openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![cfg_attr(not(test), forbid(unsafe_code))]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod serial_io;
15mod storage_builder;
16mod tracing_init;
17mod ttrpc;
18
19// `pub` so that the missing_docs warning fires for options without
20// documentation.
21pub use cli_args::Options;
22use console_relay::ConsoleLaunchOptions;
23
24use crate::cli_args::SecureBootTemplateCli;
25use anyhow::Context;
26use anyhow::bail;
27use chipset_resources::battery::HostBatteryUpdate;
28use clap::CommandFactory;
29use clap::FromArgMatches;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::AsyncWriteExt;
53use futures::FutureExt;
54use futures::StreamExt;
55use futures::executor::block_on;
56use futures::io::AllowStdIo;
57use futures_concurrency::stream::Merge;
58use gdma_resources::GdmaDeviceHandle;
59use gdma_resources::VportDefinition;
60use get_resources::ged::GuestServicingFlags;
61use guid::Guid;
62use input_core::MultiplexedInputHandle;
63use inspect::InspectMut;
64use inspect::InspectionBuilder;
65use io::Read;
66use memory_range::MemoryRange;
67use mesh::CancelContext;
68use mesh::CellUpdater;
69use mesh::error::RemoteError;
70use mesh::rpc::Rpc;
71use mesh::rpc::RpcError;
72use mesh::rpc::RpcSend;
73use mesh_worker::WorkerEvent;
74use mesh_worker::WorkerHandle;
75use meshworker::VmmMesh;
76use net_backend_resources::mac_address::MacAddress;
77use nvme_resources::NamespaceDefinition;
78use nvme_resources::NvmeControllerRequest;
79use openvmm_defs::config::Config;
80use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64;
81use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2;
82use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86;
83use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2;
84use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
85use openvmm_defs::config::DeviceVtl;
86use openvmm_defs::config::EfiDiagnosticsLogLevelType;
87use openvmm_defs::config::HypervisorConfig;
88use openvmm_defs::config::LateMapVtl0MemoryPolicy;
89use openvmm_defs::config::LoadMode;
90use openvmm_defs::config::MemoryConfig;
91use openvmm_defs::config::PcieDeviceConfig;
92use openvmm_defs::config::PcieRootComplexConfig;
93use openvmm_defs::config::PcieRootPortConfig;
94use openvmm_defs::config::PcieSwitchConfig;
95use openvmm_defs::config::ProcessorTopologyConfig;
96use openvmm_defs::config::SerialInformation;
97use openvmm_defs::config::VirtioBus;
98use openvmm_defs::config::VmbusConfig;
99use openvmm_defs::config::VpciDeviceConfig;
100use openvmm_defs::config::Vtl2BaseAddressType;
101use openvmm_defs::config::Vtl2Config;
102use openvmm_defs::rpc::PulseSaveRestoreError;
103use openvmm_defs::rpc::VmRpc;
104use openvmm_defs::worker::VM_WORKER;
105use openvmm_defs::worker::VmWorkerParameters;
106use openvmm_helpers::disk::create_disk_type;
107use openvmm_helpers::disk::open_disk_type;
108use pal_async::DefaultDriver;
109use pal_async::DefaultPool;
110use pal_async::socket::PolledSocket;
111use pal_async::task::Spawn;
112use pal_async::task::Task;
113use pal_async::timer::PolledTimer;
114use scsidisk_resources::SimpleScsiDiskHandle;
115use scsidisk_resources::SimpleScsiDvdHandle;
116use serial_16550_resources::ComPort;
117use serial_core::resources::DisconnectedSerialBackendHandle;
118use sparse_mmap::alloc_shared_memory;
119use std::cell::RefCell;
120use std::collections::BTreeMap;
121use std::fmt::Write as _;
122use std::future::pending;
123use std::io;
124#[cfg(unix)]
125use std::io::IsTerminal;
126use std::io::Write;
127use std::net::TcpListener;
128use std::path::Path;
129use std::path::PathBuf;
130use std::pin::pin;
131use std::sync::Arc;
132use std::thread;
133use std::time::Duration;
134use std::time::Instant;
135use storvsp_resources::ScsiControllerRequest;
136use storvsp_resources::ScsiDeviceAndPath;
137use storvsp_resources::ScsiPath;
138use tpm_resources::TpmDeviceHandle;
139use tpm_resources::TpmRegisterLayout;
140use tracing_helpers::AnyhowValueExt;
141use uidevices_resources::SynthKeyboardHandle;
142use uidevices_resources::SynthMouseHandle;
143use uidevices_resources::SynthVideoHandle;
144use video_core::SharedFramebufferHandle;
145use virtio_resources::VirtioPciDeviceHandle;
146use vm_manifest_builder::BaseChipsetType;
147use vm_manifest_builder::MachineArch;
148use vm_manifest_builder::VmChipsetResult;
149use vm_manifest_builder::VmManifestBuilder;
150use vm_resource::IntoResource;
151use vm_resource::Resource;
152use vm_resource::kind::DiskHandleKind;
153use vm_resource::kind::DiskLayerHandleKind;
154use vm_resource::kind::NetEndpointHandleKind;
155use vm_resource::kind::VirtioDeviceHandle;
156use vm_resource::kind::VmbusDeviceHandleKind;
157use vmbus_serial_resources::VmbusSerialDeviceHandle;
158use vmbus_serial_resources::VmbusSerialPort;
159use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
160use vmgs_resources::GuestStateEncryptionPolicy;
161use vmgs_resources::VmgsDisk;
162use vmgs_resources::VmgsFileHandle;
163use vmgs_resources::VmgsResource;
164use vmotherboard::ChipsetDeviceHandle;
165use vnc_worker_defs::VncParameters;
166
167pub fn openvmm_main() {
168    // Save the current state of the terminal so we can restore it back to
169    // normal before exiting.
170    #[cfg(unix)]
171    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
172
173    let exit_code = match do_main() {
174        Ok(_) => 0,
175        Err(err) => {
176            eprintln!("fatal error: {:?}", err);
177            1
178        }
179    };
180
181    // Restore the terminal to its initial state.
182    #[cfg(unix)]
183    if let Some(orig_termios) = orig_termios {
184        term::set_termios(orig_termios);
185    }
186
187    // Terminate the process immediately without graceful shutdown of DLLs or
188    // C++ destructors or anything like that. This is all unnecessary and saves
189    // time on Windows.
190    //
191    // Do flush stdout, though, since there may be buffered data.
192    let _ = io::stdout().flush();
193    pal::process::terminate(exit_code);
194}
195
196#[derive(Default)]
197struct VmResources {
198    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
199    framebuffer_access: Option<FramebufferAccess>,
200    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
201    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
202    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
203    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
204    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
205    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
206    #[cfg(windows)]
207    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
208}
209
210impl VmResources {
211    /// Modify the cached VTL2 settings and send them to OpenHCL via the GED.
212    ///
213    /// This follows the same pattern as petri's `modify_vtl2_settings`: the cache
214    /// is modified locally, then the entire settings are sent to OpenHCL.
215    async fn modify_vtl2_settings(
216        &mut self,
217        f: impl FnOnce(&mut vtl2_settings_proto::Vtl2Settings),
218    ) -> anyhow::Result<()> {
219        let mut settings_copy = self
220            .vtl2_settings
221            .clone()
222            .context("vtl2 settings not configured")?;
223
224        f(&mut settings_copy);
225
226        let ged_rpc = self.ged_rpc.as_ref().context("no GED configured")?;
227
228        ged_rpc
229            .call_failable(
230                get_resources::ged::GuestEmulationRequest::ModifyVtl2Settings,
231                prost::Message::encode_to_vec(&settings_copy),
232            )
233            .await?;
234
235        // Settings successfully applied, update our cache
236        self.vtl2_settings = Some(settings_copy);
237        Ok(())
238    }
239
240    /// Add a VTL0 SCSI LUN backed by a VTL2 storage device.
241    ///
242    /// This modifies the VTL2 settings to add a new LUN to the specified SCSI controller,
243    /// backed by the given VTL2 device (NVMe namespace or SCSI disk).
244    async fn add_vtl0_scsi_disk(
245        &mut self,
246        controller_guid: Guid,
247        lun: u32,
248        device_type: vtl2_settings_proto::physical_device::DeviceType,
249        device_path: Guid,
250        sub_device_path: u32,
251    ) -> anyhow::Result<()> {
252        let mut not_found = false;
253        self.modify_vtl2_settings(|settings| {
254            let dynamic = settings.dynamic.get_or_insert_with(Default::default);
255
256            // Find the SCSI controller, bail out if not found (we can't create new controllers at runtime)
257            let scsi_controller = dynamic.storage_controllers.iter_mut().find(|c| {
258                c.instance_id == controller_guid.to_string()
259                    && c.protocol
260                        == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
261            });
262
263            let Some(scsi_controller) = scsi_controller else {
264                not_found = true;
265                return;
266            };
267
268            // Add the LUN backed by the VTL2 storage device. If the LUN exists already, UH will reject the settings
269            scsi_controller.luns.push(vtl2_settings_proto::Lun {
270                location: lun,
271                device_id: Guid::new_random().to_string(),
272                vendor_id: "OpenVMM".to_string(),
273                product_id: "Disk".to_string(),
274                product_revision_level: "1.0".to_string(),
275                serial_number: "0".to_string(),
276                model_number: "1".to_string(),
277                physical_devices: Some(vtl2_settings_proto::PhysicalDevices {
278                    r#type: vtl2_settings_proto::physical_devices::BackingType::Single.into(),
279                    device: Some(vtl2_settings_proto::PhysicalDevice {
280                        device_type: device_type.into(),
281                        device_path: device_path.to_string(),
282                        sub_device_path,
283                    }),
284                    devices: Vec::new(),
285                }),
286                is_dvd: false,
287                ..Default::default()
288            });
289        })
290        .await?;
291
292        if not_found {
293            anyhow::bail!("SCSI controller {} not found", controller_guid);
294        }
295        Ok(())
296    }
297
298    /// Remove a VTL0 SCSI LUN.
299    ///
300    /// This modifies the VTL2 settings to remove a LUN from the specified SCSI controller.
301    async fn remove_vtl0_scsi_disk(
302        &mut self,
303        controller_guid: Guid,
304        lun: u32,
305    ) -> anyhow::Result<()> {
306        self.modify_vtl2_settings(|settings| {
307            let dynamic = settings.dynamic.as_mut();
308            if let Some(dynamic) = dynamic {
309                // Find the SCSI controller
310                if let Some(scsi_controller) = dynamic.storage_controllers.iter_mut().find(|c| {
311                    c.instance_id == controller_guid.to_string()
312                        && c.protocol
313                            == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
314                }) {
315                    // Remove the LUN
316                    scsi_controller.luns.retain(|l| l.location != lun);
317                }
318            }
319        })
320        .await
321    }
322
323    /// Find and remove a VTL0 SCSI LUN backed by a specific NVMe namespace.
324    ///
325    /// Returns the LUN number that was removed, or None if no matching LUN was found.
326    async fn remove_vtl0_scsi_disk_by_nvme_nsid(
327        &mut self,
328        controller_guid: Guid,
329        nvme_controller_guid: Guid,
330        nsid: u32,
331    ) -> anyhow::Result<Option<u32>> {
332        let mut removed_lun = None;
333        self.modify_vtl2_settings(|settings| {
334            let dynamic = settings.dynamic.as_mut();
335            if let Some(dynamic) = dynamic {
336                // Find the SCSI controller
337                if let Some(scsi_controller) = dynamic.storage_controllers.iter_mut().find(|c| {
338                    c.instance_id == controller_guid.to_string()
339                        && c.protocol
340                            == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
341                }) {
342                    // Find and remove the LUN backed by this NVMe namespace
343                    let nvme_controller_str = nvme_controller_guid.to_string();
344                    scsi_controller.luns.retain(|l| {
345                        let dominated_by_nsid = l.physical_devices.as_ref().is_some_and(|pd| {
346                            pd.device.as_ref().is_some_and(|d| {
347                                d.device_type
348                                    == vtl2_settings_proto::physical_device::DeviceType::Nvme as i32
349                                    && d.device_path == nvme_controller_str
350                                    && d.sub_device_path == nsid
351                            })
352                        });
353                        if dominated_by_nsid {
354                            removed_lun = Some(l.location);
355                            false // Remove this LUN
356                        } else {
357                            true // Keep this LUN
358                        }
359                    });
360                }
361            }
362        })
363        .await?;
364        Ok(removed_lun)
365    }
366}
367
368struct ConsoleState<'a> {
369    device: &'a str,
370    input: Box<dyn AsyncWrite + Unpin + Send>,
371}
372
373/// Build a flat list of switches with their parent port assignments.
374///
375/// This function converts hierarchical CLI switch definitions into a flat list
376/// where each switch specifies its parent port directly.
377fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
378    all_switches
379        .iter()
380        .map(|switch_cli| PcieSwitchConfig {
381            name: switch_cli.name.clone(),
382            num_downstream_ports: switch_cli.num_downstream_ports,
383            parent_port: switch_cli.port_name.clone(),
384            hotplug: switch_cli.hotplug,
385        })
386        .collect()
387}
388
389async fn vm_config_from_command_line(
390    spawner: impl Spawn,
391    mesh: &VmmMesh,
392    opt: &Options,
393) -> anyhow::Result<(Config, VmResources)> {
394    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
395    // Ensure the serial driver stays alive with no tasks.
396    serial_driver.spawn("leak", pending::<()>()).detach();
397
398    let openhcl_vtl = if opt.vtl2 {
399        DeviceVtl::Vtl2
400    } else {
401        DeviceVtl::Vtl0
402    };
403
404    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
405    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
406        Ok(match cli_cfg {
407            SerialConfigCli::Console => {
408                if let Some(console_state) = console_state.borrow().as_ref() {
409                    bail!("console already set by {}", console_state.device);
410                }
411                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
412                let (serial_read, serial_write) = AsyncReadExt::split(serial);
413                *console_state.borrow_mut() = Some(ConsoleState {
414                    device,
415                    input: Box::new(serial_write),
416                });
417                thread::Builder::new()
418                    .name(name.to_owned())
419                    .spawn(move || {
420                        let _ = block_on(futures::io::copy(
421                            serial_read,
422                            &mut AllowStdIo::new(term::raw_stdout()),
423                        ));
424                    })
425                    .unwrap();
426                Some(config)
427            }
428            SerialConfigCli::Stderr => {
429                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
430                thread::Builder::new()
431                    .name(name.to_owned())
432                    .spawn(move || {
433                        let _ = block_on(futures::io::copy(
434                            serial,
435                            &mut AllowStdIo::new(term::raw_stderr()),
436                        ));
437                    })
438                    .unwrap();
439                Some(config)
440            }
441            SerialConfigCli::File(path) => {
442                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
443                let file = fs_err::File::create(path).context("failed to create file")?;
444
445                thread::Builder::new()
446                    .name(name.to_owned())
447                    .spawn(move || {
448                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
449                    })
450                    .unwrap();
451                Some(config)
452            }
453            SerialConfigCli::None => None,
454            SerialConfigCli::Pipe(path) => {
455                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
456            }
457            SerialConfigCli::Tcp(addr) => {
458                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
459            }
460            SerialConfigCli::NewConsole(app, window_title) => {
461                let path = console_relay::random_console_path();
462                let config =
463                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
464                let window_title =
465                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
466
467                console_relay::launch_console(
468                    app.or_else(openvmm_terminal_app).as_deref(),
469                    &path,
470                    ConsoleLaunchOptions {
471                        window_title: Some(window_title),
472                    },
473                )
474                .context("failed to launch console")?;
475
476                Some(config)
477            }
478        })
479    };
480
481    let mut vmbus_devices = Vec::new();
482
483    let serial0_cfg = setup_serial(
484        "com1",
485        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
486        if cfg!(guest_arch = "x86_64") {
487            "ttyS0"
488        } else {
489            "ttyAMA0"
490        },
491    )?;
492    let serial1_cfg = setup_serial(
493        "com2",
494        opt.com2.clone().unwrap_or(SerialConfigCli::None),
495        if cfg!(guest_arch = "x86_64") {
496            "ttyS1"
497        } else {
498            "ttyAMA1"
499        },
500    )?;
501    let serial2_cfg = setup_serial(
502        "com3",
503        opt.com3.clone().unwrap_or(SerialConfigCli::None),
504        if cfg!(guest_arch = "x86_64") {
505            "ttyS2"
506        } else {
507            "ttyAMA2"
508        },
509    )?;
510    let serial3_cfg = setup_serial(
511        "com4",
512        opt.com4.clone().unwrap_or(SerialConfigCli::None),
513        if cfg!(guest_arch = "x86_64") {
514            "ttyS3"
515        } else {
516            "ttyAMA3"
517        },
518    )?;
519    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
520        "vmbus_com1",
521        opt.vmbus_com1_serial
522            .clone()
523            .unwrap_or(SerialConfigCli::None),
524        "vmbus_com1",
525    )? {
526        vmbus_devices.push((
527            openhcl_vtl,
528            VmbusSerialDeviceHandle {
529                port: VmbusSerialPort::Com1,
530                backend: vmbus_com1_cfg,
531            }
532            .into_resource(),
533        ));
534        true
535    } else {
536        false
537    };
538    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
539        "vmbus_com2",
540        opt.vmbus_com2_serial
541            .clone()
542            .unwrap_or(SerialConfigCli::None),
543        "vmbus_com2",
544    )? {
545        vmbus_devices.push((
546            openhcl_vtl,
547            VmbusSerialDeviceHandle {
548                port: VmbusSerialPort::Com2,
549                backend: vmbus_com2_cfg,
550            }
551            .into_resource(),
552        ));
553        true
554    } else {
555        false
556    };
557    let debugcon_cfg = setup_serial(
558        "debugcon",
559        opt.debugcon
560            .clone()
561            .map(|cfg| cfg.serial)
562            .unwrap_or(SerialConfigCli::None),
563        "debugcon",
564    )?;
565
566    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
567        setup_serial("virtio-console", serial_cfg, "hvc0")?
568    } else {
569        None
570    };
571
572    let mut resources = VmResources::default();
573    let mut console_str = "";
574    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
575        resources.console_in = Some(input);
576        console_str = device;
577    }
578
579    if opt.shared_memory {
580        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
581    }
582
583    const MAX_PROCESSOR_COUNT: u32 = 1024;
584
585    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
586        bail!("invalid proc count: {}", opt.processors);
587    }
588
589    // Total SCSI channel count should not exceed the processor count
590    // (at most, one channel per VP).
591    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
592        bail!(
593            "invalid SCSI sub-channel count: requested {}, max {}",
594            opt.scsi_sub_channels,
595            MAX_PROCESSOR_COUNT - 1
596        );
597    }
598
599    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
600
601    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
602    for &cli_args::DiskCli {
603        vtl,
604        ref kind,
605        read_only,
606        is_dvd,
607        underhill,
608        ref pcie_port,
609    } in &opt.disk
610    {
611        if pcie_port.is_some() {
612            anyhow::bail!("`--disk` is incompatible with PCIe");
613        }
614
615        storage.add(
616            vtl,
617            underhill,
618            storage_builder::DiskLocation::Scsi(None),
619            kind,
620            is_dvd,
621            read_only,
622        )?;
623    }
624
625    for &cli_args::IdeDiskCli {
626        ref kind,
627        read_only,
628        channel,
629        device,
630        is_dvd,
631    } in &opt.ide
632    {
633        storage.add(
634            DeviceVtl::Vtl0,
635            None,
636            storage_builder::DiskLocation::Ide(channel, device),
637            kind,
638            is_dvd,
639            read_only,
640        )?;
641    }
642
643    for &cli_args::DiskCli {
644        vtl,
645        ref kind,
646        read_only,
647        is_dvd,
648        underhill,
649        ref pcie_port,
650    } in &opt.nvme
651    {
652        storage.add(
653            vtl,
654            underhill,
655            storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
656            kind,
657            is_dvd,
658            read_only,
659        )?;
660    }
661
662    for &cli_args::DiskCli {
663        vtl,
664        ref kind,
665        read_only,
666        is_dvd,
667        ref underhill,
668        ref pcie_port,
669    } in &opt.virtio_blk
670    {
671        if underhill.is_some() {
672            anyhow::bail!("underhill not supported with virtio-blk");
673        }
674        storage.add(
675            vtl,
676            None,
677            storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
678            kind,
679            is_dvd,
680            read_only,
681        )?;
682    }
683
684    let floppy_disks: Vec<_> = opt
685        .floppy
686        .iter()
687        .map(|disk| -> anyhow::Result<_> {
688            let &cli_args::FloppyDiskCli {
689                ref kind,
690                read_only,
691            } = disk;
692            Ok(FloppyDiskConfig {
693                disk_type: disk_open(kind, read_only)?,
694                read_only,
695            })
696        })
697        .collect::<Result<Vec<_>, _>>()?;
698
699    let mut vpci_mana_nics = [(); 3].map(|()| None);
700    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
701    let mut underhill_nics = Vec::new();
702    let mut vpci_devices = Vec::new();
703
704    let mut nic_index = 0;
705    for cli_cfg in &opt.net {
706        if cli_cfg.pcie_port.is_some() {
707            anyhow::bail!("`--net` does not support PCIe");
708        }
709        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
710        if cli_cfg.underhill {
711            if !opt.no_alias_map {
712                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
713            }
714            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
715                let vpci_instance_id = Guid::new_random();
716                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
717                    instance_id: vpci_instance_id.to_string(),
718                    subordinate_instance_id: None,
719                    max_sub_channels: None,
720                });
721                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
722            });
723            mana.1.vports.push(VportDefinition {
724                mac_address: vport.mac_address,
725                endpoint: vport.endpoint,
726            });
727        } else {
728            vmbus_devices.push(vport.into_netvsp_handle());
729        }
730    }
731
732    if opt.nic {
733        let nic_config = parse_endpoint(
734            &NicConfigCli {
735                vtl: DeviceVtl::Vtl0,
736                endpoint: EndpointConfigCli::Consomme { cidr: None },
737                max_queues: None,
738                underhill: false,
739                pcie_port: None,
740            },
741            &mut nic_index,
742            &mut resources,
743        )?;
744        vmbus_devices.push(nic_config.into_netvsp_handle());
745    }
746
747    if opt.mcr {
748        tracing::info!("Instantiating MCR controller");
749
750        // Arbitrary but constant instance ID to be consistent across boots.
751        const MCR_INSTANCE_ID: Guid = guid::guid!("07effd8f-7501-426c-a947-d8345f39113d");
752
753        vpci_devices.push(VpciDeviceConfig {
754            vtl: DeviceVtl::Vtl0,
755            instance_id: MCR_INSTANCE_ID,
756            resource: mcr_resources::McrControllerHandle {
757                instance_id: MCR_INSTANCE_ID,
758            }
759            .into_resource(),
760        });
761    }
762
763    // Build initial PCIe devices list from CLI options. Storage devices
764    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
765    let mut pcie_devices = Vec::new();
766    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
767        tracing::info!(
768            port_name = %cli_cfg.port_name,
769            socket_addr = ?cli_cfg.socket_addr,
770            "instantiating PCIe remote device"
771        );
772
773        // Generate a deterministic instance ID based on index
774        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
775            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
776        let instance_id = Guid {
777            data1: index as u32,
778            ..PCIE_REMOTE_BASE_INSTANCE_ID
779        };
780
781        pcie_devices.push(PcieDeviceConfig {
782            port_name: cli_cfg.port_name.clone(),
783            resource: pcie_remote_resources::PcieRemoteHandle {
784                instance_id,
785                socket_addr: cli_cfg.socket_addr.clone(),
786                hu: cli_cfg.hu,
787                controller: cli_cfg.controller,
788            }
789            .into_resource(),
790        });
791    }
792
793    #[cfg(windows)]
794    let mut kernel_vmnics = Vec::new();
795    #[cfg(windows)]
796    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
797        // Pick a random MAC address.
798        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
799        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
800
801        // Pick a fixed instance ID based on the index.
802        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
803        let instance_id = Guid {
804            data1: index as u32,
805            ..BASE_INSTANCE_ID
806        };
807
808        let switch_id = if switch_id == "default" {
809            DEFAULT_SWITCH
810        } else {
811            switch_id
812        };
813        let (port_id, port) = new_switch_port(switch_id)?;
814        resources.switch_ports.push(port);
815
816        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
817            instance_id,
818            mac_address: mac_address.into(),
819            switch_port_id: port_id,
820        });
821    }
822
823    for vport in &opt.mana {
824        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
825        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
826            (vtl, None) => {
827                &mut vpci_mana_nics[vtl]
828                    .get_or_insert_with(|| {
829                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
830                    })
831                    .1
832                    .vports
833            }
834            (0, Some(pcie_port)) => {
835                &mut pcie_mana_nics
836                    .entry(pcie_port)
837                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
838                    .vports
839            }
840            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
841        };
842        vport_array.push(VportDefinition {
843            mac_address: vport.mac_address,
844            endpoint: vport.endpoint,
845        });
846    }
847
848    vpci_devices.extend(
849        vpci_mana_nics
850            .into_iter()
851            .enumerate()
852            .filter_map(|(vtl, nic)| {
853                nic.map(|(instance_id, handle)| VpciDeviceConfig {
854                    vtl: match vtl {
855                        0 => DeviceVtl::Vtl0,
856                        1 => DeviceVtl::Vtl1,
857                        2 => DeviceVtl::Vtl2,
858                        _ => unreachable!(),
859                    },
860                    instance_id,
861                    resource: handle.into_resource(),
862                })
863            }),
864    );
865
866    pcie_devices.extend(
867        pcie_mana_nics
868            .into_iter()
869            .map(|(pcie_port, handle)| PcieDeviceConfig {
870                port_name: pcie_port,
871                resource: handle.into_resource(),
872            }),
873    );
874
875    // If VTL2 is enabled, and we are not in VTL2 self allocate mode, provide an
876    // mmio gap for VTL2.
877    let use_vtl2_gap = opt.vtl2
878        && !matches!(
879            opt.igvm_vtl2_relocation_type,
880            Vtl2BaseAddressType::Vtl2Allocate { .. },
881        );
882
883    #[cfg(guest_arch = "aarch64")]
884    let arch = MachineArch::Aarch64;
885    #[cfg(guest_arch = "x86_64")]
886    let arch = MachineArch::X86_64;
887
888    let mmio_gaps: Vec<MemoryRange> = match (use_vtl2_gap, arch) {
889        (true, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86_WITH_VTL2.into(),
890        (true, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2.into(),
891        (false, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86.into(),
892        (false, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64.into(),
893    };
894
895    let mut pci_ecam_gaps = Vec::new();
896    let mut pci_mmio_gaps = Vec::new();
897
898    let mut low_mmio_start = mmio_gaps.first().context("expected mmio gap")?.start();
899    let mut high_mmio_end = mmio_gaps.last().context("expected second mmio gap")?.end();
900
901    let mut pcie_root_complexes = Vec::new();
902    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
903        let ports = opt
904            .pcie_root_port
905            .iter()
906            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
907            .map(|port_cli| PcieRootPortConfig {
908                name: port_cli.name.clone(),
909                hotplug: port_cli.hotplug,
910            })
911            .collect();
912
913        const ONE_MB: u64 = 1024 * 1024;
914        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
915        let high_mmio_size = rc_cli
916            .high_mmio
917            .checked_next_multiple_of(ONE_MB)
918            .context("high mmio rounding error")?;
919        let ecam_size = (((rc_cli.end_bus - rc_cli.start_bus) as u64) + 1) * 256 * 4096;
920
921        let low_pci_mmio_start = low_mmio_start
922            .checked_sub(low_mmio_size)
923            .context("pci low mmio underflow")?;
924        let ecam_start = low_pci_mmio_start
925            .checked_sub(ecam_size)
926            .context("pci ecam underflow")?;
927        low_mmio_start = ecam_start;
928        high_mmio_end = high_mmio_end
929            .checked_add(high_mmio_size)
930            .context("pci high mmio overflow")?;
931
932        let ecam_range = MemoryRange::new(ecam_start..ecam_start + ecam_size);
933        let low_mmio = MemoryRange::new(low_pci_mmio_start..low_pci_mmio_start + low_mmio_size);
934        let high_mmio = MemoryRange::new(high_mmio_end - high_mmio_size..high_mmio_end);
935
936        pci_ecam_gaps.push(ecam_range);
937        pci_mmio_gaps.push(low_mmio);
938        pci_mmio_gaps.push(high_mmio);
939
940        pcie_root_complexes.push(PcieRootComplexConfig {
941            index: i as u32,
942            name: rc_cli.name.clone(),
943            segment: rc_cli.segment,
944            start_bus: rc_cli.start_bus,
945            end_bus: rc_cli.end_bus,
946            ecam_range,
947            low_mmio,
948            high_mmio,
949            ports,
950        });
951    }
952
953    pci_ecam_gaps.sort();
954    pci_mmio_gaps.sort();
955
956    let pcie_switches = build_switch_list(&opt.pcie_switch);
957
958    #[cfg(windows)]
959    let vpci_resources: Vec<_> = opt
960        .device
961        .iter()
962        .map(|path| -> anyhow::Result<_> {
963            Ok(virt_whp::device::DeviceHandle(
964                whp::VpciResource::new(
965                    None,
966                    Default::default(),
967                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
968                )
969                .with_context(|| format!("opening PCI device {}", path))?,
970            ))
971        })
972        .collect::<Result<_, _>>()?;
973
974    // Create a vmbusproxy handle if needed by any devices.
975    #[cfg(windows)]
976    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
977        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
978    } else {
979        None
980    };
981
982    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
983        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
984        let (fb, fba) =
985            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
986        resources.framebuffer_access = Some(fba);
987        Some(fb)
988    } else {
989        None
990    };
991
992    let load_mode;
993    let with_hv;
994
995    let any_serial_configured = serial0_cfg.is_some()
996        || serial1_cfg.is_some()
997        || serial2_cfg.is_some()
998        || serial3_cfg.is_some();
999
1000    let has_com3 = serial2_cfg.is_some();
1001
1002    let mut chipset = VmManifestBuilder::new(
1003        if opt.igvm.is_some() {
1004            BaseChipsetType::HclHost
1005        } else if opt.pcat {
1006            BaseChipsetType::HypervGen1
1007        } else if opt.uefi {
1008            BaseChipsetType::HypervGen2Uefi
1009        } else if opt.hv {
1010            BaseChipsetType::HyperVGen2LinuxDirect
1011        } else {
1012            BaseChipsetType::UnenlightenedLinuxDirect
1013        },
1014        arch,
1015    );
1016
1017    if framebuffer.is_some() {
1018        chipset = chipset.with_framebuffer();
1019    }
1020    if opt.guest_watchdog {
1021        chipset = chipset.with_guest_watchdog();
1022    }
1023    if any_serial_configured {
1024        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
1025    }
1026    if opt.battery {
1027        let (tx, rx) = mesh::channel();
1028        tx.send(HostBatteryUpdate::default_present());
1029        chipset = chipset.with_battery(rx);
1030    }
1031    if let Some(cfg) = &opt.debugcon {
1032        chipset = chipset.with_debugcon(
1033            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
1034            cfg.port,
1035        );
1036    }
1037
1038    // TODO: load from VMGS file if it exists
1039    let bios_guid = Guid::new_random();
1040
1041    let VmChipsetResult {
1042        chipset,
1043        mut chipset_devices,
1044    } = chipset
1045        .build()
1046        .context("failed to build chipset configuration")?;
1047
1048    if opt.restore_snapshot.is_some() {
1049        // Snapshot restore: skip firmware loading entirely. Device state and
1050        // memory come from the snapshot directory.
1051        load_mode = LoadMode::None;
1052        with_hv = true;
1053    } else if let Some(path) = &opt.igvm {
1054        let file = fs_err::File::open(path)
1055            .context("failed to open igvm file")?
1056            .into();
1057        let cmdline = opt.cmdline.join(" ");
1058        with_hv = true;
1059
1060        load_mode = LoadMode::Igvm {
1061            file,
1062            cmdline,
1063            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1064            com_serial: has_com3.then(|| SerialInformation {
1065                io_port: ComPort::Com3.io_port(),
1066                irq: ComPort::Com3.irq().into(),
1067            }),
1068        };
1069    } else if opt.pcat {
1070        // Emit a nice error early instead of complaining about missing firmware.
1071        if arch != MachineArch::X86_64 {
1072            anyhow::bail!("pcat not supported on this architecture");
1073        }
1074        with_hv = true;
1075
1076        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1077        load_mode = LoadMode::Pcat {
1078            firmware,
1079            boot_order: opt
1080                .pcat_boot_order
1081                .map(|x| x.0)
1082                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1083        };
1084    } else if opt.uefi {
1085        use openvmm_defs::config::UefiConsoleMode;
1086
1087        with_hv = true;
1088
1089        let firmware = fs_err::File::open(
1090            (opt.uefi_firmware.0)
1091                .as_ref()
1092                .context("must provide uefi firmware when booting with uefi")?,
1093        )
1094        .context("failed to open uefi firmware")?;
1095
1096        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1097        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1098        load_mode = LoadMode::Uefi {
1099            firmware: firmware.into(),
1100            enable_debugging: opt.uefi_debug,
1101            enable_memory_protections: opt.uefi_enable_memory_protections,
1102            disable_frontpage: opt.disable_frontpage,
1103            enable_tpm: opt.tpm,
1104            enable_battery: opt.battery,
1105            enable_serial: any_serial_configured,
1106            enable_vpci_boot: false,
1107            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1108                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1109                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1110                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1111                UefiConsoleModeCli::None => UefiConsoleMode::None,
1112            }),
1113            default_boot_always_attempt: opt.default_boot_always_attempt,
1114            bios_guid,
1115        };
1116    } else {
1117        // Linux Direct
1118        let mut cmdline = "panic=-1 debug".to_string();
1119
1120        with_hv = opt.hv;
1121        if with_hv && opt.pcie_root_complex.is_empty() {
1122            cmdline += " pci=off";
1123        }
1124
1125        if !console_str.is_empty() {
1126            let _ = write!(&mut cmdline, " console={}", console_str);
1127        }
1128
1129        if opt.gfx {
1130            cmdline += " console=tty";
1131        }
1132        for extra in &opt.cmdline {
1133            let _ = write!(&mut cmdline, " {}", extra);
1134        }
1135
1136        let kernel = fs_err::File::open(
1137            (opt.kernel.0)
1138                .as_ref()
1139                .context("must provide kernel when booting with linux direct")?,
1140        )
1141        .context("failed to open kernel")?;
1142        let initrd = (opt.initrd.0)
1143            .as_ref()
1144            .map(fs_err::File::open)
1145            .transpose()
1146            .context("failed to open initrd")?;
1147
1148        let custom_dsdt = match &opt.custom_dsdt {
1149            Some(path) => {
1150                let mut v = Vec::new();
1151                fs_err::File::open(path)
1152                    .context("failed to open custom dsdt")?
1153                    .read_to_end(&mut v)
1154                    .context("failed to read custom dsdt")?;
1155                Some(v)
1156            }
1157            None => None,
1158        };
1159
1160        load_mode = LoadMode::Linux {
1161            kernel: kernel.into(),
1162            initrd: initrd.map(Into::into),
1163            cmdline,
1164            custom_dsdt,
1165            enable_serial: any_serial_configured,
1166            boot_mode: if opt.device_tree {
1167                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1168            } else {
1169                openvmm_defs::config::LinuxDirectBootMode::Acpi
1170            },
1171        };
1172    }
1173
1174    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1175        let disk = VmgsDisk {
1176            disk: disk_open(kind, false).context("failed to open vmgs disk")?,
1177            encryption_policy: if opt.test_gsp_by_id {
1178                GuestStateEncryptionPolicy::GspById(true)
1179            } else {
1180                GuestStateEncryptionPolicy::None(true)
1181            },
1182        };
1183        match provision {
1184            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1185            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1186            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1187        }
1188    } else {
1189        VmgsResource::Ephemeral
1190    });
1191
1192    if with_get && with_hv {
1193        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1194            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1195            fixed: Some(Default::default()),
1196            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1197                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1198                nic_devices: underhill_nics,
1199            }),
1200            namespace_settings: Vec::default(),
1201        };
1202
1203        // Cache the VTL2 settings for later modification via the interactive console.
1204        resources.vtl2_settings = Some(vtl2_settings.clone());
1205
1206        let (send, guest_request_recv) = mesh::channel();
1207        resources.ged_rpc = Some(send);
1208
1209        let vmgs = vmgs.take().unwrap();
1210
1211        vmbus_devices.extend([
1212            (
1213                openhcl_vtl,
1214                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1215            ),
1216            (
1217                openhcl_vtl,
1218                get_resources::ged::GuestEmulationDeviceHandle {
1219                    firmware: if opt.pcat {
1220                        get_resources::ged::GuestFirmwareConfig::Pcat {
1221                            boot_order: opt
1222                                .pcat_boot_order
1223                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1224                                .map(|x| match x {
1225                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1226                                        get_resources::ged::PcatBootDevice::Floppy
1227                                    }
1228                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1229                                        get_resources::ged::PcatBootDevice::HardDrive
1230                                    }
1231                                    openvmm_defs::config::PcatBootDevice::Optical => {
1232                                        get_resources::ged::PcatBootDevice::Optical
1233                                    }
1234                                    openvmm_defs::config::PcatBootDevice::Network => {
1235                                        get_resources::ged::PcatBootDevice::Network
1236                                    }
1237                                }),
1238                        }
1239                    } else {
1240                        use get_resources::ged::UefiConsoleMode;
1241
1242                        get_resources::ged::GuestFirmwareConfig::Uefi {
1243                            enable_vpci_boot: storage.has_vtl0_nvme(),
1244                            firmware_debug: opt.uefi_debug,
1245                            disable_frontpage: opt.disable_frontpage,
1246                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1247                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1248                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1249                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1250                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1251                            },
1252                            default_boot_always_attempt: opt.default_boot_always_attempt,
1253                        }
1254                    },
1255                    com1: with_vmbus_com1_serial,
1256                    com2: with_vmbus_com2_serial,
1257                    serial_tx_only: opt.serial_tx_only,
1258                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1259                    vmbus_redirection: opt.vmbus_redirect,
1260                    vmgs,
1261                    framebuffer: opt
1262                        .vtl2_gfx
1263                        .then(|| SharedFramebufferHandle.into_resource()),
1264                    guest_request_recv,
1265                    enable_tpm: opt.tpm,
1266                    firmware_event_send: None,
1267                    secure_boot_enabled: opt.secure_boot,
1268                    secure_boot_template: match opt.secure_boot_template {
1269                        Some(SecureBootTemplateCli::Windows) => {
1270                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1271                        },
1272                        Some(SecureBootTemplateCli::UefiCa) => {
1273                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1274                        }
1275                        None => {
1276                            get_resources::ged::GuestSecureBootTemplateType::None
1277                        },
1278                    },
1279                    enable_battery: opt.battery,
1280                    no_persistent_secrets: true,
1281                    igvm_attest_test_config: None,
1282                    test_gsp_by_id: opt.test_gsp_by_id,
1283                    efi_diagnostics_log_level: {
1284                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1285                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1286                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1287                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1288                        }
1289                    },
1290                    hv_sint_enabled: false,
1291                }
1292                .into_resource(),
1293            ),
1294        ]);
1295    }
1296
1297    if opt.tpm && !opt.vtl2 {
1298        let register_layout = if cfg!(guest_arch = "x86_64") {
1299            TpmRegisterLayout::IoPort
1300        } else {
1301            TpmRegisterLayout::Mmio
1302        };
1303
1304        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1305            (
1306                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1307                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1308            )
1309        } else {
1310            (
1311                EphemeralNonVolatileStoreHandle.into_resource(),
1312                EphemeralNonVolatileStoreHandle.into_resource(),
1313            )
1314        };
1315
1316        chipset_devices.push(ChipsetDeviceHandle {
1317            name: "tpm".to_string(),
1318            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1319                device: TpmDeviceHandle {
1320                    ppi_store,
1321                    nvram_store,
1322                    nvram_size: None,
1323                    refresh_tpm_seeds: false,
1324                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1325                    register_layout,
1326                    guest_secret_key: None,
1327                    logger: None,
1328                    is_confidential_vm: false,
1329                    bios_guid,
1330                }
1331                .into_resource(),
1332                worker_host: mesh.make_host("tpm", None).await?,
1333            }
1334            .into_resource(),
1335        });
1336    }
1337
1338    let custom_uefi_vars = {
1339        use firmware_uefi_custom_vars::CustomVars;
1340
1341        // load base vars from specified template, or use an empty set of base
1342        // vars if none was specified.
1343        let base_vars = match opt.secure_boot_template {
1344            Some(template) => match (arch, template) {
1345                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1346                    hyperv_secure_boot_templates::x64::microsoft_windows()
1347                }
1348                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1349                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1350                }
1351                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1352                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1353                }
1354                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1355                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1356                }
1357            },
1358            None => CustomVars::default(),
1359        };
1360
1361        // TODO: fallback to VMGS read if no command line flag was given
1362
1363        let custom_uefi_json_data = match &opt.custom_uefi_json {
1364            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1365            None => None,
1366        };
1367
1368        // obtain the final custom uefi vars by applying the delta onto the base vars
1369        match custom_uefi_json_data {
1370            Some(data) => {
1371                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1372                base_vars.apply_delta(delta)?
1373            }
1374            None => base_vars,
1375        }
1376    };
1377
1378    let vga_firmware = if opt.pcat {
1379        Some(openvmm_pcat_locator::find_svga_bios(
1380            opt.vga_firmware.as_deref(),
1381        )?)
1382    } else {
1383        None
1384    };
1385
1386    if opt.gfx {
1387        vmbus_devices.extend([
1388            (
1389                DeviceVtl::Vtl0,
1390                SynthVideoHandle {
1391                    framebuffer: SharedFramebufferHandle.into_resource(),
1392                }
1393                .into_resource(),
1394            ),
1395            (
1396                DeviceVtl::Vtl0,
1397                SynthKeyboardHandle {
1398                    source: MultiplexedInputHandle {
1399                        // Save 0 for PS/2
1400                        elevation: 1,
1401                    }
1402                    .into_resource(),
1403                }
1404                .into_resource(),
1405            ),
1406            (
1407                DeviceVtl::Vtl0,
1408                SynthMouseHandle {
1409                    source: MultiplexedInputHandle {
1410                        // Save 0 for PS/2
1411                        elevation: 1,
1412                    }
1413                    .into_resource(),
1414                }
1415                .into_resource(),
1416            ),
1417        ]);
1418    }
1419
1420    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1421        if let Some(path) = path {
1422            cleanup_socket(path.as_ref());
1423            let listener = unix_socket::UnixListener::bind(path)
1424                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1425            Ok(Some(listener))
1426        } else {
1427            Ok(None)
1428        }
1429    };
1430
1431    let vtl0_vsock_listener = vsock_listener(opt.vsock_path.as_deref())?;
1432    let vtl2_vsock_listener = vsock_listener(opt.vtl2_vsock_path.as_deref())?;
1433
1434    if let Some(path) = &opt.openhcl_dump_path {
1435        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1436        task.detach();
1437        vmbus_devices.push((openhcl_vtl, resource));
1438    }
1439
1440    #[cfg(guest_arch = "aarch64")]
1441    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1442        openvmm_defs::config::Aarch64TopologyConfig {
1443            // TODO: allow this to be configured from the command line
1444            gic_config: None,
1445            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1446        },
1447    );
1448    #[cfg(guest_arch = "x86_64")]
1449    let topology_arch =
1450        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1451            apic_id_offset: opt.apic_id_offset,
1452            x2apic: opt.x2apic,
1453        });
1454
1455    let with_isolation = if let Some(isolation) = &opt.isolation {
1456        // TODO: For now, isolation is only supported with VTL2.
1457        if !opt.vtl2 {
1458            anyhow::bail!("isolation is only currently supported with vtl2");
1459        }
1460
1461        // TODO: Alias map support is not yet implement with isolation.
1462        if !opt.no_alias_map {
1463            anyhow::bail!("alias map not supported with isolation");
1464        }
1465
1466        match isolation {
1467            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1468        }
1469    } else {
1470        None
1471    };
1472
1473    if with_hv {
1474        let (shutdown_send, shutdown_recv) = mesh::channel();
1475        resources.shutdown_ic = Some(shutdown_send);
1476        let (kvp_send, kvp_recv) = mesh::channel();
1477        resources.kvp_ic = Some(kvp_send);
1478        vmbus_devices.extend(
1479            [
1480                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1481                    recv: shutdown_recv,
1482                }
1483                .into_resource(),
1484                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1485                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1486            ]
1487            .map(|r| (DeviceVtl::Vtl0, r)),
1488        );
1489    }
1490
1491    if let Some(hive_path) = &opt.imc {
1492        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1493        vmbus_devices.push((
1494            DeviceVtl::Vtl0,
1495            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1496        ));
1497    }
1498
1499    let mut virtio_devices = Vec::new();
1500    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1501        let bus = match bus {
1502            VirtioBusCli::Auto => {
1503                // Use VPCI when possible (currently only on Windows and macOS due
1504                // to KVM backend limitations).
1505                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1506                    None
1507                } else {
1508                    Some(VirtioBus::Pci)
1509                }
1510            }
1511            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1512            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1513            VirtioBusCli::Vpci => None,
1514        };
1515        if let Some(bus) = bus {
1516            virtio_devices.push((bus, resource));
1517        } else {
1518            vpci_devices.push(VpciDeviceConfig {
1519                vtl: DeviceVtl::Vtl0,
1520                instance_id: Guid::new_random(),
1521                resource: VirtioPciDeviceHandle(resource).into_resource(),
1522            });
1523        }
1524    };
1525
1526    for cli_cfg in &opt.virtio_net {
1527        if cli_cfg.underhill {
1528            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1529        }
1530        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1531        let resource = virtio_resources::net::VirtioNetHandle {
1532            max_queues: vport.max_queues,
1533            mac_address: vport.mac_address,
1534            endpoint: vport.endpoint,
1535        }
1536        .into_resource();
1537        if let Some(pcie_port) = &cli_cfg.pcie_port {
1538            pcie_devices.push(PcieDeviceConfig {
1539                port_name: pcie_port.clone(),
1540                resource: VirtioPciDeviceHandle(resource).into_resource(),
1541            });
1542        } else {
1543            add_virtio_device(VirtioBusCli::Auto, resource);
1544        }
1545    }
1546
1547    for args in &opt.virtio_fs {
1548        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1549            tag: args.tag.clone(),
1550            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1551                root_path: args.path.clone(),
1552                mount_options: args.options.clone(),
1553            },
1554        }
1555        .into_resource();
1556        if let Some(pcie_port) = &args.pcie_port {
1557            pcie_devices.push(PcieDeviceConfig {
1558                port_name: pcie_port.clone(),
1559                resource: VirtioPciDeviceHandle(resource).into_resource(),
1560            });
1561        } else {
1562            add_virtio_device(opt.virtio_fs_bus, resource);
1563        }
1564    }
1565
1566    for args in &opt.virtio_fs_shmem {
1567        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1568            tag: args.tag.clone(),
1569            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1570                root_path: args.path.clone(),
1571            },
1572        }
1573        .into_resource();
1574        if let Some(pcie_port) = &args.pcie_port {
1575            pcie_devices.push(PcieDeviceConfig {
1576                port_name: pcie_port.clone(),
1577                resource: VirtioPciDeviceHandle(resource).into_resource(),
1578            });
1579        } else {
1580            add_virtio_device(opt.virtio_fs_bus, resource);
1581        }
1582    }
1583
1584    for args in &opt.virtio_9p {
1585        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1586            tag: args.tag.clone(),
1587            root_path: args.path.clone(),
1588            debug: opt.virtio_9p_debug,
1589        }
1590        .into_resource();
1591        if let Some(pcie_port) = &args.pcie_port {
1592            pcie_devices.push(PcieDeviceConfig {
1593                port_name: pcie_port.clone(),
1594                resource: VirtioPciDeviceHandle(resource).into_resource(),
1595            });
1596        } else {
1597            add_virtio_device(VirtioBusCli::Auto, resource);
1598        }
1599    }
1600
1601    if let Some(pmem_args) = &opt.virtio_pmem {
1602        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1603            path: pmem_args.path.clone(),
1604        }
1605        .into_resource();
1606        if let Some(pcie_port) = &pmem_args.pcie_port {
1607            pcie_devices.push(PcieDeviceConfig {
1608                port_name: pcie_port.clone(),
1609                resource: VirtioPciDeviceHandle(resource).into_resource(),
1610            });
1611        } else {
1612            add_virtio_device(VirtioBusCli::Auto, resource);
1613        }
1614    }
1615
1616    if opt.virtio_rng {
1617        let resource: Resource<VirtioDeviceHandle> =
1618            virtio_resources::rng::VirtioRngHandle.into_resource();
1619        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1620            pcie_devices.push(PcieDeviceConfig {
1621                port_name: pcie_port.clone(),
1622                resource: VirtioPciDeviceHandle(resource).into_resource(),
1623            });
1624        } else {
1625            add_virtio_device(opt.virtio_rng_bus, resource);
1626        }
1627    }
1628
1629    if let Some(backend) = virtio_console_backend {
1630        let resource: Resource<VirtioDeviceHandle> =
1631            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1632        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1633            pcie_devices.push(PcieDeviceConfig {
1634                port_name: pcie_port.clone(),
1635                resource: VirtioPciDeviceHandle(resource).into_resource(),
1636            });
1637        } else {
1638            add_virtio_device(VirtioBusCli::Auto, resource);
1639        }
1640    }
1641
1642    let mut cfg = Config {
1643        chipset,
1644        load_mode,
1645        floppy_disks,
1646        pcie_root_complexes,
1647        pcie_devices,
1648        pcie_switches,
1649        vpci_devices,
1650        ide_disks: Vec::new(),
1651        memory: MemoryConfig {
1652            mem_size: opt.memory,
1653            mmio_gaps,
1654            prefetch_memory: opt.prefetch,
1655            private_memory: opt.private_memory,
1656            transparent_hugepages: opt.thp,
1657            pci_ecam_gaps,
1658            pci_mmio_gaps,
1659        },
1660        processor_topology: ProcessorTopologyConfig {
1661            proc_count: opt.processors,
1662            vps_per_socket: opt.vps_per_socket,
1663            enable_smt: match opt.smt {
1664                cli_args::SmtConfigCli::Auto => None,
1665                cli_args::SmtConfigCli::Force => Some(true),
1666                cli_args::SmtConfigCli::Off => Some(false),
1667            },
1668            arch: Some(topology_arch),
1669        },
1670        hypervisor: HypervisorConfig {
1671            with_hv,
1672            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1673                vtl0_alias_map: !opt.no_alias_map,
1674                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1675                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1676                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1677                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1678                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1679                        Some(LateMapVtl0MemoryPolicy::InjectException)
1680                    }
1681                },
1682            }),
1683            with_isolation,
1684            user_mode_hv_enlightenments: opt.no_enlightenments,
1685            user_mode_apic: opt.user_mode_apic,
1686        },
1687        #[cfg(windows)]
1688        kernel_vmnics,
1689        input: mesh::Receiver::new(),
1690        framebuffer,
1691        vga_firmware,
1692        vtl2_gfx: opt.vtl2_gfx,
1693        virtio_devices,
1694        vmbus: with_hv.then_some(VmbusConfig {
1695            vsock_listener: vtl0_vsock_listener,
1696            vsock_path: opt.vsock_path.clone(),
1697            vtl2_redirect: opt.vmbus_redirect,
1698            vmbus_max_version: opt.vmbus_max_version,
1699            #[cfg(windows)]
1700            vmbusproxy_handle,
1701        }),
1702        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1703            vsock_listener: vtl2_vsock_listener,
1704            vsock_path: opt.vtl2_vsock_path.clone(),
1705            ..Default::default()
1706        }),
1707        vmbus_devices,
1708        chipset_devices,
1709        #[cfg(windows)]
1710        vpci_resources,
1711        vmgs,
1712        secure_boot_enabled: opt.secure_boot,
1713        custom_uefi_vars,
1714        firmware_event_send: None,
1715        debugger_rpc: None,
1716        generation_id_recv: None,
1717        rtc_delta_milliseconds: 0,
1718        automatic_guest_reset: !opt.halt_on_reset,
1719        efi_diagnostics_log_level: {
1720            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1721                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1722                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1723                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1724            }
1725        },
1726    };
1727
1728    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1729    Ok((cfg, resources))
1730}
1731
1732/// Gets the terminal to use for externally launched console windows.
1733fn openvmm_terminal_app() -> Option<PathBuf> {
1734    std::env::var_os("OPENVMM_TERM")
1735        .or_else(|| std::env::var_os("HVLITE_TERM"))
1736        .map(Into::into)
1737}
1738
1739// Tries to remove `path` if it is confirmed to be a Unix socket.
1740fn cleanup_socket(path: &Path) {
1741    #[cfg(windows)]
1742    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1743    #[cfg(not(windows))]
1744    let is_socket = path
1745        .metadata()
1746        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1747
1748    if is_socket {
1749        let _ = std::fs::remove_file(path);
1750    }
1751}
1752
1753#[cfg(windows)]
1754const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1755
1756#[cfg(windows)]
1757fn new_switch_port(
1758    switch_id: &str,
1759) -> anyhow::Result<(
1760    openvmm_defs::config::SwitchPortId,
1761    vmswitch::kernel::SwitchPort,
1762)> {
1763    let id = vmswitch::kernel::SwitchPortId {
1764        switch: switch_id.parse().context("invalid switch id")?,
1765        port: Guid::new_random(),
1766    };
1767    let _ = vmswitch::hcn::Network::open(&id.switch)
1768        .with_context(|| format!("could not find switch {}", id.switch))?;
1769
1770    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1771
1772    let id = openvmm_defs::config::SwitchPortId {
1773        switch: id.switch,
1774        port: id.port,
1775    };
1776    Ok((id, port))
1777}
1778
1779fn parse_endpoint(
1780    cli_cfg: &NicConfigCli,
1781    index: &mut usize,
1782    resources: &mut VmResources,
1783) -> anyhow::Result<NicConfig> {
1784    let _ = resources;
1785    let endpoint = match &cli_cfg.endpoint {
1786        EndpointConfigCli::Consomme { cidr } => {
1787            net_backend_resources::consomme::ConsommeHandle { cidr: cidr.clone() }.into_resource()
1788        }
1789        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1790        EndpointConfigCli::Dio { id } => {
1791            #[cfg(windows)]
1792            {
1793                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1794                resources.switch_ports.push(port);
1795                net_backend_resources::dio::WindowsDirectIoHandle {
1796                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1797                        switch: port_id.switch,
1798                        port: port_id.port,
1799                    },
1800                }
1801                .into_resource()
1802            }
1803
1804            #[cfg(not(windows))]
1805            {
1806                let _ = id;
1807                bail!("cannot use dio on non-windows platforms")
1808            }
1809        }
1810        EndpointConfigCli::Tap { name } => {
1811            net_backend_resources::tap::TapHandle { name: name.clone() }.into_resource()
1812        }
1813    };
1814
1815    // Pick a random MAC address.
1816    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1817    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1818
1819    // Pick a fixed instance ID based on the index.
1820    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1821    let instance_id = Guid {
1822        data1: *index as u32,
1823        ..BASE_INSTANCE_ID
1824    };
1825    *index += 1;
1826
1827    Ok(NicConfig {
1828        vtl: cli_cfg.vtl,
1829        instance_id,
1830        endpoint,
1831        mac_address: mac_address.into(),
1832        max_queues: cli_cfg.max_queues,
1833        pcie_port: cli_cfg.pcie_port.clone(),
1834    })
1835}
1836
1837#[derive(Debug)]
1838struct NicConfig {
1839    vtl: DeviceVtl,
1840    instance_id: Guid,
1841    mac_address: MacAddress,
1842    endpoint: Resource<NetEndpointHandleKind>,
1843    max_queues: Option<u16>,
1844    pcie_port: Option<String>,
1845}
1846
1847impl NicConfig {
1848    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1849        (
1850            self.vtl,
1851            netvsp_resources::NetvspHandle {
1852                instance_id: self.instance_id,
1853                mac_address: self.mac_address,
1854                endpoint: self.endpoint,
1855                max_queues: self.max_queues,
1856            }
1857            .into_resource(),
1858        )
1859    }
1860}
1861
1862enum LayerOrDisk {
1863    Layer(DiskLayerDescription),
1864    Disk(Resource<DiskHandleKind>),
1865}
1866
1867fn disk_open(disk_cli: &DiskCliKind, read_only: bool) -> anyhow::Result<Resource<DiskHandleKind>> {
1868    let mut layers = Vec::new();
1869    disk_open_inner(disk_cli, read_only, &mut layers)?;
1870    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1871        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1872            unreachable!()
1873        };
1874        Ok(disk)
1875    } else {
1876        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1877            layers: layers
1878                .into_iter()
1879                .map(|layer| match layer {
1880                    LayerOrDisk::Layer(layer) => layer,
1881                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1882                        layer: DiskLayerHandle(disk).into_resource(),
1883                        read_cache: false,
1884                        write_through: false,
1885                    },
1886                })
1887                .collect(),
1888        }))
1889    }
1890}
1891
1892fn disk_open_inner(
1893    disk_cli: &DiskCliKind,
1894    read_only: bool,
1895    layers: &mut Vec<LayerOrDisk>,
1896) -> anyhow::Result<()> {
1897    fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1898        LayerOrDisk::Layer(layer.into_resource().into())
1899    }
1900    fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1901        LayerOrDisk::Disk(disk.into_resource())
1902    }
1903    match disk_cli {
1904        &DiskCliKind::Memory(len) => {
1905            layers.push(layer(RamDiskLayerHandle {
1906                len: Some(len),
1907                sector_size: None,
1908            }));
1909        }
1910        DiskCliKind::File {
1911            path,
1912            create_with_len,
1913        } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1914            create_disk_type(path, *size)
1915                .with_context(|| format!("failed to create {}", path.display()))?
1916        } else {
1917            open_disk_type(path, read_only)
1918                .with_context(|| format!("failed to open {}", path.display()))?
1919        })),
1920        DiskCliKind::Blob { kind, url } => {
1921            layers.push(disk(disk_backend_resources::BlobDiskHandle {
1922                url: url.to_owned(),
1923                format: match kind {
1924                    cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1925                    cli_args::BlobKind::Vhd1 => disk_backend_resources::BlobDiskFormat::FixedVhd1,
1926                },
1927            }))
1928        }
1929        DiskCliKind::MemoryDiff(inner) => {
1930            layers.push(layer(RamDiskLayerHandle {
1931                len: None,
1932                sector_size: None,
1933            }));
1934            disk_open_inner(inner, true, layers)?;
1935        }
1936        DiskCliKind::PersistentReservationsWrapper(inner) => layers.push(disk(
1937            disk_backend_resources::DiskWithReservationsHandle(disk_open(inner, read_only)?),
1938        )),
1939        DiskCliKind::DelayDiskWrapper {
1940            delay_ms,
1941            disk: inner,
1942        } => layers.push(disk(DelayDiskHandle {
1943            delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1944            disk: disk_open(inner, read_only)?,
1945        })),
1946        DiskCliKind::Crypt {
1947            disk: inner,
1948            cipher,
1949            key_file,
1950        } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1951            disk: disk_open(inner, read_only)?,
1952            cipher: match cipher {
1953                cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1954            },
1955            key: fs_err::read(key_file).context("failed to read key file")?,
1956        })),
1957        DiskCliKind::Sqlite {
1958            path,
1959            create_with_len,
1960        } => {
1961            // FUTURE: this code should be responsible for opening
1962            // file-handle(s) itself, and passing them into sqlite via a custom
1963            // vfs. For now though - simply check if the file exists or not, and
1964            // perform early validation of filesystem-level create options.
1965            match (create_with_len.is_some(), path.exists()) {
1966                (true, true) => anyhow::bail!(
1967                    "cannot create new sqlite disk at {} - file already exists",
1968                    path.display()
1969                ),
1970                (false, false) => anyhow::bail!(
1971                    "cannot open sqlite disk at {} - file not found",
1972                    path.display()
1973                ),
1974                _ => {}
1975            }
1976
1977            layers.push(layer(SqliteDiskLayerHandle {
1978                dbhd_path: path.display().to_string(),
1979                format_dbhd: create_with_len.map(|len| {
1980                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1981                        logically_read_only: false,
1982                        len: Some(len),
1983                    }
1984                }),
1985            }));
1986        }
1987        DiskCliKind::SqliteDiff { path, create, disk } => {
1988            // FUTURE: this code should be responsible for opening
1989            // file-handle(s) itself, and passing them into sqlite via a custom
1990            // vfs. For now though - simply check if the file exists or not, and
1991            // perform early validation of filesystem-level create options.
1992            match (create, path.exists()) {
1993                (true, true) => anyhow::bail!(
1994                    "cannot create new sqlite disk at {} - file already exists",
1995                    path.display()
1996                ),
1997                (false, false) => anyhow::bail!(
1998                    "cannot open sqlite disk at {} - file not found",
1999                    path.display()
2000                ),
2001                _ => {}
2002            }
2003
2004            layers.push(layer(SqliteDiskLayerHandle {
2005                dbhd_path: path.display().to_string(),
2006                format_dbhd: create.then_some(
2007                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2008                        logically_read_only: false,
2009                        len: None,
2010                    },
2011                ),
2012            }));
2013            disk_open_inner(disk, true, layers)?;
2014        }
2015        DiskCliKind::AutoCacheSqlite {
2016            cache_path,
2017            key,
2018            disk,
2019        } => {
2020            layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2021                read_cache: true,
2022                write_through: false,
2023                layer: SqliteAutoCacheDiskLayerHandle {
2024                    cache_path: cache_path.clone(),
2025                    cache_key: key.clone(),
2026                }
2027                .into_resource(),
2028            }));
2029            disk_open_inner(disk, read_only, layers)?;
2030        }
2031    }
2032    Ok(())
2033}
2034
2035/// Get the system page size.
2036fn system_page_size() -> u32 {
2037    sparse_mmap::SparseMapping::page_size() as u32
2038}
2039
2040/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2041const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2042    "x86_64"
2043} else {
2044    "aarch64"
2045};
2046
2047/// Open a snapshot directory and validate it against the current VM config.
2048/// Returns the shared memory fd (from memory.bin) and the saved device state.
2049fn prepare_snapshot_restore(
2050    snapshot_dir: &Path,
2051    opt: &Options,
2052) -> anyhow::Result<(
2053    openvmm_defs::worker::SharedMemoryFd,
2054    mesh::payload::message::ProtobufMessage,
2055)> {
2056    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2057
2058    // Validate manifest against current VM config.
2059    openvmm_helpers::snapshot::validate_manifest(
2060        &manifest,
2061        GUEST_ARCH,
2062        opt.memory,
2063        opt.processors,
2064        system_page_size(),
2065    )?;
2066
2067    // Open memory.bin (existing file, no create, no resize).
2068    let memory_file = fs_err::OpenOptions::new()
2069        .read(true)
2070        .write(true)
2071        .open(snapshot_dir.join("memory.bin"))?;
2072
2073    // Validate file size matches expected memory size.
2074    let file_size = memory_file.metadata()?.len();
2075    if file_size != manifest.memory_size_bytes {
2076        anyhow::bail!(
2077            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2078            manifest.memory_size_bytes,
2079        );
2080    }
2081
2082    let shared_memory_fd =
2083        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2084
2085    // Reconstruct ProtobufMessage from the saved state bytes.
2086    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2087    // back to ProtobufMessage.
2088    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2089        .context("failed to decode saved state from snapshot")?;
2090
2091    Ok((shared_memory_fd, state_msg))
2092}
2093
2094/// Save a VM snapshot to the given directory.
2095///
2096/// Pauses the VM, saves device state, fsyncs the memory backing file,
2097/// and writes the snapshot directory. The VM remains paused after this
2098/// call — resuming would corrupt the snapshot.
2099async fn save_snapshot(
2100    vm_rpc: &mesh::Sender<VmRpc>,
2101    opt: &Options,
2102    dir: &Path,
2103) -> anyhow::Result<()> {
2104    let memory_file_path = opt
2105        .memory_backing_file
2106        .as_ref()
2107        .context("save-snapshot requires --memory-backing-file")?;
2108
2109    // Pause the VM.
2110    vm_rpc
2111        .call(VmRpc::Pause, ())
2112        .await
2113        .context("failed to pause VM")?;
2114
2115    // Get device state via existing VmRpc::Save.
2116    let saved_state_msg = vm_rpc
2117        .call_failable(VmRpc::Save, ())
2118        .await
2119        .context("failed to save state")?;
2120
2121    // Serialize the ProtobufMessage to bytes for writing to disk.
2122    let saved_state_bytes = mesh::payload::encode(saved_state_msg);
2123
2124    // Fsync the memory backing file.
2125    let memory_file = fs_err::File::open(memory_file_path)?;
2126    memory_file
2127        .sync_all()
2128        .context("failed to fsync memory backing file")?;
2129
2130    // Build manifest.
2131    let manifest = openvmm_helpers::snapshot::SnapshotManifest {
2132        version: openvmm_helpers::snapshot::MANIFEST_VERSION,
2133        created_at: std::time::SystemTime::now().into(),
2134        openvmm_version: env!("CARGO_PKG_VERSION").to_string(),
2135        memory_size_bytes: opt.memory,
2136        vp_count: opt.processors,
2137        page_size: system_page_size(),
2138        architecture: GUEST_ARCH.to_string(),
2139    };
2140
2141    // Write snapshot directory.
2142    openvmm_helpers::snapshot::write_snapshot(
2143        dir,
2144        &manifest,
2145        &saved_state_bytes,
2146        memory_file_path,
2147    )?;
2148
2149    // VM stays paused. Do NOT resume.
2150    Ok(())
2151}
2152
2153fn do_main() -> anyhow::Result<()> {
2154    #[cfg(windows)]
2155    pal::windows::disable_hard_error_dialog();
2156
2157    tracing_init::enable_tracing()?;
2158
2159    // Try to run as a worker host.
2160    // On success the worker runs to completion and then exits the process (does
2161    // not return). Any worker host setup errors are return and bubbled up.
2162    meshworker::run_vmm_mesh_host()?;
2163
2164    let opt = Options::parse();
2165    if let Some(path) = &opt.write_saved_state_proto {
2166        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2167            .write_to_path(path)
2168            .context("failed to write protobuf descriptors")?;
2169        return Ok(());
2170    }
2171
2172    if let Some(path) = opt.relay_console_path {
2173        let console_title = opt.relay_console_title.unwrap_or_default();
2174        return console_relay::relay_console(&path, console_title.as_str());
2175    }
2176
2177    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2178    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2179        return block_on(async {
2180            let _ = std::fs::remove_file(path);
2181            let listener =
2182                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2183
2184            let transport = if opt.ttrpc.is_some() {
2185                ttrpc::RpcTransport::Ttrpc
2186            } else {
2187                ttrpc::RpcTransport::Grpc
2188            };
2189
2190            // This is a local launch
2191            let mut handle =
2192                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2193                    listener,
2194                    transport,
2195                })
2196                .await?;
2197
2198            tracing::info!(%transport, path = %path.display(), "listening");
2199
2200            // Signal the the parent process that the server is ready.
2201            pal::close_stdout().context("failed to close stdout")?;
2202
2203            handle.join().await?;
2204
2205            Ok(())
2206        });
2207    }
2208
2209    DefaultPool::run_with(async |driver| {
2210        let mesh = VmmMesh::new(&driver, opt.single_process)?;
2211        let result = run_control(&driver, &mesh, opt).await;
2212        mesh.shutdown().await;
2213        result
2214    })
2215}
2216
2217fn maybe_with_radix_u64(s: &str) -> Result<u64, String> {
2218    let (radix, prefix_len) = if s.starts_with("0x") || s.starts_with("0X") {
2219        (16, 2)
2220    } else if s.starts_with("0o") || s.starts_with("0O") {
2221        (8, 2)
2222    } else if s.starts_with("0b") || s.starts_with("0B") {
2223        (2, 2)
2224    } else {
2225        (10, 0)
2226    };
2227
2228    u64::from_str_radix(&s[prefix_len..], radix).map_err(|e| format!("{e}"))
2229}
2230
2231#[derive(Parser)]
2232#[clap(
2233    name = "openvmm",
2234    disable_help_flag = true,
2235    disable_version_flag = true,
2236    no_binary_name = true,
2237    help_template("{subcommands}")
2238)]
2239enum InteractiveCommand {
2240    /// Restart the VM worker (experimental).
2241    ///
2242    /// This restarts the VM worker while preserving state.
2243    #[clap(visible_alias = "R")]
2244    Restart,
2245
2246    /// Inject an NMI.
2247    #[clap(visible_alias = "n")]
2248    Nmi,
2249
2250    /// Pause the VM.
2251    #[clap(visible_alias = "p")]
2252    Pause,
2253
2254    /// Resume the VM.
2255    #[clap(visible_alias = "r")]
2256    Resume,
2257
2258    /// Save a snapshot to a directory (requires --memory-backing-file).
2259    #[clap(visible_alias = "snap")]
2260    SaveSnapshot {
2261        /// Directory to write the snapshot to.
2262        dir: PathBuf,
2263    },
2264
2265    /// Do a pulsed save restore (pause, save, reset, restore, resume) to the VM.
2266    #[clap(visible_alias = "psr")]
2267    PulseSaveRestore,
2268
2269    /// Schedule a pulsed save restore (pause, save, reset, restore, resume) to the VM.
2270    #[clap(visible_alias = "spsr")]
2271    SchedulePulseSaveRestore {
2272        /// The interval between pulse save restore operations in seconds.
2273        /// None or 0 means any previous scheduled pulse save restores will be cleared.
2274        interval: Option<u64>,
2275    },
2276
2277    /// Hot add a disk to the VTL0 guest.
2278    #[clap(visible_alias = "d")]
2279    AddDisk {
2280        #[clap(long = "ro")]
2281        read_only: bool,
2282        #[clap(long = "dvd")]
2283        is_dvd: bool,
2284        #[clap(long, default_value_t)]
2285        target: u8,
2286        #[clap(long, default_value_t)]
2287        path: u8,
2288        #[clap(long, default_value_t)]
2289        lun: u8,
2290        #[clap(long)]
2291        ram: Option<u64>,
2292        file_path: Option<PathBuf>,
2293    },
2294
2295    /// Hot remove a disk from the VTL0 guest.
2296    #[clap(visible_alias = "D")]
2297    RmDisk {
2298        #[clap(long)]
2299        target: u8,
2300        #[clap(long)]
2301        path: u8,
2302        #[clap(long)]
2303        lun: u8,
2304    },
2305
2306    /// Manage VTL2 settings (storage controllers, NICs exposed to VTL0).
2307    #[clap(subcommand)]
2308    Vtl2Settings(Vtl2SettingsCommand),
2309
2310    /// Hot add an NVMe namespace to VTL2, and optionally to VTL0.
2311    AddNvmeNs {
2312        #[clap(long = "ro")]
2313        read_only: bool,
2314        /// The namespace ID.
2315        #[clap(long)]
2316        nsid: u32,
2317        /// Create a RAM-backed namespace of the specified size in bytes.
2318        #[clap(long)]
2319        ram: Option<u64>,
2320        /// Path to a file to use as the backing store.
2321        file_path: Option<PathBuf>,
2322        /// Also expose this namespace to VTL0 via VTL2 settings as a SCSI disk
2323        /// with the specified LUN number.
2324        #[clap(long)]
2325        vtl0_lun: Option<u32>,
2326    },
2327
2328    /// Hot remove an NVMe namespace from VTL2.
2329    RmNvmeNs {
2330        /// The namespace ID to remove.
2331        #[clap(long)]
2332        nsid: u32,
2333        /// Also remove the VTL0 SCSI disk backed by this namespace.
2334        #[clap(long)]
2335        vtl0: bool,
2336    },
2337
2338    /// Inspect program state.
2339    #[clap(visible_alias = "x")]
2340    Inspect {
2341        /// Enumerate state recursively.
2342        #[clap(short, long)]
2343        recursive: bool,
2344        /// The recursive depth limit.
2345        #[clap(short, long, requires("recursive"))]
2346        limit: Option<usize>,
2347        /// Target the paravisor.
2348        #[clap(short = 'v', long)]
2349        paravisor: bool,
2350        /// The element path to inspect.
2351        element: Option<String>,
2352        /// Update the path with a new value.
2353        #[clap(short, long, conflicts_with("recursive"))]
2354        update: Option<String>,
2355    },
2356
2357    /// Restart the VNC worker.
2358    #[clap(visible_alias = "V")]
2359    RestartVnc,
2360
2361    /// Start an hvsocket terminal window.
2362    #[clap(visible_alias = "v")]
2363    Hvsock {
2364        /// the terminal emulator to run (defaults to conhost.exe or xterm)
2365        #[clap(short, long)]
2366        term: Option<PathBuf>,
2367        /// the vsock port to connect to
2368        port: u32,
2369    },
2370
2371    /// Quit the program.
2372    #[clap(visible_alias = "q")]
2373    Quit,
2374
2375    /// Write input to the VM console.
2376    ///
2377    /// This will write each input parameter to the console's associated serial
2378    /// port, separated by spaces.
2379    #[clap(visible_alias = "i")]
2380    Input { data: Vec<String> },
2381
2382    /// Switch to input mode.
2383    ///
2384    /// Once in input mode, Ctrl-Q returns to command mode.
2385    #[clap(visible_alias = "I")]
2386    InputMode,
2387
2388    /// Reset the VM.
2389    Reset,
2390
2391    /// Send a request to the VM to shut it down.
2392    Shutdown {
2393        /// Reboot the VM instead of powering it off.
2394        #[clap(long, short = 'r')]
2395        reboot: bool,
2396        /// Hibernate the VM instead of powering it off.
2397        #[clap(long, short = 'h', conflicts_with = "reboot")]
2398        hibernate: bool,
2399        /// Tell the guest to force the power state transition.
2400        #[clap(long, short = 'f')]
2401        force: bool,
2402    },
2403
2404    /// Clears the current halt condition, resuming the VPs if the VM is
2405    /// running.
2406    #[clap(visible_alias = "ch")]
2407    ClearHalt,
2408
2409    /// Update the image in VTL2.
2410    ServiceVtl2 {
2411        /// Just restart the user-mode paravisor process, not the full
2412        /// firmware.
2413        #[clap(long, short = 'u')]
2414        user_mode_only: bool,
2415        /// The path to the new IGVM file. If missing, use the originally
2416        /// configured path.
2417        #[clap(long, conflicts_with("user_mode_only"))]
2418        igvm: Option<PathBuf>,
2419        /// Enable keepalive when servicing VTL2 devices.
2420        /// Default is `true`.
2421        #[clap(long, short = 'n', default_missing_value = "true")]
2422        nvme_keepalive: bool,
2423        /// Enable keepalive when servicing VTL2 devices.
2424        /// Default is `false`.
2425        #[clap(long)]
2426        mana_keepalive: bool,
2427    },
2428
2429    /// Read guest memory
2430    ReadMemory {
2431        /// Guest physical address to start at.
2432        #[clap(value_parser=maybe_with_radix_u64)]
2433        gpa: u64,
2434        /// How many bytes to dump.
2435        #[clap(value_parser=maybe_with_radix_u64)]
2436        size: u64,
2437        /// File to save the data to. If omitted,
2438        /// the data will be presented as a hex dump.
2439        #[clap(long, short = 'f')]
2440        file: Option<PathBuf>,
2441    },
2442
2443    /// Write guest memory
2444    WriteMemory {
2445        /// Guest physical address to start at
2446        #[clap(value_parser=maybe_with_radix_u64)]
2447        gpa: u64,
2448        /// Hex string encoding data, with no `0x` radix.
2449        /// If omitted, the source file must be specified.
2450        hex: Option<String>,
2451        /// File to write the data from.
2452        #[clap(long, short = 'f')]
2453        file: Option<PathBuf>,
2454    },
2455
2456    /// Inject an artificial panic into OpenVMM
2457    Panic,
2458
2459    /// Use KVP to interact with the guest.
2460    Kvp(kvp::KvpCommand),
2461}
2462
2463/// Subcommands for managing VTL2 settings.
2464#[derive(clap::Subcommand)]
2465enum Vtl2SettingsCommand {
2466    /// Show the current VTL2 settings.
2467    Show,
2468
2469    /// Add a SCSI disk to VTL0 backed by a VTL2 storage device.
2470    ///
2471    /// The backing device can be either a VTL2 NVMe namespace or a VTL2 SCSI disk.
2472    AddScsiDisk {
2473        /// The VTL0 SCSI controller instance ID (GUID). Defaults to the standard
2474        /// OpenVMM VTL0 SCSI instance.
2475        #[clap(long)]
2476        controller: Option<String>,
2477        /// The SCSI LUN to expose to VTL0.
2478        #[clap(long)]
2479        lun: u32,
2480        /// The backing VTL2 NVMe namespace ID.
2481        #[clap(
2482            long,
2483            conflicts_with = "backing_scsi_lun",
2484            required_unless_present = "backing_scsi_lun"
2485        )]
2486        backing_nvme_nsid: Option<u32>,
2487        /// The backing VTL2 SCSI LUN.
2488        #[clap(
2489            long,
2490            conflicts_with = "backing_nvme_nsid",
2491            required_unless_present = "backing_nvme_nsid"
2492        )]
2493        backing_scsi_lun: Option<u32>,
2494    },
2495
2496    /// Remove a SCSI disk from VTL0.
2497    RmScsiDisk {
2498        /// The SCSI controller instance ID (GUID). Defaults to the standard
2499        /// OpenVMM VTL0 SCSI instance.
2500        #[clap(long)]
2501        controller: Option<String>,
2502        /// The SCSI LUN to remove.
2503        #[clap(long)]
2504        lun: u32,
2505    },
2506}
2507
2508struct CommandParser {
2509    app: clap::Command,
2510}
2511
2512impl CommandParser {
2513    fn new() -> Self {
2514        // Update the help template for each subcommand.
2515        let mut app = InteractiveCommand::command();
2516        for sc in app.get_subcommands_mut() {
2517            *sc = sc
2518                .clone()
2519                .help_template("{about-with-newline}\n{usage-heading}\n    {usage}\n\n{all-args}");
2520        }
2521        Self { app }
2522    }
2523
2524    fn parse(&mut self, line: &str) -> clap::error::Result<InteractiveCommand> {
2525        let args = shell_words::split(line)
2526            .map_err(|err| self.app.error(clap::error::ErrorKind::ValueValidation, err))?;
2527        let matches = self.app.try_get_matches_from_mut(args)?;
2528        InteractiveCommand::from_arg_matches(&matches).map_err(|err| err.format(&mut self.app))
2529    }
2530}
2531
2532fn new_hvsock_service_id(port: u32) -> Guid {
2533    // This GUID is an embedding of the AF_VSOCK port into an
2534    // AF_HYPERV service ID.
2535    Guid {
2536        data1: port,
2537        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2538    }
2539}
2540
2541async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> anyhow::Result<()> {
2542    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2543
2544    let mut vnc_worker = None;
2545    if opt.gfx || opt.vnc {
2546        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2547            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2548
2549        let input_send = vm_config.input.sender();
2550        let framebuffer = resources
2551            .framebuffer_access
2552            .take()
2553            .expect("synth video enabled");
2554
2555        let vnc_host = mesh
2556            .make_host("vnc", None)
2557            .await
2558            .context("spawning vnc process failed")?;
2559
2560        vnc_worker = Some(
2561            vnc_host
2562                .launch_worker(
2563                    vnc_worker_defs::VNC_WORKER_TCP,
2564                    VncParameters {
2565                        listener,
2566                        framebuffer,
2567                        input_send,
2568                    },
2569                )
2570                .await?,
2571        )
2572    }
2573
2574    // spin up the debug worker
2575    let gdb_worker = if let Some(port) = opt.gdb {
2576        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2577            .with_context(|| format!("binding to gdb port {}", port))?;
2578
2579        let (req_tx, req_rx) = mesh::channel();
2580        vm_config.debugger_rpc = Some(req_rx);
2581
2582        let gdb_host = mesh
2583            .make_host("gdb", None)
2584            .await
2585            .context("spawning gdbstub process failed")?;
2586
2587        Some(
2588            gdb_host
2589                .launch_worker(
2590                    debug_worker_defs::DEBUGGER_WORKER,
2591                    debug_worker_defs::DebuggerParameters {
2592                        listener,
2593                        req_chan: req_tx,
2594                        vp_count: vm_config.processor_topology.proc_count,
2595                        target_arch: if cfg!(guest_arch = "x86_64") {
2596                            debug_worker_defs::TargetArch::X86_64
2597                        } else {
2598                            debug_worker_defs::TargetArch::Aarch64
2599                        },
2600                    },
2601                )
2602                .await
2603                .context("failed to launch gdbstub worker")?,
2604        )
2605    } else {
2606        None
2607    };
2608
2609    // spin up the VM
2610    let (vm_rpc, rpc_recv) = mesh::channel();
2611    let (notify_send, notify_recv) = mesh::channel();
2612    let mut vm_worker = {
2613        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2614
2615        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2616            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2617            (Some(fd), Some(state_msg))
2618        } else {
2619            let shared_memory = opt
2620                .memory_backing_file
2621                .as_ref()
2622                .map(|path| {
2623                    openvmm_helpers::shared_memory::open_memory_backing_file(path, opt.memory)
2624                })
2625                .transpose()?;
2626            (shared_memory, None)
2627        };
2628
2629        let params = VmWorkerParameters {
2630            hypervisor: match &opt.hypervisor {
2631                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2632                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2633            },
2634            cfg: vm_config,
2635            saved_state,
2636            shared_memory,
2637            rpc: rpc_recv,
2638            notify: notify_send,
2639        };
2640        vm_host
2641            .launch_worker(VM_WORKER, params)
2642            .await
2643            .context("failed to launch vm worker")?
2644    };
2645
2646    if opt.restore_snapshot.is_some() {
2647        tracing::info!("restoring VM from snapshot");
2648    }
2649
2650    if !opt.paused {
2651        vm_rpc.call(VmRpc::Resume, ()).await?;
2652    }
2653
2654    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2655        driver.clone(),
2656        DiagDialer {
2657            driver: driver.clone(),
2658            vm_rpc: vm_rpc.clone(),
2659            openhcl_vtl: if opt.vtl2 {
2660                DeviceVtl::Vtl2
2661            } else {
2662                DeviceVtl::Vtl0
2663            },
2664        },
2665    ));
2666
2667    let mut diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2668
2669    let (console_command_send, console_command_recv) = mesh::channel();
2670    let (inspect_completion_engine_send, inspect_completion_engine_recv) = mesh::channel();
2671
2672    let mut console_in = resources.console_in.take();
2673    thread::Builder::new()
2674        .name("stdio-thread".to_string())
2675        .spawn(move || {
2676            // install panic hook to restore cooked terminal (linux)
2677            #[cfg(unix)]
2678            if io::stderr().is_terminal() {
2679                term::revert_terminal_on_panic()
2680            }
2681
2682            let mut rl = rustyline::Editor::<
2683                interactive_console::OpenvmmRustylineEditor,
2684                rustyline::history::FileHistory,
2685            >::with_config(
2686                rustyline::Config::builder()
2687                    .completion_type(rustyline::CompletionType::List)
2688                    .build(),
2689            )
2690            .unwrap();
2691
2692            rl.set_helper(Some(interactive_console::OpenvmmRustylineEditor {
2693                openvmm_inspect_req: Arc::new(inspect_completion_engine_send),
2694            }));
2695
2696            let history_file = {
2697                const HISTORY_FILE: &str = ".openvmm_history";
2698
2699                // using a `None` to kick off the `.or()` chain in order to make
2700                // it a bit easier to visually inspect the fallback chain.
2701                let history_folder = None
2702                    .or_else(dirs::state_dir)
2703                    .or_else(dirs::data_local_dir)
2704                    .map(|path| path.join("openvmm"));
2705
2706                if let Some(history_folder) = history_folder {
2707                    if let Err(err) = std::fs::create_dir_all(&history_folder) {
2708                        tracing::warn!(
2709                            error = &err as &dyn std::error::Error,
2710                            "could not create directory: {}",
2711                            history_folder.display()
2712                        )
2713                    }
2714
2715                    Some(history_folder.join(HISTORY_FILE))
2716                } else {
2717                    None
2718                }
2719            };
2720
2721            if let Some(history_file) = &history_file {
2722                tracing::info!("restoring history from {}", history_file.display());
2723                if rl.load_history(history_file).is_err() {
2724                    tracing::info!("could not find existing {}", history_file.display());
2725                }
2726            }
2727
2728            // Enable Ctrl-Backspace to delete the current word.
2729            rl.bind_sequence(
2730                rustyline::KeyEvent::new('\x08', rustyline::Modifiers::CTRL),
2731                rustyline::Cmd::Kill(rustyline::Movement::BackwardWord(1, rustyline::Word::Emacs)),
2732            );
2733
2734            let mut parser = CommandParser::new();
2735
2736            let mut stdin = io::stdin();
2737            loop {
2738                // Raw console text until Ctrl-Q.
2739                term::set_raw_console(true).expect("failed to set raw console mode");
2740
2741                if let Some(input) = console_in.as_mut() {
2742                    let mut buf = [0; 32];
2743                    loop {
2744                        let n = stdin.read(&mut buf).unwrap();
2745                        let mut b = &buf[..n];
2746                        let stop = if let Some(ctrlq) = b.iter().position(|x| *x == 0x11) {
2747                            b = &b[..ctrlq];
2748                            true
2749                        } else {
2750                            false
2751                        };
2752                        block_on(input.as_mut().write_all(b)).expect("BUGBUG");
2753                        if stop {
2754                            break;
2755                        }
2756                    }
2757                }
2758
2759                term::set_raw_console(false).expect("failed to set raw console mode");
2760
2761                loop {
2762                    let line = rl.readline("openvmm> ");
2763                    if line.is_err() {
2764                        break;
2765                    }
2766                    let line = line.unwrap();
2767                    let trimmed = line.trim();
2768                    if trimmed.is_empty() {
2769                        continue;
2770                    }
2771                    if let Err(err) = rl.add_history_entry(&line) {
2772                        tracing::warn!(
2773                            err = &err as &dyn std::error::Error,
2774                            "error adding to .openvmm_history"
2775                        )
2776                    }
2777
2778                    match parser.parse(trimmed) {
2779                        Ok(cmd) => match cmd {
2780                            InteractiveCommand::Input { data } => {
2781                                let mut data = data.join(" ");
2782                                data.push('\n');
2783                                if let Some(input) = console_in.as_mut() {
2784                                    block_on(input.write_all(data.as_bytes())).expect("BUGBUG");
2785                                }
2786                            }
2787                            InteractiveCommand::InputMode => break,
2788                            cmd => {
2789                                // Send the command to the main thread for processing.
2790                                let (processing_done_send, processing_done_recv) =
2791                                    mesh::oneshot::<()>();
2792                                console_command_send.send((cmd, processing_done_send));
2793                                let _ = block_on(processing_done_recv);
2794                            }
2795                        },
2796                        Err(err) => {
2797                            err.print().unwrap();
2798                        }
2799                    }
2800
2801                    if let Some(history_file) = &history_file {
2802                        rl.append_history(history_file).unwrap();
2803                    }
2804                }
2805            }
2806        })
2807        .unwrap();
2808
2809    let mut state_change_task = None::<Task<Result<StateChange, RpcError>>>;
2810    let mut pulse_save_restore_interval: Option<Duration> = None;
2811    let mut pending_shutdown = None;
2812    let mut snapshot_saved = false;
2813
2814    enum StateChange {
2815        Pause(bool),
2816        Resume(bool),
2817        Reset(Result<(), RemoteError>),
2818        PulseSaveRestore(Result<(), PulseSaveRestoreError>),
2819        ServiceVtl2(anyhow::Result<Duration>),
2820    }
2821
2822    enum Event {
2823        Command((InteractiveCommand, mesh::OneshotSender<()>)),
2824        InspectRequestFromCompletionEngine(
2825            (InspectTarget, String, mesh::OneshotSender<inspect::Node>),
2826        ),
2827        Quit,
2828        Halt(vmm_core_defs::HaltReason),
2829        PulseSaveRestore,
2830        Worker(WorkerEvent),
2831        VncWorker(WorkerEvent),
2832        StateChange(Result<StateChange, RpcError>),
2833        ShutdownResult(Result<hyperv_ic_resources::shutdown::ShutdownResult, RpcError>),
2834    }
2835
2836    let mut console_command_recv = console_command_recv
2837        .map(Event::Command)
2838        .chain(futures::stream::repeat_with(|| Event::Quit));
2839
2840    let mut notify_recv = notify_recv.map(Event::Halt);
2841
2842    let mut inspect_completion_engine_recv =
2843        inspect_completion_engine_recv.map(Event::InspectRequestFromCompletionEngine);
2844
2845    let mut quit = false;
2846    loop {
2847        let event = {
2848            let pulse_save_restore = pin!(async {
2849                match pulse_save_restore_interval {
2850                    Some(wait) => {
2851                        PolledTimer::new(driver).sleep(wait).await;
2852                        Event::PulseSaveRestore
2853                    }
2854                    None => pending().await,
2855                }
2856            });
2857
2858            let vm = (&mut vm_worker).map(Event::Worker);
2859            let vnc = futures::stream::iter(vnc_worker.as_mut())
2860                .flatten()
2861                .map(Event::VncWorker);
2862            let change = futures::stream::iter(state_change_task.as_mut().map(|x| x.into_stream()))
2863                .flatten()
2864                .map(Event::StateChange);
2865            let shutdown = pin!(async {
2866                if let Some(s) = &mut pending_shutdown {
2867                    Event::ShutdownResult(s.await)
2868                } else {
2869                    pending().await
2870                }
2871            });
2872
2873            (
2874                &mut console_command_recv,
2875                &mut inspect_completion_engine_recv,
2876                &mut notify_recv,
2877                pulse_save_restore.into_stream(),
2878                vm,
2879                vnc,
2880                change,
2881                shutdown.into_stream(),
2882            )
2883                .merge()
2884                .next()
2885                .await
2886                .unwrap()
2887        };
2888
2889        let (cmd, _processing_done_send) = match event {
2890            Event::Command(message) => message,
2891            Event::InspectRequestFromCompletionEngine((vtl, path, res)) => {
2892                let mut inspection =
2893                    InspectionBuilder::new(&path)
2894                        .depth(Some(1))
2895                        .inspect(inspect_obj(
2896                            vtl,
2897                            mesh,
2898                            &vm_worker,
2899                            vnc_worker.as_ref(),
2900                            gdb_worker.as_ref(),
2901                            &mut diag_inspector,
2902                        ));
2903                let _ = CancelContext::new()
2904                    .with_timeout(Duration::from_secs(1))
2905                    .until_cancelled(inspection.resolve())
2906                    .await;
2907
2908                let node = inspection.results();
2909                res.send(node);
2910                continue;
2911            }
2912            Event::Quit => break,
2913            Event::Halt(reason) => {
2914                tracing::info!(?reason, "guest halted");
2915                continue;
2916            }
2917            Event::PulseSaveRestore => {
2918                vm_rpc.call(VmRpc::PulseSaveRestore, ()).await??;
2919                continue;
2920            }
2921            Event::Worker(event) => {
2922                match event {
2923                    WorkerEvent::Stopped => {
2924                        if quit {
2925                            tracing::info!("vm stopped");
2926                        } else {
2927                            tracing::error!("vm worker unexpectedly stopped");
2928                        }
2929                        break;
2930                    }
2931                    WorkerEvent::Failed(err) => {
2932                        tracing::error!(error = &err as &dyn std::error::Error, "vm worker failed");
2933                        break;
2934                    }
2935                    WorkerEvent::RestartFailed(err) => {
2936                        tracing::error!(
2937                            error = &err as &dyn std::error::Error,
2938                            "vm worker restart failed"
2939                        );
2940                    }
2941                    WorkerEvent::Started => {
2942                        tracing::info!("vm worker restarted");
2943                    }
2944                }
2945                continue;
2946            }
2947            Event::VncWorker(event) => {
2948                match event {
2949                    WorkerEvent::Stopped => tracing::error!("vnc unexpectedly stopped"),
2950                    WorkerEvent::Failed(err) => {
2951                        tracing::error!(
2952                            error = &err as &dyn std::error::Error,
2953                            "vnc worker failed"
2954                        );
2955                    }
2956                    WorkerEvent::RestartFailed(err) => {
2957                        tracing::error!(
2958                            error = &err as &dyn std::error::Error,
2959                            "vnc worker restart failed"
2960                        );
2961                    }
2962                    WorkerEvent::Started => {
2963                        tracing::info!("vnc worker restarted");
2964                    }
2965                }
2966                continue;
2967            }
2968            Event::StateChange(r) => {
2969                match r {
2970                    Ok(sc) => match sc {
2971                        StateChange::Pause(success) => {
2972                            if success {
2973                                tracing::info!("pause complete");
2974                            } else {
2975                                tracing::warn!("already paused");
2976                            }
2977                        }
2978                        StateChange::Resume(success) => {
2979                            if success {
2980                                tracing::info!("resumed complete");
2981                            } else {
2982                                tracing::warn!("already running");
2983                            }
2984                        }
2985                        StateChange::Reset(r) => match r {
2986                            Ok(()) => tracing::info!("reset complete"),
2987                            Err(err) => tracing::error!(
2988                                error = &err as &dyn std::error::Error,
2989                                "reset failed"
2990                            ),
2991                        },
2992                        StateChange::PulseSaveRestore(r) => match r {
2993                            Ok(()) => tracing::info!("pulse save/restore complete"),
2994                            Err(err) => tracing::error!(
2995                                error = &err as &dyn std::error::Error,
2996                                "pulse save/restore failed"
2997                            ),
2998                        },
2999                        StateChange::ServiceVtl2(r) => match r {
3000                            Ok(dur) => {
3001                                tracing::info!(
3002                                    duration = dur.as_millis() as i64,
3003                                    "vtl2 servicing complete"
3004                                )
3005                            }
3006                            Err(err) => tracing::error!(
3007                                error = err.as_ref() as &dyn std::error::Error,
3008                                "vtl2 servicing failed"
3009                            ),
3010                        },
3011                    },
3012                    Err(err) => {
3013                        tracing::error!(
3014                            error = &err as &dyn std::error::Error,
3015                            "communication failure during state change"
3016                        );
3017                    }
3018                }
3019                state_change_task = None;
3020                continue;
3021            }
3022            Event::ShutdownResult(r) => {
3023                match r {
3024                    Ok(r) => match r {
3025                        hyperv_ic_resources::shutdown::ShutdownResult::Ok => {
3026                            tracing::info!("shutdown initiated");
3027                        }
3028                        hyperv_ic_resources::shutdown::ShutdownResult::NotReady => {
3029                            tracing::error!("shutdown ic not ready");
3030                        }
3031                        hyperv_ic_resources::shutdown::ShutdownResult::AlreadyInProgress => {
3032                            tracing::error!("shutdown already in progress");
3033                        }
3034                        hyperv_ic_resources::shutdown::ShutdownResult::Failed(hr) => {
3035                            tracing::error!("shutdown failed with error code {hr:#x}");
3036                        }
3037                    },
3038                    Err(err) => {
3039                        tracing::error!(
3040                            error = &err as &dyn std::error::Error,
3041                            "communication failure during shutdown"
3042                        );
3043                    }
3044                }
3045                pending_shutdown = None;
3046                continue;
3047            }
3048        };
3049
3050        fn inspect_obj<'a>(
3051            target: InspectTarget,
3052            mesh: &'a VmmMesh,
3053            vm_worker: &'a WorkerHandle,
3054            vnc_worker: Option<&'a WorkerHandle>,
3055            gdb_worker: Option<&'a WorkerHandle>,
3056            diag_inspector: &'a mut DiagInspector,
3057        ) -> impl 'a + InspectMut {
3058            inspect::adhoc_mut(move |req| match target {
3059                InspectTarget::Host => {
3060                    let mut resp = req.respond();
3061                    resp.field("mesh", mesh)
3062                        .field("vm", vm_worker)
3063                        .field("vnc", vnc_worker)
3064                        .field("gdb", gdb_worker);
3065                }
3066                InspectTarget::Paravisor => {
3067                    diag_inspector.inspect_mut(req);
3068                }
3069            })
3070        }
3071
3072        fn state_change<U: 'static + Send>(
3073            driver: impl Spawn,
3074            vm_rpc: &mesh::Sender<VmRpc>,
3075            state_change_task: &mut Option<Task<Result<StateChange, RpcError>>>,
3076            f: impl FnOnce(Rpc<(), U>) -> VmRpc,
3077            g: impl FnOnce(U) -> StateChange + 'static + Send,
3078        ) {
3079            if state_change_task.is_some() {
3080                tracing::error!("state change already in progress");
3081            } else {
3082                let rpc = vm_rpc.call(f, ());
3083                *state_change_task =
3084                    Some(driver.spawn("state-change", async move { Ok(g(rpc.await?)) }));
3085            }
3086        }
3087
3088        match cmd {
3089            InteractiveCommand::Panic => {
3090                panic!("injected panic")
3091            }
3092            InteractiveCommand::Restart => {
3093                // create a new host process
3094                let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
3095
3096                vm_worker.restart(&vm_host);
3097            }
3098            InteractiveCommand::Pause => {
3099                state_change(
3100                    driver,
3101                    &vm_rpc,
3102                    &mut state_change_task,
3103                    VmRpc::Pause,
3104                    StateChange::Pause,
3105                );
3106            }
3107            InteractiveCommand::Resume => {
3108                if snapshot_saved {
3109                    eprintln!(
3110                        "error: cannot resume after snapshot save — resuming would corrupt the snapshot. Use 'shutdown' to exit."
3111                    );
3112                } else {
3113                    state_change(
3114                        driver,
3115                        &vm_rpc,
3116                        &mut state_change_task,
3117                        VmRpc::Resume,
3118                        StateChange::Resume,
3119                    );
3120                }
3121            }
3122            InteractiveCommand::Reset => {
3123                state_change(
3124                    driver,
3125                    &vm_rpc,
3126                    &mut state_change_task,
3127                    VmRpc::Reset,
3128                    StateChange::Reset,
3129                );
3130            }
3131            InteractiveCommand::SaveSnapshot { dir } => {
3132                match save_snapshot(&vm_rpc, &opt, &dir).await {
3133                    Ok(()) => {
3134                        snapshot_saved = true;
3135                        tracing::info!(
3136                            dir = %dir.display(),
3137                            "snapshot saved; VM is paused. \
3138                             Resume is blocked to prevent snapshot corruption. \
3139                             Use 'shutdown' to exit."
3140                        );
3141                    }
3142                    Err(err) => {
3143                        eprintln!("error: save-snapshot failed: {err:#}");
3144                    }
3145                }
3146            }
3147            InteractiveCommand::PulseSaveRestore => {
3148                state_change(
3149                    driver,
3150                    &vm_rpc,
3151                    &mut state_change_task,
3152                    VmRpc::PulseSaveRestore,
3153                    StateChange::PulseSaveRestore,
3154                );
3155            }
3156            InteractiveCommand::SchedulePulseSaveRestore { interval } => {
3157                pulse_save_restore_interval = match interval {
3158                    Some(seconds) if seconds != 0 => Some(Duration::from_secs(seconds)),
3159                    _ => {
3160                        // Treat None and 0 seconds as do not perform scheduled pulse save restores anymore.
3161                        None
3162                    }
3163                }
3164            }
3165            InteractiveCommand::Shutdown {
3166                reboot,
3167                hibernate,
3168                force,
3169            } => {
3170                if pending_shutdown.is_some() {
3171                    println!("shutdown already in progress");
3172                } else if let Some(ic) = &resources.shutdown_ic {
3173                    let params = hyperv_ic_resources::shutdown::ShutdownParams {
3174                        shutdown_type: if hibernate {
3175                            hyperv_ic_resources::shutdown::ShutdownType::Hibernate
3176                        } else if reboot {
3177                            hyperv_ic_resources::shutdown::ShutdownType::Reboot
3178                        } else {
3179                            hyperv_ic_resources::shutdown::ShutdownType::PowerOff
3180                        },
3181                        force,
3182                    };
3183                    pending_shutdown =
3184                        Some(ic.call(hyperv_ic_resources::shutdown::ShutdownRpc::Shutdown, params));
3185                } else {
3186                    println!("no shutdown ic configured");
3187                }
3188            }
3189            InteractiveCommand::Nmi => {
3190                let _ = vm_rpc.call(VmRpc::Nmi, 0).await;
3191            }
3192            InteractiveCommand::ClearHalt => {
3193                vm_rpc.call(VmRpc::ClearHalt, ()).await.ok();
3194            }
3195            InteractiveCommand::AddDisk {
3196                read_only,
3197                target,
3198                path,
3199                lun,
3200                ram,
3201                file_path,
3202                is_dvd,
3203            } => {
3204                let action = async {
3205                    let scsi = resources.scsi_rpc.as_ref().context("no scsi controller")?;
3206                    let disk_type = match ram {
3207                        None => {
3208                            let path = file_path.context("no filename passed")?;
3209                            open_disk_type(path.as_ref(), read_only)
3210                                .with_context(|| format!("failed to open {}", path.display()))?
3211                        }
3212                        Some(size) => {
3213                            Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer(
3214                                RamDiskLayerHandle {
3215                                    len: Some(size),
3216                                    sector_size: None,
3217                                },
3218                            ))
3219                        }
3220                    };
3221
3222                    let device = if is_dvd {
3223                        SimpleScsiDvdHandle {
3224                            media: Some(disk_type),
3225                            requests: None,
3226                        }
3227                        .into_resource()
3228                    } else {
3229                        SimpleScsiDiskHandle {
3230                            disk: disk_type,
3231                            read_only,
3232                            parameters: Default::default(),
3233                        }
3234                        .into_resource()
3235                    };
3236
3237                    let cfg = ScsiDeviceAndPath {
3238                        path: ScsiPath { path, target, lun },
3239                        device,
3240                    };
3241
3242                    scsi.call_failable(ScsiControllerRequest::AddDevice, cfg)
3243                        .await?;
3244
3245                    anyhow::Result::<_>::Ok(())
3246                };
3247
3248                if let Err(error) = action.await {
3249                    tracing::error!(error = error.as_error(), "error adding disk")
3250                }
3251            }
3252            InteractiveCommand::RmDisk { target, path, lun } => {
3253                let action = async {
3254                    let scsi = resources.scsi_rpc.as_ref().context("no scsi controller")?;
3255                    scsi.call_failable(
3256                        ScsiControllerRequest::RemoveDevice,
3257                        ScsiPath { target, path, lun },
3258                    )
3259                    .await?;
3260                    anyhow::Ok(())
3261                };
3262
3263                if let Err(error) = action.await {
3264                    tracing::error!(error = error.as_error(), "error removing disk")
3265                }
3266            }
3267            InteractiveCommand::Vtl2Settings(cmd) => {
3268                if resources.vtl2_settings.is_none() {
3269                    eprintln!("error: no VTL2 settings (not running with VTL2?)");
3270                    continue;
3271                }
3272                let action = async {
3273                    match cmd {
3274                        Vtl2SettingsCommand::Show => {
3275                            let settings = resources.vtl2_settings.as_ref().unwrap();
3276                            println!("{:#?}", settings);
3277                        }
3278                        Vtl2SettingsCommand::AddScsiDisk {
3279                            controller,
3280                            lun,
3281                            backing_nvme_nsid,
3282                            backing_scsi_lun,
3283                        } => {
3284                            // Determine the backing device type and path
3285                            let (device_type, device_path, sub_device_path) = match (
3286                                backing_nvme_nsid,
3287                                backing_scsi_lun,
3288                            ) {
3289                                (Some(nsid), None) => (
3290                                    vtl2_settings_proto::physical_device::DeviceType::Nvme,
3291                                    storage_builder::NVME_VTL2_INSTANCE_ID,
3292                                    nsid,
3293                                ),
3294                                (None, Some(scsi_lun)) => (
3295                                    vtl2_settings_proto::physical_device::DeviceType::Vscsi,
3296                                    storage_builder::SCSI_VTL2_INSTANCE_ID,
3297                                    scsi_lun,
3298                                ),
3299                                (Some(_), Some(_)) => {
3300                                    anyhow::bail!(
3301                                        "can't specify both --backing-nvme-nsid and --backing-scsi-lun"
3302                                    );
3303                                }
3304                                (None, None) => {
3305                                    anyhow::bail!(
3306                                        "must specify either --backing-nvme-nsid or --backing-scsi-lun"
3307                                    );
3308                                }
3309                            };
3310
3311                            // Default to the standard OpenVMM VTL0 SCSI instance
3312                            let controller_guid = controller
3313                                .map(|s| s.parse())
3314                                .transpose()
3315                                .context("invalid controller GUID")?
3316                                .unwrap_or(storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE);
3317
3318                            resources
3319                                .add_vtl0_scsi_disk(
3320                                    controller_guid,
3321                                    lun,
3322                                    device_type,
3323                                    device_path,
3324                                    sub_device_path,
3325                                )
3326                                .await?;
3327
3328                            let backing_desc = if backing_nvme_nsid.is_some() {
3329                                format!("nvme_nsid={}", sub_device_path)
3330                            } else {
3331                                format!("scsi_lun={}", sub_device_path)
3332                            };
3333                            println!(
3334                                "Added VTL0 SCSI disk: controller={}, lun={}, backing={}",
3335                                controller_guid, lun, backing_desc
3336                            );
3337                        }
3338                        Vtl2SettingsCommand::RmScsiDisk { controller, lun } => {
3339                            // Default to the standard OpenVMM VTL0 SCSI instance
3340                            let controller_guid = controller
3341                                .map(|s| s.parse())
3342                                .transpose()
3343                                .context("invalid controller GUID")?
3344                                .unwrap_or(storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE);
3345
3346                            resources
3347                                .remove_vtl0_scsi_disk(controller_guid, lun)
3348                                .await?;
3349
3350                            println!(
3351                                "Removed VTL0 SCSI disk: controller={}, lun={}",
3352                                controller_guid, lun
3353                            );
3354                        }
3355                    }
3356                    anyhow::Ok(())
3357                };
3358
3359                if let Err(error) = action.await {
3360                    eprintln!("error: {}", error);
3361                }
3362            }
3363            InteractiveCommand::AddNvmeNs {
3364                read_only,
3365                nsid,
3366                ram,
3367                file_path,
3368                vtl0_lun,
3369            } => {
3370                if resources.vtl2_settings.is_none() {
3371                    eprintln!("error: add-nvme-ns requires --vtl2 mode");
3372                    continue;
3373                }
3374                let action = async {
3375                    let nvme = resources
3376                        .nvme_vtl2_rpc
3377                        .as_ref()
3378                        .context("no vtl2 nvme controller")?;
3379                    let disk_type = match (ram, file_path) {
3380                        (None, Some(path)) => open_disk_type(path.as_ref(), read_only)
3381                            .with_context(|| format!("failed to open {}", path.display()))?,
3382                        (Some(size), None) => {
3383                            Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer(
3384                                RamDiskLayerHandle {
3385                                    len: Some(size),
3386                                    sector_size: None,
3387                                },
3388                            ))
3389                        }
3390                        (None, None) => {
3391                            anyhow::bail!("must specify either file path or --ram");
3392                        }
3393                        (Some(_), Some(_)) => {
3394                            anyhow::bail!("cannot specify both file path and --ram");
3395                        }
3396                    };
3397
3398                    let ns = NamespaceDefinition {
3399                        nsid,
3400                        read_only,
3401                        disk: disk_type,
3402                    };
3403
3404                    nvme.call_failable(NvmeControllerRequest::AddNamespace, ns)
3405                        .await?;
3406                    println!("Added namespace {}", nsid);
3407
3408                    // If --vtl0-lun was specified, add a SCSI disk to VTL0 backed by the NVMe namespace
3409                    if let Some(lun) = vtl0_lun {
3410                        resources
3411                            .add_vtl0_scsi_disk(
3412                                storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE,
3413                                lun,
3414                                vtl2_settings_proto::physical_device::DeviceType::Nvme,
3415                                storage_builder::NVME_VTL2_INSTANCE_ID,
3416                                nsid,
3417                            )
3418                            .await?;
3419                        println!("Exposed namespace {} to VTL0 as SCSI lun={}", nsid, lun);
3420                    }
3421
3422                    Ok(())
3423                };
3424
3425                if let Err(error) = action.await {
3426                    eprintln!("error adding nvme namespace: {}", error);
3427                }
3428            }
3429            InteractiveCommand::RmNvmeNs { nsid, vtl0 } => {
3430                if resources.vtl2_settings.is_none() {
3431                    eprintln!("error: rm-nvme-ns requires --vtl2 mode");
3432                    continue;
3433                }
3434                let action = async {
3435                    // If --vtl0 was specified, find and remove the SCSI disk backed by this namespace
3436                    if vtl0 {
3437                        let removed_lun = resources
3438                            .remove_vtl0_scsi_disk_by_nvme_nsid(
3439                                storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE,
3440                                storage_builder::NVME_VTL2_INSTANCE_ID,
3441                                nsid,
3442                            )
3443                            .await?;
3444                        if let Some(lun) = removed_lun {
3445                            println!("Removed VTL0 SCSI lun={}", lun);
3446                        } else {
3447                            println!("No VTL0 SCSI disk found backed by NVMe nsid={}", nsid);
3448                        }
3449                    }
3450
3451                    let nvme = resources
3452                        .nvme_vtl2_rpc
3453                        .as_ref()
3454                        .context("no vtl2 nvme controller")?;
3455                    nvme.call_failable(NvmeControllerRequest::RemoveNamespace, nsid)
3456                        .await?;
3457                    println!("Removed NVMe namespace {}", nsid);
3458                    anyhow::Ok(())
3459                };
3460
3461                if let Err(error) = action.await {
3462                    eprintln!("error removing nvme namespace: {}", error);
3463                }
3464            }
3465            InteractiveCommand::Inspect {
3466                recursive,
3467                limit,
3468                paravisor,
3469                element,
3470                update,
3471            } => {
3472                let obj = inspect_obj(
3473                    if paravisor {
3474                        InspectTarget::Paravisor
3475                    } else {
3476                        InspectTarget::Host
3477                    },
3478                    mesh,
3479                    &vm_worker,
3480                    vnc_worker.as_ref(),
3481                    gdb_worker.as_ref(),
3482                    &mut diag_inspector,
3483                );
3484
3485                if let Some(value) = update {
3486                    let Some(element) = element else {
3487                        anyhow::bail!("must provide element for update")
3488                    };
3489
3490                    let value = async {
3491                        let update = inspect::update(&element, &value, obj);
3492                        let value = CancelContext::new()
3493                            .with_timeout(Duration::from_secs(1))
3494                            .until_cancelled(update)
3495                            .await??;
3496                        anyhow::Ok(value)
3497                    }
3498                    .await;
3499                    match value {
3500                        Ok(node) => match &node.kind {
3501                            inspect::ValueKind::String(s) => println!("{s}"),
3502                            _ => println!("{:#}", node),
3503                        },
3504                        Err(err) => println!("error: {:#}", err),
3505                    }
3506                } else {
3507                    let element = element.unwrap_or_default();
3508                    let depth = if recursive { limit } else { Some(0) };
3509                    let node = async {
3510                        let mut inspection =
3511                            InspectionBuilder::new(&element).depth(depth).inspect(obj);
3512                        let _ = CancelContext::new()
3513                            .with_timeout(Duration::from_secs(1))
3514                            .until_cancelled(inspection.resolve())
3515                            .await;
3516                        inspection.results()
3517                    }
3518                    .await;
3519
3520                    println!("{:#}", node);
3521                }
3522            }
3523            InteractiveCommand::RestartVnc => {
3524                if let Some(vnc) = &mut vnc_worker {
3525                    let action = async {
3526                        let vnc_host = mesh
3527                            .make_host("vnc", None)
3528                            .await
3529                            .context("spawning vnc process failed")?;
3530
3531                        vnc.restart(&vnc_host);
3532                        anyhow::Result::<_>::Ok(())
3533                    };
3534
3535                    if let Err(error) = action.await {
3536                        eprintln!("error: {}", error);
3537                    }
3538                } else {
3539                    eprintln!("ERROR: no VNC server running");
3540                }
3541            }
3542            InteractiveCommand::Hvsock { term, port } => {
3543                let vm_rpc = &vm_rpc;
3544                let action = async || {
3545                    let service_id = new_hvsock_service_id(port);
3546                    let socket = vm_rpc
3547                        .call_failable(
3548                            VmRpc::ConnectHvsock,
3549                            (
3550                                CancelContext::new().with_timeout(Duration::from_secs(2)),
3551                                service_id,
3552                                DeviceVtl::Vtl0,
3553                            ),
3554                        )
3555                        .await?;
3556                    let socket = PolledSocket::new(driver, socket)?;
3557                    let mut console = console_relay::Console::new(
3558                        driver.clone(),
3559                        term.or_else(openvmm_terminal_app).as_deref(),
3560                        Some(ConsoleLaunchOptions {
3561                            window_title: Some(format!("HVSock{} [OpenVMM]", port)),
3562                        }),
3563                    )?;
3564                    driver
3565                        .spawn("console-relay", async move { console.relay(socket).await })
3566                        .detach();
3567                    anyhow::Result::<_>::Ok(())
3568                };
3569
3570                if let Err(error) = (action)().await {
3571                    eprintln!("error: {}", error);
3572                }
3573            }
3574            InteractiveCommand::ServiceVtl2 {
3575                user_mode_only,
3576                igvm,
3577                mana_keepalive,
3578                nvme_keepalive,
3579            } => {
3580                let paravisor_diag = paravisor_diag.clone();
3581                let vm_rpc = vm_rpc.clone();
3582                let igvm = igvm.or_else(|| opt.igvm.clone());
3583                let ged_rpc = resources.ged_rpc.clone();
3584                let r = async move {
3585                    let start;
3586                    if user_mode_only {
3587                        start = Instant::now();
3588                        paravisor_diag.restart().await?;
3589                    } else {
3590                        let path = igvm.context("no igvm file loaded")?;
3591                        let file = fs_err::File::open(path)?;
3592                        start = Instant::now();
3593                        openvmm_helpers::underhill::save_underhill(
3594                            &vm_rpc,
3595                            ged_rpc.as_ref().context("no GED")?,
3596                            GuestServicingFlags {
3597                                nvme_keepalive,
3598                                mana_keepalive,
3599                            },
3600                            file.into(),
3601                        )
3602                        .await?;
3603                        openvmm_helpers::underhill::restore_underhill(
3604                            &vm_rpc,
3605                            ged_rpc.as_ref().context("no GED")?,
3606                        )
3607                        .await?;
3608                    }
3609                    let end = Instant::now();
3610                    Ok(end - start)
3611                }
3612                .map(|r| Ok(StateChange::ServiceVtl2(r)));
3613                if state_change_task.is_some() {
3614                    tracing::error!("state change already in progress");
3615                } else {
3616                    state_change_task = Some(driver.spawn("state-change", r));
3617                }
3618            }
3619            InteractiveCommand::Quit => {
3620                tracing::info!("quitting");
3621                // Work around the detached SCSI task holding up worker stop.
3622                // TODO: Fix the underlying bug
3623                resources.scsi_rpc = None;
3624                resources.nvme_vtl2_rpc = None;
3625
3626                vm_worker.stop();
3627                quit = true;
3628            }
3629            InteractiveCommand::ReadMemory { gpa, size, file } => {
3630                let size = size as usize;
3631                let data = vm_rpc.call(VmRpc::ReadMemory, (gpa, size)).await?;
3632
3633                match data {
3634                    Ok(bytes) => {
3635                        if let Some(file) = file {
3636                            if let Err(err) = fs_err::write(file, bytes) {
3637                                eprintln!("error: {err:?}");
3638                            }
3639                        } else {
3640                            let width = 16;
3641                            let show_ascii = true;
3642
3643                            let mut dump = String::new();
3644                            for (i, chunk) in bytes.chunks(width).enumerate() {
3645                                let hex_part: Vec<String> =
3646                                    chunk.iter().map(|byte| format!("{:02x}", byte)).collect();
3647                                let hex_line = hex_part.join(" ");
3648
3649                                if show_ascii {
3650                                    let ascii_part: String = chunk
3651                                        .iter()
3652                                        .map(|&byte| {
3653                                            if byte.is_ascii_graphic() || byte == b' ' {
3654                                                byte as char
3655                                            } else {
3656                                                '.'
3657                                            }
3658                                        })
3659                                        .collect();
3660                                    dump.push_str(&format!(
3661                                        "{:04x}: {:<width$}  {}\n",
3662                                        i * width,
3663                                        hex_line,
3664                                        ascii_part,
3665                                        width = width * 3 - 1
3666                                    ));
3667                                } else {
3668                                    dump.push_str(&format!("{:04x}: {}\n", i * width, hex_line));
3669                                }
3670                            }
3671
3672                            println!("{dump}");
3673                        }
3674                    }
3675                    Err(err) => {
3676                        eprintln!("error: {err:?}");
3677                    }
3678                }
3679            }
3680            InteractiveCommand::WriteMemory { gpa, hex, file } => {
3681                if hex.is_some() == file.is_some() {
3682                    eprintln!("error: either path to the file or the hex string must be specified");
3683                    continue;
3684                }
3685
3686                let data = if let Some(file) = file {
3687                    let data = fs_err::read(file);
3688                    match data {
3689                        Ok(data) => data,
3690                        Err(err) => {
3691                            eprintln!("error: {err:?}");
3692                            continue;
3693                        }
3694                    }
3695                } else if let Some(hex) = hex {
3696                    if hex.len() & 1 != 0 {
3697                        eprintln!(
3698                            "error: expected even number of hex digits (2 hex digits per byte)"
3699                        );
3700                        continue;
3701                    }
3702                    let data: Result<Vec<u8>, String> = (0..hex.len())
3703                        .step_by(2)
3704                        .map(|i| {
3705                            u8::from_str_radix(&hex[i..i + 2], 16).map_err(|e| {
3706                                format!("invalid hex character at position {}: {}", i, e)
3707                            })
3708                        })
3709                        .collect();
3710
3711                    match data {
3712                        Ok(data) => data,
3713                        Err(err) => {
3714                            eprintln!("error: {err}");
3715                            continue;
3716                        }
3717                    }
3718                } else {
3719                    unreachable!();
3720                };
3721
3722                if data.is_empty() {
3723                    eprintln!("error: no data to write");
3724                    continue;
3725                }
3726
3727                if let Err(err) = vm_rpc.call(VmRpc::WriteMemory, (gpa, data)).await? {
3728                    eprintln!("error: {err:?}");
3729                }
3730            }
3731            InteractiveCommand::Kvp(command) => {
3732                let Some(kvp) = &resources.kvp_ic else {
3733                    eprintln!("error: no kvp ic configured");
3734                    continue;
3735                };
3736                if let Err(err) = kvp::handle_kvp(kvp, command).await {
3737                    eprintln!("error: {err:#}");
3738                }
3739            }
3740            InteractiveCommand::Input { .. } | InteractiveCommand::InputMode => unreachable!(),
3741        }
3742    }
3743
3744    vm_worker.stop();
3745    vm_worker.join().await?;
3746    Ok(())
3747}
3748
3749struct DiagDialer {
3750    driver: DefaultDriver,
3751    vm_rpc: mesh::Sender<VmRpc>,
3752    openhcl_vtl: DeviceVtl,
3753}
3754
3755impl mesh_rpc::client::Dial for DiagDialer {
3756    type Stream = PolledSocket<unix_socket::UnixStream>;
3757
3758    async fn dial(&mut self) -> io::Result<Self::Stream> {
3759        let service_id = new_hvsock_service_id(1);
3760        let socket = self
3761            .vm_rpc
3762            .call_failable(
3763                VmRpc::ConnectHvsock,
3764                (
3765                    CancelContext::new().with_timeout(Duration::from_secs(2)),
3766                    service_id,
3767                    self.openhcl_vtl,
3768                ),
3769            )
3770            .await
3771            .map_err(io::Error::other)?;
3772
3773        PolledSocket::new(&self.driver, socket)
3774    }
3775}
3776
3777/// An object that implements [`InspectMut`] by sending an inspect request over
3778/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
3779/// the response back into the inspect tree.
3780///
3781/// This also caches the TTRPC connection to the guest so that only the first
3782/// inspect request has to wait for the connection to be established.
3783pub struct DiagInspector(DiagInspectorInner);
3784
3785enum DiagInspectorInner {
3786    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
3787    Started {
3788        send: mesh::Sender<inspect::Deferred>,
3789        _task: Task<()>,
3790    },
3791    Invalid,
3792}
3793
3794impl DiagInspector {
3795    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
3796        Self(DiagInspectorInner::NotStarted(driver, diag_client))
3797    }
3798
3799    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
3800        loop {
3801            match self.0 {
3802                DiagInspectorInner::NotStarted { .. } => {
3803                    let DiagInspectorInner::NotStarted(driver, client) =
3804                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
3805                    else {
3806                        unreachable!()
3807                    };
3808                    let (send, recv) = mesh::channel();
3809                    let task = driver.clone().spawn("diag-inspect", async move {
3810                        Self::run(&client, recv).await
3811                    });
3812
3813                    self.0 = DiagInspectorInner::Started { send, _task: task };
3814                }
3815                DiagInspectorInner::Started { ref send, .. } => break send,
3816                DiagInspectorInner::Invalid => unreachable!(),
3817            }
3818        }
3819    }
3820
3821    async fn run(
3822        diag_client: &diag_client::DiagClient,
3823        mut recv: mesh::Receiver<inspect::Deferred>,
3824    ) {
3825        while let Some(deferred) = recv.next().await {
3826            let info = deferred.external_request();
3827            let result = match info.request_type {
3828                inspect::ExternalRequestType::Inspect { depth } => {
3829                    if depth == 0 {
3830                        Ok(inspect::Node::Unevaluated)
3831                    } else {
3832                        // TODO: Support taking timeouts from the command line
3833                        diag_client
3834                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
3835                            .await
3836                    }
3837                }
3838                inspect::ExternalRequestType::Update { value } => {
3839                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
3840                }
3841            };
3842            deferred.complete_external(
3843                result.unwrap_or_else(|err| {
3844                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
3845                }),
3846                inspect::SensitivityLevel::Unspecified,
3847            )
3848        }
3849    }
3850}
3851
3852impl InspectMut for DiagInspector {
3853    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
3854        self.start().send(req.defer());
3855    }
3856}
3857
3858enum InspectTarget {
3859    Host,
3860    Paravisor,
3861}
3862
3863mod interactive_console {
3864    use super::InteractiveCommand;
3865    use rustyline::Helper;
3866    use rustyline::Highlighter;
3867    use rustyline::Hinter;
3868    use rustyline::Validator;
3869
3870    #[derive(Helper, Highlighter, Hinter, Validator)]
3871    pub(crate) struct OpenvmmRustylineEditor {
3872        pub openvmm_inspect_req: std::sync::Arc<
3873            mesh::Sender<(
3874                super::InspectTarget,
3875                String,
3876                mesh::OneshotSender<inspect::Node>,
3877            )>,
3878        >,
3879    }
3880
3881    impl rustyline::completion::Completer for OpenvmmRustylineEditor {
3882        type Candidate = String;
3883
3884        fn complete(
3885            &self,
3886            line: &str,
3887            pos: usize,
3888            _ctx: &rustyline::Context<'_>,
3889        ) -> rustyline::Result<(usize, Vec<Self::Candidate>)> {
3890            let Ok(cmd) = shell_words::split(line) else {
3891                return Ok((0, Vec::with_capacity(0)));
3892            };
3893
3894            let completions = futures::executor::block_on(
3895                clap_dyn_complete::Complete {
3896                    cmd,
3897                    raw: Some(line.into()),
3898                    position: Some(pos),
3899                }
3900                .generate_completions::<InteractiveCommand>(None, self),
3901            );
3902
3903            let pos_from_end = {
3904                let line = line.chars().take(pos).collect::<String>();
3905
3906                let trailing_ws = line.len() - line.trim_end().len();
3907
3908                if trailing_ws > 0 {
3909                    line.len() - trailing_ws + 1 // +1 for the space
3910                } else {
3911                    let last_word = shell_words::split(&line)
3912                        .unwrap_or_default()
3913                        .last()
3914                        .cloned()
3915                        .unwrap_or_default();
3916
3917                    line.len() - last_word.len()
3918                }
3919            };
3920
3921            Ok((pos_from_end, completions))
3922        }
3923    }
3924
3925    impl clap_dyn_complete::CustomCompleterFactory for &OpenvmmRustylineEditor {
3926        type CustomCompleter = OpenvmmComplete;
3927        async fn build(&self, _ctx: &clap_dyn_complete::RootCtx<'_>) -> Self::CustomCompleter {
3928            OpenvmmComplete {
3929                openvmm_inspect_req: self.openvmm_inspect_req.clone(),
3930            }
3931        }
3932    }
3933
3934    pub struct OpenvmmComplete {
3935        openvmm_inspect_req: std::sync::Arc<
3936            mesh::Sender<(
3937                super::InspectTarget,
3938                String,
3939                mesh::OneshotSender<inspect::Node>,
3940            )>,
3941        >,
3942    }
3943
3944    impl clap_dyn_complete::CustomCompleter for OpenvmmComplete {
3945        async fn complete(
3946            &self,
3947            ctx: &clap_dyn_complete::RootCtx<'_>,
3948            subcommand_path: &[&str],
3949            arg_id: &str,
3950        ) -> Vec<String> {
3951            match (subcommand_path, arg_id) {
3952                (["openvmm", "inspect"], "element") => {
3953                    let on_error = vec!["failed/to/connect".into()];
3954
3955                    let (parent_path, to_complete) = (ctx.to_complete)
3956                        .rsplit_once('/')
3957                        .unwrap_or(("", ctx.to_complete));
3958
3959                    let node = {
3960                        let paravisor = {
3961                            let raw_arg = ctx
3962                                .matches
3963                                .subcommand()
3964                                .unwrap()
3965                                .1
3966                                .get_one::<String>("paravisor")
3967                                .map(|x| x.as_str())
3968                                .unwrap_or_default();
3969                            raw_arg == "true"
3970                        };
3971
3972                        let (tx, rx) = mesh::oneshot();
3973                        self.openvmm_inspect_req.send((
3974                            if paravisor {
3975                                super::InspectTarget::Paravisor
3976                            } else {
3977                                super::InspectTarget::Host
3978                            },
3979                            parent_path.to_owned(),
3980                            tx,
3981                        ));
3982                        let Ok(node) = rx.await else {
3983                            return on_error;
3984                        };
3985
3986                        node
3987                    };
3988
3989                    let mut completions = Vec::new();
3990
3991                    if let inspect::Node::Dir(dir) = node {
3992                        for entry in dir {
3993                            if entry.name.starts_with(to_complete) {
3994                                if parent_path.is_empty() {
3995                                    completions.push(format!("{}/", entry.name))
3996                                } else {
3997                                    completions.push(format!(
3998                                        "{}/{}{}",
3999                                        parent_path,
4000                                        entry.name,
4001                                        if matches!(entry.node, inspect::Node::Dir(..)) {
4002                                            "/"
4003                                        } else {
4004                                            ""
4005                                        }
4006                                    ))
4007                                }
4008                            }
4009                        }
4010                    } else {
4011                        return on_error;
4012                    }
4013
4014                    completions
4015                }
4016                _ => Vec::new(),
4017            }
4018        }
4019    }
4020}