Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod pidfile;
15mod repl;
16mod serial_io;
17mod storage_builder;
18mod tracing_init;
19mod ttrpc;
20mod vm_controller;
21
22// `pub` so that the missing_docs warning fires for options without
23// documentation.
24pub use cli_args::Options;
25use console_relay::ConsoleLaunchOptions;
26
27use crate::cli_args::SecureBootTemplateCli;
28use anyhow::Context;
29use anyhow::bail;
30use chipset_resources::battery::HostBatteryUpdate;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use cxl_spec::test::CxlTestDeviceHandle;
42use disk_backend_resources::DelayDiskHandle;
43use disk_backend_resources::DiskLayerDescription;
44use disk_backend_resources::layer::DiskLayerHandle;
45use disk_backend_resources::layer::RamDiskLayerHandle;
46use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
47use disk_backend_resources::layer::SqliteDiskLayerHandle;
48use floppy_resources::FloppyDiskConfig;
49use framebuffer::FRAMEBUFFER_SIZE;
50use framebuffer::FramebufferAccess;
51use futures::AsyncReadExt;
52use futures::AsyncWrite;
53use futures::StreamExt;
54use futures::executor::block_on;
55use futures::io::AllowStdIo;
56use gdma_resources::GdmaDeviceHandle;
57use gdma_resources::VportDefinition;
58use guid::Guid;
59use input_core::MultiplexedInputHandle;
60use inspect::InspectMut;
61use io::Read;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
70use openvmm_defs::config::DeviceVtl;
71use openvmm_defs::config::EfiDiagnosticsLogLevelType;
72use openvmm_defs::config::HypervisorConfig;
73use openvmm_defs::config::LateMapVtl0MemoryPolicy;
74use openvmm_defs::config::LoadMode;
75use openvmm_defs::config::MemoryConfig;
76use openvmm_defs::config::NumaDistance;
77use openvmm_defs::config::NumaNode;
78use openvmm_defs::config::NumaTopology;
79use openvmm_defs::config::PcieDeviceConfig;
80use openvmm_defs::config::PcieMmioRangeConfig;
81use openvmm_defs::config::PcieRootComplexConfig;
82use openvmm_defs::config::PcieRootPortConfig;
83use openvmm_defs::config::PcieSwitchConfig;
84use openvmm_defs::config::ProcessorTopologyConfig;
85use openvmm_defs::config::RootComplexCxlConfig;
86use openvmm_defs::config::SerialInformation;
87use openvmm_defs::config::VirtioBus;
88use openvmm_defs::config::VmbusConfig;
89use openvmm_defs::config::VpAssignment;
90use openvmm_defs::config::VpciDeviceConfig;
91use openvmm_defs::config::Vtl2Config;
92use openvmm_defs::rpc::VmRpc;
93use openvmm_defs::worker::VM_WORKER;
94use openvmm_defs::worker::VmWorkerParameters;
95use openvmm_helpers::disk::OpenDiskOptions;
96use openvmm_helpers::disk::create_disk_type;
97use openvmm_helpers::disk::open_disk_type;
98use pal_async::DefaultDriver;
99use pal_async::DefaultPool;
100use pal_async::socket::PolledSocket;
101use pal_async::task::Spawn;
102use pal_async::task::Task;
103use serial_16550_resources::ComPort;
104use serial_core::resources::DisconnectedSerialBackendHandle;
105use sparse_mmap::alloc_shared_memory;
106use std::cell::RefCell;
107use std::collections::BTreeMap;
108use std::fmt::Write as _;
109use std::future::pending;
110use std::io;
111#[cfg(unix)]
112use std::io::IsTerminal;
113use std::io::Write;
114use std::net::TcpListener;
115use std::path::Path;
116use std::path::PathBuf;
117use std::sync::Arc;
118use std::thread;
119use std::time::Duration;
120use storvsp_resources::ScsiControllerRequest;
121use tpm_resources::TpmDeviceHandle;
122use tpm_resources::TpmRegisterLayout;
123use uidevices_resources::SynthKeyboardHandle;
124use uidevices_resources::SynthMouseHandle;
125use uidevices_resources::SynthVideoHandle;
126use video_core::SharedFramebufferHandle;
127use virtio_resources::VirtioPciDeviceHandle;
128use vm_manifest_builder::BaseChipsetType;
129use vm_manifest_builder::MachineArch;
130use vm_manifest_builder::VmChipsetResult;
131use vm_manifest_builder::VmManifestBuilder;
132use vm_resource::IntoResource;
133use vm_resource::Resource;
134use vm_resource::kind::DiskHandleKind;
135use vm_resource::kind::DiskLayerHandleKind;
136use vm_resource::kind::NetEndpointHandleKind;
137use vm_resource::kind::VirtioDeviceHandle;
138use vm_resource::kind::VmbusDeviceHandleKind;
139use vmbus_serial_resources::VmbusSerialDeviceHandle;
140use vmbus_serial_resources::VmbusSerialPort;
141use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
142use vmgs_resources::GuestStateEncryptionPolicy;
143use vmgs_resources::VmgsDisk;
144use vmgs_resources::VmgsFileHandle;
145use vmgs_resources::VmgsResource;
146use vmotherboard::ChipsetDeviceHandle;
147use vnc_worker_defs::VncParameters;
148
149pub fn openvmm_main() {
150    // Save the current state of the terminal so we can restore it back to
151    // normal before exiting.
152    #[cfg(unix)]
153    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
154
155    let mut pidfile_guard: Option<pidfile::Pidfile> = None;
156    let exit_code = match do_main(&mut pidfile_guard) {
157        Ok(_) => 0,
158        Err(err) => {
159            eprintln!("fatal error: {:?}", err);
160            1
161        }
162    };
163
164    // Restore the terminal to its initial state.
165    #[cfg(unix)]
166    if let Some(orig_termios) = orig_termios {
167        term::set_termios(orig_termios);
168    }
169
170    // Clean up the pidfile before terminating, since
171    // pal::process::terminate skips destructors.
172    drop(pidfile_guard);
173
174    // Terminate the process immediately without graceful shutdown of DLLs or
175    // C++ destructors or anything like that. This is all unnecessary and saves
176    // time on Windows.
177    //
178    // Do flush stdout, though, since there may be buffered data.
179    let _ = io::stdout().flush();
180    pal::process::terminate(exit_code);
181}
182
183#[derive(Default)]
184struct VmResources {
185    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
186    framebuffer_access: Option<FramebufferAccess>,
187    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
188    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
189    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
190    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
191    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
192    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
193    /// Receives dirty rectangles from the synthetic video device for the VNC worker.
194    dirty_rect_recv: Option<mesh::Receiver<Vec<video_core::DirtyRect>>>,
195    #[cfg(windows)]
196    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
197}
198
199struct ConsoleState<'a> {
200    device: &'a str,
201    input: Box<dyn AsyncWrite + Unpin + Send>,
202}
203
204/// Build a flat list of switches with their parent port assignments.
205///
206/// This function converts hierarchical CLI switch definitions into a flat list
207/// where each switch specifies its parent port directly.
208fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
209    all_switches
210        .iter()
211        .map(|switch_cli| PcieSwitchConfig {
212            name: switch_cli.name.clone(),
213            num_downstream_ports: switch_cli.num_downstream_ports,
214            parent_port: switch_cli.port_name.clone(),
215            hotplug: switch_cli.hotplug,
216            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
217        })
218        .collect()
219}
220
221async fn vm_config_from_command_line(
222    spawner: impl Spawn,
223    mesh: &VmmMesh,
224    opt: &Options,
225) -> anyhow::Result<(Config, VmResources)> {
226    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
227    // Ensure the serial driver stays alive with no tasks.
228    serial_driver.spawn("leak", pending::<()>()).detach();
229
230    let openhcl_vtl = if opt.vtl2 {
231        DeviceVtl::Vtl2
232    } else {
233        DeviceVtl::Vtl0
234    };
235
236    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
237    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
238        Ok(match cli_cfg {
239            SerialConfigCli::Console => {
240                if let Some(console_state) = console_state.borrow().as_ref() {
241                    bail!("console already set by {}", console_state.device);
242                }
243                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
244                let (serial_read, serial_write) = AsyncReadExt::split(serial);
245                *console_state.borrow_mut() = Some(ConsoleState {
246                    device,
247                    input: Box::new(serial_write),
248                });
249                thread::Builder::new()
250                    .name(name.to_owned())
251                    .spawn(move || {
252                        let _ = block_on(futures::io::copy(
253                            serial_read,
254                            &mut AllowStdIo::new(term::raw_stdout()),
255                        ));
256                    })
257                    .unwrap();
258                Some(config)
259            }
260            SerialConfigCli::Stderr => {
261                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
262                thread::Builder::new()
263                    .name(name.to_owned())
264                    .spawn(move || {
265                        let _ = block_on(futures::io::copy(
266                            serial,
267                            &mut AllowStdIo::new(term::raw_stderr()),
268                        ));
269                    })
270                    .unwrap();
271                Some(config)
272            }
273            SerialConfigCli::File(path) => {
274                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
275                let file = fs_err::File::create(path).context("failed to create file")?;
276
277                thread::Builder::new()
278                    .name(name.to_owned())
279                    .spawn(move || {
280                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
281                    })
282                    .unwrap();
283                Some(config)
284            }
285            SerialConfigCli::None => None,
286            SerialConfigCli::Pipe(path) => {
287                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
288            }
289            SerialConfigCli::Tcp(addr) => {
290                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
291            }
292            SerialConfigCli::NewConsole(app, window_title) => {
293                let path = console_relay::random_console_path();
294                let config =
295                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
296                let window_title =
297                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
298
299                console_relay::launch_console(
300                    app.or_else(openvmm_terminal_app).as_deref(),
301                    &path,
302                    ConsoleLaunchOptions {
303                        window_title: Some(window_title),
304                    },
305                )
306                .context("failed to launch console")?;
307
308                Some(config)
309            }
310        })
311    };
312
313    let mut vmbus_devices = Vec::new();
314
315    let serial0_cfg = setup_serial(
316        "com1",
317        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
318        if cfg!(guest_arch = "x86_64") {
319            "ttyS0"
320        } else {
321            "ttyAMA0"
322        },
323    )?;
324    let serial1_cfg = setup_serial(
325        "com2",
326        opt.com2.clone().unwrap_or(SerialConfigCli::None),
327        if cfg!(guest_arch = "x86_64") {
328            "ttyS1"
329        } else {
330            "ttyAMA1"
331        },
332    )?;
333    let serial2_cfg = setup_serial(
334        "com3",
335        opt.com3.clone().unwrap_or(SerialConfigCli::None),
336        if cfg!(guest_arch = "x86_64") {
337            "ttyS2"
338        } else {
339            "ttyAMA2"
340        },
341    )?;
342    let serial3_cfg = setup_serial(
343        "com4",
344        opt.com4.clone().unwrap_or(SerialConfigCli::None),
345        if cfg!(guest_arch = "x86_64") {
346            "ttyS3"
347        } else {
348            "ttyAMA3"
349        },
350    )?;
351    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
352        "vmbus_com1",
353        opt.vmbus_com1_serial
354            .clone()
355            .unwrap_or(SerialConfigCli::None),
356        "vmbus_com1",
357    )? {
358        vmbus_devices.push((
359            openhcl_vtl,
360            VmbusSerialDeviceHandle {
361                port: VmbusSerialPort::Com1,
362                backend: vmbus_com1_cfg,
363            }
364            .into_resource(),
365        ));
366        true
367    } else {
368        false
369    };
370    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
371        "vmbus_com2",
372        opt.vmbus_com2_serial
373            .clone()
374            .unwrap_or(SerialConfigCli::None),
375        "vmbus_com2",
376    )? {
377        vmbus_devices.push((
378            openhcl_vtl,
379            VmbusSerialDeviceHandle {
380                port: VmbusSerialPort::Com2,
381                backend: vmbus_com2_cfg,
382            }
383            .into_resource(),
384        ));
385        true
386    } else {
387        false
388    };
389    let debugcon_cfg = setup_serial(
390        "debugcon",
391        opt.debugcon
392            .clone()
393            .map(|cfg| cfg.serial)
394            .unwrap_or(SerialConfigCli::None),
395        "debugcon",
396    )?;
397
398    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
399        setup_serial("virtio-console", serial_cfg, "hvc0")?
400    } else {
401        None
402    };
403
404    let mut resources = VmResources::default();
405    let mut console_str = "";
406    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
407        resources.console_in = Some(input);
408        console_str = device;
409    }
410
411    if opt.shared_memory {
412        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
413    }
414    if opt.deprecated_prefetch {
415        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
416    }
417    if opt.deprecated_private_memory {
418        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
419    }
420    if opt.deprecated_thp {
421        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
422    }
423    if opt.deprecated_memory_backing_file.is_some() {
424        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
425    }
426
427    opt.validate_memory_options()?;
428
429    const MAX_PROCESSOR_COUNT: u32 = 1024;
430
431    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
432        bail!("invalid proc count: {}", opt.processors);
433    }
434
435    // Total SCSI channel count should not exceed the processor count
436    // (at most, one channel per VP).
437    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
438        bail!(
439            "invalid SCSI sub-channel count: requested {}, max {}",
440            opt.scsi_sub_channels,
441            MAX_PROCESSOR_COUNT - 1
442        );
443    }
444
445    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
446
447    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
448
449    // Register named controllers first, so that --disk on=<name>
450    // references can be resolved.
451    for ctrl in &opt.nvme_pci {
452        let transport = match &ctrl.transport {
453            cli_args::NvmeControllerTransport::Pcie(port) => {
454                storage_builder::NvmeControllerTransport::Pcie(port.clone())
455            }
456            cli_args::NvmeControllerTransport::Vpci(guid) => {
457                let guid = guid.unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
458                storage_builder::NvmeControllerTransport::Vpci(guid)
459            }
460        };
461        storage.add_nvme_controller(ctrl.id.clone(), ctrl.vtl, transport, None)?;
462    }
463
464    for ctrl in &opt.vmbus_scsi {
465        let instance_id = storage_builder::deterministic_guid(&ctrl.id);
466        storage.add_scsi_controller(ctrl.id.clone(), ctrl.vtl, instance_id, ctrl.sub_channels)?;
467    }
468
469    for ctrl in &opt.openhcl_controller {
470        let controller_type = match ctrl.controller_type {
471            cli_args::OpenhclControllerType::Scsi => storage_builder::OpenhclControllerType::Scsi,
472            cli_args::OpenhclControllerType::Nvme => storage_builder::OpenhclControllerType::Nvme,
473        };
474        let instance_id = ctrl
475            .guid
476            .unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
477        storage.add_openhcl_controller(ctrl.id.clone(), controller_type, instance_id)?;
478    }
479
480    for &cli_args::DiskCli {
481        vtl,
482        ref kind,
483        read_only,
484        is_dvd,
485        underhill,
486        ref pcie_port,
487        ref controller,
488        nsid,
489        lun,
490        ref relay,
491    } in &opt.disk
492    {
493        if controller.is_none() && underhill.is_none() && relay.is_none() {
494            tracing::warn!(
495                "--disk without `on` is deprecated; \
496                 use --vmbus-scsi and --disk on=<name> instead"
497            );
498        }
499
500        let relay_target = relay
501            .as_ref()
502            .map(|(name, loc)| storage_builder::RelayTarget {
503                controller: name.clone(),
504                location: *loc,
505            });
506
507        let target = if let Some(name) = controller {
508            if pcie_port.is_some() {
509                anyhow::bail!("`on` is incompatible with `pcie_port` on `--disk`");
510            }
511            storage_builder::DiskLocation::Named {
512                controller: name.clone(),
513                nsid,
514                lun,
515            }
516        } else if pcie_port.is_some() {
517            anyhow::bail!("`--disk` is incompatible with `pcie_port` without `controller`");
518        } else {
519            storage_builder::DiskLocation::Scsi(None)
520        };
521
522        storage
523            .add(
524                vtl,
525                underhill,
526                relay_target,
527                target,
528                kind,
529                is_dvd,
530                read_only,
531            )
532            .await?;
533    }
534
535    for &cli_args::IdeDiskCli {
536        ref kind,
537        read_only,
538        channel,
539        device,
540        is_dvd,
541    } in &opt.ide
542    {
543        storage
544            .add(
545                DeviceVtl::Vtl0,
546                None,
547                None,
548                storage_builder::DiskLocation::Ide(channel, device),
549                kind,
550                is_dvd,
551                read_only,
552            )
553            .await?;
554    }
555
556    if !opt.nvme.is_empty() {
557        tracing::warn!("--nvme is deprecated; use --nvme-pci and --disk on=<name> instead");
558
559        // Pre-register implicit PCIe controllers for unique port names.
560        let mut registered_ports = std::collections::BTreeSet::new();
561        for disk in &opt.nvme {
562            if let Some(port) = &disk.pcie_port {
563                if registered_ports.insert(port.clone()) {
564                    storage.add_nvme_controller(
565                        port.clone(),
566                        DeviceVtl::Vtl0,
567                        storage_builder::NvmeControllerTransport::Pcie(port.clone()),
568                        None,
569                    ).with_context(|| format!(
570                        "legacy --nvme flag conflicts with an explicit controller named '{port}'; \
571                         use --nvme-pci and --disk on=<name> instead"
572                    ))?;
573                }
574            }
575        }
576    }
577
578    for &cli_args::DiskCli {
579        vtl,
580        ref kind,
581        read_only,
582        is_dvd,
583        underhill,
584        ref pcie_port,
585        controller: _,
586        nsid: _,
587        lun: _,
588        relay: _,
589    } in &opt.nvme
590    {
591        let target = if let Some(port) = pcie_port {
592            storage_builder::DiskLocation::Named {
593                controller: port.clone(),
594                nsid: None,
595                lun: None,
596            }
597        } else {
598            storage_builder::DiskLocation::Nvme(None)
599        };
600        storage
601            .add(vtl, underhill, None, target, kind, is_dvd, read_only)
602            .await?;
603    }
604
605    for &cli_args::DiskCli {
606        vtl,
607        ref kind,
608        read_only,
609        is_dvd,
610        ref underhill,
611        ref pcie_port,
612        controller: _,
613        nsid: _,
614        lun: _,
615        relay: _,
616    } in &opt.virtio_blk
617    {
618        if underhill.is_some() {
619            anyhow::bail!("underhill not supported with virtio-blk");
620        }
621        storage
622            .add(
623                vtl,
624                None,
625                None,
626                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
627                kind,
628                is_dvd,
629                read_only,
630            )
631            .await?;
632    }
633
634    let mut floppy_disks = Vec::new();
635    for disk in &opt.floppy {
636        let &cli_args::FloppyDiskCli {
637            ref kind,
638            read_only,
639        } = disk;
640        floppy_disks.push(FloppyDiskConfig {
641            disk_type: disk_open(kind, read_only).await?,
642            read_only,
643        });
644    }
645
646    let mut vpci_mana_nics = [(); 3].map(|()| None);
647    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
648    let mut underhill_nics = Vec::new();
649    let mut vpci_devices = Vec::new();
650
651    let mut nic_index = 0;
652    for cli_cfg in &opt.net {
653        if cli_cfg.pcie_port.is_some() {
654            anyhow::bail!("`--net` does not support PCIe");
655        }
656        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
657        if cli_cfg.underhill {
658            if !opt.no_alias_map {
659                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
660            }
661            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
662                let vpci_instance_id = Guid::new_random();
663                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
664                    instance_id: vpci_instance_id.to_string(),
665                    subordinate_instance_id: None,
666                    max_sub_channels: None,
667                });
668                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
669            });
670            mana.1.vports.push(VportDefinition {
671                mac_address: vport.mac_address,
672                endpoint: vport.endpoint,
673            });
674        } else {
675            vmbus_devices.push(vport.into_netvsp_handle());
676        }
677    }
678
679    if opt.nic {
680        let nic_config = parse_endpoint(
681            &NicConfigCli {
682                vtl: DeviceVtl::Vtl0,
683                endpoint: EndpointConfigCli::Consomme {
684                    cidr: None,
685                    host_fwd: Vec::new(),
686                },
687                max_queues: None,
688                underhill: false,
689                pcie_port: None,
690            },
691            &mut nic_index,
692            &mut resources,
693        )?;
694        vmbus_devices.push(nic_config.into_netvsp_handle());
695    }
696
697    // Build initial PCIe devices list from CLI options. Storage devices
698    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
699    let mut pcie_devices = Vec::new();
700    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
701        tracing::info!(
702            port_name = %cli_cfg.port_name,
703            socket_addr = ?cli_cfg.socket_addr,
704            "instantiating PCIe remote device"
705        );
706
707        // Generate a deterministic instance ID based on index
708        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
709            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
710        let instance_id = Guid {
711            data1: index as u32,
712            ..PCIE_REMOTE_BASE_INSTANCE_ID
713        };
714
715        pcie_devices.push(PcieDeviceConfig {
716            port_name: cli_cfg.port_name.clone(),
717            resource: pcie_remote_resources::PcieRemoteHandle {
718                instance_id,
719                socket_addr: cli_cfg.socket_addr.clone(),
720                hu: cli_cfg.hu,
721                controller: cli_cfg.controller,
722            }
723            .into_resource(),
724        });
725    }
726
727    #[cfg(windows)]
728    let mut kernel_vmnics = Vec::new();
729    #[cfg(windows)]
730    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
731        // Pick a random MAC address.
732        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
733        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
734
735        // Pick a fixed instance ID based on the index.
736        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
737        let instance_id = Guid {
738            data1: index as u32,
739            ..BASE_INSTANCE_ID
740        };
741
742        let switch_id = if switch_id == "default" {
743            None
744        } else {
745            Some(switch_id.as_str())
746        };
747        let (port_id, port) = new_switch_port(switch_id)?;
748        resources.switch_ports.push(port);
749
750        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
751            instance_id,
752            mac_address: mac_address.into(),
753            switch_port_id: port_id,
754        });
755    }
756
757    for vport in &opt.mana {
758        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
759        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
760            (vtl, None) => {
761                &mut vpci_mana_nics[vtl]
762                    .get_or_insert_with(|| {
763                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
764                    })
765                    .1
766                    .vports
767            }
768            (0, Some(pcie_port)) => {
769                &mut pcie_mana_nics
770                    .entry(pcie_port)
771                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
772                    .vports
773            }
774            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
775        };
776        vport_array.push(VportDefinition {
777            mac_address: vport.mac_address,
778            endpoint: vport.endpoint,
779        });
780    }
781
782    vpci_devices.extend(
783        vpci_mana_nics
784            .into_iter()
785            .enumerate()
786            .filter_map(|(vtl, nic)| {
787                nic.map(|(instance_id, handle)| VpciDeviceConfig {
788                    vtl: match vtl {
789                        0 => DeviceVtl::Vtl0,
790                        1 => DeviceVtl::Vtl1,
791                        2 => DeviceVtl::Vtl2,
792                        _ => unreachable!(),
793                    },
794                    instance_id,
795                    resource: handle.into_resource(),
796                })
797            }),
798    );
799
800    pcie_devices.extend(
801        pcie_mana_nics
802            .into_iter()
803            .map(|(pcie_port, handle)| PcieDeviceConfig {
804                port_name: pcie_port,
805                resource: handle.into_resource(),
806            }),
807    );
808
809    for cxl_test in &opt.cxl_test {
810        pcie_devices.push(PcieDeviceConfig {
811            port_name: cxl_test.pcie_port.clone(),
812            resource: CxlTestDeviceHandle {
813                hdm_size_bytes: cxl_test.hdm_size,
814            }
815            .into_resource(),
816        });
817    }
818
819    #[cfg(guest_arch = "aarch64")]
820    let arch = MachineArch::Aarch64;
821    #[cfg(guest_arch = "x86_64")]
822    let arch = MachineArch::X86_64;
823
824    let mut pcie_root_complexes = Vec::new();
825    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
826        let ports: Vec<PcieRootPortConfig> = opt
827            .pcie_root_port
828            .iter()
829            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
830            .map(|port_cli| PcieRootPortConfig {
831                name: port_cli.name.clone(),
832                hotplug: port_cli.hotplug,
833                acs_capabilities_supported: port_cli.acs_capabilities_supported,
834                cxl: port_cli.cxl,
835            })
836            .collect();
837
838        const ONE_MB: u64 = 1024 * 1024;
839        // Keep all PCI windows 1MB-granular to match layout and downstream placement rules.
840        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
841        let high_mmio_size = rc_cli
842            .high_mmio
843            .checked_next_multiple_of(ONE_MB)
844            .context("high mmio rounding error")?;
845
846        // Count CXL-capable ports under the root bus. If the root bus has CXL root ports, it needs CHBCR.
847        let cxl_port_count = ports.iter().filter(|port| port.cxl).count() as u64;
848
849        let cxl = if cxl_port_count != 0 {
850            Some(RootComplexCxlConfig {
851                hdm_size: rc_cli.hdm,
852                hdm_window_restrictions: rc_cli.hdm_window_restrictions.bits(),
853            })
854        } else {
855            None
856        };
857        pcie_root_complexes.push(PcieRootComplexConfig {
858            index: i as u32,
859            name: rc_cli.name.clone(),
860            segment: rc_cli.segment,
861            start_bus: rc_cli.start_bus,
862            end_bus: rc_cli.end_bus,
863            low_mmio: PcieMmioRangeConfig::Dynamic {
864                size: low_mmio_size,
865            },
866            high_mmio: PcieMmioRangeConfig::Dynamic {
867                size: high_mmio_size,
868            },
869            cxl,
870            ports,
871            #[cfg(guest_arch = "aarch64")]
872            iommu: opt
873                .smmu
874                .iter()
875                .any(|s| s == &rc_cli.name)
876                .then_some(openvmm_defs::config::PcieIommuConfig::Smmu),
877            #[cfg(guest_arch = "x86_64")]
878            iommu: opt
879                .amd_iommu
880                .iter()
881                .any(|s| s == &rc_cli.name)
882                .then_some(openvmm_defs::config::PcieIommuConfig::AmdVi),
883        });
884    }
885
886    // Validate that all --smmu / --amd-iommu names refer to known root complexes.
887    #[cfg(guest_arch = "aarch64")]
888    for name in &opt.smmu {
889        anyhow::ensure!(
890            pcie_root_complexes.iter().any(|rc| rc.name == *name),
891            "--smmu refers to unknown root complex '{name}'"
892        );
893    }
894    #[cfg(guest_arch = "x86_64")]
895    for name in &opt.amd_iommu {
896        anyhow::ensure!(
897            pcie_root_complexes.iter().any(|rc| rc.name == *name),
898            "--amd-iommu refers to unknown root complex '{name}'"
899        );
900    }
901
902    let pcie_switches = build_switch_list(&opt.pcie_switch);
903
904    #[cfg(target_os = "linux")]
905    let vfio_pcie_devices: Vec<PcieDeviceConfig> = {
906        use std::collections::HashMap;
907        use vm_resource::IntoResource;
908
909        // Process --iommu flags: open /dev/iommu for each declared context.
910        let mut iommu_map: HashMap<String, std::fs::File> = HashMap::new();
911        for iommu_cli in &opt.iommu {
912            anyhow::ensure!(
913                !iommu_map.contains_key(&iommu_cli.id),
914                "duplicate --iommu id={}",
915                iommu_cli.id
916            );
917            let file = std::fs::OpenOptions::new()
918                .read(true)
919                .write(true)
920                .open("/dev/iommu")
921                .context("failed to open /dev/iommu (is iommufd available?)")?;
922            iommu_map.insert(iommu_cli.id.clone(), file);
923        }
924
925        opt.vfio
926            .iter()
927            .map(|cli_cfg| {
928                let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
929
930                if let Some(iommu_id) = &cli_cfg.iommu {
931                    // cdev + iommufd path
932                    let iommufd = iommu_map.get(iommu_id).with_context(|| {
933                        format!(
934                            "--vfio device {} references iommu={iommu_id}, \
935                             but no --iommu id={iommu_id} was specified",
936                            cli_cfg.pci_id
937                        )
938                    })?;
939                    // Clone the iommufd fd so the per-iommu manager can own it.
940                    // The first device for a given iommu ID uses the cloned fd
941                    // to create the IoasManager; subsequent devices reuse the
942                    // existing manager and the cloned fd is dropped.
943                    let iommufd = iommufd.try_clone().with_context(|| {
944                        format!("failed to dup iommufd fd for iommu={iommu_id}")
945                    })?;
946
947                    // Open the cdev device node.
948                    let vfio_dev_dir = sysfs_path.join("vfio-dev");
949                    let entry = std::fs::read_dir(&vfio_dev_dir)
950                        .with_context(|| {
951                            format!(
952                                "failed to read {}: is {} bound to vfio-pci?",
953                                vfio_dev_dir.display(),
954                                cli_cfg.pci_id
955                            )
956                        })?
957                        .next()
958                        .context("no vfio-dev entry found")?
959                        .context("failed to read vfio-dev entry")?;
960                    let dev_path = Path::new("/dev/vfio/devices").join(entry.file_name());
961                    let cdev = std::fs::OpenOptions::new()
962                        .read(true)
963                        .write(true)
964                        .open(&dev_path)
965                        .with_context(|| format!("failed to open {}", dev_path.display()))?;
966
967                    Ok(PcieDeviceConfig {
968                        port_name: cli_cfg.port_name.clone(),
969                        resource: vfio_assigned_device_resources::VfioCdevDeviceHandle {
970                            pci_id: cli_cfg.pci_id.clone(),
971                            cdev,
972                            iommufd,
973                            iommu_id: iommu_id.clone(),
974                        }
975                        .into_resource(),
976                    })
977                } else {
978                    // Legacy group/container path
979                    let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
980                        .with_context(|| {
981                            format!("failed to read IOMMU group for {}", cli_cfg.pci_id)
982                        })?;
983                    let group_id: u64 = iommu_group_link
984                        .file_name()
985                        .and_then(|s| s.to_str())
986                        .context("invalid iommu_group symlink")?
987                        .parse()
988                        .context("failed to parse IOMMU group ID")?;
989                    let group = std::fs::OpenOptions::new()
990                        .read(true)
991                        .write(true)
992                        .open(format!("/dev/vfio/{group_id}"))
993                        .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
994
995                    Ok(PcieDeviceConfig {
996                        port_name: cli_cfg.port_name.clone(),
997                        resource: vfio_assigned_device_resources::VfioDeviceHandle {
998                            pci_id: cli_cfg.pci_id.clone(),
999                            group,
1000                        }
1001                        .into_resource(),
1002                    })
1003                }
1004            })
1005            .collect::<anyhow::Result<Vec<_>>>()?
1006    };
1007
1008    #[cfg(windows)]
1009    let vpci_resources: Vec<_> = opt
1010        .device
1011        .iter()
1012        .map(|path| -> anyhow::Result<_> {
1013            Ok(virt_whp::device::DeviceHandle(
1014                whp::VpciResource::new(
1015                    None,
1016                    Default::default(),
1017                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
1018                )
1019                .with_context(|| format!("opening PCI device {}", path))?,
1020            ))
1021        })
1022        .collect::<Result<_, _>>()?;
1023
1024    // Create a vmbusproxy handle if needed by any devices.
1025    #[cfg(windows)]
1026    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
1027        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
1028    } else {
1029        None
1030    };
1031
1032    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc.vnc || opt.pcat {
1033        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
1034        let (fb, fba) =
1035            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
1036        resources.framebuffer_access = Some(fba);
1037        Some(fb)
1038    } else {
1039        None
1040    };
1041
1042    let load_mode;
1043    let with_hv;
1044
1045    let any_serial_configured = serial0_cfg.is_some()
1046        || serial1_cfg.is_some()
1047        || serial2_cfg.is_some()
1048        || serial3_cfg.is_some();
1049
1050    let has_com3 = serial2_cfg.is_some();
1051
1052    let mut chipset = VmManifestBuilder::new(
1053        if opt.igvm.is_some() {
1054            BaseChipsetType::HclHost
1055        } else if opt.pcat {
1056            BaseChipsetType::HypervGen1
1057        } else if opt.uefi {
1058            BaseChipsetType::HypervGen2Uefi
1059        } else if opt.hv {
1060            BaseChipsetType::HyperVGen2LinuxDirect
1061        } else {
1062            BaseChipsetType::UnenlightenedLinuxDirect
1063        },
1064        arch,
1065    );
1066
1067    if framebuffer.is_some() {
1068        chipset = chipset.with_framebuffer();
1069    }
1070    if opt.guest_watchdog {
1071        chipset = chipset.with_guest_watchdog();
1072    }
1073    if any_serial_configured {
1074        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
1075    }
1076    if opt.battery {
1077        let (tx, rx) = mesh::channel();
1078        tx.send(HostBatteryUpdate::default_present());
1079        chipset = chipset.with_battery(rx);
1080    }
1081    if opt.no_vmbus {
1082        chipset = chipset.without_vmbus();
1083    }
1084    if let Some(cfg) = &opt.debugcon {
1085        chipset = chipset.with_debugcon(
1086            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
1087            cfg.port,
1088        );
1089    }
1090
1091    let custom_uefi_vars = {
1092        use firmware_uefi_custom_vars::CustomVars;
1093
1094        // load base vars from specified template, or use an empty set of base
1095        // vars if none was specified.
1096        let base_vars = match opt.secure_boot_template {
1097            Some(template) => match (arch, template) {
1098                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1099                    hyperv_secure_boot_templates::x64::microsoft_windows()
1100                }
1101                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1102                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1103                }
1104                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1105                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1106                }
1107                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1108                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1109                }
1110            },
1111            None => CustomVars::default(),
1112        };
1113
1114        // TODO: fallback to VMGS read if no command line flag was given
1115
1116        let custom_uefi_json_data = match &opt.custom_uefi_json {
1117            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1118            None => None,
1119        };
1120
1121        // obtain the final custom uefi vars by applying the delta onto the base vars
1122        match custom_uefi_json_data {
1123            Some(data) => {
1124                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1125                base_vars.apply_delta(delta)?
1126            }
1127            None => base_vars,
1128        }
1129    };
1130
1131    let efi_diagnostics_log_level = match opt.efi_diagnostics_log_level.unwrap_or_default() {
1132        EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1133        EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1134        EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1135    };
1136
1137    if opt.uefi {
1138        let log_level = match efi_diagnostics_log_level {
1139            EfiDiagnosticsLogLevelType::Default => {
1140                firmware_uefi_resources::LogLevel::make_default()
1141            }
1142            EfiDiagnosticsLogLevelType::Info => firmware_uefi_resources::LogLevel::make_info(),
1143            EfiDiagnosticsLogLevelType::Full => firmware_uefi_resources::LogLevel::make_full(),
1144        };
1145        let nvram_storage = if opt.vmgs.is_some() {
1146            VmgsFileHandle::new(vmgs_format::FileId::BIOS_NVRAM, true).into_resource()
1147        } else {
1148            EphemeralNonVolatileStoreHandle.into_resource()
1149        };
1150        chipset = chipset.with_uefi(vm_manifest_builder::UefiManifest::new(
1151            arch,
1152            custom_uefi_vars.clone(),
1153            opt.secure_boot,
1154            log_level,
1155            nvram_storage,
1156            None,
1157        ));
1158    }
1159
1160    // TODO: load from VMGS file if it exists
1161    let bios_guid = Guid::new_random();
1162
1163    let layout_config = chipset.layout_config();
1164    let VmChipsetResult {
1165        chipset,
1166        mut chipset_devices,
1167        pci_chipset_devices,
1168        isa_dma_controller,
1169        capabilities,
1170    } = chipset
1171        .build()
1172        .context("failed to build chipset configuration")?;
1173
1174    if opt.restore_snapshot.is_some() {
1175        // Snapshot restore: skip firmware loading entirely. Device state and
1176        // memory come from the snapshot directory.
1177        load_mode = LoadMode::None;
1178        with_hv = true;
1179    } else if let Some(path) = &opt.igvm {
1180        let file = fs_err::File::open(path)
1181            .context("failed to open igvm file")?
1182            .into();
1183        let cmdline = opt.cmdline.join(" ");
1184        with_hv = true;
1185
1186        load_mode = LoadMode::Igvm {
1187            file,
1188            cmdline,
1189            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1190            com_serial: has_com3.then(|| SerialInformation {
1191                io_port: ComPort::Com3.io_port(),
1192                irq: ComPort::Com3.irq().into(),
1193            }),
1194        };
1195    } else if opt.pcat {
1196        // Emit a nice error early instead of complaining about missing firmware.
1197        if arch != MachineArch::X86_64 {
1198            anyhow::bail!("pcat not supported on this architecture");
1199        }
1200        with_hv = true;
1201
1202        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1203        load_mode = LoadMode::Pcat {
1204            firmware,
1205            boot_order: opt
1206                .pcat_boot_order
1207                .map(|x| x.0)
1208                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1209        };
1210    } else if opt.uefi {
1211        use openvmm_defs::config::UefiConsoleMode;
1212
1213        with_hv = true;
1214
1215        let firmware = fs_err::File::open(
1216            (opt.uefi_firmware.0)
1217                .as_ref()
1218                .context("must provide uefi firmware when booting with uefi")?,
1219        )
1220        .context("failed to open uefi firmware")?;
1221
1222        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1223        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1224        load_mode = LoadMode::Uefi {
1225            firmware: firmware.into(),
1226            enable_debugging: opt.uefi_debug,
1227            enable_memory_protections: opt.uefi_enable_memory_protections,
1228            disable_frontpage: opt.disable_frontpage,
1229            enable_tpm: opt.tpm,
1230            enable_battery: opt.battery,
1231            enable_serial: any_serial_configured,
1232            enable_vpci_boot: false,
1233            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1234                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1235                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1236                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1237                UefiConsoleModeCli::None => UefiConsoleMode::None,
1238            }),
1239            default_boot_always_attempt: opt.default_boot_always_attempt,
1240            bios_guid,
1241            enable_vmbus: !opt.no_vmbus,
1242        };
1243    } else {
1244        // Linux Direct
1245        let mut cmdline = "panic=-1 debug".to_string();
1246
1247        with_hv = opt.hv;
1248        if with_hv && opt.pcie_root_complex.is_empty() {
1249            cmdline += " pci=off";
1250        }
1251
1252        if !console_str.is_empty() {
1253            let _ = write!(&mut cmdline, " console={}", console_str);
1254        }
1255
1256        if opt.gfx {
1257            cmdline += " console=tty";
1258        }
1259        for extra in &opt.cmdline {
1260            let _ = write!(&mut cmdline, " {}", extra);
1261        }
1262
1263        let kernel = fs_err::File::open(
1264            (opt.kernel.0)
1265                .as_ref()
1266                .context("must provide kernel when booting with linux direct")?,
1267        )
1268        .context("failed to open kernel")?;
1269        let initrd = (opt.initrd.0)
1270            .as_ref()
1271            .map(fs_err::File::open)
1272            .transpose()
1273            .context("failed to open initrd")?;
1274
1275        let custom_dsdt = match &opt.custom_dsdt {
1276            Some(path) => {
1277                let mut v = Vec::new();
1278                fs_err::File::open(path)
1279                    .context("failed to open custom dsdt")?
1280                    .read_to_end(&mut v)
1281                    .context("failed to read custom dsdt")?;
1282                Some(v)
1283            }
1284            None => None,
1285        };
1286
1287        load_mode = LoadMode::Linux {
1288            kernel: kernel.into(),
1289            initrd: initrd.map(Into::into),
1290            cmdline,
1291            custom_dsdt,
1292            enable_serial: any_serial_configured,
1293            boot_mode: if opt.device_tree {
1294                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1295            } else {
1296                openvmm_defs::config::LinuxDirectBootMode::Acpi
1297            },
1298        };
1299    }
1300
1301    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1302        let disk = VmgsDisk {
1303            disk: disk_open(kind, false)
1304                .await
1305                .context("failed to open vmgs disk")?,
1306            encryption_policy: if opt.test_gsp_by_id {
1307                GuestStateEncryptionPolicy::GspById(true)
1308            } else {
1309                GuestStateEncryptionPolicy::None(true)
1310            },
1311        };
1312        match provision {
1313            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1314            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1315            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1316        }
1317    } else {
1318        VmgsResource::Ephemeral
1319    });
1320
1321    if with_get && with_hv {
1322        let has_vtl0_nvme = storage.has_vtl0_nvme();
1323        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1324            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1325            fixed: Some(Default::default()),
1326            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1327                storage_controllers: storage.build_openhcl_settings(opt.vmbus_redirect),
1328                nic_devices: underhill_nics,
1329            }),
1330            namespace_settings: Vec::default(),
1331        };
1332
1333        // Cache the VTL2 settings for later modification via the interactive console.
1334        resources.vtl2_settings = Some(vtl2_settings.clone());
1335
1336        let (send, guest_request_recv) = mesh::channel();
1337        resources.ged_rpc = Some(send);
1338
1339        let vmgs = vmgs.take().unwrap();
1340
1341        vmbus_devices.extend([
1342            (
1343                openhcl_vtl,
1344                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1345            ),
1346            (
1347                openhcl_vtl,
1348                get_resources::ged::GuestEmulationDeviceHandle {
1349                    firmware: if opt.pcat {
1350                        get_resources::ged::GuestFirmwareConfig::Pcat {
1351                            boot_order: opt
1352                                .pcat_boot_order
1353                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1354                                .map(|x| match x {
1355                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1356                                        get_resources::ged::PcatBootDevice::Floppy
1357                                    }
1358                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1359                                        get_resources::ged::PcatBootDevice::HardDrive
1360                                    }
1361                                    openvmm_defs::config::PcatBootDevice::Optical => {
1362                                        get_resources::ged::PcatBootDevice::Optical
1363                                    }
1364                                    openvmm_defs::config::PcatBootDevice::Network => {
1365                                        get_resources::ged::PcatBootDevice::Network
1366                                    }
1367                                }),
1368                        }
1369                    } else {
1370                        use get_resources::ged::UefiConsoleMode;
1371
1372                        get_resources::ged::GuestFirmwareConfig::Uefi {
1373                            enable_vpci_boot: has_vtl0_nvme,
1374                            firmware_debug: opt.uefi_debug,
1375                            disable_frontpage: opt.disable_frontpage,
1376                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1377                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1378                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1379                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1380                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1381                            },
1382                            default_boot_always_attempt: opt.default_boot_always_attempt,
1383                        }
1384                    },
1385                    com1: with_vmbus_com1_serial,
1386                    com2: with_vmbus_com2_serial,
1387                    serial_tx_only: opt.serial_tx_only,
1388                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1389                    vmbus_redirection: opt.vmbus_redirect,
1390                    vmgs,
1391                    framebuffer: opt
1392                        .vtl2_gfx
1393                        .then(|| SharedFramebufferHandle.into_resource()),
1394                    guest_request_recv,
1395                    enable_tpm: opt.tpm,
1396                    firmware_event_send: None,
1397                    secure_boot_enabled: opt.secure_boot,
1398                    secure_boot_template: match opt.secure_boot_template {
1399                        Some(SecureBootTemplateCli::Windows) => {
1400                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1401                        },
1402                        Some(SecureBootTemplateCli::UefiCa) => {
1403                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1404                        }
1405                        None => {
1406                            get_resources::ged::GuestSecureBootTemplateType::None
1407                        },
1408                    },
1409                    enable_battery: opt.battery,
1410                    no_persistent_secrets: true,
1411                    igvm_attest_test_config: None,
1412                    test_gsp_by_id: opt.test_gsp_by_id,
1413                    efi_diagnostics_log_level: {
1414                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1415                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1416                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1417                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1418                        }
1419                    },
1420                    hv_sint_enabled: false,
1421                }
1422                .into_resource(),
1423            ),
1424        ]);
1425    }
1426
1427    if opt.tpm && !opt.vtl2 {
1428        let register_layout = if cfg!(guest_arch = "x86_64") {
1429            TpmRegisterLayout::IoPort
1430        } else {
1431            TpmRegisterLayout::Mmio
1432        };
1433
1434        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1435            (
1436                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1437                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1438            )
1439        } else {
1440            (
1441                EphemeralNonVolatileStoreHandle.into_resource(),
1442                EphemeralNonVolatileStoreHandle.into_resource(),
1443            )
1444        };
1445
1446        chipset_devices.push(ChipsetDeviceHandle {
1447            name: "tpm".to_string(),
1448            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1449                device: TpmDeviceHandle {
1450                    ppi_store,
1451                    nvram_store,
1452                    nvram_size: None,
1453                    refresh_tpm_seeds: false,
1454                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1455                    register_layout,
1456                    guest_secret_key: None,
1457                    logger: None,
1458                    is_confidential_vm: false,
1459                    bios_guid,
1460                }
1461                .into_resource(),
1462                worker_host: mesh.make_host("tpm", None).await?,
1463            }
1464            .into_resource(),
1465        });
1466    }
1467
1468    let vga_firmware = if opt.pcat {
1469        Some(openvmm_pcat_locator::find_svga_bios(
1470            opt.vga_firmware.as_deref(),
1471        )?)
1472    } else {
1473        None
1474    };
1475
1476    if opt.gfx {
1477        // Channel for the video device to report dirty rectangles to the VNC worker.
1478        let (dirt_send, dirt_recv) = mesh::channel();
1479        resources.dirty_rect_recv = Some(dirt_recv);
1480
1481        vmbus_devices.extend([
1482            (
1483                DeviceVtl::Vtl0,
1484                SynthVideoHandle {
1485                    framebuffer: SharedFramebufferHandle.into_resource(),
1486                    dirt_send: Some(dirt_send),
1487                }
1488                .into_resource(),
1489            ),
1490            (
1491                DeviceVtl::Vtl0,
1492                SynthKeyboardHandle {
1493                    source: MultiplexedInputHandle {
1494                        // Save 0 for PS/2
1495                        elevation: 1,
1496                    }
1497                    .into_resource(),
1498                }
1499                .into_resource(),
1500            ),
1501            (
1502                DeviceVtl::Vtl0,
1503                SynthMouseHandle {
1504                    source: MultiplexedInputHandle {
1505                        // Save 0 for PS/2
1506                        elevation: 1,
1507                    }
1508                    .into_resource(),
1509                }
1510                .into_resource(),
1511            ),
1512        ]);
1513    }
1514
1515    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1516        if let Some(path) = path {
1517            cleanup_socket(path.as_ref());
1518            let listener = unix_socket::UnixListener::bind(path)
1519                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1520            Ok(Some(listener))
1521        } else {
1522            Ok(None)
1523        }
1524    };
1525
1526    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1527    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1528
1529    if let Some(path) = &opt.openhcl_dump_path {
1530        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1531        task.detach();
1532        vmbus_devices.push((openhcl_vtl, resource));
1533    }
1534
1535    #[cfg(guest_arch = "aarch64")]
1536    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1537        openvmm_defs::config::Aarch64TopologyConfig {
1538            // TODO: allow this to be configured from the command line
1539            gic_config: None,
1540            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1541            gic_msi: match opt.gic_msi {
1542                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1543                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1544                cli_args::GicMsiCli::V2m => {
1545                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1546                }
1547            },
1548        },
1549    );
1550    #[cfg(guest_arch = "x86_64")]
1551    let topology_arch =
1552        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1553            apic_id_offset: opt.apic_id_offset,
1554            x2apic: opt.x2apic,
1555        });
1556
1557    let with_isolation = if let Some(isolation) = &opt.isolation {
1558        // TODO: For now, isolation is only supported with VTL2.
1559        if !opt.vtl2 {
1560            anyhow::bail!("isolation is only currently supported with vtl2");
1561        }
1562
1563        // TODO: Alias map support is not yet implement with isolation.
1564        if !opt.no_alias_map {
1565            anyhow::bail!("alias map not supported with isolation");
1566        }
1567
1568        match isolation {
1569            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1570        }
1571    } else {
1572        None
1573    };
1574
1575    if with_hv && !opt.no_vmbus {
1576        let (shutdown_send, shutdown_recv) = mesh::channel();
1577        resources.shutdown_ic = Some(shutdown_send);
1578        let (kvp_send, kvp_recv) = mesh::channel();
1579        resources.kvp_ic = Some(kvp_send);
1580        vmbus_devices.extend(
1581            [
1582                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1583                    recv: shutdown_recv,
1584                }
1585                .into_resource(),
1586                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1587                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1588            ]
1589            .map(|r| (DeviceVtl::Vtl0, r)),
1590        );
1591    }
1592
1593    if let Some(hive_path) = &opt.imc {
1594        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1595        vmbus_devices.push((
1596            DeviceVtl::Vtl0,
1597            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1598        ));
1599    }
1600
1601    let mut virtio_devices = Vec::new();
1602    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1603        let bus = match bus {
1604            VirtioBusCli::Auto => {
1605                // Use VPCI when possible (currently only on Windows and macOS due
1606                // to KVM backend limitations).
1607                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1608                    None
1609                } else {
1610                    Some(VirtioBus::Pci)
1611                }
1612            }
1613            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1614            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1615            VirtioBusCli::Vpci => None,
1616        };
1617        if let Some(bus) = bus {
1618            virtio_devices.push((bus, resource));
1619        } else {
1620            vpci_devices.push(VpciDeviceConfig {
1621                vtl: DeviceVtl::Vtl0,
1622                instance_id: Guid::new_random(),
1623                resource: VirtioPciDeviceHandle(resource).into_resource(),
1624            });
1625        }
1626    };
1627
1628    for cli_cfg in &opt.virtio_net {
1629        if cli_cfg.underhill {
1630            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1631        }
1632        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1633        let resource = virtio_resources::net::VirtioNetHandle {
1634            max_queues: vport.max_queues,
1635            mac_address: vport.mac_address,
1636            endpoint: vport.endpoint,
1637        }
1638        .into_resource();
1639        if let Some(pcie_port) = &cli_cfg.pcie_port {
1640            pcie_devices.push(PcieDeviceConfig {
1641                port_name: pcie_port.clone(),
1642                resource: VirtioPciDeviceHandle(resource).into_resource(),
1643            });
1644        } else {
1645            add_virtio_device(VirtioBusCli::Auto, resource);
1646        }
1647    }
1648
1649    for args in &opt.virtio_fs {
1650        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1651            tag: args.tag.clone(),
1652            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1653                root_path: args.path.clone(),
1654                mount_options: args.options.clone(),
1655            },
1656        }
1657        .into_resource();
1658        if let Some(pcie_port) = &args.pcie_port {
1659            pcie_devices.push(PcieDeviceConfig {
1660                port_name: pcie_port.clone(),
1661                resource: VirtioPciDeviceHandle(resource).into_resource(),
1662            });
1663        } else {
1664            add_virtio_device(opt.virtio_fs_bus, resource);
1665        }
1666    }
1667
1668    for args in &opt.virtio_fs_shmem {
1669        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1670            tag: args.tag.clone(),
1671            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1672                root_path: args.path.clone(),
1673            },
1674        }
1675        .into_resource();
1676        if let Some(pcie_port) = &args.pcie_port {
1677            pcie_devices.push(PcieDeviceConfig {
1678                port_name: pcie_port.clone(),
1679                resource: VirtioPciDeviceHandle(resource).into_resource(),
1680            });
1681        } else {
1682            add_virtio_device(opt.virtio_fs_bus, resource);
1683        }
1684    }
1685
1686    for args in &opt.virtio_9p {
1687        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1688            tag: args.tag.clone(),
1689            root_path: args.path.clone(),
1690            debug: opt.virtio_9p_debug,
1691        }
1692        .into_resource();
1693        if let Some(pcie_port) = &args.pcie_port {
1694            pcie_devices.push(PcieDeviceConfig {
1695                port_name: pcie_port.clone(),
1696                resource: VirtioPciDeviceHandle(resource).into_resource(),
1697            });
1698        } else {
1699            add_virtio_device(VirtioBusCli::Auto, resource);
1700        }
1701    }
1702
1703    if let Some(pmem_args) = &opt.virtio_pmem {
1704        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1705            path: pmem_args.path.clone(),
1706        }
1707        .into_resource();
1708        if let Some(pcie_port) = &pmem_args.pcie_port {
1709            pcie_devices.push(PcieDeviceConfig {
1710                port_name: pcie_port.clone(),
1711                resource: VirtioPciDeviceHandle(resource).into_resource(),
1712            });
1713        } else {
1714            add_virtio_device(VirtioBusCli::Auto, resource);
1715        }
1716    }
1717
1718    if opt.virtio_rng {
1719        let resource: Resource<VirtioDeviceHandle> =
1720            virtio_resources::rng::VirtioRngHandle.into_resource();
1721        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1722            pcie_devices.push(PcieDeviceConfig {
1723                port_name: pcie_port.clone(),
1724                resource: VirtioPciDeviceHandle(resource).into_resource(),
1725            });
1726        } else {
1727            add_virtio_device(opt.virtio_rng_bus, resource);
1728        }
1729    }
1730
1731    if let Some(backend) = virtio_console_backend {
1732        let resource: Resource<VirtioDeviceHandle> =
1733            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1734        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1735            pcie_devices.push(PcieDeviceConfig {
1736                port_name: pcie_port.clone(),
1737                resource: VirtioPciDeviceHandle(resource).into_resource(),
1738            });
1739        } else {
1740            add_virtio_device(VirtioBusCli::Auto, resource);
1741        }
1742    }
1743
1744    // Handle --vhost-user arguments.
1745    #[cfg(target_os = "linux")]
1746    for vhost_cli in &opt.vhost_user {
1747        let stream =
1748            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1749                format!(
1750                    "failed to connect to vhost-user socket: {}",
1751                    vhost_cli.socket_path
1752                )
1753            })?;
1754
1755        use crate::cli_args::VhostUserDeviceTypeCli;
1756        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1757            VhostUserDeviceTypeCli::Fs {
1758                ref tag,
1759                num_queues,
1760                queue_size,
1761            } => virtio_resources::vhost_user::VhostUserFsHandle {
1762                socket: stream.into(),
1763                tag: tag.clone(),
1764                num_queues,
1765                queue_size,
1766            }
1767            .into_resource(),
1768            VhostUserDeviceTypeCli::Blk {
1769                num_queues,
1770                queue_size,
1771            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1772                socket: stream.into(),
1773                num_queues,
1774                queue_size,
1775            }
1776            .into_resource(),
1777            VhostUserDeviceTypeCli::Other {
1778                device_id,
1779                ref queue_sizes,
1780            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1781                socket: stream.into(),
1782                device_id,
1783                queue_sizes: queue_sizes.clone(),
1784            }
1785            .into_resource(),
1786        };
1787        if let Some(pcie_port) = &vhost_cli.pcie_port {
1788            pcie_devices.push(PcieDeviceConfig {
1789                port_name: pcie_port.clone(),
1790                resource: VirtioPciDeviceHandle(resource).into_resource(),
1791            });
1792        } else {
1793            add_virtio_device(VirtioBusCli::Auto, resource);
1794        }
1795    }
1796
1797    if let Some(vsock_path) = &opt.virtio_vsock_path {
1798        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1799        add_virtio_device(
1800            VirtioBusCli::Auto,
1801            virtio_resources::vsock::VirtioVsockHandle {
1802                // The guest CID does not matter since the UDS relay does not use it. It just needs
1803                // to be some non-reserved value for the guest to use.
1804                guest_cid: 0x3,
1805                base_path: vsock_path.clone(),
1806                listener,
1807            }
1808            .into_resource(),
1809        );
1810    }
1811
1812    let mut cfg = Config {
1813        chipset,
1814        load_mode,
1815        floppy_disks,
1816        pcie_root_complexes,
1817        #[cfg(target_os = "linux")]
1818        pcie_devices: {
1819            let mut devs = pcie_devices;
1820            devs.extend(vfio_pcie_devices);
1821            devs
1822        },
1823        #[cfg(not(target_os = "linux"))]
1824        pcie_devices,
1825        pcie_switches,
1826        vpci_devices,
1827        ide_disks: Vec::new(),
1828        numa: {
1829            if let Some(ref nodes) = opt.numa {
1830                // --numa mode: each --numa flag defines a node.
1831                NumaTopology {
1832                    nodes: nodes
1833                        .iter()
1834                        .map(|n| NumaNode {
1835                            mem: Some(MemoryConfig {
1836                                mem_size: n.memory.mem_size,
1837                                prefetch_memory: n.memory.prefetch,
1838                                private_memory: n.memory.shared == Some(false),
1839                                transparent_hugepages: n.memory.transparent_hugepages,
1840                                hugepages: n.memory.hugepages,
1841                                hugepage_size: n.memory.hugepage_size,
1842                                host_numa_node: n.host_numa_node,
1843                            }),
1844                            vps: match &n.vps {
1845                                Some(vps) => VpAssignment::Explicit(vps.clone()),
1846                                None => VpAssignment::FromTopology,
1847                            },
1848                        })
1849                        .collect(),
1850                    distances: opt
1851                        .numa_distance
1852                        .as_deref()
1853                        .unwrap_or(&[])
1854                        .iter()
1855                        .map(|d| NumaDistance {
1856                            src: d.src,
1857                            dst: d.dst,
1858                            distance: d.distance,
1859                        })
1860                        .collect(),
1861                }
1862            } else {
1863                // Single-node default from --memory.
1864                NumaTopology {
1865                    nodes: vec![NumaNode {
1866                        mem: Some(MemoryConfig {
1867                            mem_size: opt.memory_size(),
1868                            prefetch_memory: opt.prefetch_memory(),
1869                            private_memory: opt.private_memory(),
1870                            transparent_hugepages: opt.transparent_hugepages(),
1871                            hugepages: opt.memory.hugepages,
1872                            hugepage_size: opt.memory.hugepage_size,
1873                            host_numa_node: None,
1874                        }),
1875                        vps: VpAssignment::FromTopology,
1876                    }],
1877                    distances: vec![],
1878                }
1879            }
1880        },
1881        processor_topology: ProcessorTopologyConfig {
1882            proc_count: opt.processors,
1883            vps_per_socket: opt.vps_per_socket,
1884            enable_smt: match opt.smt {
1885                cli_args::SmtConfigCli::Auto => None,
1886                cli_args::SmtConfigCli::Force => Some(true),
1887                cli_args::SmtConfigCli::Off => Some(false),
1888            },
1889            arch: Some(topology_arch),
1890        },
1891        hypervisor: HypervisorConfig {
1892            with_hv,
1893            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1894                vtl0_alias_map: !opt.no_alias_map,
1895                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1896                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1897                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1898                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1899                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1900                        Some(LateMapVtl0MemoryPolicy::InjectException)
1901                    }
1902                },
1903            }),
1904            with_isolation,
1905        },
1906        #[cfg(windows)]
1907        kernel_vmnics,
1908        input: mesh::Receiver::new(),
1909        framebuffer,
1910        vga_firmware,
1911        vtl2_gfx: opt.vtl2_gfx,
1912        virtio_devices,
1913        vmbus: (with_hv && !opt.no_vmbus).then_some(VmbusConfig {
1914            vsock_listener: vtl0_vsock_listener,
1915            vsock_path: opt.vmbus_vsock_path.clone(),
1916            vtl2_redirect: opt.vmbus_redirect,
1917            vmbus_max_version: opt.vmbus_max_version,
1918            #[cfg(windows)]
1919            vmbusproxy_handle,
1920        }),
1921        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1922            vsock_listener: vtl2_vsock_listener,
1923            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1924            ..Default::default()
1925        }),
1926        vmbus_devices,
1927        chipset_devices,
1928        pci_chipset_devices,
1929        isa_dma_controller,
1930        chipset_capabilities: capabilities,
1931        layout: layout_config,
1932        #[cfg(windows)]
1933        vpci_resources,
1934        vmgs,
1935        secure_boot_enabled: opt.secure_boot,
1936        custom_uefi_vars,
1937        firmware_event_send: None,
1938        debugger_rpc: None,
1939        rtc_delta_milliseconds: 0,
1940        automatic_guest_reset: !opt.halt_on_reset,
1941        efi_diagnostics_log_level: {
1942            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1943                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1944                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1945                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1946            }
1947        },
1948    };
1949
1950    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1951    Ok((cfg, resources))
1952}
1953
1954/// Gets the terminal to use for externally launched console windows.
1955pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1956    std::env::var_os("OPENVMM_TERM")
1957        .or_else(|| std::env::var_os("HVLITE_TERM"))
1958        .map(Into::into)
1959}
1960
1961// Tries to remove `path` if it is confirmed to be a Unix socket.
1962fn cleanup_socket(path: &Path) {
1963    #[cfg(windows)]
1964    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1965    #[cfg(not(windows))]
1966    let is_socket = path
1967        .metadata()
1968        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1969
1970    if is_socket {
1971        let _ = std::fs::remove_file(path);
1972    }
1973}
1974
1975#[cfg(windows)]
1976fn new_switch_port(
1977    switch_id: Option<&str>,
1978) -> anyhow::Result<(
1979    openvmm_defs::config::SwitchPortId,
1980    vmswitch::kernel::SwitchPort,
1981)> {
1982    let id = vmswitch::kernel::SwitchPortId {
1983        switch: match switch_id {
1984            Some(s) => s.parse().context("invalid switch id")?,
1985            None => vmswitch::hcn::DEFAULT_SWITCH,
1986        },
1987        port: Guid::new_random(),
1988    };
1989    let _ = vmswitch::hcn::Network::open(&id.switch)
1990        .with_context(|| format!("could not find switch {}", id.switch))?;
1991
1992    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1993
1994    let id = openvmm_defs::config::SwitchPortId {
1995        switch: id.switch,
1996        port: id.port,
1997    };
1998    Ok((id, port))
1999}
2000
2001fn parse_endpoint(
2002    cli_cfg: &NicConfigCli,
2003    index: &mut usize,
2004    resources: &mut VmResources,
2005) -> anyhow::Result<NicConfig> {
2006    let _ = resources;
2007    let endpoint = match &cli_cfg.endpoint {
2008        EndpointConfigCli::Consomme { cidr, host_fwd } => {
2009            let ports = host_fwd
2010                .iter()
2011                .map(|fwd| {
2012                    use net_backend_resources::consomme::HostPortProtocol;
2013                    net_backend_resources::consomme::HostPortConfig {
2014                        protocol: match fwd.protocol {
2015                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
2016                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
2017                        },
2018                        host_address: fwd
2019                            .host_address
2020                            .map(net_backend_resources::consomme::HostIpAddress::from),
2021                        host_port: net_backend_resources::consomme::HostPort::Fixed(fwd.host_port),
2022                        guest_port: fwd.guest_port,
2023                    }
2024                })
2025                .collect();
2026            net_backend_resources::consomme::ConsommeHandle {
2027                cidr: cidr.clone(),
2028                ports,
2029            }
2030            .into_resource()
2031        }
2032        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
2033        EndpointConfigCli::Dio { id } => {
2034            #[cfg(windows)]
2035            {
2036                let (port_id, port) = new_switch_port(id.as_deref())?;
2037                resources.switch_ports.push(port);
2038                net_backend_resources::dio::WindowsDirectIoHandle {
2039                    switch_port_id: net_backend_resources::dio::SwitchPortId {
2040                        switch: port_id.switch,
2041                        port: port_id.port,
2042                    },
2043                }
2044                .into_resource()
2045            }
2046
2047            #[cfg(not(windows))]
2048            {
2049                let _ = id;
2050                bail!("cannot use dio on non-windows platforms")
2051            }
2052        }
2053        EndpointConfigCli::Tap { name } => {
2054            #[cfg(target_os = "linux")]
2055            {
2056                let fd = net_tap::tap::open_tap(name)
2057                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
2058                net_backend_resources::tap::TapHandle { fd }.into_resource()
2059            }
2060
2061            #[cfg(not(target_os = "linux"))]
2062            {
2063                let _ = name;
2064                bail!("TAP backend is only supported on Linux")
2065            }
2066        }
2067    };
2068
2069    // Pick a random MAC address.
2070    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
2071    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
2072
2073    // Pick a fixed instance ID based on the index.
2074    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
2075    let instance_id = Guid {
2076        data1: *index as u32,
2077        ..BASE_INSTANCE_ID
2078    };
2079    *index += 1;
2080
2081    Ok(NicConfig {
2082        vtl: cli_cfg.vtl,
2083        instance_id,
2084        endpoint,
2085        mac_address: mac_address.into(),
2086        max_queues: cli_cfg.max_queues,
2087        pcie_port: cli_cfg.pcie_port.clone(),
2088    })
2089}
2090
2091#[derive(Debug)]
2092struct NicConfig {
2093    vtl: DeviceVtl,
2094    instance_id: Guid,
2095    mac_address: MacAddress,
2096    endpoint: Resource<NetEndpointHandleKind>,
2097    max_queues: Option<u16>,
2098    pcie_port: Option<String>,
2099}
2100
2101impl NicConfig {
2102    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
2103        (
2104            self.vtl,
2105            netvsp_resources::NetvspHandle {
2106                instance_id: self.instance_id,
2107                mac_address: self.mac_address,
2108                endpoint: self.endpoint,
2109                max_queues: self.max_queues,
2110            }
2111            .into_resource(),
2112        )
2113    }
2114}
2115
2116enum LayerOrDisk {
2117    Layer(DiskLayerDescription),
2118    Disk(Resource<DiskHandleKind>),
2119}
2120
2121async fn disk_open(
2122    disk_cli: &DiskCliKind,
2123    read_only: bool,
2124) -> anyhow::Result<Resource<DiskHandleKind>> {
2125    let mut layers = Vec::new();
2126    disk_open_inner(disk_cli, read_only, &mut layers).await?;
2127    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
2128        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
2129            unreachable!()
2130        };
2131        Ok(disk)
2132    } else {
2133        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
2134            layers: layers
2135                .into_iter()
2136                .map(|layer| match layer {
2137                    LayerOrDisk::Layer(layer) => layer,
2138                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
2139                        layer: DiskLayerHandle(disk).into_resource(),
2140                        read_cache: false,
2141                        write_through: false,
2142                    },
2143                })
2144                .collect(),
2145        }))
2146    }
2147}
2148
2149fn disk_open_inner<'a>(
2150    disk_cli: &'a DiskCliKind,
2151    read_only: bool,
2152    layers: &'a mut Vec<LayerOrDisk>,
2153) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
2154    Box::pin(async move {
2155        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
2156            LayerOrDisk::Layer(layer.into_resource().into())
2157        }
2158        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
2159            LayerOrDisk::Disk(disk.into_resource())
2160        }
2161        match disk_cli {
2162            &DiskCliKind::Memory(len) => {
2163                layers.push(layer(RamDiskLayerHandle {
2164                    len: Some(len),
2165                    sector_size: None,
2166                }));
2167            }
2168            DiskCliKind::File {
2169                path,
2170                create_with_len,
2171                direct,
2172            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
2173                create_disk_type(
2174                    path,
2175                    *size,
2176                    OpenDiskOptions {
2177                        read_only: false,
2178                        direct: *direct,
2179                    },
2180                )
2181                .with_context(|| format!("failed to create {}", path.display()))?
2182            } else {
2183                open_disk_type(
2184                    path,
2185                    OpenDiskOptions {
2186                        read_only,
2187                        direct: *direct,
2188                    },
2189                )
2190                .await
2191                .with_context(|| format!("failed to open {}", path.display()))?
2192            })),
2193            DiskCliKind::Blob { kind, url } => {
2194                layers.push(disk(disk_backend_resources::BlobDiskHandle {
2195                    url: url.to_owned(),
2196                    format: match kind {
2197                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
2198                        cli_args::BlobKind::Vhd1 => {
2199                            disk_backend_resources::BlobDiskFormat::FixedVhd1
2200                        }
2201                    },
2202                }))
2203            }
2204            DiskCliKind::MemoryDiff(inner) => {
2205                layers.push(layer(RamDiskLayerHandle {
2206                    len: None,
2207                    sector_size: None,
2208                }));
2209                disk_open_inner(inner, true, layers).await?;
2210            }
2211            DiskCliKind::PersistentReservationsWrapper(inner) => {
2212                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
2213                    disk_open(inner, read_only).await?,
2214                )))
2215            }
2216            DiskCliKind::DelayDiskWrapper {
2217                delay_ms,
2218                disk: inner,
2219            } => layers.push(disk(DelayDiskHandle {
2220                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
2221                disk: disk_open(inner, read_only).await?,
2222            })),
2223            DiskCliKind::Crypt {
2224                disk: inner,
2225                cipher,
2226                key_file,
2227            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
2228                disk: disk_open(inner, read_only).await?,
2229                cipher: match cipher {
2230                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
2231                },
2232                key: fs_err::read(key_file).context("failed to read key file")?,
2233            })),
2234            DiskCliKind::Sqlite {
2235                path,
2236                create_with_len,
2237            } => {
2238                // FUTURE: this code should be responsible for opening
2239                // file-handle(s) itself, and passing them into sqlite via a custom
2240                // vfs. For now though - simply check if the file exists or not, and
2241                // perform early validation of filesystem-level create options.
2242                match (create_with_len.is_some(), path.exists()) {
2243                    (true, true) => anyhow::bail!(
2244                        "cannot create new sqlite disk at {} - file already exists",
2245                        path.display()
2246                    ),
2247                    (false, false) => anyhow::bail!(
2248                        "cannot open sqlite disk at {} - file not found",
2249                        path.display()
2250                    ),
2251                    _ => {}
2252                }
2253
2254                layers.push(layer(SqliteDiskLayerHandle {
2255                    dbhd_path: path.display().to_string(),
2256                    format_dbhd: create_with_len.map(|len| {
2257                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2258                            logically_read_only: false,
2259                            len: Some(len),
2260                        }
2261                    }),
2262                }));
2263            }
2264            DiskCliKind::SqliteDiff { path, create, disk } => {
2265                // FUTURE: this code should be responsible for opening
2266                // file-handle(s) itself, and passing them into sqlite via a custom
2267                // vfs. For now though - simply check if the file exists or not, and
2268                // perform early validation of filesystem-level create options.
2269                match (create, path.exists()) {
2270                    (true, true) => anyhow::bail!(
2271                        "cannot create new sqlite disk at {} - file already exists",
2272                        path.display()
2273                    ),
2274                    (false, false) => anyhow::bail!(
2275                        "cannot open sqlite disk at {} - file not found",
2276                        path.display()
2277                    ),
2278                    _ => {}
2279                }
2280
2281                layers.push(layer(SqliteDiskLayerHandle {
2282                    dbhd_path: path.display().to_string(),
2283                    format_dbhd: create.then_some(
2284                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2285                            logically_read_only: false,
2286                            len: None,
2287                        },
2288                    ),
2289                }));
2290                disk_open_inner(disk, true, layers).await?;
2291            }
2292            DiskCliKind::AutoCacheSqlite {
2293                cache_path,
2294                key,
2295                disk,
2296            } => {
2297                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2298                    read_cache: true,
2299                    write_through: false,
2300                    layer: SqliteAutoCacheDiskLayerHandle {
2301                        cache_path: cache_path.clone(),
2302                        cache_key: key.clone(),
2303                    }
2304                    .into_resource(),
2305                }));
2306                disk_open_inner(disk, read_only, layers).await?;
2307            }
2308        }
2309        Ok(())
2310    })
2311}
2312
2313/// Get the system page size.
2314pub(crate) fn system_page_size() -> u32 {
2315    sparse_mmap::SparseMapping::page_size() as u32
2316}
2317
2318/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2319pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2320    "x86_64"
2321} else {
2322    "aarch64"
2323};
2324
2325/// Open a snapshot directory and validate it against the current VM config.
2326/// Returns the shared memory fd (from memory.bin) and the saved device state.
2327fn prepare_snapshot_restore(
2328    snapshot_dir: &Path,
2329    opt: &Options,
2330) -> anyhow::Result<(
2331    openvmm_defs::worker::SharedMemoryFd,
2332    mesh::payload::message::ProtobufMessage,
2333)> {
2334    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2335
2336    // Validate manifest against current VM config.
2337    openvmm_helpers::snapshot::validate_manifest(
2338        &manifest,
2339        GUEST_ARCH,
2340        opt.memory_size(),
2341        opt.processors,
2342        system_page_size(),
2343    )?;
2344
2345    // Open memory.bin (existing file, no create, no resize).
2346    let memory_file = fs_err::OpenOptions::new()
2347        .read(true)
2348        .write(true)
2349        .open(snapshot_dir.join("memory.bin"))?;
2350
2351    // Validate file size matches expected memory size.
2352    let file_size = memory_file.metadata()?.len();
2353    if file_size != manifest.memory_size_bytes {
2354        anyhow::bail!(
2355            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2356            manifest.memory_size_bytes,
2357        );
2358    }
2359
2360    let shared_memory_fd =
2361        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2362
2363    // Reconstruct ProtobufMessage from the saved state bytes.
2364    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2365    // back to ProtobufMessage.
2366    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2367        .context("failed to decode saved state from snapshot")?;
2368
2369    Ok((shared_memory_fd, state_msg))
2370}
2371
2372fn do_main(pidfile_guard: &mut Option<pidfile::Pidfile>) -> anyhow::Result<()> {
2373    #[cfg(windows)]
2374    pal::windows::disable_hard_error_dialog();
2375
2376    tracing_init::enable_tracing()?;
2377
2378    // Try to run as a worker host.
2379    // On success the worker runs to completion and then exits the process (does
2380    // not return). Any worker host setup errors are return and bubbled up.
2381    meshworker::run_vmm_mesh_host()?;
2382
2383    let opt = cli_args::parse_options();
2384    if let Some(path) = &opt.write_saved_state_proto {
2385        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2386            .write_to_path(path)
2387            .context("failed to write protobuf descriptors")?;
2388        return Ok(());
2389    }
2390
2391    if let Some(ref path) = opt.pidfile {
2392        *pidfile_guard = Some(pidfile::Pidfile::new(path).context("failed to create pidfile")?);
2393    }
2394
2395    if let Some(path) = opt.relay_console_path {
2396        let console_title = opt.relay_console_title.unwrap_or_default();
2397        return console_relay::relay_console(&path, console_title.as_str());
2398    }
2399
2400    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2401    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2402        return block_on(async {
2403            let _ = std::fs::remove_file(path);
2404            let listener =
2405                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2406
2407            let transport = if opt.ttrpc.is_some() {
2408                ttrpc::RpcTransport::Ttrpc
2409            } else {
2410                ttrpc::RpcTransport::Grpc
2411            };
2412
2413            // This is a local launch
2414            let mut handle =
2415                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2416                    listener,
2417                    transport,
2418                })
2419                .await?;
2420
2421            tracing::info!(%transport, path = %path.display(), "listening");
2422
2423            // Signal the the parent process that the server is ready.
2424            pal::close_stdout().context("failed to close stdout")?;
2425
2426            handle.join().await?;
2427
2428            Ok(())
2429        });
2430    }
2431
2432    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2433}
2434
2435fn new_hvsock_service_id(port: u32) -> Guid {
2436    // This GUID is an embedding of the AF_VSOCK port into an
2437    // AF_HYPERV service ID.
2438    Guid {
2439        data1: port,
2440        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2441    }
2442}
2443
2444async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2445    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2446    let result = run_control_inner(driver, &mut mesh, opt).await;
2447    // If setup failed before the mesh was handed to the controller, shut it
2448    // down so the child host process exits cleanly without noisy logs.
2449    if let Some(mesh) = mesh {
2450        mesh.shutdown().await;
2451    }
2452    result
2453}
2454
2455async fn run_control_inner(
2456    driver: &DefaultDriver,
2457    mesh_slot: &mut Option<VmmMesh>,
2458    opt: Options,
2459) -> anyhow::Result<()> {
2460    let mesh = mesh_slot.as_ref().unwrap();
2461    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2462
2463    let mut vnc_worker = None;
2464    if opt.gfx || opt.vnc.vnc {
2465        // Parse the listen address. Try as a full SocketAddr (host:port) first;
2466        // fall back to a bare IP, using the configured port.
2467        let addr: std::net::SocketAddr = if let Ok(sa) =
2468            opt.vnc.vnc_listen.parse::<std::net::SocketAddr>()
2469        {
2470            sa
2471        } else {
2472            let ip: std::net::IpAddr = opt.vnc.vnc_listen.parse().with_context(|| {
2473                format!(
2474                    "invalid VNC listen address: {} (expected IP address or socket address like [::1]:5900)",
2475                    opt.vnc.vnc_listen
2476                )
2477            })?;
2478            std::net::SocketAddr::new(ip, opt.vnc.vnc_port)
2479        };
2480
2481        let socket = socket2::Socket::new(
2482            if addr.is_ipv6() {
2483                socket2::Domain::IPV6
2484            } else {
2485                socket2::Domain::IPV4
2486            },
2487            socket2::Type::STREAM,
2488            None,
2489        )
2490        .with_context(|| format!("creating VNC socket for {}", addr))?;
2491
2492        if addr.is_ipv6() {
2493            if let Err(e) = socket.set_only_v6(false) {
2494                tracing::warn!(
2495                    error = %e,
2496                    "failed to enable dual-stack on IPv6 VNC socket, IPv4 clients may not be able to connect"
2497                );
2498            }
2499        }
2500        socket.set_reuse_address(true)?;
2501        socket
2502            .bind(&addr.into())
2503            .with_context(|| format!("binding VNC socket to {}", addr))?;
2504        socket
2505            .listen(128)
2506            .with_context(|| format!("listening on VNC socket {}", addr))?;
2507        let listener: TcpListener = socket.into();
2508
2509        if !addr.ip().is_loopback() {
2510            tracing::warn!(
2511                address = %addr,
2512                "VNC server listening on non-localhost address without authentication"
2513            );
2514        }
2515
2516        let input_send = vm_config.input.sender();
2517        let framebuffer = resources
2518            .framebuffer_access
2519            .take()
2520            .expect("synth video enabled");
2521
2522        let vnc_host = mesh
2523            .make_host("vnc", None)
2524            .await
2525            .context("spawning vnc process failed")?;
2526
2527        vnc_worker = Some(
2528            vnc_host
2529                .launch_worker(
2530                    vnc_worker_defs::VNC_WORKER_TCP,
2531                    VncParameters {
2532                        listener,
2533                        framebuffer,
2534                        input_send,
2535                        dirty_recv: resources.dirty_rect_recv.take(),
2536                        max_clients: opt.vnc.vnc_max_clients,
2537                        evict_oldest: opt.vnc.vnc_evict_oldest,
2538                    },
2539                )
2540                .await?,
2541        )
2542    }
2543
2544    // spin up the debug worker
2545    let gdb_worker = if let Some(port) = opt.gdb {
2546        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2547            .with_context(|| format!("binding to gdb port {}", port))?;
2548
2549        let (req_tx, req_rx) = mesh::channel();
2550        vm_config.debugger_rpc = Some(req_rx);
2551
2552        let gdb_host = mesh
2553            .make_host("gdb", None)
2554            .await
2555            .context("spawning gdbstub process failed")?;
2556
2557        Some(
2558            gdb_host
2559                .launch_worker(
2560                    debug_worker_defs::DEBUGGER_WORKER,
2561                    debug_worker_defs::DebuggerParameters {
2562                        listener,
2563                        req_chan: req_tx,
2564                        vp_count: vm_config.processor_topology.proc_count,
2565                        target_arch: if cfg!(guest_arch = "x86_64") {
2566                            debug_worker_defs::TargetArch::X86_64
2567                        } else {
2568                            debug_worker_defs::TargetArch::Aarch64
2569                        },
2570                    },
2571                )
2572                .await
2573                .context("failed to launch gdbstub worker")?,
2574        )
2575    } else {
2576        None
2577    };
2578
2579    // spin up the VM
2580    let (vm_rpc, rpc_recv) = mesh::channel();
2581    let (notify_send, notify_recv) = mesh::channel();
2582    let vm_worker = {
2583        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2584
2585        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2586            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2587            (Some(fd), Some(state_msg))
2588        } else {
2589            let shared_memory = opt
2590                .memory_backing_file()
2591                .map(|path| {
2592                    openvmm_helpers::shared_memory::open_memory_backing_file(
2593                        path,
2594                        opt.memory_size(),
2595                    )
2596                })
2597                .transpose()?;
2598            (shared_memory, None)
2599        };
2600
2601        let params = VmWorkerParameters {
2602            hypervisor: match &opt.hypervisor {
2603                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2604                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2605            },
2606            cfg: vm_config,
2607            saved_state,
2608            shared_memory,
2609            rpc: rpc_recv,
2610            notify: notify_send,
2611        };
2612        vm_host
2613            .launch_worker(VM_WORKER, params)
2614            .await
2615            .context("failed to launch vm worker")?
2616    };
2617
2618    if opt.restore_snapshot.is_some() {
2619        tracing::info!("restoring VM from snapshot");
2620    }
2621
2622    if !opt.paused {
2623        vm_rpc.call(VmRpc::Resume, ()).await?;
2624    }
2625
2626    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2627        driver.clone(),
2628        DiagDialer {
2629            driver: driver.clone(),
2630            vm_rpc: vm_rpc.clone(),
2631            openhcl_vtl: if opt.vtl2 {
2632                DeviceVtl::Vtl2
2633            } else {
2634                DeviceVtl::Vtl0
2635            },
2636        },
2637    ));
2638
2639    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2640
2641    // Create channels between the REPL and VmController.
2642    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2643    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2644
2645    let has_vtl2 = resources.vtl2_settings.is_some();
2646
2647    // Build the VmController with exclusive resources.
2648    let controller = vm_controller::VmController {
2649        mesh: mesh_slot.take().unwrap(),
2650        vm_worker,
2651        vnc_worker,
2652        gdb_worker,
2653        diag_inspector: Some(diag_inspector),
2654        vtl2_settings: resources.vtl2_settings,
2655        ged_rpc: resources.ged_rpc.clone(),
2656        vm_rpc: vm_rpc.clone(),
2657        paravisor_diag: Some(paravisor_diag),
2658        igvm_path: opt.igvm.clone(),
2659        memory_backing_file: opt.memory_backing_file().cloned(),
2660        memory: opt.memory_size(),
2661        processors: opt.processors,
2662        log_file: opt.log_file.clone(),
2663    };
2664
2665    // Spawn the VmController as a task.
2666    let controller_task = driver.spawn(
2667        "vm-controller",
2668        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2669    );
2670
2671    // Run the REPL with shareable resources.
2672    let repl_result = repl::run_repl(
2673        driver,
2674        repl::ReplResources {
2675            vm_rpc,
2676            vm_controller: vm_controller_send,
2677            vm_controller_events: vm_controller_event_recv,
2678            scsi_rpc: resources.scsi_rpc,
2679            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2680            shutdown_ic: resources.shutdown_ic,
2681            kvp_ic: resources.kvp_ic,
2682            console_in: resources.console_in,
2683            has_vtl2,
2684        },
2685    )
2686    .await;
2687
2688    // Wait for the controller task to finish (it stops the VM worker and
2689    // shuts down the mesh).
2690    controller_task.await;
2691
2692    repl_result
2693}
2694
2695struct DiagDialer {
2696    driver: DefaultDriver,
2697    vm_rpc: mesh::Sender<VmRpc>,
2698    openhcl_vtl: DeviceVtl,
2699}
2700
2701impl mesh_rpc::client::Dial for DiagDialer {
2702    type Stream = PolledSocket<unix_socket::UnixStream>;
2703
2704    async fn dial(&mut self) -> io::Result<Self::Stream> {
2705        let service_id = new_hvsock_service_id(1);
2706        let socket = self
2707            .vm_rpc
2708            .call_failable(
2709                VmRpc::ConnectHvsock,
2710                (
2711                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2712                    service_id,
2713                    self.openhcl_vtl,
2714                ),
2715            )
2716            .await
2717            .map_err(io::Error::other)?;
2718
2719        PolledSocket::new(&self.driver, socket)
2720    }
2721}
2722
2723/// An object that implements [`InspectMut`] by sending an inspect request over
2724/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2725/// the response back into the inspect tree.
2726///
2727/// This also caches the TTRPC connection to the guest so that only the first
2728/// inspect request has to wait for the connection to be established.
2729pub(crate) struct DiagInspector(DiagInspectorInner);
2730
2731enum DiagInspectorInner {
2732    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2733    Started {
2734        send: mesh::Sender<inspect::Deferred>,
2735        _task: Task<()>,
2736    },
2737    Invalid,
2738}
2739
2740impl DiagInspector {
2741    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2742        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2743    }
2744
2745    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2746        loop {
2747            match self.0 {
2748                DiagInspectorInner::NotStarted { .. } => {
2749                    let DiagInspectorInner::NotStarted(driver, client) =
2750                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2751                    else {
2752                        unreachable!()
2753                    };
2754                    let (send, recv) = mesh::channel();
2755                    let task = driver.clone().spawn("diag-inspect", async move {
2756                        Self::run(&client, recv).await
2757                    });
2758
2759                    self.0 = DiagInspectorInner::Started { send, _task: task };
2760                }
2761                DiagInspectorInner::Started { ref send, .. } => break send,
2762                DiagInspectorInner::Invalid => unreachable!(),
2763            }
2764        }
2765    }
2766
2767    async fn run(
2768        diag_client: &diag_client::DiagClient,
2769        mut recv: mesh::Receiver<inspect::Deferred>,
2770    ) {
2771        while let Some(deferred) = recv.next().await {
2772            let info = deferred.external_request();
2773            let result = match info.request_type {
2774                inspect::ExternalRequestType::Inspect { depth } => {
2775                    if depth == 0 {
2776                        Ok(inspect::Node::Unevaluated)
2777                    } else {
2778                        // TODO: Support taking timeouts from the command line
2779                        diag_client
2780                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2781                            .await
2782                    }
2783                }
2784                inspect::ExternalRequestType::Update { value } => {
2785                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2786                }
2787            };
2788            deferred.complete_external(
2789                result.unwrap_or_else(|err| {
2790                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2791                }),
2792                inspect::SensitivityLevel::Unspecified,
2793            )
2794        }
2795    }
2796}
2797
2798impl InspectMut for DiagInspector {
2799    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2800        self.start().send(req.defer());
2801    }
2802}