Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod pidfile;
15mod repl;
16mod serial_io;
17mod storage_builder;
18mod tracing_init;
19mod ttrpc;
20mod vm_controller;
21
22// `pub` so that the missing_docs warning fires for options without
23// documentation.
24pub use cli_args::Options;
25use console_relay::ConsoleLaunchOptions;
26
27use crate::cli_args::SecureBootTemplateCli;
28use anyhow::Context;
29use anyhow::bail;
30use chipset_resources::battery::HostBatteryUpdate;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::GuestPowerAction;
35use cli_args::NicConfigCli;
36use cli_args::ProvisionVmgs;
37use cli_args::SerialConfigCli;
38use cli_args::UefiConsoleModeCli;
39use cli_args::VirtioBusCli;
40use cli_args::VmgsCli;
41use crash_dump::spawn_dump_handler;
42use cxl_spec::test::CxlTestDeviceHandle;
43use disk_backend_resources::DelayDiskHandle;
44use disk_backend_resources::DiskLayerDescription;
45use disk_backend_resources::layer::DiskLayerHandle;
46use disk_backend_resources::layer::RamDiskLayerHandle;
47use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
48use disk_backend_resources::layer::SqliteDiskLayerHandle;
49use floppy_resources::FloppyDiskConfig;
50use framebuffer::FRAMEBUFFER_SIZE;
51use framebuffer::FramebufferAccess;
52use futures::AsyncReadExt;
53use futures::AsyncWrite;
54use futures::StreamExt;
55use futures::executor::block_on;
56use futures::io::AllowStdIo;
57use gdma_resources::GdmaDeviceHandle;
58use gdma_resources::VportDefinition;
59use guid::Guid;
60use input_core::MultiplexedInputHandle;
61use inspect::InspectMut;
62use io::Read;
63use mesh::CancelContext;
64use mesh::CellUpdater;
65use mesh::rpc::RpcSend;
66use meshworker::VmmMesh;
67use net_backend_resources::mac_address::MacAddress;
68use nvme_resources::NvmeControllerRequest;
69use openvmm_defs::config::Config;
70use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
71use openvmm_defs::config::DeviceVtl;
72use openvmm_defs::config::EfiDiagnosticsLogLevelType;
73use openvmm_defs::config::HypervisorConfig;
74use openvmm_defs::config::LateMapVtl0MemoryPolicy;
75use openvmm_defs::config::LoadMode;
76use openvmm_defs::config::MemoryConfig;
77use openvmm_defs::config::NumaDistance;
78use openvmm_defs::config::NumaNode;
79use openvmm_defs::config::NumaTopology;
80use openvmm_defs::config::PcieDeviceConfig;
81use openvmm_defs::config::PcieMmioRangeConfig;
82use openvmm_defs::config::PcieRootComplexConfig;
83use openvmm_defs::config::PcieRootPortConfig;
84use openvmm_defs::config::PcieSwitchConfig;
85use openvmm_defs::config::ProcessorTopologyConfig;
86use openvmm_defs::config::RootComplexCxlConfig;
87use openvmm_defs::config::SerialInformation;
88use openvmm_defs::config::VirtioBus;
89use openvmm_defs::config::VmbusConfig;
90use openvmm_defs::config::VpAssignment;
91use openvmm_defs::config::VpciDeviceConfig;
92use openvmm_defs::config::Vtl2Config;
93use openvmm_defs::rpc::VmRpc;
94use openvmm_defs::worker::VM_WORKER;
95use openvmm_defs::worker::VmWorkerParameters;
96use openvmm_helpers::disk::OpenDiskOptions;
97use openvmm_helpers::disk::create_disk_type;
98use openvmm_helpers::disk::open_disk_type;
99use pal_async::DefaultDriver;
100use pal_async::DefaultPool;
101use pal_async::socket::PolledSocket;
102use pal_async::task::Spawn;
103use pal_async::task::Task;
104use serial_16550_resources::ComPort;
105use serial_core::resources::DisconnectedSerialBackendHandle;
106use sparse_mmap::alloc_shared_memory;
107use std::cell::RefCell;
108use std::collections::BTreeMap;
109use std::fmt::Write as _;
110use std::future::pending;
111use std::io;
112#[cfg(unix)]
113use std::io::IsTerminal;
114use std::io::Write;
115use std::net::TcpListener;
116use std::path::Path;
117use std::path::PathBuf;
118use std::sync::Arc;
119use std::thread;
120use std::time::Duration;
121use storvsp_resources::ScsiControllerRequest;
122use tpm_resources::TpmDeviceHandle;
123use tpm_resources::TpmRegisterLayout;
124use uidevices_resources::SynthKeyboardHandle;
125use uidevices_resources::SynthMouseHandle;
126use uidevices_resources::SynthVideoHandle;
127use video_core::SharedFramebufferHandle;
128use virtio_resources::VirtioPciDeviceHandle;
129use vm_manifest_builder::BaseChipsetType;
130use vm_manifest_builder::MachineArch;
131use vm_manifest_builder::VmChipsetResult;
132use vm_manifest_builder::VmManifestBuilder;
133use vm_resource::IntoResource;
134use vm_resource::Resource;
135use vm_resource::kind::DiskHandleKind;
136use vm_resource::kind::DiskLayerHandleKind;
137use vm_resource::kind::NetEndpointHandleKind;
138use vm_resource::kind::VirtioDeviceHandle;
139use vm_resource::kind::VmbusDeviceHandleKind;
140use vmbus_serial_resources::VmbusSerialDeviceHandle;
141use vmbus_serial_resources::VmbusSerialPort;
142use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
143use vmgs_resources::GuestStateEncryptionPolicy;
144use vmgs_resources::VmgsDisk;
145use vmgs_resources::VmgsFileHandle;
146use vmgs_resources::VmgsResource;
147use vmotherboard::ChipsetDeviceHandle;
148use vnc_worker_defs::VncParameters;
149
150pub fn openvmm_main() {
151    // Save the current state of the terminal so we can restore it back to
152    // normal before exiting.
153    #[cfg(unix)]
154    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
155
156    let mut pidfile_guard: Option<pidfile::Pidfile> = None;
157    let exit_code = match do_main(&mut pidfile_guard) {
158        Ok(code) => code,
159        Err(err) => {
160            eprintln!("fatal error: {:?}", err);
161            1
162        }
163    };
164
165    // Restore the terminal to its initial state.
166    #[cfg(unix)]
167    if let Some(orig_termios) = orig_termios {
168        term::set_termios(orig_termios);
169    }
170
171    // Clean up the pidfile before terminating, since
172    // pal::process::terminate skips destructors.
173    drop(pidfile_guard);
174
175    // Terminate the process immediately without graceful shutdown of DLLs or
176    // C++ destructors or anything like that. This is all unnecessary and saves
177    // time on Windows.
178    //
179    // Do flush stdout, though, since there may be buffered data.
180    let _ = io::stdout().flush();
181    pal::process::terminate(exit_code);
182}
183
184#[derive(Default)]
185struct VmResources {
186    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
187    framebuffer_access: Option<FramebufferAccess>,
188    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
189    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
190    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
191    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
192    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
193    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
194    /// Receives dirty rectangles from the synthetic video device for the VNC worker.
195    dirty_rect_recv: Option<mesh::Receiver<Vec<video_core::DirtyRect>>>,
196    #[cfg(windows)]
197    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
198}
199
200struct ConsoleState<'a> {
201    device: &'a str,
202    input: Box<dyn AsyncWrite + Unpin + Send>,
203}
204
205/// Build a flat list of switches with their parent port assignments.
206///
207/// This function converts hierarchical CLI switch definitions into a flat list
208/// where each switch specifies its parent port directly.
209fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
210    all_switches
211        .iter()
212        .map(|switch_cli| PcieSwitchConfig {
213            name: switch_cli.name.clone(),
214            num_downstream_ports: switch_cli.num_downstream_ports,
215            parent_port: switch_cli.port_name.clone(),
216            hotplug: switch_cli.hotplug,
217            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
218        })
219        .collect()
220}
221
222async fn vm_config_from_command_line(
223    spawner: impl Spawn,
224    mesh: &VmmMesh,
225    opt: &Options,
226) -> anyhow::Result<(Config, VmResources)> {
227    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
228    // Ensure the serial driver stays alive with no tasks.
229    serial_driver.spawn("leak", pending::<()>()).detach();
230
231    let openhcl_vtl = if opt.vtl2 {
232        DeviceVtl::Vtl2
233    } else {
234        DeviceVtl::Vtl0
235    };
236
237    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
238    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
239        Ok(match cli_cfg {
240            SerialConfigCli::Console => {
241                if let Some(console_state) = console_state.borrow().as_ref() {
242                    bail!("console already set by {}", console_state.device);
243                }
244                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
245                let (serial_read, serial_write) = AsyncReadExt::split(serial);
246                *console_state.borrow_mut() = Some(ConsoleState {
247                    device,
248                    input: Box::new(serial_write),
249                });
250                thread::Builder::new()
251                    .name(name.to_owned())
252                    .spawn(move || {
253                        let _ = block_on(futures::io::copy(
254                            serial_read,
255                            &mut AllowStdIo::new(term::raw_stdout()),
256                        ));
257                    })
258                    .unwrap();
259                Some(config)
260            }
261            SerialConfigCli::Stderr => {
262                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
263                thread::Builder::new()
264                    .name(name.to_owned())
265                    .spawn(move || {
266                        let _ = block_on(futures::io::copy(
267                            serial,
268                            &mut AllowStdIo::new(term::raw_stderr()),
269                        ));
270                    })
271                    .unwrap();
272                Some(config)
273            }
274            SerialConfigCli::File(path) => {
275                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
276                let file = fs_err::File::create(path).context("failed to create file")?;
277
278                thread::Builder::new()
279                    .name(name.to_owned())
280                    .spawn(move || {
281                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
282                    })
283                    .unwrap();
284                Some(config)
285            }
286            SerialConfigCli::None => None,
287            SerialConfigCli::Pipe(path) => {
288                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
289            }
290            SerialConfigCli::Tcp(addr) => {
291                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
292            }
293            SerialConfigCli::NewConsole(app, window_title) => {
294                let path = console_relay::random_console_path();
295                let config =
296                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
297                let window_title =
298                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
299
300                console_relay::launch_console(
301                    app.or_else(openvmm_terminal_app).as_deref(),
302                    &path,
303                    ConsoleLaunchOptions {
304                        window_title: Some(window_title),
305                    },
306                )
307                .context("failed to launch console")?;
308
309                Some(config)
310            }
311        })
312    };
313
314    let mut vmbus_devices = Vec::new();
315
316    let serial0_cfg = setup_serial(
317        "com1",
318        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
319        if cfg!(guest_arch = "x86_64") {
320            "ttyS0"
321        } else {
322            "ttyAMA0"
323        },
324    )?;
325    let serial1_cfg = setup_serial(
326        "com2",
327        opt.com2.clone().unwrap_or(SerialConfigCli::None),
328        if cfg!(guest_arch = "x86_64") {
329            "ttyS1"
330        } else {
331            "ttyAMA1"
332        },
333    )?;
334    let serial2_cfg = setup_serial(
335        "com3",
336        opt.com3.clone().unwrap_or(SerialConfigCli::None),
337        if cfg!(guest_arch = "x86_64") {
338            "ttyS2"
339        } else {
340            "ttyAMA2"
341        },
342    )?;
343    let serial3_cfg = setup_serial(
344        "com4",
345        opt.com4.clone().unwrap_or(SerialConfigCli::None),
346        if cfg!(guest_arch = "x86_64") {
347            "ttyS3"
348        } else {
349            "ttyAMA3"
350        },
351    )?;
352    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
353        "vmbus_com1",
354        opt.vmbus_com1_serial
355            .clone()
356            .unwrap_or(SerialConfigCli::None),
357        "vmbus_com1",
358    )? {
359        vmbus_devices.push((
360            openhcl_vtl,
361            VmbusSerialDeviceHandle {
362                port: VmbusSerialPort::Com1,
363                backend: vmbus_com1_cfg,
364            }
365            .into_resource(),
366        ));
367        true
368    } else {
369        false
370    };
371    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
372        "vmbus_com2",
373        opt.vmbus_com2_serial
374            .clone()
375            .unwrap_or(SerialConfigCli::None),
376        "vmbus_com2",
377    )? {
378        vmbus_devices.push((
379            openhcl_vtl,
380            VmbusSerialDeviceHandle {
381                port: VmbusSerialPort::Com2,
382                backend: vmbus_com2_cfg,
383            }
384            .into_resource(),
385        ));
386        true
387    } else {
388        false
389    };
390    let debugcon_cfg = setup_serial(
391        "debugcon",
392        opt.debugcon
393            .clone()
394            .map(|cfg| cfg.serial)
395            .unwrap_or(SerialConfigCli::None),
396        "debugcon",
397    )?;
398
399    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
400        setup_serial("virtio-console", serial_cfg, "hvc0")?
401    } else {
402        None
403    };
404
405    let mut resources = VmResources::default();
406    let mut console_str = "";
407    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
408        resources.console_in = Some(input);
409        console_str = device;
410    }
411
412    if opt.shared_memory {
413        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
414    }
415    if opt.deprecated_prefetch {
416        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
417    }
418    if opt.deprecated_private_memory {
419        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
420    }
421    if opt.deprecated_thp {
422        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
423    }
424    if opt.deprecated_memory_backing_file.is_some() {
425        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
426    }
427
428    opt.validate_memory_options()?;
429
430    const MAX_PROCESSOR_COUNT: u32 = 1024;
431
432    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
433        bail!("invalid proc count: {}", opt.processors);
434    }
435
436    // Total SCSI channel count should not exceed the processor count
437    // (at most, one channel per VP).
438    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
439        bail!(
440            "invalid SCSI sub-channel count: requested {}, max {}",
441            opt.scsi_sub_channels,
442            MAX_PROCESSOR_COUNT - 1
443        );
444    }
445
446    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
447
448    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
449
450    // Register named controllers first, so that --disk on=<name>
451    // references can be resolved.
452    for ctrl in &opt.nvme_pci {
453        let transport = match &ctrl.transport {
454            cli_args::NvmeControllerTransport::Pcie(port) => {
455                storage_builder::NvmeControllerTransport::Pcie(port.clone())
456            }
457            cli_args::NvmeControllerTransport::Vpci(guid) => {
458                let guid = guid.unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
459                storage_builder::NvmeControllerTransport::Vpci(guid)
460            }
461        };
462        storage.add_nvme_controller(ctrl.id.clone(), ctrl.vtl, transport, None)?;
463    }
464
465    for ctrl in &opt.vmbus_scsi {
466        let instance_id = storage_builder::deterministic_guid(&ctrl.id);
467        storage.add_scsi_controller(ctrl.id.clone(), ctrl.vtl, instance_id, ctrl.sub_channels)?;
468    }
469
470    for ctrl in &opt.openhcl_controller {
471        let controller_type = match ctrl.controller_type {
472            cli_args::OpenhclControllerType::Scsi => storage_builder::OpenhclControllerType::Scsi,
473            cli_args::OpenhclControllerType::Nvme => storage_builder::OpenhclControllerType::Nvme,
474        };
475        let instance_id = ctrl
476            .guid
477            .unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
478        storage.add_openhcl_controller(ctrl.id.clone(), controller_type, instance_id)?;
479    }
480
481    for &cli_args::DiskCli {
482        vtl,
483        ref kind,
484        read_only,
485        is_dvd,
486        underhill,
487        ref pcie_port,
488        ref controller,
489        nsid,
490        lun,
491        ref relay,
492    } in &opt.disk
493    {
494        if controller.is_none() && underhill.is_none() && relay.is_none() {
495            tracing::warn!(
496                "--disk without `on` is deprecated; \
497                 use --vmbus-scsi and --disk on=<name> instead"
498            );
499        }
500
501        let relay_target = relay
502            .as_ref()
503            .map(|(name, loc)| storage_builder::RelayTarget {
504                controller: name.clone(),
505                location: *loc,
506            });
507
508        let target = if let Some(name) = controller {
509            if pcie_port.is_some() {
510                anyhow::bail!("`on` is incompatible with `pcie_port` on `--disk`");
511            }
512            storage_builder::DiskLocation::Named {
513                controller: name.clone(),
514                nsid,
515                lun,
516            }
517        } else if pcie_port.is_some() {
518            anyhow::bail!("`--disk` is incompatible with `pcie_port` without `controller`");
519        } else {
520            storage_builder::DiskLocation::Scsi(None)
521        };
522
523        storage
524            .add(
525                vtl,
526                underhill,
527                relay_target,
528                target,
529                kind,
530                is_dvd,
531                read_only,
532            )
533            .await?;
534    }
535
536    for &cli_args::IdeDiskCli {
537        ref kind,
538        read_only,
539        channel,
540        device,
541        is_dvd,
542    } in &opt.ide
543    {
544        storage
545            .add(
546                DeviceVtl::Vtl0,
547                None,
548                None,
549                storage_builder::DiskLocation::Ide(channel, device),
550                kind,
551                is_dvd,
552                read_only,
553            )
554            .await?;
555    }
556
557    if !opt.nvme.is_empty() {
558        tracing::warn!("--nvme is deprecated; use --nvme-pci and --disk on=<name> instead");
559
560        // Pre-register implicit PCIe controllers for unique port names.
561        let mut registered_ports = std::collections::BTreeSet::new();
562        for disk in &opt.nvme {
563            if let Some(port) = &disk.pcie_port {
564                if registered_ports.insert(port.clone()) {
565                    storage.add_nvme_controller(
566                        port.clone(),
567                        DeviceVtl::Vtl0,
568                        storage_builder::NvmeControllerTransport::Pcie(port.clone()),
569                        None,
570                    ).with_context(|| format!(
571                        "legacy --nvme flag conflicts with an explicit controller named '{port}'; \
572                         use --nvme-pci and --disk on=<name> instead"
573                    ))?;
574                }
575            }
576        }
577    }
578
579    for &cli_args::DiskCli {
580        vtl,
581        ref kind,
582        read_only,
583        is_dvd,
584        underhill,
585        ref pcie_port,
586        controller: _,
587        nsid: _,
588        lun: _,
589        relay: _,
590    } in &opt.nvme
591    {
592        let target = if let Some(port) = pcie_port {
593            storage_builder::DiskLocation::Named {
594                controller: port.clone(),
595                nsid: None,
596                lun: None,
597            }
598        } else {
599            storage_builder::DiskLocation::Nvme(None)
600        };
601        storage
602            .add(vtl, underhill, None, target, kind, is_dvd, read_only)
603            .await?;
604    }
605
606    for &cli_args::DiskCli {
607        vtl,
608        ref kind,
609        read_only,
610        is_dvd,
611        ref underhill,
612        ref pcie_port,
613        controller: _,
614        nsid: _,
615        lun: _,
616        relay: _,
617    } in &opt.virtio_blk
618    {
619        if underhill.is_some() {
620            anyhow::bail!("underhill not supported with virtio-blk");
621        }
622        storage
623            .add(
624                vtl,
625                None,
626                None,
627                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
628                kind,
629                is_dvd,
630                read_only,
631            )
632            .await?;
633    }
634
635    let mut floppy_disks = Vec::new();
636    for disk in &opt.floppy {
637        let &cli_args::FloppyDiskCli {
638            ref kind,
639            read_only,
640        } = disk;
641        floppy_disks.push(FloppyDiskConfig {
642            disk_type: disk_open(kind, read_only).await?,
643            read_only,
644        });
645    }
646
647    let mut vpci_mana_nics = [(); 3].map(|()| None);
648    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
649    let mut underhill_nics = Vec::new();
650    let mut vpci_devices = Vec::new();
651
652    let mut nic_index = 0;
653    for cli_cfg in &opt.net {
654        if cli_cfg.pcie_port.is_some() {
655            anyhow::bail!("`--net` does not support PCIe");
656        }
657        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
658        if cli_cfg.underhill {
659            if !opt.no_alias_map {
660                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
661            }
662            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
663                let vpci_instance_id = Guid::new_random();
664                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
665                    instance_id: vpci_instance_id.to_string(),
666                    subordinate_instance_id: None,
667                    max_sub_channels: None,
668                });
669                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
670            });
671            mana.1.vports.push(VportDefinition {
672                mac_address: vport.mac_address,
673                endpoint: vport.endpoint,
674            });
675        } else {
676            vmbus_devices.push(vport.into_netvsp_handle());
677        }
678    }
679
680    if opt.nic {
681        let nic_config = parse_endpoint(
682            &NicConfigCli {
683                vtl: DeviceVtl::Vtl0,
684                endpoint: EndpointConfigCli::Consomme {
685                    cidr: None,
686                    host_fwd: Vec::new(),
687                },
688                max_queues: None,
689                underhill: false,
690                pcie_port: None,
691            },
692            &mut nic_index,
693            &mut resources,
694        )?;
695        vmbus_devices.push(nic_config.into_netvsp_handle());
696    }
697
698    // Build initial PCIe devices list from CLI options. Storage devices
699    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
700    let mut pcie_devices = Vec::new();
701    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
702        tracing::info!(
703            port_name = %cli_cfg.port_name,
704            socket_addr = ?cli_cfg.socket_addr,
705            "instantiating PCIe remote device"
706        );
707
708        // Generate a deterministic instance ID based on index
709        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
710            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
711        let instance_id = Guid {
712            data1: index as u32,
713            ..PCIE_REMOTE_BASE_INSTANCE_ID
714        };
715
716        pcie_devices.push(PcieDeviceConfig {
717            port_name: cli_cfg.port_name.clone(),
718            resource: pcie_remote_resources::PcieRemoteHandle {
719                instance_id,
720                socket_addr: cli_cfg.socket_addr.clone(),
721                hu: cli_cfg.hu,
722                controller: cli_cfg.controller,
723            }
724            .into_resource(),
725        });
726    }
727
728    #[cfg(windows)]
729    let mut kernel_vmnics = Vec::new();
730    #[cfg(windows)]
731    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
732        // Pick a random MAC address.
733        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
734        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
735
736        // Pick a fixed instance ID based on the index.
737        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
738        let instance_id = Guid {
739            data1: index as u32,
740            ..BASE_INSTANCE_ID
741        };
742
743        let switch_id = if switch_id == "default" {
744            None
745        } else {
746            Some(switch_id.as_str())
747        };
748        let (port_id, port) = new_switch_port(switch_id)?;
749        resources.switch_ports.push(port);
750
751        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
752            instance_id,
753            mac_address: mac_address.into(),
754            switch_port_id: port_id,
755        });
756    }
757
758    for vport in &opt.mana {
759        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
760        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
761            (vtl, None) => {
762                &mut vpci_mana_nics[vtl]
763                    .get_or_insert_with(|| {
764                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
765                    })
766                    .1
767                    .vports
768            }
769            (0, Some(pcie_port)) => {
770                &mut pcie_mana_nics
771                    .entry(pcie_port)
772                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
773                    .vports
774            }
775            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
776        };
777        vport_array.push(VportDefinition {
778            mac_address: vport.mac_address,
779            endpoint: vport.endpoint,
780        });
781    }
782
783    vpci_devices.extend(
784        vpci_mana_nics
785            .into_iter()
786            .enumerate()
787            .filter_map(|(vtl, nic)| {
788                nic.map(|(instance_id, handle)| VpciDeviceConfig {
789                    vtl: match vtl {
790                        0 => DeviceVtl::Vtl0,
791                        1 => DeviceVtl::Vtl1,
792                        2 => DeviceVtl::Vtl2,
793                        _ => unreachable!(),
794                    },
795                    instance_id,
796                    resource: handle.into_resource(),
797                    vnode: None,
798                })
799            }),
800    );
801
802    pcie_devices.extend(
803        pcie_mana_nics
804            .into_iter()
805            .map(|(pcie_port, handle)| PcieDeviceConfig {
806                port_name: pcie_port,
807                resource: handle.into_resource(),
808            }),
809    );
810
811    for cxl_test in &opt.cxl_test {
812        pcie_devices.push(PcieDeviceConfig {
813            port_name: cxl_test.pcie_port.clone(),
814            resource: CxlTestDeviceHandle {
815                hdm_size_bytes: cxl_test.hdm_size,
816            }
817            .into_resource(),
818        });
819    }
820
821    #[cfg(guest_arch = "aarch64")]
822    let arch = MachineArch::Aarch64;
823    #[cfg(guest_arch = "x86_64")]
824    let arch = MachineArch::X86_64;
825
826    let mut pcie_root_complexes = Vec::new();
827    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
828        let ports: Vec<PcieRootPortConfig> = opt
829            .pcie_root_port
830            .iter()
831            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
832            .map(|port_cli| PcieRootPortConfig {
833                name: port_cli.name.clone(),
834                hotplug: port_cli.hotplug,
835                acs_capabilities_supported: port_cli.acs_capabilities_supported,
836                cxl: port_cli.cxl,
837            })
838            .collect();
839
840        const ONE_MB: u64 = 1024 * 1024;
841        // Keep all PCI windows 1MB-granular to match layout and downstream placement rules.
842        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
843        let high_mmio_size = rc_cli
844            .high_mmio
845            .checked_next_multiple_of(ONE_MB)
846            .context("high mmio rounding error")?;
847
848        // Count CXL-capable ports under the root bus. If the root bus has CXL root ports, it needs CHBCR.
849        let cxl_port_count = ports.iter().filter(|port| port.cxl).count() as u64;
850
851        let cxl = if cxl_port_count != 0 {
852            Some(RootComplexCxlConfig {
853                hdm_size: rc_cli.hdm,
854                hdm_window_restrictions: rc_cli.hdm_window_restrictions.bits(),
855            })
856        } else {
857            None
858        };
859        pcie_root_complexes.push(PcieRootComplexConfig {
860            index: i as u32,
861            name: rc_cli.name.clone(),
862            segment: rc_cli.segment,
863            start_bus: rc_cli.start_bus,
864            end_bus: rc_cli.end_bus,
865            low_mmio: PcieMmioRangeConfig::Dynamic {
866                size: low_mmio_size,
867            },
868            high_mmio: PcieMmioRangeConfig::Dynamic {
869                size: high_mmio_size,
870            },
871            cxl,
872            ports,
873            #[cfg(guest_arch = "aarch64")]
874            iommu: opt
875                .smmu
876                .iter()
877                .any(|s| s == &rc_cli.name)
878                .then_some(openvmm_defs::config::PcieIommuConfig::Smmu),
879            #[cfg(guest_arch = "x86_64")]
880            iommu: opt
881                .amd_iommu
882                .iter()
883                .any(|s| s == &rc_cli.name)
884                .then_some(openvmm_defs::config::PcieIommuConfig::AmdVi),
885            vnode: rc_cli.vnode,
886        });
887    }
888
889    // Validate that all --smmu / --amd-iommu names refer to known root complexes.
890    #[cfg(guest_arch = "aarch64")]
891    for name in &opt.smmu {
892        anyhow::ensure!(
893            pcie_root_complexes.iter().any(|rc| rc.name == *name),
894            "--smmu refers to unknown root complex '{name}'"
895        );
896    }
897    #[cfg(guest_arch = "x86_64")]
898    for name in &opt.amd_iommu {
899        anyhow::ensure!(
900            pcie_root_complexes.iter().any(|rc| rc.name == *name),
901            "--amd-iommu refers to unknown root complex '{name}'"
902        );
903    }
904
905    let pcie_switches = build_switch_list(&opt.pcie_switch);
906
907    #[cfg(target_os = "linux")]
908    let vfio_pcie_devices: Vec<PcieDeviceConfig> = {
909        use std::collections::HashMap;
910        use vm_resource::IntoResource;
911
912        // Process --iommu flags: open /dev/iommu for each declared context.
913        let mut iommu_map: HashMap<String, std::fs::File> = HashMap::new();
914        for iommu_cli in &opt.iommu {
915            anyhow::ensure!(
916                !iommu_map.contains_key(&iommu_cli.id),
917                "duplicate --iommu id={}",
918                iommu_cli.id
919            );
920            let file = std::fs::OpenOptions::new()
921                .read(true)
922                .write(true)
923                .open("/dev/iommu")
924                .context("failed to open /dev/iommu (is iommufd available?)")?;
925            iommu_map.insert(iommu_cli.id.clone(), file);
926        }
927
928        opt.vfio
929            .iter()
930            .map(|cli_cfg| {
931                let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
932
933                if let Some(iommu_id) = &cli_cfg.iommu {
934                    // cdev + iommufd path
935                    let iommufd = iommu_map.get(iommu_id).with_context(|| {
936                        format!(
937                            "--vfio device {} references iommu={iommu_id}, \
938                             but no --iommu id={iommu_id} was specified",
939                            cli_cfg.pci_id
940                        )
941                    })?;
942                    // Clone the iommufd fd so the per-iommu manager can own it.
943                    // The first device for a given iommu ID uses the cloned fd
944                    // to create the IoasManager; subsequent devices reuse the
945                    // existing manager and the cloned fd is dropped.
946                    let iommufd = iommufd.try_clone().with_context(|| {
947                        format!("failed to dup iommufd fd for iommu={iommu_id}")
948                    })?;
949
950                    // Open the cdev device node.
951                    let vfio_dev_dir = sysfs_path.join("vfio-dev");
952                    let entry = std::fs::read_dir(&vfio_dev_dir)
953                        .with_context(|| {
954                            format!(
955                                "failed to read {}: is {} bound to vfio-pci?",
956                                vfio_dev_dir.display(),
957                                cli_cfg.pci_id
958                            )
959                        })?
960                        .next()
961                        .context("no vfio-dev entry found")?
962                        .context("failed to read vfio-dev entry")?;
963                    let dev_path = Path::new("/dev/vfio/devices").join(entry.file_name());
964                    let cdev = std::fs::OpenOptions::new()
965                        .read(true)
966                        .write(true)
967                        .open(&dev_path)
968                        .with_context(|| format!("failed to open {}", dev_path.display()))?;
969
970                    Ok(PcieDeviceConfig {
971                        port_name: cli_cfg.port_name.clone(),
972                        resource: vfio_assigned_device_resources::VfioCdevDeviceHandle {
973                            pci_id: cli_cfg.pci_id.clone(),
974                            cdev,
975                            iommufd,
976                            iommu_id: iommu_id.clone(),
977                        }
978                        .into_resource(),
979                    })
980                } else {
981                    // Legacy group/container path
982                    let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
983                        .with_context(|| {
984                            format!("failed to read IOMMU group for {}", cli_cfg.pci_id)
985                        })?;
986                    let group_id: u64 = iommu_group_link
987                        .file_name()
988                        .and_then(|s| s.to_str())
989                        .context("invalid iommu_group symlink")?
990                        .parse()
991                        .context("failed to parse IOMMU group ID")?;
992                    let group = std::fs::OpenOptions::new()
993                        .read(true)
994                        .write(true)
995                        .open(format!("/dev/vfio/{group_id}"))
996                        .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
997
998                    Ok(PcieDeviceConfig {
999                        port_name: cli_cfg.port_name.clone(),
1000                        resource: vfio_assigned_device_resources::VfioDeviceHandle {
1001                            pci_id: cli_cfg.pci_id.clone(),
1002                            group,
1003                        }
1004                        .into_resource(),
1005                    })
1006                }
1007            })
1008            .collect::<anyhow::Result<Vec<_>>>()?
1009    };
1010
1011    #[cfg(windows)]
1012    let vpci_resources: Vec<_> = opt
1013        .device
1014        .iter()
1015        .map(|path| -> anyhow::Result<_> {
1016            Ok(virt_whp::device::DeviceHandle(
1017                whp::VpciResource::new(
1018                    None,
1019                    Default::default(),
1020                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
1021                )
1022                .with_context(|| format!("opening PCI device {}", path))?,
1023            ))
1024        })
1025        .collect::<Result<_, _>>()?;
1026
1027    // Create a vmbusproxy handle if needed by any devices.
1028    #[cfg(windows)]
1029    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
1030        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
1031    } else {
1032        None
1033    };
1034
1035    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc.vnc || opt.pcat {
1036        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
1037        let (fb, fba) =
1038            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
1039        resources.framebuffer_access = Some(fba);
1040        Some(fb)
1041    } else {
1042        None
1043    };
1044
1045    let load_mode;
1046    let with_hv;
1047
1048    let any_serial_configured = serial0_cfg.is_some()
1049        || serial1_cfg.is_some()
1050        || serial2_cfg.is_some()
1051        || serial3_cfg.is_some();
1052
1053    let has_com3 = serial2_cfg.is_some();
1054
1055    let mut chipset = VmManifestBuilder::new(
1056        if opt.igvm.is_some() {
1057            BaseChipsetType::HclHost
1058        } else if opt.pcat {
1059            BaseChipsetType::HypervGen1
1060        } else if opt.uefi {
1061            BaseChipsetType::HypervGen2Uefi
1062        } else if opt.hv {
1063            BaseChipsetType::HyperVGen2LinuxDirect
1064        } else {
1065            BaseChipsetType::UnenlightenedLinuxDirect
1066        },
1067        arch,
1068    );
1069
1070    if framebuffer.is_some() {
1071        chipset = chipset.with_framebuffer();
1072    }
1073    if opt.guest_watchdog {
1074        chipset = chipset.with_guest_watchdog();
1075    }
1076    if any_serial_configured {
1077        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
1078    }
1079    if opt.battery {
1080        let (tx, rx) = mesh::channel();
1081        tx.send(HostBatteryUpdate::default_present());
1082        chipset = chipset.with_battery(rx);
1083    }
1084    if opt.no_vmbus {
1085        chipset = chipset.without_vmbus();
1086    }
1087    if let Some(cfg) = &opt.debugcon {
1088        chipset = chipset.with_debugcon(
1089            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
1090            cfg.port,
1091        );
1092    }
1093
1094    let custom_uefi_vars = {
1095        use firmware_uefi_custom_vars::CustomVars;
1096
1097        // load base vars from specified template, or use an empty set of base
1098        // vars if none was specified.
1099        let base_vars = match opt.secure_boot_template {
1100            Some(template) => match (arch, template) {
1101                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1102                    hyperv_secure_boot_templates::x64::microsoft_windows()
1103                }
1104                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1105                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1106                }
1107                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1108                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1109                }
1110                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1111                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1112                }
1113            },
1114            None => CustomVars::default(),
1115        };
1116
1117        // TODO: fallback to VMGS read if no command line flag was given
1118
1119        let custom_uefi_json_data = match &opt.custom_uefi_json {
1120            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1121            None => None,
1122        };
1123
1124        // obtain the final custom uefi vars by applying the delta onto the base vars
1125        match custom_uefi_json_data {
1126            Some(data) => {
1127                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1128                base_vars.apply_delta(delta)?
1129            }
1130            None => base_vars,
1131        }
1132    };
1133
1134    let efi_diagnostics_log_level = match opt.efi_diagnostics_log_level.unwrap_or_default() {
1135        EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1136        EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1137        EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1138    };
1139
1140    if opt.uefi {
1141        let log_level = match efi_diagnostics_log_level {
1142            EfiDiagnosticsLogLevelType::Default => {
1143                firmware_uefi_resources::LogLevel::make_default()
1144            }
1145            EfiDiagnosticsLogLevelType::Info => firmware_uefi_resources::LogLevel::make_info(),
1146            EfiDiagnosticsLogLevelType::Full => firmware_uefi_resources::LogLevel::make_full(),
1147        };
1148        let nvram_storage = if opt.vmgs.is_some() {
1149            VmgsFileHandle::new(vmgs_format::FileId::BIOS_NVRAM, true).into_resource()
1150        } else {
1151            EphemeralNonVolatileStoreHandle.into_resource()
1152        };
1153        chipset = chipset.with_uefi(vm_manifest_builder::UefiManifest::new(
1154            arch,
1155            custom_uefi_vars.clone(),
1156            opt.secure_boot,
1157            log_level,
1158            nvram_storage,
1159            None,
1160        ));
1161    }
1162
1163    // TODO: load from VMGS file if it exists
1164    let bios_guid = Guid::new_random();
1165
1166    let layout_config = chipset.layout_config();
1167    let VmChipsetResult {
1168        chipset,
1169        mut chipset_devices,
1170        pci_chipset_devices,
1171        isa_dma_controller,
1172        capabilities,
1173    } = chipset
1174        .build()
1175        .context("failed to build chipset configuration")?;
1176
1177    if opt.restore_snapshot.is_some() {
1178        // Snapshot restore: skip firmware loading entirely. Device state and
1179        // memory come from the snapshot directory.
1180        load_mode = LoadMode::None;
1181        with_hv = true;
1182    } else if let Some(path) = &opt.igvm {
1183        let file = fs_err::File::open(path)
1184            .context("failed to open igvm file")?
1185            .into();
1186        let cmdline = opt.cmdline.join(" ");
1187        with_hv = true;
1188
1189        load_mode = LoadMode::Igvm {
1190            file,
1191            cmdline,
1192            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1193            com_serial: has_com3.then(|| SerialInformation {
1194                io_port: ComPort::Com3.io_port(),
1195                irq: ComPort::Com3.irq().into(),
1196            }),
1197        };
1198    } else if opt.pcat {
1199        // Emit a nice error early instead of complaining about missing firmware.
1200        if arch != MachineArch::X86_64 {
1201            anyhow::bail!("pcat not supported on this architecture");
1202        }
1203        with_hv = true;
1204
1205        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1206        load_mode = LoadMode::Pcat {
1207            firmware,
1208            boot_order: opt
1209                .pcat_boot_order
1210                .map(|x| x.0)
1211                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1212        };
1213    } else if opt.uefi {
1214        use openvmm_defs::config::UefiConsoleMode;
1215
1216        with_hv = true;
1217
1218        let firmware = fs_err::File::open(
1219            (opt.uefi_firmware.0)
1220                .as_ref()
1221                .context("must provide uefi firmware when booting with uefi")?,
1222        )
1223        .context("failed to open uefi firmware")?;
1224
1225        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1226        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1227        load_mode = LoadMode::Uefi {
1228            firmware: firmware.into(),
1229            enable_debugging: opt.uefi_debug,
1230            enable_memory_protections: opt.uefi_enable_memory_protections,
1231            disable_frontpage: opt.disable_frontpage,
1232            enable_tpm: opt.tpm,
1233            enable_battery: opt.battery,
1234            enable_serial: any_serial_configured,
1235            enable_vpci_boot: false,
1236            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1237                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1238                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1239                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1240                UefiConsoleModeCli::None => UefiConsoleMode::None,
1241            }),
1242            default_boot_always_attempt: opt.default_boot_always_attempt,
1243            bios_guid,
1244            enable_vmbus: !opt.no_vmbus,
1245        };
1246    } else {
1247        // Linux Direct
1248        let mut cmdline = "panic=-1 debug".to_string();
1249
1250        with_hv = opt.hv;
1251        if with_hv && opt.pcie_root_complex.is_empty() {
1252            cmdline += " pci=off";
1253        }
1254
1255        if !console_str.is_empty() {
1256            let _ = write!(&mut cmdline, " console={}", console_str);
1257        }
1258
1259        if opt.gfx {
1260            cmdline += " console=tty";
1261        }
1262        for extra in &opt.cmdline {
1263            let _ = write!(&mut cmdline, " {}", extra);
1264        }
1265
1266        let kernel = fs_err::File::open(
1267            (opt.kernel.0)
1268                .as_ref()
1269                .context("must provide kernel when booting with linux direct")?,
1270        )
1271        .context("failed to open kernel")?;
1272        let initrd = (opt.initrd.0)
1273            .as_ref()
1274            .map(fs_err::File::open)
1275            .transpose()
1276            .context("failed to open initrd")?;
1277
1278        let custom_dsdt = match &opt.custom_dsdt {
1279            Some(path) => {
1280                let mut v = Vec::new();
1281                fs_err::File::open(path)
1282                    .context("failed to open custom dsdt")?
1283                    .read_to_end(&mut v)
1284                    .context("failed to read custom dsdt")?;
1285                Some(v)
1286            }
1287            None => None,
1288        };
1289
1290        load_mode = LoadMode::Linux {
1291            kernel: kernel.into(),
1292            initrd: initrd.map(Into::into),
1293            cmdline,
1294            custom_dsdt,
1295            enable_serial: any_serial_configured,
1296            boot_mode: if opt.device_tree {
1297                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1298            } else {
1299                openvmm_defs::config::LinuxDirectBootMode::Acpi
1300            },
1301        };
1302    }
1303
1304    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1305        let disk = VmgsDisk {
1306            disk: disk_open(kind, false)
1307                .await
1308                .context("failed to open vmgs disk")?,
1309            encryption_policy: if opt.test_gsp_by_id {
1310                GuestStateEncryptionPolicy::GspById(true)
1311            } else {
1312                GuestStateEncryptionPolicy::None(true)
1313            },
1314        };
1315        match provision {
1316            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1317            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1318            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1319        }
1320    } else {
1321        VmgsResource::Ephemeral
1322    });
1323
1324    if with_get && with_hv {
1325        let has_vtl0_nvme = storage.has_vtl0_nvme();
1326        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1327            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1328            fixed: Some(Default::default()),
1329            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1330                storage_controllers: storage.build_openhcl_settings(opt.vmbus_redirect),
1331                nic_devices: underhill_nics,
1332            }),
1333            namespace_settings: Vec::default(),
1334        };
1335
1336        // Cache the VTL2 settings for later modification via the interactive console.
1337        resources.vtl2_settings = Some(vtl2_settings.clone());
1338
1339        let (send, guest_request_recv) = mesh::channel();
1340        resources.ged_rpc = Some(send);
1341
1342        let vmgs = vmgs.take().unwrap();
1343
1344        vmbus_devices.extend([
1345            (
1346                openhcl_vtl,
1347                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1348            ),
1349            (
1350                openhcl_vtl,
1351                get_resources::ged::GuestEmulationDeviceHandle {
1352                    firmware: if opt.pcat {
1353                        get_resources::ged::GuestFirmwareConfig::Pcat {
1354                            boot_order: opt
1355                                .pcat_boot_order
1356                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1357                                .map(|x| match x {
1358                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1359                                        get_resources::ged::PcatBootDevice::Floppy
1360                                    }
1361                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1362                                        get_resources::ged::PcatBootDevice::HardDrive
1363                                    }
1364                                    openvmm_defs::config::PcatBootDevice::Optical => {
1365                                        get_resources::ged::PcatBootDevice::Optical
1366                                    }
1367                                    openvmm_defs::config::PcatBootDevice::Network => {
1368                                        get_resources::ged::PcatBootDevice::Network
1369                                    }
1370                                }),
1371                        }
1372                    } else {
1373                        use get_resources::ged::UefiConsoleMode;
1374
1375                        get_resources::ged::GuestFirmwareConfig::Uefi {
1376                            enable_vpci_boot: has_vtl0_nvme,
1377                            firmware_debug: opt.uefi_debug,
1378                            disable_frontpage: opt.disable_frontpage,
1379                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1380                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1381                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1382                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1383                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1384                            },
1385                            default_boot_always_attempt: opt.default_boot_always_attempt,
1386                        }
1387                    },
1388                    com1: with_vmbus_com1_serial,
1389                    com2: with_vmbus_com2_serial,
1390                    serial_tx_only: opt.serial_tx_only,
1391                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1392                    vmbus_redirection: opt.vmbus_redirect,
1393                    vmgs,
1394                    framebuffer: opt
1395                        .vtl2_gfx
1396                        .then(|| SharedFramebufferHandle.into_resource()),
1397                    guest_request_recv,
1398                    enable_tpm: opt.tpm,
1399                    firmware_event_send: None,
1400                    secure_boot_enabled: opt.secure_boot,
1401                    secure_boot_template: match opt.secure_boot_template {
1402                        Some(SecureBootTemplateCli::Windows) => {
1403                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1404                        },
1405                        Some(SecureBootTemplateCli::UefiCa) => {
1406                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1407                        }
1408                        None => {
1409                            get_resources::ged::GuestSecureBootTemplateType::None
1410                        },
1411                    },
1412                    enable_battery: opt.battery,
1413                    no_persistent_secrets: true,
1414                    igvm_attest_test_config: None,
1415                    test_gsp_by_id: opt.test_gsp_by_id,
1416                    efi_diagnostics_log_level: {
1417                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1418                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1419                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1420                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1421                        }
1422                    },
1423                    hv_sint_enabled: false,
1424                }
1425                .into_resource(),
1426            ),
1427        ]);
1428    }
1429
1430    if opt.tpm && !opt.vtl2 {
1431        let register_layout = if cfg!(guest_arch = "x86_64") {
1432            TpmRegisterLayout::IoPort
1433        } else {
1434            TpmRegisterLayout::Mmio
1435        };
1436
1437        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1438            (
1439                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1440                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1441            )
1442        } else {
1443            (
1444                EphemeralNonVolatileStoreHandle.into_resource(),
1445                EphemeralNonVolatileStoreHandle.into_resource(),
1446            )
1447        };
1448
1449        chipset_devices.push(ChipsetDeviceHandle {
1450            name: "tpm".to_string(),
1451            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1452                device: TpmDeviceHandle {
1453                    ppi_store,
1454                    nvram_store,
1455                    nvram_size: None,
1456                    refresh_tpm_seeds: false,
1457                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1458                    register_layout,
1459                    guest_secret_key: None,
1460                    logger: None,
1461                    is_confidential_vm: false,
1462                    bios_guid,
1463                }
1464                .into_resource(),
1465                worker_host: mesh.make_host("tpm", None).await?,
1466            }
1467            .into_resource(),
1468        });
1469    }
1470
1471    let vga_firmware = if opt.pcat {
1472        Some(openvmm_pcat_locator::find_svga_bios(
1473            opt.vga_firmware.as_deref(),
1474        )?)
1475    } else {
1476        None
1477    };
1478
1479    if opt.gfx {
1480        // Channel for the video device to report dirty rectangles to the VNC worker.
1481        let (dirt_send, dirt_recv) = mesh::channel();
1482        resources.dirty_rect_recv = Some(dirt_recv);
1483
1484        vmbus_devices.extend([
1485            (
1486                DeviceVtl::Vtl0,
1487                SynthVideoHandle {
1488                    framebuffer: SharedFramebufferHandle.into_resource(),
1489                    dirt_send: Some(dirt_send),
1490                }
1491                .into_resource(),
1492            ),
1493            (
1494                DeviceVtl::Vtl0,
1495                SynthKeyboardHandle {
1496                    source: MultiplexedInputHandle {
1497                        // Save 0 for PS/2
1498                        elevation: 1,
1499                    }
1500                    .into_resource(),
1501                }
1502                .into_resource(),
1503            ),
1504            (
1505                DeviceVtl::Vtl0,
1506                SynthMouseHandle {
1507                    source: MultiplexedInputHandle {
1508                        // Save 0 for PS/2
1509                        elevation: 1,
1510                    }
1511                    .into_resource(),
1512                }
1513                .into_resource(),
1514            ),
1515        ]);
1516    }
1517
1518    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1519        if let Some(path) = path {
1520            cleanup_socket(path.as_ref());
1521            let listener = unix_socket::UnixListener::bind(path)
1522                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1523            Ok(Some(listener))
1524        } else {
1525            Ok(None)
1526        }
1527    };
1528
1529    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1530    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1531
1532    if let Some(path) = &opt.openhcl_dump_path {
1533        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1534        task.detach();
1535        vmbus_devices.push((openhcl_vtl, resource));
1536    }
1537
1538    #[cfg(guest_arch = "aarch64")]
1539    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1540        openvmm_defs::config::Aarch64TopologyConfig {
1541            // TODO: allow this to be configured from the command line
1542            gic_config: None,
1543            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1544            gic_msi: match opt.gic_msi {
1545                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1546                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1547                cli_args::GicMsiCli::V2m => {
1548                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1549                }
1550            },
1551        },
1552    );
1553    #[cfg(guest_arch = "x86_64")]
1554    let topology_arch =
1555        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1556            apic_id_offset: opt.apic_id_offset,
1557            x2apic: opt.x2apic,
1558        });
1559
1560    let with_isolation = if let Some(isolation) = &opt.isolation {
1561        // TODO: For now, isolation is only supported with VTL2.
1562        if !opt.vtl2 {
1563            anyhow::bail!("isolation is only currently supported with vtl2");
1564        }
1565
1566        // TODO: Alias map support is not yet implement with isolation.
1567        if !opt.no_alias_map {
1568            anyhow::bail!("alias map not supported with isolation");
1569        }
1570
1571        match isolation {
1572            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1573        }
1574    } else {
1575        None
1576    };
1577
1578    if with_hv && !opt.no_vmbus {
1579        let (shutdown_send, shutdown_recv) = mesh::channel();
1580        resources.shutdown_ic = Some(shutdown_send);
1581        let (kvp_send, kvp_recv) = mesh::channel();
1582        resources.kvp_ic = Some(kvp_send);
1583        vmbus_devices.extend(
1584            [
1585                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1586                    recv: shutdown_recv,
1587                }
1588                .into_resource(),
1589                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1590                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1591            ]
1592            .map(|r| (DeviceVtl::Vtl0, r)),
1593        );
1594    }
1595
1596    if let Some(hive_path) = &opt.imc {
1597        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1598        vmbus_devices.push((
1599            DeviceVtl::Vtl0,
1600            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1601        ));
1602    }
1603
1604    let mut virtio_devices = Vec::new();
1605    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1606        let bus = match bus {
1607            VirtioBusCli::Auto => {
1608                // Use VPCI when possible (currently only on Windows and macOS due
1609                // to KVM backend limitations).
1610                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1611                    None
1612                } else {
1613                    Some(VirtioBus::Pci)
1614                }
1615            }
1616            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1617            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1618            VirtioBusCli::Vpci => None,
1619        };
1620        if let Some(bus) = bus {
1621            virtio_devices.push((bus, resource));
1622        } else {
1623            vpci_devices.push(VpciDeviceConfig {
1624                vtl: DeviceVtl::Vtl0,
1625                instance_id: Guid::new_random(),
1626                resource: VirtioPciDeviceHandle(resource).into_resource(),
1627                vnode: None,
1628            });
1629        }
1630    };
1631
1632    for cli_cfg in &opt.virtio_net {
1633        if cli_cfg.underhill {
1634            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1635        }
1636        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1637        let resource = virtio_resources::net::VirtioNetHandle {
1638            max_queues: vport.max_queues,
1639            mac_address: vport.mac_address,
1640            endpoint: vport.endpoint,
1641        }
1642        .into_resource();
1643        if let Some(pcie_port) = &cli_cfg.pcie_port {
1644            pcie_devices.push(PcieDeviceConfig {
1645                port_name: pcie_port.clone(),
1646                resource: VirtioPciDeviceHandle(resource).into_resource(),
1647            });
1648        } else {
1649            add_virtio_device(VirtioBusCli::Auto, resource);
1650        }
1651    }
1652
1653    for args in &opt.virtio_fs {
1654        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1655            tag: args.tag.clone(),
1656            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1657                root_path: args.path.clone(),
1658                mount_options: args.options.clone(),
1659            },
1660        }
1661        .into_resource();
1662        if let Some(pcie_port) = &args.pcie_port {
1663            pcie_devices.push(PcieDeviceConfig {
1664                port_name: pcie_port.clone(),
1665                resource: VirtioPciDeviceHandle(resource).into_resource(),
1666            });
1667        } else {
1668            add_virtio_device(opt.virtio_fs_bus, resource);
1669        }
1670    }
1671
1672    for args in &opt.virtio_fs_shmem {
1673        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1674            tag: args.tag.clone(),
1675            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1676                root_path: args.path.clone(),
1677            },
1678        }
1679        .into_resource();
1680        if let Some(pcie_port) = &args.pcie_port {
1681            pcie_devices.push(PcieDeviceConfig {
1682                port_name: pcie_port.clone(),
1683                resource: VirtioPciDeviceHandle(resource).into_resource(),
1684            });
1685        } else {
1686            add_virtio_device(opt.virtio_fs_bus, resource);
1687        }
1688    }
1689
1690    for args in &opt.virtio_9p {
1691        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1692            tag: args.tag.clone(),
1693            root_path: args.path.clone(),
1694            debug: opt.virtio_9p_debug,
1695        }
1696        .into_resource();
1697        if let Some(pcie_port) = &args.pcie_port {
1698            pcie_devices.push(PcieDeviceConfig {
1699                port_name: pcie_port.clone(),
1700                resource: VirtioPciDeviceHandle(resource).into_resource(),
1701            });
1702        } else {
1703            add_virtio_device(VirtioBusCli::Auto, resource);
1704        }
1705    }
1706
1707    if let Some(pmem_args) = &opt.virtio_pmem {
1708        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1709            path: pmem_args.path.clone(),
1710        }
1711        .into_resource();
1712        if let Some(pcie_port) = &pmem_args.pcie_port {
1713            pcie_devices.push(PcieDeviceConfig {
1714                port_name: pcie_port.clone(),
1715                resource: VirtioPciDeviceHandle(resource).into_resource(),
1716            });
1717        } else {
1718            add_virtio_device(VirtioBusCli::Auto, resource);
1719        }
1720    }
1721
1722    if opt.virtio_rng {
1723        let resource: Resource<VirtioDeviceHandle> =
1724            virtio_resources::rng::VirtioRngHandle.into_resource();
1725        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1726            pcie_devices.push(PcieDeviceConfig {
1727                port_name: pcie_port.clone(),
1728                resource: VirtioPciDeviceHandle(resource).into_resource(),
1729            });
1730        } else {
1731            add_virtio_device(opt.virtio_rng_bus, resource);
1732        }
1733    }
1734
1735    if let Some(backend) = virtio_console_backend {
1736        let resource: Resource<VirtioDeviceHandle> =
1737            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1738        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1739            pcie_devices.push(PcieDeviceConfig {
1740                port_name: pcie_port.clone(),
1741                resource: VirtioPciDeviceHandle(resource).into_resource(),
1742            });
1743        } else {
1744            add_virtio_device(VirtioBusCli::Auto, resource);
1745        }
1746    }
1747
1748    // Handle --vhost-user arguments.
1749    #[cfg(target_os = "linux")]
1750    for vhost_cli in &opt.vhost_user {
1751        let stream =
1752            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1753                format!(
1754                    "failed to connect to vhost-user socket: {}",
1755                    vhost_cli.socket_path
1756                )
1757            })?;
1758
1759        use crate::cli_args::VhostUserDeviceTypeCli;
1760        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1761            VhostUserDeviceTypeCli::Fs {
1762                ref tag,
1763                num_queues,
1764                queue_size,
1765            } => virtio_resources::vhost_user::VhostUserFsHandle {
1766                socket: stream.into(),
1767                tag: tag.clone(),
1768                num_queues,
1769                queue_size,
1770            }
1771            .into_resource(),
1772            VhostUserDeviceTypeCli::Blk {
1773                num_queues,
1774                queue_size,
1775            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1776                socket: stream.into(),
1777                num_queues,
1778                queue_size,
1779            }
1780            .into_resource(),
1781            VhostUserDeviceTypeCli::Other {
1782                device_id,
1783                ref queue_sizes,
1784            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1785                socket: stream.into(),
1786                device_id,
1787                queue_sizes: queue_sizes.clone(),
1788            }
1789            .into_resource(),
1790        };
1791        if let Some(pcie_port) = &vhost_cli.pcie_port {
1792            pcie_devices.push(PcieDeviceConfig {
1793                port_name: pcie_port.clone(),
1794                resource: VirtioPciDeviceHandle(resource).into_resource(),
1795            });
1796        } else {
1797            add_virtio_device(VirtioBusCli::Auto, resource);
1798        }
1799    }
1800
1801    if let Some(vsock_path) = &opt.virtio_vsock_path {
1802        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1803        add_virtio_device(
1804            VirtioBusCli::Auto,
1805            virtio_resources::vsock::VirtioVsockHandle {
1806                // The guest CID does not matter since the UDS relay does not use it. It just needs
1807                // to be some non-reserved value for the guest to use.
1808                guest_cid: 0x3,
1809                base_path: vsock_path.clone(),
1810                listener,
1811            }
1812            .into_resource(),
1813        );
1814    }
1815
1816    let mut cfg = Config {
1817        chipset,
1818        load_mode,
1819        floppy_disks,
1820        pcie_root_complexes,
1821        #[cfg(target_os = "linux")]
1822        pcie_devices: {
1823            let mut devs = pcie_devices;
1824            devs.extend(vfio_pcie_devices);
1825            devs
1826        },
1827        #[cfg(not(target_os = "linux"))]
1828        pcie_devices,
1829        pcie_switches,
1830        vpci_devices,
1831        ide_disks: Vec::new(),
1832        numa: {
1833            if let Some(ref nodes) = opt.numa {
1834                // --numa mode: each --numa flag defines a node.
1835                NumaTopology {
1836                    nodes: nodes
1837                        .iter()
1838                        .map(|n| NumaNode {
1839                            mem: Some(MemoryConfig {
1840                                mem_size: n.memory.mem_size,
1841                                prefetch_memory: n.memory.prefetch,
1842                                private_memory: n.memory.shared == Some(false),
1843                                transparent_hugepages: n.memory.transparent_hugepages,
1844                                hugepages: n.memory.hugepages,
1845                                hugepage_size: n.memory.hugepage_size,
1846                                host_numa_node: n.host_numa_node,
1847                            }),
1848                            vps: match &n.vps {
1849                                Some(vps) => VpAssignment::Explicit(vps.clone()),
1850                                None => VpAssignment::FromTopology,
1851                            },
1852                        })
1853                        .collect(),
1854                    distances: opt
1855                        .numa_distance
1856                        .as_deref()
1857                        .unwrap_or(&[])
1858                        .iter()
1859                        .map(|d| NumaDistance {
1860                            src: d.src,
1861                            dst: d.dst,
1862                            distance: d.distance,
1863                        })
1864                        .collect(),
1865                }
1866            } else {
1867                // Single-node default from --memory.
1868                NumaTopology {
1869                    nodes: vec![NumaNode {
1870                        mem: Some(MemoryConfig {
1871                            mem_size: opt.memory_size(),
1872                            prefetch_memory: opt.prefetch_memory(),
1873                            private_memory: opt.private_memory(),
1874                            transparent_hugepages: opt.transparent_hugepages(),
1875                            hugepages: opt.memory.hugepages,
1876                            hugepage_size: opt.memory.hugepage_size,
1877                            host_numa_node: None,
1878                        }),
1879                        vps: VpAssignment::FromTopology,
1880                    }],
1881                    distances: vec![],
1882                }
1883            }
1884        },
1885        processor_topology: ProcessorTopologyConfig {
1886            proc_count: opt.processors,
1887            vps_per_socket: opt.vps_per_socket,
1888            enable_smt: match opt.smt {
1889                cli_args::SmtConfigCli::Auto => None,
1890                cli_args::SmtConfigCli::Force => Some(true),
1891                cli_args::SmtConfigCli::Off => Some(false),
1892            },
1893            arch: Some(topology_arch),
1894        },
1895        hypervisor: HypervisorConfig {
1896            with_hv,
1897            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1898                vtl0_alias_map: !opt.no_alias_map,
1899                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1900                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1901                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1902                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1903                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1904                        Some(LateMapVtl0MemoryPolicy::InjectException)
1905                    }
1906                },
1907            }),
1908            with_isolation,
1909        },
1910        #[cfg(windows)]
1911        kernel_vmnics,
1912        input: mesh::Receiver::new(),
1913        framebuffer,
1914        vga_firmware,
1915        vtl2_gfx: opt.vtl2_gfx,
1916        virtio_devices,
1917        vmbus: (with_hv && !opt.no_vmbus).then_some(VmbusConfig {
1918            vsock_listener: vtl0_vsock_listener,
1919            vsock_path: opt.vmbus_vsock_path.clone(),
1920            vtl2_redirect: opt.vmbus_redirect,
1921            vmbus_max_version: opt.vmbus_max_version,
1922            #[cfg(windows)]
1923            vmbusproxy_handle,
1924        }),
1925        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1926            vsock_listener: vtl2_vsock_listener,
1927            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1928            ..Default::default()
1929        }),
1930        vmbus_devices,
1931        chipset_devices,
1932        pci_chipset_devices,
1933        isa_dma_controller,
1934        chipset_capabilities: capabilities,
1935        layout: layout_config,
1936        #[cfg(windows)]
1937        vpci_resources,
1938        vmgs,
1939        secure_boot_enabled: opt.secure_boot,
1940        custom_uefi_vars,
1941        firmware_event_send: None,
1942        debugger_rpc: None,
1943        rtc_delta_milliseconds: 0,
1944        // Only let the partition auto-reset when the reset action is `reset`.
1945        // For `halt` or `exit`, the guest reset must surface as a halt event so
1946        // the controller can hold the VM or exit instead of rebooting in place.
1947        automatic_guest_reset: matches!(opt.guest_reset_action, GuestPowerAction::Reset),
1948        efi_diagnostics_log_level: {
1949            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1950                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1951                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1952                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1953            }
1954        },
1955    };
1956
1957    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1958    Ok((cfg, resources))
1959}
1960
1961/// Gets the terminal to use for externally launched console windows.
1962pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1963    std::env::var_os("OPENVMM_TERM")
1964        .or_else(|| std::env::var_os("HVLITE_TERM"))
1965        .map(Into::into)
1966}
1967
1968// Tries to remove `path` if it is confirmed to be a Unix socket.
1969fn cleanup_socket(path: &Path) {
1970    #[cfg(windows)]
1971    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1972    #[cfg(not(windows))]
1973    let is_socket = path
1974        .metadata()
1975        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1976
1977    if is_socket {
1978        let _ = std::fs::remove_file(path);
1979    }
1980}
1981
1982#[cfg(windows)]
1983fn new_switch_port(
1984    switch_id: Option<&str>,
1985) -> anyhow::Result<(
1986    openvmm_defs::config::SwitchPortId,
1987    vmswitch::kernel::SwitchPort,
1988)> {
1989    let id = vmswitch::kernel::SwitchPortId {
1990        switch: match switch_id {
1991            Some(s) => s.parse().context("invalid switch id")?,
1992            None => vmswitch::hcn::DEFAULT_SWITCH,
1993        },
1994        port: Guid::new_random(),
1995    };
1996    let _ = vmswitch::hcn::Network::open(&id.switch)
1997        .with_context(|| format!("could not find switch {}", id.switch))?;
1998
1999    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
2000
2001    let id = openvmm_defs::config::SwitchPortId {
2002        switch: id.switch,
2003        port: id.port,
2004    };
2005    Ok((id, port))
2006}
2007
2008fn parse_endpoint(
2009    cli_cfg: &NicConfigCli,
2010    index: &mut usize,
2011    resources: &mut VmResources,
2012) -> anyhow::Result<NicConfig> {
2013    let _ = resources;
2014    let endpoint = match &cli_cfg.endpoint {
2015        EndpointConfigCli::Consomme { cidr, host_fwd } => {
2016            let ports = host_fwd
2017                .iter()
2018                .map(|fwd| {
2019                    use net_backend_resources::consomme::HostPortProtocol;
2020                    net_backend_resources::consomme::HostPortConfig {
2021                        protocol: match fwd.protocol {
2022                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
2023                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
2024                        },
2025                        host_address: fwd
2026                            .host_address
2027                            .map(net_backend_resources::consomme::HostIpAddress::from),
2028                        host_port: net_backend_resources::consomme::HostPort::Fixed(fwd.host_port),
2029                        guest_port: fwd.guest_port,
2030                    }
2031                })
2032                .collect();
2033            net_backend_resources::consomme::ConsommeHandle {
2034                cidr: cidr.clone(),
2035                ports,
2036            }
2037            .into_resource()
2038        }
2039        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
2040        EndpointConfigCli::Dio { id } => {
2041            #[cfg(windows)]
2042            {
2043                let (port_id, port) = new_switch_port(id.as_deref())?;
2044                resources.switch_ports.push(port);
2045                net_backend_resources::dio::WindowsDirectIoHandle {
2046                    switch_port_id: net_backend_resources::dio::SwitchPortId {
2047                        switch: port_id.switch,
2048                        port: port_id.port,
2049                    },
2050                }
2051                .into_resource()
2052            }
2053
2054            #[cfg(not(windows))]
2055            {
2056                let _ = id;
2057                bail!("cannot use dio on non-windows platforms")
2058            }
2059        }
2060        EndpointConfigCli::Tap { name } => {
2061            #[cfg(target_os = "linux")]
2062            {
2063                let fd = net_tap::tap::open_tap(name)
2064                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
2065                net_backend_resources::tap::TapHandle { fd }.into_resource()
2066            }
2067
2068            #[cfg(not(target_os = "linux"))]
2069            {
2070                let _ = name;
2071                bail!("TAP backend is only supported on Linux")
2072            }
2073        }
2074    };
2075
2076    // Pick a random MAC address.
2077    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
2078    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
2079
2080    // Pick a fixed instance ID based on the index.
2081    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
2082    let instance_id = Guid {
2083        data1: *index as u32,
2084        ..BASE_INSTANCE_ID
2085    };
2086    *index += 1;
2087
2088    Ok(NicConfig {
2089        vtl: cli_cfg.vtl,
2090        instance_id,
2091        endpoint,
2092        mac_address: mac_address.into(),
2093        max_queues: cli_cfg.max_queues,
2094        pcie_port: cli_cfg.pcie_port.clone(),
2095    })
2096}
2097
2098#[derive(Debug)]
2099struct NicConfig {
2100    vtl: DeviceVtl,
2101    instance_id: Guid,
2102    mac_address: MacAddress,
2103    endpoint: Resource<NetEndpointHandleKind>,
2104    max_queues: Option<u16>,
2105    pcie_port: Option<String>,
2106}
2107
2108impl NicConfig {
2109    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
2110        (
2111            self.vtl,
2112            netvsp_resources::NetvspHandle {
2113                instance_id: self.instance_id,
2114                mac_address: self.mac_address,
2115                endpoint: self.endpoint,
2116                max_queues: self.max_queues,
2117            }
2118            .into_resource(),
2119        )
2120    }
2121}
2122
2123enum LayerOrDisk {
2124    Layer(DiskLayerDescription),
2125    Disk(Resource<DiskHandleKind>),
2126}
2127
2128async fn disk_open(
2129    disk_cli: &DiskCliKind,
2130    read_only: bool,
2131) -> anyhow::Result<Resource<DiskHandleKind>> {
2132    let mut layers = Vec::new();
2133    disk_open_inner(disk_cli, read_only, &mut layers).await?;
2134    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
2135        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
2136            unreachable!()
2137        };
2138        Ok(disk)
2139    } else {
2140        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
2141            layers: layers
2142                .into_iter()
2143                .map(|layer| match layer {
2144                    LayerOrDisk::Layer(layer) => layer,
2145                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
2146                        layer: DiskLayerHandle(disk).into_resource(),
2147                        read_cache: false,
2148                        write_through: false,
2149                    },
2150                })
2151                .collect(),
2152        }))
2153    }
2154}
2155
2156fn disk_open_inner<'a>(
2157    disk_cli: &'a DiskCliKind,
2158    read_only: bool,
2159    layers: &'a mut Vec<LayerOrDisk>,
2160) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
2161    Box::pin(async move {
2162        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
2163            LayerOrDisk::Layer(layer.into_resource().into())
2164        }
2165        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
2166            LayerOrDisk::Disk(disk.into_resource())
2167        }
2168        match disk_cli {
2169            &DiskCliKind::Memory(len) => {
2170                layers.push(layer(RamDiskLayerHandle {
2171                    len: Some(len),
2172                    sector_size: None,
2173                }));
2174            }
2175            DiskCliKind::File {
2176                path,
2177                create_with_len,
2178                direct,
2179            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
2180                create_disk_type(
2181                    path,
2182                    *size,
2183                    OpenDiskOptions {
2184                        read_only: false,
2185                        direct: *direct,
2186                    },
2187                )
2188                .with_context(|| format!("failed to create {}", path.display()))?
2189            } else {
2190                open_disk_type(
2191                    path,
2192                    OpenDiskOptions {
2193                        read_only,
2194                        direct: *direct,
2195                    },
2196                )
2197                .await
2198                .with_context(|| format!("failed to open {}", path.display()))?
2199            })),
2200            DiskCliKind::Blob { kind, url } => {
2201                layers.push(disk(disk_backend_resources::BlobDiskHandle {
2202                    url: url.to_owned(),
2203                    format: match kind {
2204                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
2205                        cli_args::BlobKind::Vhd1 => {
2206                            disk_backend_resources::BlobDiskFormat::FixedVhd1
2207                        }
2208                    },
2209                }))
2210            }
2211            DiskCliKind::MemoryDiff(inner) => {
2212                layers.push(layer(RamDiskLayerHandle {
2213                    len: None,
2214                    sector_size: None,
2215                }));
2216                disk_open_inner(inner, true, layers).await?;
2217            }
2218            DiskCliKind::PersistentReservationsWrapper(inner) => {
2219                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
2220                    disk_open(inner, read_only).await?,
2221                )))
2222            }
2223            DiskCliKind::DelayDiskWrapper {
2224                delay_ms,
2225                disk: inner,
2226            } => layers.push(disk(DelayDiskHandle {
2227                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
2228                disk: disk_open(inner, read_only).await?,
2229            })),
2230            DiskCliKind::Crypt {
2231                disk: inner,
2232                cipher,
2233                key_file,
2234            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
2235                disk: disk_open(inner, read_only).await?,
2236                cipher: match cipher {
2237                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
2238                },
2239                key: fs_err::read(key_file).context("failed to read key file")?,
2240            })),
2241            DiskCliKind::Sqlite {
2242                path,
2243                create_with_len,
2244            } => {
2245                // FUTURE: this code should be responsible for opening
2246                // file-handle(s) itself, and passing them into sqlite via a custom
2247                // vfs. For now though - simply check if the file exists or not, and
2248                // perform early validation of filesystem-level create options.
2249                match (create_with_len.is_some(), path.exists()) {
2250                    (true, true) => anyhow::bail!(
2251                        "cannot create new sqlite disk at {} - file already exists",
2252                        path.display()
2253                    ),
2254                    (false, false) => anyhow::bail!(
2255                        "cannot open sqlite disk at {} - file not found",
2256                        path.display()
2257                    ),
2258                    _ => {}
2259                }
2260
2261                layers.push(layer(SqliteDiskLayerHandle {
2262                    dbhd_path: path.display().to_string(),
2263                    format_dbhd: create_with_len.map(|len| {
2264                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2265                            logically_read_only: false,
2266                            len: Some(len),
2267                        }
2268                    }),
2269                }));
2270            }
2271            DiskCliKind::SqliteDiff { path, create, disk } => {
2272                // FUTURE: this code should be responsible for opening
2273                // file-handle(s) itself, and passing them into sqlite via a custom
2274                // vfs. For now though - simply check if the file exists or not, and
2275                // perform early validation of filesystem-level create options.
2276                match (create, path.exists()) {
2277                    (true, true) => anyhow::bail!(
2278                        "cannot create new sqlite disk at {} - file already exists",
2279                        path.display()
2280                    ),
2281                    (false, false) => anyhow::bail!(
2282                        "cannot open sqlite disk at {} - file not found",
2283                        path.display()
2284                    ),
2285                    _ => {}
2286                }
2287
2288                layers.push(layer(SqliteDiskLayerHandle {
2289                    dbhd_path: path.display().to_string(),
2290                    format_dbhd: create.then_some(
2291                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2292                            logically_read_only: false,
2293                            len: None,
2294                        },
2295                    ),
2296                }));
2297                disk_open_inner(disk, true, layers).await?;
2298            }
2299            DiskCliKind::AutoCacheSqlite {
2300                cache_path,
2301                key,
2302                disk,
2303            } => {
2304                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2305                    read_cache: true,
2306                    write_through: false,
2307                    layer: SqliteAutoCacheDiskLayerHandle {
2308                        cache_path: cache_path.clone(),
2309                        cache_key: key.clone(),
2310                    }
2311                    .into_resource(),
2312                }));
2313                disk_open_inner(disk, read_only, layers).await?;
2314            }
2315        }
2316        Ok(())
2317    })
2318}
2319
2320/// Get the system page size.
2321pub(crate) fn system_page_size() -> u32 {
2322    sparse_mmap::SparseMapping::page_size() as u32
2323}
2324
2325/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2326pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2327    "x86_64"
2328} else {
2329    "aarch64"
2330};
2331
2332/// Open a snapshot directory and validate it against the current VM config.
2333/// Returns the shared memory fd (from memory.bin) and the saved device state.
2334fn prepare_snapshot_restore(
2335    snapshot_dir: &Path,
2336    opt: &Options,
2337) -> anyhow::Result<(
2338    openvmm_defs::worker::SharedMemoryFd,
2339    mesh::payload::message::ProtobufMessage,
2340)> {
2341    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2342
2343    // Validate manifest against current VM config.
2344    openvmm_helpers::snapshot::validate_manifest(
2345        &manifest,
2346        GUEST_ARCH,
2347        opt.memory_size(),
2348        opt.processors,
2349        system_page_size(),
2350    )?;
2351
2352    // Open memory.bin (existing file, no create, no resize).
2353    let memory_file = fs_err::OpenOptions::new()
2354        .read(true)
2355        .write(true)
2356        .open(snapshot_dir.join("memory.bin"))?;
2357
2358    // Validate file size matches expected memory size.
2359    let file_size = memory_file.metadata()?.len();
2360    if file_size != manifest.memory_size_bytes {
2361        anyhow::bail!(
2362            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2363            manifest.memory_size_bytes,
2364        );
2365    }
2366
2367    let shared_memory_fd =
2368        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2369
2370    // Reconstruct ProtobufMessage from the saved state bytes.
2371    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2372    // back to ProtobufMessage.
2373    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2374        .context("failed to decode saved state from snapshot")?;
2375
2376    Ok((shared_memory_fd, state_msg))
2377}
2378
2379fn do_main(pidfile_guard: &mut Option<pidfile::Pidfile>) -> anyhow::Result<i32> {
2380    #[cfg(windows)]
2381    pal::windows::disable_hard_error_dialog();
2382
2383    tracing_init::enable_tracing()?;
2384
2385    // Try to run as a worker host.
2386    // On success the worker runs to completion and then exits the process (does
2387    // not return). Any worker host setup errors are return and bubbled up.
2388    meshworker::run_vmm_mesh_host()?;
2389
2390    let opt = cli_args::parse_options();
2391    if let Some(path) = &opt.write_saved_state_proto {
2392        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2393            .write_to_path(path)
2394            .context("failed to write protobuf descriptors")?;
2395        return Ok(0);
2396    }
2397
2398    if let Some(ref path) = opt.pidfile {
2399        *pidfile_guard = Some(pidfile::Pidfile::new(path).context("failed to create pidfile")?);
2400    }
2401
2402    if let Some(path) = opt.relay_console_path {
2403        let console_title = opt.relay_console_title.unwrap_or_default();
2404        return console_relay::relay_console(&path, console_title.as_str()).map(|()| 0);
2405    }
2406
2407    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2408    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2409        return block_on(async {
2410            let _ = std::fs::remove_file(path);
2411            let listener =
2412                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2413
2414            let transport = if opt.ttrpc.is_some() {
2415                ttrpc::RpcTransport::Ttrpc
2416            } else {
2417                ttrpc::RpcTransport::Grpc
2418            };
2419
2420            // This is a local launch
2421            let mut handle =
2422                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2423                    listener,
2424                    transport,
2425                })
2426                .await?;
2427
2428            tracing::info!(%transport, path = %path.display(), "listening");
2429
2430            // Signal the the parent process that the server is ready.
2431            pal::close_stdout().context("failed to close stdout")?;
2432
2433            handle.join().await?;
2434
2435            Ok(0)
2436        });
2437    }
2438
2439    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2440}
2441
2442fn new_hvsock_service_id(port: u32) -> Guid {
2443    // This GUID is an embedding of the AF_VSOCK port into an
2444    // AF_HYPERV service ID.
2445    Guid {
2446        data1: port,
2447        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2448    }
2449}
2450
2451async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<i32> {
2452    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2453    let result = run_control_inner(driver, &mut mesh, opt).await;
2454    // If setup failed before the mesh was handed to the controller, shut it
2455    // down so the child host process exits cleanly without noisy logs.
2456    if let Some(mesh) = mesh {
2457        mesh.shutdown().await;
2458    }
2459    result
2460}
2461
2462async fn run_control_inner(
2463    driver: &DefaultDriver,
2464    mesh_slot: &mut Option<VmmMesh>,
2465    opt: Options,
2466) -> anyhow::Result<i32> {
2467    let mesh = mesh_slot.as_ref().unwrap();
2468    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2469
2470    let mut vnc_worker = None;
2471    if opt.gfx || opt.vnc.vnc {
2472        // Parse the listen address. Try as a full SocketAddr (host:port) first;
2473        // fall back to a bare IP, using the configured port.
2474        let addr: std::net::SocketAddr = if let Ok(sa) =
2475            opt.vnc.vnc_listen.parse::<std::net::SocketAddr>()
2476        {
2477            sa
2478        } else {
2479            let ip: std::net::IpAddr = opt.vnc.vnc_listen.parse().with_context(|| {
2480                format!(
2481                    "invalid VNC listen address: {} (expected IP address or socket address like [::1]:5900)",
2482                    opt.vnc.vnc_listen
2483                )
2484            })?;
2485            std::net::SocketAddr::new(ip, opt.vnc.vnc_port)
2486        };
2487
2488        let socket = socket2::Socket::new(
2489            if addr.is_ipv6() {
2490                socket2::Domain::IPV6
2491            } else {
2492                socket2::Domain::IPV4
2493            },
2494            socket2::Type::STREAM,
2495            None,
2496        )
2497        .with_context(|| format!("creating VNC socket for {}", addr))?;
2498
2499        if addr.is_ipv6() {
2500            if let Err(e) = socket.set_only_v6(false) {
2501                tracing::warn!(
2502                    error = %e,
2503                    "failed to enable dual-stack on IPv6 VNC socket, IPv4 clients may not be able to connect"
2504                );
2505            }
2506        }
2507        socket.set_reuse_address(true)?;
2508        socket
2509            .bind(&addr.into())
2510            .with_context(|| format!("binding VNC socket to {}", addr))?;
2511        socket
2512            .listen(128)
2513            .with_context(|| format!("listening on VNC socket {}", addr))?;
2514        let listener: TcpListener = socket.into();
2515
2516        if !addr.ip().is_loopback() {
2517            tracing::warn!(
2518                address = %addr,
2519                "VNC server listening on non-localhost address without authentication"
2520            );
2521        }
2522
2523        let input_send = vm_config.input.sender();
2524        let framebuffer = resources
2525            .framebuffer_access
2526            .take()
2527            .expect("synth video enabled");
2528
2529        let vnc_host = mesh
2530            .make_host("vnc", None)
2531            .await
2532            .context("spawning vnc process failed")?;
2533
2534        vnc_worker = Some(
2535            vnc_host
2536                .launch_worker(
2537                    vnc_worker_defs::VNC_WORKER_TCP,
2538                    VncParameters {
2539                        listener,
2540                        framebuffer,
2541                        input_send,
2542                        dirty_recv: resources.dirty_rect_recv.take(),
2543                        max_clients: opt.vnc.vnc_max_clients,
2544                        evict_oldest: opt.vnc.vnc_evict_oldest,
2545                    },
2546                )
2547                .await?,
2548        )
2549    }
2550
2551    // spin up the debug worker
2552    let gdb_worker = if let Some(port) = opt.gdb {
2553        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2554            .with_context(|| format!("binding to gdb port {}", port))?;
2555
2556        let (req_tx, req_rx) = mesh::channel();
2557        vm_config.debugger_rpc = Some(req_rx);
2558
2559        let gdb_host = mesh
2560            .make_host("gdb", None)
2561            .await
2562            .context("spawning gdbstub process failed")?;
2563
2564        Some(
2565            gdb_host
2566                .launch_worker(
2567                    debug_worker_defs::DEBUGGER_WORKER,
2568                    debug_worker_defs::DebuggerParameters {
2569                        listener,
2570                        req_chan: req_tx,
2571                        vp_count: vm_config.processor_topology.proc_count,
2572                        target_arch: if cfg!(guest_arch = "x86_64") {
2573                            debug_worker_defs::TargetArch::X86_64
2574                        } else {
2575                            debug_worker_defs::TargetArch::Aarch64
2576                        },
2577                    },
2578                )
2579                .await
2580                .context("failed to launch gdbstub worker")?,
2581        )
2582    } else {
2583        None
2584    };
2585
2586    // spin up the VM
2587    let (vm_rpc, rpc_recv) = mesh::channel();
2588    let (notify_send, notify_recv) = mesh::channel();
2589    let vm_worker = {
2590        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2591
2592        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2593            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2594            (Some(fd), Some(state_msg))
2595        } else {
2596            let shared_memory = opt
2597                .memory_backing_file()
2598                .map(|path| {
2599                    openvmm_helpers::shared_memory::open_memory_backing_file(
2600                        path,
2601                        opt.memory_size(),
2602                    )
2603                })
2604                .transpose()?;
2605            (shared_memory, None)
2606        };
2607
2608        let params = VmWorkerParameters {
2609            hypervisor: match &opt.hypervisor {
2610                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2611                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2612            },
2613            cfg: vm_config,
2614            saved_state,
2615            shared_memory,
2616            rpc: rpc_recv,
2617            notify: notify_send,
2618        };
2619        vm_host
2620            .launch_worker(VM_WORKER, params)
2621            .await
2622            .context("failed to launch vm worker")?
2623    };
2624
2625    if opt.restore_snapshot.is_some() {
2626        tracing::info!("restoring VM from snapshot");
2627    }
2628
2629    if !opt.paused {
2630        vm_rpc.call(VmRpc::Resume, ()).await?;
2631    }
2632
2633    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2634        driver.clone(),
2635        DiagDialer {
2636            driver: driver.clone(),
2637            vm_rpc: vm_rpc.clone(),
2638            openhcl_vtl: if opt.vtl2 {
2639                DeviceVtl::Vtl2
2640            } else {
2641                DeviceVtl::Vtl0
2642            },
2643        },
2644    ));
2645
2646    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2647
2648    // Create channels between the REPL and VmController.
2649    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2650    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2651
2652    let has_vtl2 = resources.vtl2_settings.is_some();
2653
2654    // Build the VmController with exclusive resources.
2655    let controller = vm_controller::VmController {
2656        mesh: mesh_slot.take().unwrap(),
2657        vm_worker,
2658        vnc_worker,
2659        gdb_worker,
2660        diag_inspector: Some(diag_inspector),
2661        vtl2_settings: resources.vtl2_settings,
2662        ged_rpc: resources.ged_rpc.clone(),
2663        vm_rpc: vm_rpc.clone(),
2664        paravisor_diag: Some(paravisor_diag),
2665        igvm_path: opt.igvm.clone(),
2666        memory_backing_file: opt.memory_backing_file().cloned(),
2667        memory: opt.memory_size(),
2668        processors: opt.processors,
2669        log_file: opt.log_file.clone(),
2670        guest_power_actions: vm_controller::GuestPowerActions {
2671            shutdown: opt.guest_shutdown_action,
2672            reset: opt.guest_reset_action,
2673            crash: opt.guest_crash_action,
2674            watchdog: opt.guest_watchdog_action,
2675        },
2676    };
2677
2678    // Spawn the VmController as a task.
2679    let controller_task = driver.spawn(
2680        "vm-controller",
2681        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2682    );
2683
2684    // Run the REPL with shareable resources.
2685    let repl_result = repl::run_repl(
2686        driver,
2687        repl::ReplResources {
2688            vm_rpc,
2689            vm_controller: vm_controller_send,
2690            vm_controller_events: vm_controller_event_recv,
2691            scsi_rpc: resources.scsi_rpc,
2692            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2693            shutdown_ic: resources.shutdown_ic,
2694            kvp_ic: resources.kvp_ic,
2695            console_in: resources.console_in,
2696            has_vtl2,
2697        },
2698    )
2699    .await;
2700
2701    // Wait for the controller task to finish (it stops the VM worker and
2702    // shuts down the mesh).
2703    controller_task.await;
2704
2705    // run_repl returns the exit status: the code the guest drove via an opt-in
2706    // exit (VmControllerEvent::ExitRequested), or 0 when the VM stopped normally.
2707    repl_result
2708}
2709
2710struct DiagDialer {
2711    driver: DefaultDriver,
2712    vm_rpc: mesh::Sender<VmRpc>,
2713    openhcl_vtl: DeviceVtl,
2714}
2715
2716impl mesh_rpc::client::Dial for DiagDialer {
2717    type Stream = PolledSocket<unix_socket::UnixStream>;
2718
2719    async fn dial(&mut self) -> io::Result<Self::Stream> {
2720        let service_id = new_hvsock_service_id(1);
2721        let socket = self
2722            .vm_rpc
2723            .call_failable(
2724                VmRpc::ConnectHvsock,
2725                (
2726                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2727                    service_id,
2728                    self.openhcl_vtl,
2729                ),
2730            )
2731            .await
2732            .map_err(io::Error::other)?;
2733
2734        PolledSocket::new(&self.driver, socket)
2735    }
2736}
2737
2738/// An object that implements [`InspectMut`] by sending an inspect request over
2739/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2740/// the response back into the inspect tree.
2741///
2742/// This also caches the TTRPC connection to the guest so that only the first
2743/// inspect request has to wait for the connection to be established.
2744pub(crate) struct DiagInspector(DiagInspectorInner);
2745
2746enum DiagInspectorInner {
2747    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2748    Started {
2749        send: mesh::Sender<inspect::Deferred>,
2750        _task: Task<()>,
2751    },
2752    Invalid,
2753}
2754
2755impl DiagInspector {
2756    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2757        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2758    }
2759
2760    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2761        loop {
2762            match self.0 {
2763                DiagInspectorInner::NotStarted { .. } => {
2764                    let DiagInspectorInner::NotStarted(driver, client) =
2765                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2766                    else {
2767                        unreachable!()
2768                    };
2769                    let (send, recv) = mesh::channel();
2770                    let task = driver.clone().spawn("diag-inspect", async move {
2771                        Self::run(&client, recv).await
2772                    });
2773
2774                    self.0 = DiagInspectorInner::Started { send, _task: task };
2775                }
2776                DiagInspectorInner::Started { ref send, .. } => break send,
2777                DiagInspectorInner::Invalid => unreachable!(),
2778            }
2779        }
2780    }
2781
2782    async fn run(
2783        diag_client: &diag_client::DiagClient,
2784        mut recv: mesh::Receiver<inspect::Deferred>,
2785    ) {
2786        while let Some(deferred) = recv.next().await {
2787            let info = deferred.external_request();
2788            let result = match info.request_type {
2789                inspect::ExternalRequestType::Inspect { depth } => {
2790                    if depth == 0 {
2791                        Ok(inspect::Node::Unevaluated)
2792                    } else {
2793                        // TODO: Support taking timeouts from the command line
2794                        diag_client
2795                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2796                            .await
2797                    }
2798                }
2799                inspect::ExternalRequestType::Update { value } => {
2800                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2801                }
2802            };
2803            deferred.complete_external(
2804                result.unwrap_or_else(|err| {
2805                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2806                }),
2807                inspect::SensitivityLevel::Unspecified,
2808            )
2809        }
2810    }
2811}
2812
2813impl InspectMut for DiagInspector {
2814    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2815        self.start().send(req.defer());
2816    }
2817}