Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use cli_args::DiskCliKind;
31use cli_args::EfiDiagnosticsLogLevelCli;
32use cli_args::EndpointConfigCli;
33use cli_args::NicConfigCli;
34use cli_args::ProvisionVmgs;
35use cli_args::SerialConfigCli;
36use cli_args::UefiConsoleModeCli;
37use cli_args::VirtioBusCli;
38use cli_args::VmgsCli;
39use crash_dump::spawn_dump_handler;
40use cxl_spec::test::CxlTestDeviceHandle;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::StreamExt;
53use futures::executor::block_on;
54use futures::io::AllowStdIo;
55use gdma_resources::GdmaDeviceHandle;
56use gdma_resources::VportDefinition;
57use guid::Guid;
58use input_core::MultiplexedInputHandle;
59use inspect::InspectMut;
60use io::Read;
61use mesh::CancelContext;
62use mesh::CellUpdater;
63use mesh::rpc::RpcSend;
64use meshworker::VmmMesh;
65use net_backend_resources::mac_address::MacAddress;
66use nvme_resources::NvmeControllerRequest;
67use openvmm_defs::config::Config;
68use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
69use openvmm_defs::config::DeviceVtl;
70use openvmm_defs::config::EfiDiagnosticsLogLevelType;
71use openvmm_defs::config::HypervisorConfig;
72use openvmm_defs::config::LateMapVtl0MemoryPolicy;
73use openvmm_defs::config::LoadMode;
74use openvmm_defs::config::MemoryConfig;
75use openvmm_defs::config::PcieDeviceConfig;
76use openvmm_defs::config::PcieMmioRangeConfig;
77use openvmm_defs::config::PcieRootComplexConfig;
78use openvmm_defs::config::PcieRootPortConfig;
79use openvmm_defs::config::PcieSwitchConfig;
80use openvmm_defs::config::ProcessorTopologyConfig;
81use openvmm_defs::config::RootComplexCxlConfig;
82use openvmm_defs::config::SerialInformation;
83use openvmm_defs::config::VirtioBus;
84use openvmm_defs::config::VmbusConfig;
85use openvmm_defs::config::VpciDeviceConfig;
86use openvmm_defs::config::Vtl2Config;
87use openvmm_defs::rpc::VmRpc;
88use openvmm_defs::worker::VM_WORKER;
89use openvmm_defs::worker::VmWorkerParameters;
90use openvmm_helpers::disk::OpenDiskOptions;
91use openvmm_helpers::disk::create_disk_type;
92use openvmm_helpers::disk::open_disk_type;
93use pal_async::DefaultDriver;
94use pal_async::DefaultPool;
95use pal_async::socket::PolledSocket;
96use pal_async::task::Spawn;
97use pal_async::task::Task;
98use serial_16550_resources::ComPort;
99use serial_core::resources::DisconnectedSerialBackendHandle;
100use sparse_mmap::alloc_shared_memory;
101use std::cell::RefCell;
102use std::collections::BTreeMap;
103use std::fmt::Write as _;
104use std::future::pending;
105use std::io;
106#[cfg(unix)]
107use std::io::IsTerminal;
108use std::io::Write;
109use std::net::TcpListener;
110use std::path::Path;
111use std::path::PathBuf;
112use std::sync::Arc;
113use std::thread;
114use std::time::Duration;
115use storvsp_resources::ScsiControllerRequest;
116use tpm_resources::TpmDeviceHandle;
117use tpm_resources::TpmRegisterLayout;
118use uidevices_resources::SynthKeyboardHandle;
119use uidevices_resources::SynthMouseHandle;
120use uidevices_resources::SynthVideoHandle;
121use video_core::SharedFramebufferHandle;
122use virtio_resources::VirtioPciDeviceHandle;
123use vm_manifest_builder::BaseChipsetType;
124use vm_manifest_builder::MachineArch;
125use vm_manifest_builder::VmChipsetResult;
126use vm_manifest_builder::VmManifestBuilder;
127use vm_resource::IntoResource;
128use vm_resource::Resource;
129use vm_resource::kind::DiskHandleKind;
130use vm_resource::kind::DiskLayerHandleKind;
131use vm_resource::kind::NetEndpointHandleKind;
132use vm_resource::kind::VirtioDeviceHandle;
133use vm_resource::kind::VmbusDeviceHandleKind;
134use vmbus_serial_resources::VmbusSerialDeviceHandle;
135use vmbus_serial_resources::VmbusSerialPort;
136use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
137use vmgs_resources::GuestStateEncryptionPolicy;
138use vmgs_resources::VmgsDisk;
139use vmgs_resources::VmgsFileHandle;
140use vmgs_resources::VmgsResource;
141use vmotherboard::ChipsetDeviceHandle;
142use vnc_worker_defs::VncParameters;
143
144/// RAII guard that removes the pidfile when dropped. Ensures the pidfile is
145/// cleaned up even if [`do_main`] panics.
146struct PidfileGuard(Option<PathBuf>);
147
148impl Drop for PidfileGuard {
149    fn drop(&mut self) {
150        if let Some(path) = &self.0 {
151            let _ = fs_err::remove_file(path);
152        }
153    }
154}
155
156pub fn openvmm_main() {
157    // Save the current state of the terminal so we can restore it back to
158    // normal before exiting.
159    #[cfg(unix)]
160    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
161
162    let mut pidfile_guard = PidfileGuard(None);
163    let exit_code = match do_main(&mut pidfile_guard.0) {
164        Ok(_) => 0,
165        Err(err) => {
166            eprintln!("fatal error: {:?}", err);
167            1
168        }
169    };
170
171    // Restore the terminal to its initial state.
172    #[cfg(unix)]
173    if let Some(orig_termios) = orig_termios {
174        term::set_termios(orig_termios);
175    }
176
177    // Clean up the pidfile before terminating, since pal::process::terminate
178    // skips destructors.
179    drop(pidfile_guard);
180
181    // Terminate the process immediately without graceful shutdown of DLLs or
182    // C++ destructors or anything like that. This is all unnecessary and saves
183    // time on Windows.
184    //
185    // Do flush stdout, though, since there may be buffered data.
186    let _ = io::stdout().flush();
187    pal::process::terminate(exit_code);
188}
189
190#[derive(Default)]
191struct VmResources {
192    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
193    framebuffer_access: Option<FramebufferAccess>,
194    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
195    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
196    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
197    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
198    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
199    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
200    /// Receives dirty rectangles from the synthetic video device for the VNC worker.
201    dirty_rect_recv: Option<mesh::Receiver<Vec<video_core::DirtyRect>>>,
202    #[cfg(windows)]
203    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
204}
205
206struct ConsoleState<'a> {
207    device: &'a str,
208    input: Box<dyn AsyncWrite + Unpin + Send>,
209}
210
211/// Build a flat list of switches with their parent port assignments.
212///
213/// This function converts hierarchical CLI switch definitions into a flat list
214/// where each switch specifies its parent port directly.
215fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
216    all_switches
217        .iter()
218        .map(|switch_cli| PcieSwitchConfig {
219            name: switch_cli.name.clone(),
220            num_downstream_ports: switch_cli.num_downstream_ports,
221            parent_port: switch_cli.port_name.clone(),
222            hotplug: switch_cli.hotplug,
223            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
224        })
225        .collect()
226}
227
228async fn vm_config_from_command_line(
229    spawner: impl Spawn,
230    mesh: &VmmMesh,
231    opt: &Options,
232) -> anyhow::Result<(Config, VmResources)> {
233    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
234    // Ensure the serial driver stays alive with no tasks.
235    serial_driver.spawn("leak", pending::<()>()).detach();
236
237    let openhcl_vtl = if opt.vtl2 {
238        DeviceVtl::Vtl2
239    } else {
240        DeviceVtl::Vtl0
241    };
242
243    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
244    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
245        Ok(match cli_cfg {
246            SerialConfigCli::Console => {
247                if let Some(console_state) = console_state.borrow().as_ref() {
248                    bail!("console already set by {}", console_state.device);
249                }
250                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
251                let (serial_read, serial_write) = AsyncReadExt::split(serial);
252                *console_state.borrow_mut() = Some(ConsoleState {
253                    device,
254                    input: Box::new(serial_write),
255                });
256                thread::Builder::new()
257                    .name(name.to_owned())
258                    .spawn(move || {
259                        let _ = block_on(futures::io::copy(
260                            serial_read,
261                            &mut AllowStdIo::new(term::raw_stdout()),
262                        ));
263                    })
264                    .unwrap();
265                Some(config)
266            }
267            SerialConfigCli::Stderr => {
268                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
269                thread::Builder::new()
270                    .name(name.to_owned())
271                    .spawn(move || {
272                        let _ = block_on(futures::io::copy(
273                            serial,
274                            &mut AllowStdIo::new(term::raw_stderr()),
275                        ));
276                    })
277                    .unwrap();
278                Some(config)
279            }
280            SerialConfigCli::File(path) => {
281                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
282                let file = fs_err::File::create(path).context("failed to create file")?;
283
284                thread::Builder::new()
285                    .name(name.to_owned())
286                    .spawn(move || {
287                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
288                    })
289                    .unwrap();
290                Some(config)
291            }
292            SerialConfigCli::None => None,
293            SerialConfigCli::Pipe(path) => {
294                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
295            }
296            SerialConfigCli::Tcp(addr) => {
297                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
298            }
299            SerialConfigCli::NewConsole(app, window_title) => {
300                let path = console_relay::random_console_path();
301                let config =
302                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
303                let window_title =
304                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
305
306                console_relay::launch_console(
307                    app.or_else(openvmm_terminal_app).as_deref(),
308                    &path,
309                    ConsoleLaunchOptions {
310                        window_title: Some(window_title),
311                    },
312                )
313                .context("failed to launch console")?;
314
315                Some(config)
316            }
317        })
318    };
319
320    let mut vmbus_devices = Vec::new();
321
322    let serial0_cfg = setup_serial(
323        "com1",
324        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
325        if cfg!(guest_arch = "x86_64") {
326            "ttyS0"
327        } else {
328            "ttyAMA0"
329        },
330    )?;
331    let serial1_cfg = setup_serial(
332        "com2",
333        opt.com2.clone().unwrap_or(SerialConfigCli::None),
334        if cfg!(guest_arch = "x86_64") {
335            "ttyS1"
336        } else {
337            "ttyAMA1"
338        },
339    )?;
340    let serial2_cfg = setup_serial(
341        "com3",
342        opt.com3.clone().unwrap_or(SerialConfigCli::None),
343        if cfg!(guest_arch = "x86_64") {
344            "ttyS2"
345        } else {
346            "ttyAMA2"
347        },
348    )?;
349    let serial3_cfg = setup_serial(
350        "com4",
351        opt.com4.clone().unwrap_or(SerialConfigCli::None),
352        if cfg!(guest_arch = "x86_64") {
353            "ttyS3"
354        } else {
355            "ttyAMA3"
356        },
357    )?;
358    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
359        "vmbus_com1",
360        opt.vmbus_com1_serial
361            .clone()
362            .unwrap_or(SerialConfigCli::None),
363        "vmbus_com1",
364    )? {
365        vmbus_devices.push((
366            openhcl_vtl,
367            VmbusSerialDeviceHandle {
368                port: VmbusSerialPort::Com1,
369                backend: vmbus_com1_cfg,
370            }
371            .into_resource(),
372        ));
373        true
374    } else {
375        false
376    };
377    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
378        "vmbus_com2",
379        opt.vmbus_com2_serial
380            .clone()
381            .unwrap_or(SerialConfigCli::None),
382        "vmbus_com2",
383    )? {
384        vmbus_devices.push((
385            openhcl_vtl,
386            VmbusSerialDeviceHandle {
387                port: VmbusSerialPort::Com2,
388                backend: vmbus_com2_cfg,
389            }
390            .into_resource(),
391        ));
392        true
393    } else {
394        false
395    };
396    let debugcon_cfg = setup_serial(
397        "debugcon",
398        opt.debugcon
399            .clone()
400            .map(|cfg| cfg.serial)
401            .unwrap_or(SerialConfigCli::None),
402        "debugcon",
403    )?;
404
405    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
406        setup_serial("virtio-console", serial_cfg, "hvc0")?
407    } else {
408        None
409    };
410
411    let mut resources = VmResources::default();
412    let mut console_str = "";
413    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
414        resources.console_in = Some(input);
415        console_str = device;
416    }
417
418    if opt.shared_memory {
419        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
420    }
421    if opt.deprecated_prefetch {
422        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
423    }
424    if opt.deprecated_private_memory {
425        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
426    }
427    if opt.deprecated_thp {
428        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
429    }
430    if opt.deprecated_memory_backing_file.is_some() {
431        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
432    }
433
434    opt.validate_memory_options()?;
435
436    const MAX_PROCESSOR_COUNT: u32 = 1024;
437
438    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
439        bail!("invalid proc count: {}", opt.processors);
440    }
441
442    // Total SCSI channel count should not exceed the processor count
443    // (at most, one channel per VP).
444    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
445        bail!(
446            "invalid SCSI sub-channel count: requested {}, max {}",
447            opt.scsi_sub_channels,
448            MAX_PROCESSOR_COUNT - 1
449        );
450    }
451
452    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
453
454    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
455
456    // Register named controllers first, so that --disk on=<name>
457    // references can be resolved.
458    for ctrl in &opt.nvme_pci {
459        let transport = match &ctrl.transport {
460            cli_args::NvmeControllerTransport::Pcie(port) => {
461                storage_builder::NvmeControllerTransport::Pcie(port.clone())
462            }
463            cli_args::NvmeControllerTransport::Vpci(guid) => {
464                let guid = guid.unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
465                storage_builder::NvmeControllerTransport::Vpci(guid)
466            }
467        };
468        storage.add_nvme_controller(ctrl.id.clone(), ctrl.vtl, transport, None)?;
469    }
470
471    for ctrl in &opt.vmbus_scsi {
472        let instance_id = storage_builder::deterministic_guid(&ctrl.id);
473        storage.add_scsi_controller(ctrl.id.clone(), ctrl.vtl, instance_id, ctrl.sub_channels)?;
474    }
475
476    for ctrl in &opt.openhcl_controller {
477        let controller_type = match ctrl.controller_type {
478            cli_args::OpenhclControllerType::Scsi => storage_builder::OpenhclControllerType::Scsi,
479            cli_args::OpenhclControllerType::Nvme => storage_builder::OpenhclControllerType::Nvme,
480        };
481        let instance_id = ctrl
482            .guid
483            .unwrap_or_else(|| storage_builder::deterministic_guid(&ctrl.id));
484        storage.add_openhcl_controller(ctrl.id.clone(), controller_type, instance_id)?;
485    }
486
487    for &cli_args::DiskCli {
488        vtl,
489        ref kind,
490        read_only,
491        is_dvd,
492        underhill,
493        ref pcie_port,
494        ref controller,
495        nsid,
496        lun,
497        ref relay,
498    } in &opt.disk
499    {
500        if controller.is_none() && underhill.is_none() && relay.is_none() {
501            tracing::warn!(
502                "--disk without `on` is deprecated; \
503                 use --vmbus-scsi and --disk on=<name> instead"
504            );
505        }
506
507        let relay_target = relay
508            .as_ref()
509            .map(|(name, loc)| storage_builder::RelayTarget {
510                controller: name.clone(),
511                location: *loc,
512            });
513
514        let target = if let Some(name) = controller {
515            if pcie_port.is_some() {
516                anyhow::bail!("`on` is incompatible with `pcie_port` on `--disk`");
517            }
518            storage_builder::DiskLocation::Named {
519                controller: name.clone(),
520                nsid,
521                lun,
522            }
523        } else if pcie_port.is_some() {
524            anyhow::bail!("`--disk` is incompatible with `pcie_port` without `controller`");
525        } else {
526            storage_builder::DiskLocation::Scsi(None)
527        };
528
529        storage
530            .add(
531                vtl,
532                underhill,
533                relay_target,
534                target,
535                kind,
536                is_dvd,
537                read_only,
538            )
539            .await?;
540    }
541
542    for &cli_args::IdeDiskCli {
543        ref kind,
544        read_only,
545        channel,
546        device,
547        is_dvd,
548    } in &opt.ide
549    {
550        storage
551            .add(
552                DeviceVtl::Vtl0,
553                None,
554                None,
555                storage_builder::DiskLocation::Ide(channel, device),
556                kind,
557                is_dvd,
558                read_only,
559            )
560            .await?;
561    }
562
563    if !opt.nvme.is_empty() {
564        tracing::warn!("--nvme is deprecated; use --nvme-pci and --disk on=<name> instead");
565
566        // Pre-register implicit PCIe controllers for unique port names.
567        let mut registered_ports = std::collections::BTreeSet::new();
568        for disk in &opt.nvme {
569            if let Some(port) = &disk.pcie_port {
570                if registered_ports.insert(port.clone()) {
571                    storage.add_nvme_controller(
572                        port.clone(),
573                        DeviceVtl::Vtl0,
574                        storage_builder::NvmeControllerTransport::Pcie(port.clone()),
575                        None,
576                    ).with_context(|| format!(
577                        "legacy --nvme flag conflicts with an explicit controller named '{port}'; \
578                         use --nvme-pci and --disk on=<name> instead"
579                    ))?;
580                }
581            }
582        }
583    }
584
585    for &cli_args::DiskCli {
586        vtl,
587        ref kind,
588        read_only,
589        is_dvd,
590        underhill,
591        ref pcie_port,
592        controller: _,
593        nsid: _,
594        lun: _,
595        relay: _,
596    } in &opt.nvme
597    {
598        let target = if let Some(port) = pcie_port {
599            storage_builder::DiskLocation::Named {
600                controller: port.clone(),
601                nsid: None,
602                lun: None,
603            }
604        } else {
605            storage_builder::DiskLocation::Nvme(None)
606        };
607        storage
608            .add(vtl, underhill, None, target, kind, is_dvd, read_only)
609            .await?;
610    }
611
612    for &cli_args::DiskCli {
613        vtl,
614        ref kind,
615        read_only,
616        is_dvd,
617        ref underhill,
618        ref pcie_port,
619        controller: _,
620        nsid: _,
621        lun: _,
622        relay: _,
623    } in &opt.virtio_blk
624    {
625        if underhill.is_some() {
626            anyhow::bail!("underhill not supported with virtio-blk");
627        }
628        storage
629            .add(
630                vtl,
631                None,
632                None,
633                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
634                kind,
635                is_dvd,
636                read_only,
637            )
638            .await?;
639    }
640
641    let mut floppy_disks = Vec::new();
642    for disk in &opt.floppy {
643        let &cli_args::FloppyDiskCli {
644            ref kind,
645            read_only,
646        } = disk;
647        floppy_disks.push(FloppyDiskConfig {
648            disk_type: disk_open(kind, read_only).await?,
649            read_only,
650        });
651    }
652
653    let mut vpci_mana_nics = [(); 3].map(|()| None);
654    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
655    let mut underhill_nics = Vec::new();
656    let mut vpci_devices = Vec::new();
657
658    let mut nic_index = 0;
659    for cli_cfg in &opt.net {
660        if cli_cfg.pcie_port.is_some() {
661            anyhow::bail!("`--net` does not support PCIe");
662        }
663        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
664        if cli_cfg.underhill {
665            if !opt.no_alias_map {
666                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
667            }
668            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
669                let vpci_instance_id = Guid::new_random();
670                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
671                    instance_id: vpci_instance_id.to_string(),
672                    subordinate_instance_id: None,
673                    max_sub_channels: None,
674                });
675                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
676            });
677            mana.1.vports.push(VportDefinition {
678                mac_address: vport.mac_address,
679                endpoint: vport.endpoint,
680            });
681        } else {
682            vmbus_devices.push(vport.into_netvsp_handle());
683        }
684    }
685
686    if opt.nic {
687        let nic_config = parse_endpoint(
688            &NicConfigCli {
689                vtl: DeviceVtl::Vtl0,
690                endpoint: EndpointConfigCli::Consomme {
691                    cidr: None,
692                    host_fwd: Vec::new(),
693                },
694                max_queues: None,
695                underhill: false,
696                pcie_port: None,
697            },
698            &mut nic_index,
699            &mut resources,
700        )?;
701        vmbus_devices.push(nic_config.into_netvsp_handle());
702    }
703
704    // Build initial PCIe devices list from CLI options. Storage devices
705    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
706    let mut pcie_devices = Vec::new();
707    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
708        tracing::info!(
709            port_name = %cli_cfg.port_name,
710            socket_addr = ?cli_cfg.socket_addr,
711            "instantiating PCIe remote device"
712        );
713
714        // Generate a deterministic instance ID based on index
715        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
716            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
717        let instance_id = Guid {
718            data1: index as u32,
719            ..PCIE_REMOTE_BASE_INSTANCE_ID
720        };
721
722        pcie_devices.push(PcieDeviceConfig {
723            port_name: cli_cfg.port_name.clone(),
724            resource: pcie_remote_resources::PcieRemoteHandle {
725                instance_id,
726                socket_addr: cli_cfg.socket_addr.clone(),
727                hu: cli_cfg.hu,
728                controller: cli_cfg.controller,
729            }
730            .into_resource(),
731        });
732    }
733
734    #[cfg(windows)]
735    let mut kernel_vmnics = Vec::new();
736    #[cfg(windows)]
737    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
738        // Pick a random MAC address.
739        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
740        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
741
742        // Pick a fixed instance ID based on the index.
743        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
744        let instance_id = Guid {
745            data1: index as u32,
746            ..BASE_INSTANCE_ID
747        };
748
749        let switch_id = if switch_id == "default" {
750            None
751        } else {
752            Some(switch_id.as_str())
753        };
754        let (port_id, port) = new_switch_port(switch_id)?;
755        resources.switch_ports.push(port);
756
757        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
758            instance_id,
759            mac_address: mac_address.into(),
760            switch_port_id: port_id,
761        });
762    }
763
764    for vport in &opt.mana {
765        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
766        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
767            (vtl, None) => {
768                &mut vpci_mana_nics[vtl]
769                    .get_or_insert_with(|| {
770                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
771                    })
772                    .1
773                    .vports
774            }
775            (0, Some(pcie_port)) => {
776                &mut pcie_mana_nics
777                    .entry(pcie_port)
778                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
779                    .vports
780            }
781            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
782        };
783        vport_array.push(VportDefinition {
784            mac_address: vport.mac_address,
785            endpoint: vport.endpoint,
786        });
787    }
788
789    vpci_devices.extend(
790        vpci_mana_nics
791            .into_iter()
792            .enumerate()
793            .filter_map(|(vtl, nic)| {
794                nic.map(|(instance_id, handle)| VpciDeviceConfig {
795                    vtl: match vtl {
796                        0 => DeviceVtl::Vtl0,
797                        1 => DeviceVtl::Vtl1,
798                        2 => DeviceVtl::Vtl2,
799                        _ => unreachable!(),
800                    },
801                    instance_id,
802                    resource: handle.into_resource(),
803                })
804            }),
805    );
806
807    pcie_devices.extend(
808        pcie_mana_nics
809            .into_iter()
810            .map(|(pcie_port, handle)| PcieDeviceConfig {
811                port_name: pcie_port,
812                resource: handle.into_resource(),
813            }),
814    );
815
816    for cxl_test in &opt.cxl_test {
817        pcie_devices.push(PcieDeviceConfig {
818            port_name: cxl_test.pcie_port.clone(),
819            resource: CxlTestDeviceHandle {
820                hdm_size_bytes: cxl_test.hdm_size,
821            }
822            .into_resource(),
823        });
824    }
825
826    #[cfg(guest_arch = "aarch64")]
827    let arch = MachineArch::Aarch64;
828    #[cfg(guest_arch = "x86_64")]
829    let arch = MachineArch::X86_64;
830
831    let mut pcie_root_complexes = Vec::new();
832    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
833        let ports: Vec<PcieRootPortConfig> = opt
834            .pcie_root_port
835            .iter()
836            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
837            .map(|port_cli| PcieRootPortConfig {
838                name: port_cli.name.clone(),
839                hotplug: port_cli.hotplug,
840                acs_capabilities_supported: port_cli.acs_capabilities_supported,
841                cxl: port_cli.cxl,
842            })
843            .collect();
844
845        const ONE_MB: u64 = 1024 * 1024;
846        // Keep all PCI windows 1MB-granular to match layout and downstream placement rules.
847        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
848        let high_mmio_size = rc_cli
849            .high_mmio
850            .checked_next_multiple_of(ONE_MB)
851            .context("high mmio rounding error")?;
852
853        // Count CXL-capable ports under the root bus. If the root bus has CXL root ports, it needs CHBCR.
854        let cxl_port_count = ports.iter().filter(|port| port.cxl).count() as u64;
855
856        let cxl = if cxl_port_count != 0 {
857            Some(RootComplexCxlConfig {
858                hdm_size: rc_cli.hdm,
859                hdm_window_restrictions: rc_cli.hdm_window_restrictions.bits(),
860            })
861        } else {
862            None
863        };
864        pcie_root_complexes.push(PcieRootComplexConfig {
865            index: i as u32,
866            name: rc_cli.name.clone(),
867            segment: rc_cli.segment,
868            start_bus: rc_cli.start_bus,
869            end_bus: rc_cli.end_bus,
870            low_mmio: PcieMmioRangeConfig::Dynamic {
871                size: low_mmio_size,
872            },
873            high_mmio: PcieMmioRangeConfig::Dynamic {
874                size: high_mmio_size,
875            },
876            cxl,
877            ports,
878            #[cfg(guest_arch = "aarch64")]
879            iommu: opt
880                .smmu
881                .iter()
882                .any(|s| s == &rc_cli.name)
883                .then_some(openvmm_defs::config::PcieIommuConfig::Smmu),
884            #[cfg(guest_arch = "x86_64")]
885            iommu: opt
886                .amd_iommu
887                .iter()
888                .any(|s| s == &rc_cli.name)
889                .then_some(openvmm_defs::config::PcieIommuConfig::AmdVi),
890        });
891    }
892
893    // Validate that all --smmu / --amd-iommu names refer to known root complexes.
894    #[cfg(guest_arch = "aarch64")]
895    for name in &opt.smmu {
896        anyhow::ensure!(
897            pcie_root_complexes.iter().any(|rc| rc.name == *name),
898            "--smmu refers to unknown root complex '{name}'"
899        );
900    }
901    #[cfg(guest_arch = "x86_64")]
902    for name in &opt.amd_iommu {
903        anyhow::ensure!(
904            pcie_root_complexes.iter().any(|rc| rc.name == *name),
905            "--amd-iommu refers to unknown root complex '{name}'"
906        );
907    }
908
909    let pcie_switches = build_switch_list(&opt.pcie_switch);
910
911    #[cfg(target_os = "linux")]
912    let vfio_pcie_devices: Vec<PcieDeviceConfig> = {
913        use std::collections::HashMap;
914        use vm_resource::IntoResource;
915
916        // Process --iommu flags: open /dev/iommu for each declared context.
917        let mut iommu_map: HashMap<String, std::fs::File> = HashMap::new();
918        for iommu_cli in &opt.iommu {
919            anyhow::ensure!(
920                !iommu_map.contains_key(&iommu_cli.id),
921                "duplicate --iommu id={}",
922                iommu_cli.id
923            );
924            let file = std::fs::OpenOptions::new()
925                .read(true)
926                .write(true)
927                .open("/dev/iommu")
928                .context("failed to open /dev/iommu (is iommufd available?)")?;
929            iommu_map.insert(iommu_cli.id.clone(), file);
930        }
931
932        opt.vfio
933            .iter()
934            .map(|cli_cfg| {
935                let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
936
937                if let Some(iommu_id) = &cli_cfg.iommu {
938                    // cdev + iommufd path
939                    let iommufd = iommu_map.get(iommu_id).with_context(|| {
940                        format!(
941                            "--vfio device {} references iommu={iommu_id}, \
942                             but no --iommu id={iommu_id} was specified",
943                            cli_cfg.pci_id
944                        )
945                    })?;
946                    // Clone the iommufd fd so the per-iommu manager can own it.
947                    // The first device for a given iommu ID uses the cloned fd
948                    // to create the IoasManager; subsequent devices reuse the
949                    // existing manager and the cloned fd is dropped.
950                    let iommufd = iommufd.try_clone().with_context(|| {
951                        format!("failed to dup iommufd fd for iommu={iommu_id}")
952                    })?;
953
954                    // Open the cdev device node.
955                    let vfio_dev_dir = sysfs_path.join("vfio-dev");
956                    let entry = std::fs::read_dir(&vfio_dev_dir)
957                        .with_context(|| {
958                            format!(
959                                "failed to read {}: is {} bound to vfio-pci?",
960                                vfio_dev_dir.display(),
961                                cli_cfg.pci_id
962                            )
963                        })?
964                        .next()
965                        .context("no vfio-dev entry found")?
966                        .context("failed to read vfio-dev entry")?;
967                    let dev_path = Path::new("/dev/vfio/devices").join(entry.file_name());
968                    let cdev = std::fs::OpenOptions::new()
969                        .read(true)
970                        .write(true)
971                        .open(&dev_path)
972                        .with_context(|| format!("failed to open {}", dev_path.display()))?;
973
974                    Ok(PcieDeviceConfig {
975                        port_name: cli_cfg.port_name.clone(),
976                        resource: vfio_assigned_device_resources::VfioCdevDeviceHandle {
977                            pci_id: cli_cfg.pci_id.clone(),
978                            cdev,
979                            iommufd,
980                            iommu_id: iommu_id.clone(),
981                        }
982                        .into_resource(),
983                    })
984                } else {
985                    // Legacy group/container path
986                    let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
987                        .with_context(|| {
988                            format!("failed to read IOMMU group for {}", cli_cfg.pci_id)
989                        })?;
990                    let group_id: u64 = iommu_group_link
991                        .file_name()
992                        .and_then(|s| s.to_str())
993                        .context("invalid iommu_group symlink")?
994                        .parse()
995                        .context("failed to parse IOMMU group ID")?;
996                    let group = std::fs::OpenOptions::new()
997                        .read(true)
998                        .write(true)
999                        .open(format!("/dev/vfio/{group_id}"))
1000                        .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
1001
1002                    Ok(PcieDeviceConfig {
1003                        port_name: cli_cfg.port_name.clone(),
1004                        resource: vfio_assigned_device_resources::VfioDeviceHandle {
1005                            pci_id: cli_cfg.pci_id.clone(),
1006                            group,
1007                        }
1008                        .into_resource(),
1009                    })
1010                }
1011            })
1012            .collect::<anyhow::Result<Vec<_>>>()?
1013    };
1014
1015    #[cfg(windows)]
1016    let vpci_resources: Vec<_> = opt
1017        .device
1018        .iter()
1019        .map(|path| -> anyhow::Result<_> {
1020            Ok(virt_whp::device::DeviceHandle(
1021                whp::VpciResource::new(
1022                    None,
1023                    Default::default(),
1024                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
1025                )
1026                .with_context(|| format!("opening PCI device {}", path))?,
1027            ))
1028        })
1029        .collect::<Result<_, _>>()?;
1030
1031    // Create a vmbusproxy handle if needed by any devices.
1032    #[cfg(windows)]
1033    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
1034        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
1035    } else {
1036        None
1037    };
1038
1039    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc.vnc || opt.pcat {
1040        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
1041        let (fb, fba) =
1042            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
1043        resources.framebuffer_access = Some(fba);
1044        Some(fb)
1045    } else {
1046        None
1047    };
1048
1049    let load_mode;
1050    let with_hv;
1051
1052    let any_serial_configured = serial0_cfg.is_some()
1053        || serial1_cfg.is_some()
1054        || serial2_cfg.is_some()
1055        || serial3_cfg.is_some();
1056
1057    let has_com3 = serial2_cfg.is_some();
1058
1059    let mut chipset = VmManifestBuilder::new(
1060        if opt.igvm.is_some() {
1061            BaseChipsetType::HclHost
1062        } else if opt.pcat {
1063            BaseChipsetType::HypervGen1
1064        } else if opt.uefi {
1065            BaseChipsetType::HypervGen2Uefi
1066        } else if opt.hv {
1067            BaseChipsetType::HyperVGen2LinuxDirect
1068        } else {
1069            BaseChipsetType::UnenlightenedLinuxDirect
1070        },
1071        arch,
1072    );
1073
1074    if framebuffer.is_some() {
1075        chipset = chipset.with_framebuffer();
1076    }
1077    if opt.guest_watchdog {
1078        chipset = chipset.with_guest_watchdog();
1079    }
1080    if any_serial_configured {
1081        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
1082    }
1083    if opt.battery {
1084        let (tx, rx) = mesh::channel();
1085        tx.send(HostBatteryUpdate::default_present());
1086        chipset = chipset.with_battery(rx);
1087    }
1088    if opt.no_vmbus {
1089        chipset = chipset.without_vmbus();
1090    }
1091    if let Some(cfg) = &opt.debugcon {
1092        chipset = chipset.with_debugcon(
1093            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
1094            cfg.port,
1095        );
1096    }
1097
1098    let custom_uefi_vars = {
1099        use firmware_uefi_custom_vars::CustomVars;
1100
1101        // load base vars from specified template, or use an empty set of base
1102        // vars if none was specified.
1103        let base_vars = match opt.secure_boot_template {
1104            Some(template) => match (arch, template) {
1105                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1106                    hyperv_secure_boot_templates::x64::microsoft_windows()
1107                }
1108                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1109                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1110                }
1111                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1112                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1113                }
1114                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1115                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1116                }
1117            },
1118            None => CustomVars::default(),
1119        };
1120
1121        // TODO: fallback to VMGS read if no command line flag was given
1122
1123        let custom_uefi_json_data = match &opt.custom_uefi_json {
1124            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1125            None => None,
1126        };
1127
1128        // obtain the final custom uefi vars by applying the delta onto the base vars
1129        match custom_uefi_json_data {
1130            Some(data) => {
1131                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1132                base_vars.apply_delta(delta)?
1133            }
1134            None => base_vars,
1135        }
1136    };
1137
1138    let efi_diagnostics_log_level = match opt.efi_diagnostics_log_level.unwrap_or_default() {
1139        EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1140        EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1141        EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1142    };
1143
1144    if opt.uefi {
1145        let log_level = match efi_diagnostics_log_level {
1146            EfiDiagnosticsLogLevelType::Default => {
1147                firmware_uefi_resources::LogLevel::make_default()
1148            }
1149            EfiDiagnosticsLogLevelType::Info => firmware_uefi_resources::LogLevel::make_info(),
1150            EfiDiagnosticsLogLevelType::Full => firmware_uefi_resources::LogLevel::make_full(),
1151        };
1152        let nvram_storage = if opt.vmgs.is_some() {
1153            VmgsFileHandle::new(vmgs_format::FileId::BIOS_NVRAM, true).into_resource()
1154        } else {
1155            EphemeralNonVolatileStoreHandle.into_resource()
1156        };
1157        chipset = chipset.with_uefi(vm_manifest_builder::UefiManifest::new(
1158            arch,
1159            custom_uefi_vars.clone(),
1160            opt.secure_boot,
1161            log_level,
1162            nvram_storage,
1163            None,
1164        ));
1165    }
1166
1167    // TODO: load from VMGS file if it exists
1168    let bios_guid = Guid::new_random();
1169
1170    let layout_config = chipset.layout_config();
1171    let VmChipsetResult {
1172        chipset,
1173        mut chipset_devices,
1174        pci_chipset_devices,
1175        isa_dma_controller,
1176        capabilities,
1177    } = chipset
1178        .build()
1179        .context("failed to build chipset configuration")?;
1180
1181    if opt.restore_snapshot.is_some() {
1182        // Snapshot restore: skip firmware loading entirely. Device state and
1183        // memory come from the snapshot directory.
1184        load_mode = LoadMode::None;
1185        with_hv = true;
1186    } else if let Some(path) = &opt.igvm {
1187        let file = fs_err::File::open(path)
1188            .context("failed to open igvm file")?
1189            .into();
1190        let cmdline = opt.cmdline.join(" ");
1191        with_hv = true;
1192
1193        load_mode = LoadMode::Igvm {
1194            file,
1195            cmdline,
1196            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1197            com_serial: has_com3.then(|| SerialInformation {
1198                io_port: ComPort::Com3.io_port(),
1199                irq: ComPort::Com3.irq().into(),
1200            }),
1201        };
1202    } else if opt.pcat {
1203        // Emit a nice error early instead of complaining about missing firmware.
1204        if arch != MachineArch::X86_64 {
1205            anyhow::bail!("pcat not supported on this architecture");
1206        }
1207        with_hv = true;
1208
1209        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1210        load_mode = LoadMode::Pcat {
1211            firmware,
1212            boot_order: opt
1213                .pcat_boot_order
1214                .map(|x| x.0)
1215                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1216        };
1217    } else if opt.uefi {
1218        use openvmm_defs::config::UefiConsoleMode;
1219
1220        with_hv = true;
1221
1222        let firmware = fs_err::File::open(
1223            (opt.uefi_firmware.0)
1224                .as_ref()
1225                .context("must provide uefi firmware when booting with uefi")?,
1226        )
1227        .context("failed to open uefi firmware")?;
1228
1229        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1230        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1231        load_mode = LoadMode::Uefi {
1232            firmware: firmware.into(),
1233            enable_debugging: opt.uefi_debug,
1234            enable_memory_protections: opt.uefi_enable_memory_protections,
1235            disable_frontpage: opt.disable_frontpage,
1236            enable_tpm: opt.tpm,
1237            enable_battery: opt.battery,
1238            enable_serial: any_serial_configured,
1239            enable_vpci_boot: false,
1240            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1241                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1242                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1243                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1244                UefiConsoleModeCli::None => UefiConsoleMode::None,
1245            }),
1246            default_boot_always_attempt: opt.default_boot_always_attempt,
1247            bios_guid,
1248            enable_vmbus: !opt.no_vmbus,
1249        };
1250    } else {
1251        // Linux Direct
1252        let mut cmdline = "panic=-1 debug".to_string();
1253
1254        with_hv = opt.hv;
1255        if with_hv && opt.pcie_root_complex.is_empty() {
1256            cmdline += " pci=off";
1257        }
1258
1259        if !console_str.is_empty() {
1260            let _ = write!(&mut cmdline, " console={}", console_str);
1261        }
1262
1263        if opt.gfx {
1264            cmdline += " console=tty";
1265        }
1266        for extra in &opt.cmdline {
1267            let _ = write!(&mut cmdline, " {}", extra);
1268        }
1269
1270        let kernel = fs_err::File::open(
1271            (opt.kernel.0)
1272                .as_ref()
1273                .context("must provide kernel when booting with linux direct")?,
1274        )
1275        .context("failed to open kernel")?;
1276        let initrd = (opt.initrd.0)
1277            .as_ref()
1278            .map(fs_err::File::open)
1279            .transpose()
1280            .context("failed to open initrd")?;
1281
1282        let custom_dsdt = match &opt.custom_dsdt {
1283            Some(path) => {
1284                let mut v = Vec::new();
1285                fs_err::File::open(path)
1286                    .context("failed to open custom dsdt")?
1287                    .read_to_end(&mut v)
1288                    .context("failed to read custom dsdt")?;
1289                Some(v)
1290            }
1291            None => None,
1292        };
1293
1294        load_mode = LoadMode::Linux {
1295            kernel: kernel.into(),
1296            initrd: initrd.map(Into::into),
1297            cmdline,
1298            custom_dsdt,
1299            enable_serial: any_serial_configured,
1300            boot_mode: if opt.device_tree {
1301                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1302            } else {
1303                openvmm_defs::config::LinuxDirectBootMode::Acpi
1304            },
1305        };
1306    }
1307
1308    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1309        let disk = VmgsDisk {
1310            disk: disk_open(kind, false)
1311                .await
1312                .context("failed to open vmgs disk")?,
1313            encryption_policy: if opt.test_gsp_by_id {
1314                GuestStateEncryptionPolicy::GspById(true)
1315            } else {
1316                GuestStateEncryptionPolicy::None(true)
1317            },
1318        };
1319        match provision {
1320            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1321            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1322            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1323        }
1324    } else {
1325        VmgsResource::Ephemeral
1326    });
1327
1328    if with_get && with_hv {
1329        let has_vtl0_nvme = storage.has_vtl0_nvme();
1330        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1331            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1332            fixed: Some(Default::default()),
1333            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1334                storage_controllers: storage.build_openhcl_settings(opt.vmbus_redirect),
1335                nic_devices: underhill_nics,
1336            }),
1337            namespace_settings: Vec::default(),
1338        };
1339
1340        // Cache the VTL2 settings for later modification via the interactive console.
1341        resources.vtl2_settings = Some(vtl2_settings.clone());
1342
1343        let (send, guest_request_recv) = mesh::channel();
1344        resources.ged_rpc = Some(send);
1345
1346        let vmgs = vmgs.take().unwrap();
1347
1348        vmbus_devices.extend([
1349            (
1350                openhcl_vtl,
1351                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1352            ),
1353            (
1354                openhcl_vtl,
1355                get_resources::ged::GuestEmulationDeviceHandle {
1356                    firmware: if opt.pcat {
1357                        get_resources::ged::GuestFirmwareConfig::Pcat {
1358                            boot_order: opt
1359                                .pcat_boot_order
1360                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1361                                .map(|x| match x {
1362                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1363                                        get_resources::ged::PcatBootDevice::Floppy
1364                                    }
1365                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1366                                        get_resources::ged::PcatBootDevice::HardDrive
1367                                    }
1368                                    openvmm_defs::config::PcatBootDevice::Optical => {
1369                                        get_resources::ged::PcatBootDevice::Optical
1370                                    }
1371                                    openvmm_defs::config::PcatBootDevice::Network => {
1372                                        get_resources::ged::PcatBootDevice::Network
1373                                    }
1374                                }),
1375                        }
1376                    } else {
1377                        use get_resources::ged::UefiConsoleMode;
1378
1379                        get_resources::ged::GuestFirmwareConfig::Uefi {
1380                            enable_vpci_boot: has_vtl0_nvme,
1381                            firmware_debug: opt.uefi_debug,
1382                            disable_frontpage: opt.disable_frontpage,
1383                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1384                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1385                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1386                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1387                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1388                            },
1389                            default_boot_always_attempt: opt.default_boot_always_attempt,
1390                        }
1391                    },
1392                    com1: with_vmbus_com1_serial,
1393                    com2: with_vmbus_com2_serial,
1394                    serial_tx_only: opt.serial_tx_only,
1395                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1396                    vmbus_redirection: opt.vmbus_redirect,
1397                    vmgs,
1398                    framebuffer: opt
1399                        .vtl2_gfx
1400                        .then(|| SharedFramebufferHandle.into_resource()),
1401                    guest_request_recv,
1402                    enable_tpm: opt.tpm,
1403                    firmware_event_send: None,
1404                    secure_boot_enabled: opt.secure_boot,
1405                    secure_boot_template: match opt.secure_boot_template {
1406                        Some(SecureBootTemplateCli::Windows) => {
1407                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1408                        },
1409                        Some(SecureBootTemplateCli::UefiCa) => {
1410                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1411                        }
1412                        None => {
1413                            get_resources::ged::GuestSecureBootTemplateType::None
1414                        },
1415                    },
1416                    enable_battery: opt.battery,
1417                    no_persistent_secrets: true,
1418                    igvm_attest_test_config: None,
1419                    test_gsp_by_id: opt.test_gsp_by_id,
1420                    efi_diagnostics_log_level: {
1421                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1422                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1423                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1424                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1425                        }
1426                    },
1427                    hv_sint_enabled: false,
1428                }
1429                .into_resource(),
1430            ),
1431        ]);
1432    }
1433
1434    if opt.tpm && !opt.vtl2 {
1435        let register_layout = if cfg!(guest_arch = "x86_64") {
1436            TpmRegisterLayout::IoPort
1437        } else {
1438            TpmRegisterLayout::Mmio
1439        };
1440
1441        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1442            (
1443                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1444                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1445            )
1446        } else {
1447            (
1448                EphemeralNonVolatileStoreHandle.into_resource(),
1449                EphemeralNonVolatileStoreHandle.into_resource(),
1450            )
1451        };
1452
1453        chipset_devices.push(ChipsetDeviceHandle {
1454            name: "tpm".to_string(),
1455            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1456                device: TpmDeviceHandle {
1457                    ppi_store,
1458                    nvram_store,
1459                    nvram_size: None,
1460                    refresh_tpm_seeds: false,
1461                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1462                    register_layout,
1463                    guest_secret_key: None,
1464                    logger: None,
1465                    is_confidential_vm: false,
1466                    bios_guid,
1467                }
1468                .into_resource(),
1469                worker_host: mesh.make_host("tpm", None).await?,
1470            }
1471            .into_resource(),
1472        });
1473    }
1474
1475    let vga_firmware = if opt.pcat {
1476        Some(openvmm_pcat_locator::find_svga_bios(
1477            opt.vga_firmware.as_deref(),
1478        )?)
1479    } else {
1480        None
1481    };
1482
1483    if opt.gfx {
1484        // Channel for the video device to report dirty rectangles to the VNC worker.
1485        let (dirt_send, dirt_recv) = mesh::channel();
1486        resources.dirty_rect_recv = Some(dirt_recv);
1487
1488        vmbus_devices.extend([
1489            (
1490                DeviceVtl::Vtl0,
1491                SynthVideoHandle {
1492                    framebuffer: SharedFramebufferHandle.into_resource(),
1493                    dirt_send: Some(dirt_send),
1494                }
1495                .into_resource(),
1496            ),
1497            (
1498                DeviceVtl::Vtl0,
1499                SynthKeyboardHandle {
1500                    source: MultiplexedInputHandle {
1501                        // Save 0 for PS/2
1502                        elevation: 1,
1503                    }
1504                    .into_resource(),
1505                }
1506                .into_resource(),
1507            ),
1508            (
1509                DeviceVtl::Vtl0,
1510                SynthMouseHandle {
1511                    source: MultiplexedInputHandle {
1512                        // Save 0 for PS/2
1513                        elevation: 1,
1514                    }
1515                    .into_resource(),
1516                }
1517                .into_resource(),
1518            ),
1519        ]);
1520    }
1521
1522    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1523        if let Some(path) = path {
1524            cleanup_socket(path.as_ref());
1525            let listener = unix_socket::UnixListener::bind(path)
1526                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1527            Ok(Some(listener))
1528        } else {
1529            Ok(None)
1530        }
1531    };
1532
1533    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1534    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1535
1536    if let Some(path) = &opt.openhcl_dump_path {
1537        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1538        task.detach();
1539        vmbus_devices.push((openhcl_vtl, resource));
1540    }
1541
1542    #[cfg(guest_arch = "aarch64")]
1543    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1544        openvmm_defs::config::Aarch64TopologyConfig {
1545            // TODO: allow this to be configured from the command line
1546            gic_config: None,
1547            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1548            gic_msi: match opt.gic_msi {
1549                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1550                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1551                cli_args::GicMsiCli::V2m => {
1552                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1553                }
1554            },
1555        },
1556    );
1557    #[cfg(guest_arch = "x86_64")]
1558    let topology_arch =
1559        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1560            apic_id_offset: opt.apic_id_offset,
1561            x2apic: opt.x2apic,
1562        });
1563
1564    let with_isolation = if let Some(isolation) = &opt.isolation {
1565        // TODO: For now, isolation is only supported with VTL2.
1566        if !opt.vtl2 {
1567            anyhow::bail!("isolation is only currently supported with vtl2");
1568        }
1569
1570        // TODO: Alias map support is not yet implement with isolation.
1571        if !opt.no_alias_map {
1572            anyhow::bail!("alias map not supported with isolation");
1573        }
1574
1575        match isolation {
1576            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1577        }
1578    } else {
1579        None
1580    };
1581
1582    if with_hv && !opt.no_vmbus {
1583        let (shutdown_send, shutdown_recv) = mesh::channel();
1584        resources.shutdown_ic = Some(shutdown_send);
1585        let (kvp_send, kvp_recv) = mesh::channel();
1586        resources.kvp_ic = Some(kvp_send);
1587        vmbus_devices.extend(
1588            [
1589                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1590                    recv: shutdown_recv,
1591                }
1592                .into_resource(),
1593                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1594                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1595            ]
1596            .map(|r| (DeviceVtl::Vtl0, r)),
1597        );
1598    }
1599
1600    if let Some(hive_path) = &opt.imc {
1601        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1602        vmbus_devices.push((
1603            DeviceVtl::Vtl0,
1604            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1605        ));
1606    }
1607
1608    let mut virtio_devices = Vec::new();
1609    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1610        let bus = match bus {
1611            VirtioBusCli::Auto => {
1612                // Use VPCI when possible (currently only on Windows and macOS due
1613                // to KVM backend limitations).
1614                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1615                    None
1616                } else {
1617                    Some(VirtioBus::Pci)
1618                }
1619            }
1620            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1621            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1622            VirtioBusCli::Vpci => None,
1623        };
1624        if let Some(bus) = bus {
1625            virtio_devices.push((bus, resource));
1626        } else {
1627            vpci_devices.push(VpciDeviceConfig {
1628                vtl: DeviceVtl::Vtl0,
1629                instance_id: Guid::new_random(),
1630                resource: VirtioPciDeviceHandle(resource).into_resource(),
1631            });
1632        }
1633    };
1634
1635    for cli_cfg in &opt.virtio_net {
1636        if cli_cfg.underhill {
1637            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1638        }
1639        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1640        let resource = virtio_resources::net::VirtioNetHandle {
1641            max_queues: vport.max_queues,
1642            mac_address: vport.mac_address,
1643            endpoint: vport.endpoint,
1644        }
1645        .into_resource();
1646        if let Some(pcie_port) = &cli_cfg.pcie_port {
1647            pcie_devices.push(PcieDeviceConfig {
1648                port_name: pcie_port.clone(),
1649                resource: VirtioPciDeviceHandle(resource).into_resource(),
1650            });
1651        } else {
1652            add_virtio_device(VirtioBusCli::Auto, resource);
1653        }
1654    }
1655
1656    for args in &opt.virtio_fs {
1657        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1658            tag: args.tag.clone(),
1659            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1660                root_path: args.path.clone(),
1661                mount_options: args.options.clone(),
1662            },
1663        }
1664        .into_resource();
1665        if let Some(pcie_port) = &args.pcie_port {
1666            pcie_devices.push(PcieDeviceConfig {
1667                port_name: pcie_port.clone(),
1668                resource: VirtioPciDeviceHandle(resource).into_resource(),
1669            });
1670        } else {
1671            add_virtio_device(opt.virtio_fs_bus, resource);
1672        }
1673    }
1674
1675    for args in &opt.virtio_fs_shmem {
1676        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1677            tag: args.tag.clone(),
1678            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1679                root_path: args.path.clone(),
1680            },
1681        }
1682        .into_resource();
1683        if let Some(pcie_port) = &args.pcie_port {
1684            pcie_devices.push(PcieDeviceConfig {
1685                port_name: pcie_port.clone(),
1686                resource: VirtioPciDeviceHandle(resource).into_resource(),
1687            });
1688        } else {
1689            add_virtio_device(opt.virtio_fs_bus, resource);
1690        }
1691    }
1692
1693    for args in &opt.virtio_9p {
1694        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1695            tag: args.tag.clone(),
1696            root_path: args.path.clone(),
1697            debug: opt.virtio_9p_debug,
1698        }
1699        .into_resource();
1700        if let Some(pcie_port) = &args.pcie_port {
1701            pcie_devices.push(PcieDeviceConfig {
1702                port_name: pcie_port.clone(),
1703                resource: VirtioPciDeviceHandle(resource).into_resource(),
1704            });
1705        } else {
1706            add_virtio_device(VirtioBusCli::Auto, resource);
1707        }
1708    }
1709
1710    if let Some(pmem_args) = &opt.virtio_pmem {
1711        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1712            path: pmem_args.path.clone(),
1713        }
1714        .into_resource();
1715        if let Some(pcie_port) = &pmem_args.pcie_port {
1716            pcie_devices.push(PcieDeviceConfig {
1717                port_name: pcie_port.clone(),
1718                resource: VirtioPciDeviceHandle(resource).into_resource(),
1719            });
1720        } else {
1721            add_virtio_device(VirtioBusCli::Auto, resource);
1722        }
1723    }
1724
1725    if opt.virtio_rng {
1726        let resource: Resource<VirtioDeviceHandle> =
1727            virtio_resources::rng::VirtioRngHandle.into_resource();
1728        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1729            pcie_devices.push(PcieDeviceConfig {
1730                port_name: pcie_port.clone(),
1731                resource: VirtioPciDeviceHandle(resource).into_resource(),
1732            });
1733        } else {
1734            add_virtio_device(opt.virtio_rng_bus, resource);
1735        }
1736    }
1737
1738    if let Some(backend) = virtio_console_backend {
1739        let resource: Resource<VirtioDeviceHandle> =
1740            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1741        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1742            pcie_devices.push(PcieDeviceConfig {
1743                port_name: pcie_port.clone(),
1744                resource: VirtioPciDeviceHandle(resource).into_resource(),
1745            });
1746        } else {
1747            add_virtio_device(VirtioBusCli::Auto, resource);
1748        }
1749    }
1750
1751    // Handle --vhost-user arguments.
1752    #[cfg(target_os = "linux")]
1753    for vhost_cli in &opt.vhost_user {
1754        let stream =
1755            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1756                format!(
1757                    "failed to connect to vhost-user socket: {}",
1758                    vhost_cli.socket_path
1759                )
1760            })?;
1761
1762        use crate::cli_args::VhostUserDeviceTypeCli;
1763        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1764            VhostUserDeviceTypeCli::Fs {
1765                ref tag,
1766                num_queues,
1767                queue_size,
1768            } => virtio_resources::vhost_user::VhostUserFsHandle {
1769                socket: stream.into(),
1770                tag: tag.clone(),
1771                num_queues,
1772                queue_size,
1773            }
1774            .into_resource(),
1775            VhostUserDeviceTypeCli::Blk {
1776                num_queues,
1777                queue_size,
1778            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1779                socket: stream.into(),
1780                num_queues,
1781                queue_size,
1782            }
1783            .into_resource(),
1784            VhostUserDeviceTypeCli::Other {
1785                device_id,
1786                ref queue_sizes,
1787            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1788                socket: stream.into(),
1789                device_id,
1790                queue_sizes: queue_sizes.clone(),
1791            }
1792            .into_resource(),
1793        };
1794        if let Some(pcie_port) = &vhost_cli.pcie_port {
1795            pcie_devices.push(PcieDeviceConfig {
1796                port_name: pcie_port.clone(),
1797                resource: VirtioPciDeviceHandle(resource).into_resource(),
1798            });
1799        } else {
1800            add_virtio_device(VirtioBusCli::Auto, resource);
1801        }
1802    }
1803
1804    if let Some(vsock_path) = &opt.virtio_vsock_path {
1805        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1806        add_virtio_device(
1807            VirtioBusCli::Auto,
1808            virtio_resources::vsock::VirtioVsockHandle {
1809                // The guest CID does not matter since the UDS relay does not use it. It just needs
1810                // to be some non-reserved value for the guest to use.
1811                guest_cid: 0x3,
1812                base_path: vsock_path.clone(),
1813                listener,
1814            }
1815            .into_resource(),
1816        );
1817    }
1818
1819    let mut cfg = Config {
1820        chipset,
1821        load_mode,
1822        floppy_disks,
1823        pcie_root_complexes,
1824        #[cfg(target_os = "linux")]
1825        pcie_devices: {
1826            let mut devs = pcie_devices;
1827            devs.extend(vfio_pcie_devices);
1828            devs
1829        },
1830        #[cfg(not(target_os = "linux"))]
1831        pcie_devices,
1832        pcie_switches,
1833        vpci_devices,
1834        ide_disks: Vec::new(),
1835        memory: MemoryConfig {
1836            mem_size: if let Some(ref sizes) = opt.numa_memory {
1837                sizes
1838                    .iter()
1839                    .try_fold(0u64, |acc, &s| acc.checked_add(s))
1840                    .context("numa memory sizes overflow")?
1841            } else {
1842                opt.memory_size()
1843            },
1844            prefetch_memory: opt.prefetch_memory(),
1845            private_memory: opt.private_memory(),
1846            transparent_hugepages: opt.transparent_hugepages(),
1847            hugepages: opt.memory.hugepages,
1848            hugepage_size: opt.memory.hugepage_size,
1849            numa_mem_sizes: opt.numa_memory.clone(),
1850        },
1851        processor_topology: ProcessorTopologyConfig {
1852            proc_count: opt.processors,
1853            vps_per_socket: opt.vps_per_socket,
1854            enable_smt: match opt.smt {
1855                cli_args::SmtConfigCli::Auto => None,
1856                cli_args::SmtConfigCli::Force => Some(true),
1857                cli_args::SmtConfigCli::Off => Some(false),
1858            },
1859            arch: Some(topology_arch),
1860        },
1861        hypervisor: HypervisorConfig {
1862            with_hv,
1863            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1864                vtl0_alias_map: !opt.no_alias_map,
1865                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1866                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1867                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1868                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1869                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1870                        Some(LateMapVtl0MemoryPolicy::InjectException)
1871                    }
1872                },
1873            }),
1874            with_isolation,
1875        },
1876        #[cfg(windows)]
1877        kernel_vmnics,
1878        input: mesh::Receiver::new(),
1879        framebuffer,
1880        vga_firmware,
1881        vtl2_gfx: opt.vtl2_gfx,
1882        virtio_devices,
1883        vmbus: (with_hv && !opt.no_vmbus).then_some(VmbusConfig {
1884            vsock_listener: vtl0_vsock_listener,
1885            vsock_path: opt.vmbus_vsock_path.clone(),
1886            vtl2_redirect: opt.vmbus_redirect,
1887            vmbus_max_version: opt.vmbus_max_version,
1888            #[cfg(windows)]
1889            vmbusproxy_handle,
1890        }),
1891        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1892            vsock_listener: vtl2_vsock_listener,
1893            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1894            ..Default::default()
1895        }),
1896        vmbus_devices,
1897        chipset_devices,
1898        pci_chipset_devices,
1899        isa_dma_controller,
1900        chipset_capabilities: capabilities,
1901        layout: layout_config,
1902        #[cfg(windows)]
1903        vpci_resources,
1904        vmgs,
1905        secure_boot_enabled: opt.secure_boot,
1906        custom_uefi_vars,
1907        firmware_event_send: None,
1908        debugger_rpc: None,
1909        rtc_delta_milliseconds: 0,
1910        automatic_guest_reset: !opt.halt_on_reset,
1911        efi_diagnostics_log_level: {
1912            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1913                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1914                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1915                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1916            }
1917        },
1918    };
1919
1920    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1921    Ok((cfg, resources))
1922}
1923
1924/// Gets the terminal to use for externally launched console windows.
1925pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1926    std::env::var_os("OPENVMM_TERM")
1927        .or_else(|| std::env::var_os("HVLITE_TERM"))
1928        .map(Into::into)
1929}
1930
1931// Tries to remove `path` if it is confirmed to be a Unix socket.
1932fn cleanup_socket(path: &Path) {
1933    #[cfg(windows)]
1934    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1935    #[cfg(not(windows))]
1936    let is_socket = path
1937        .metadata()
1938        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1939
1940    if is_socket {
1941        let _ = std::fs::remove_file(path);
1942    }
1943}
1944
1945#[cfg(windows)]
1946fn new_switch_port(
1947    switch_id: Option<&str>,
1948) -> anyhow::Result<(
1949    openvmm_defs::config::SwitchPortId,
1950    vmswitch::kernel::SwitchPort,
1951)> {
1952    let id = vmswitch::kernel::SwitchPortId {
1953        switch: match switch_id {
1954            Some(s) => s.parse().context("invalid switch id")?,
1955            None => vmswitch::hcn::DEFAULT_SWITCH,
1956        },
1957        port: Guid::new_random(),
1958    };
1959    let _ = vmswitch::hcn::Network::open(&id.switch)
1960        .with_context(|| format!("could not find switch {}", id.switch))?;
1961
1962    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1963
1964    let id = openvmm_defs::config::SwitchPortId {
1965        switch: id.switch,
1966        port: id.port,
1967    };
1968    Ok((id, port))
1969}
1970
1971fn parse_endpoint(
1972    cli_cfg: &NicConfigCli,
1973    index: &mut usize,
1974    resources: &mut VmResources,
1975) -> anyhow::Result<NicConfig> {
1976    let _ = resources;
1977    let endpoint = match &cli_cfg.endpoint {
1978        EndpointConfigCli::Consomme { cidr, host_fwd } => {
1979            let ports = host_fwd
1980                .iter()
1981                .map(|fwd| {
1982                    use net_backend_resources::consomme::HostPortProtocol;
1983                    net_backend_resources::consomme::HostPortConfig {
1984                        protocol: match fwd.protocol {
1985                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
1986                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
1987                        },
1988                        host_address: fwd
1989                            .host_address
1990                            .map(net_backend_resources::consomme::HostIpAddress::from),
1991                        host_port: net_backend_resources::consomme::HostPort::Fixed(fwd.host_port),
1992                        guest_port: fwd.guest_port,
1993                    }
1994                })
1995                .collect();
1996            net_backend_resources::consomme::ConsommeHandle {
1997                cidr: cidr.clone(),
1998                ports,
1999            }
2000            .into_resource()
2001        }
2002        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
2003        EndpointConfigCli::Dio { id } => {
2004            #[cfg(windows)]
2005            {
2006                let (port_id, port) = new_switch_port(id.as_deref())?;
2007                resources.switch_ports.push(port);
2008                net_backend_resources::dio::WindowsDirectIoHandle {
2009                    switch_port_id: net_backend_resources::dio::SwitchPortId {
2010                        switch: port_id.switch,
2011                        port: port_id.port,
2012                    },
2013                }
2014                .into_resource()
2015            }
2016
2017            #[cfg(not(windows))]
2018            {
2019                let _ = id;
2020                bail!("cannot use dio on non-windows platforms")
2021            }
2022        }
2023        EndpointConfigCli::Tap { name } => {
2024            #[cfg(target_os = "linux")]
2025            {
2026                let fd = net_tap::tap::open_tap(name)
2027                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
2028                net_backend_resources::tap::TapHandle { fd }.into_resource()
2029            }
2030
2031            #[cfg(not(target_os = "linux"))]
2032            {
2033                let _ = name;
2034                bail!("TAP backend is only supported on Linux")
2035            }
2036        }
2037    };
2038
2039    // Pick a random MAC address.
2040    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
2041    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
2042
2043    // Pick a fixed instance ID based on the index.
2044    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
2045    let instance_id = Guid {
2046        data1: *index as u32,
2047        ..BASE_INSTANCE_ID
2048    };
2049    *index += 1;
2050
2051    Ok(NicConfig {
2052        vtl: cli_cfg.vtl,
2053        instance_id,
2054        endpoint,
2055        mac_address: mac_address.into(),
2056        max_queues: cli_cfg.max_queues,
2057        pcie_port: cli_cfg.pcie_port.clone(),
2058    })
2059}
2060
2061#[derive(Debug)]
2062struct NicConfig {
2063    vtl: DeviceVtl,
2064    instance_id: Guid,
2065    mac_address: MacAddress,
2066    endpoint: Resource<NetEndpointHandleKind>,
2067    max_queues: Option<u16>,
2068    pcie_port: Option<String>,
2069}
2070
2071impl NicConfig {
2072    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
2073        (
2074            self.vtl,
2075            netvsp_resources::NetvspHandle {
2076                instance_id: self.instance_id,
2077                mac_address: self.mac_address,
2078                endpoint: self.endpoint,
2079                max_queues: self.max_queues,
2080            }
2081            .into_resource(),
2082        )
2083    }
2084}
2085
2086enum LayerOrDisk {
2087    Layer(DiskLayerDescription),
2088    Disk(Resource<DiskHandleKind>),
2089}
2090
2091async fn disk_open(
2092    disk_cli: &DiskCliKind,
2093    read_only: bool,
2094) -> anyhow::Result<Resource<DiskHandleKind>> {
2095    let mut layers = Vec::new();
2096    disk_open_inner(disk_cli, read_only, &mut layers).await?;
2097    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
2098        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
2099            unreachable!()
2100        };
2101        Ok(disk)
2102    } else {
2103        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
2104            layers: layers
2105                .into_iter()
2106                .map(|layer| match layer {
2107                    LayerOrDisk::Layer(layer) => layer,
2108                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
2109                        layer: DiskLayerHandle(disk).into_resource(),
2110                        read_cache: false,
2111                        write_through: false,
2112                    },
2113                })
2114                .collect(),
2115        }))
2116    }
2117}
2118
2119fn disk_open_inner<'a>(
2120    disk_cli: &'a DiskCliKind,
2121    read_only: bool,
2122    layers: &'a mut Vec<LayerOrDisk>,
2123) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
2124    Box::pin(async move {
2125        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
2126            LayerOrDisk::Layer(layer.into_resource().into())
2127        }
2128        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
2129            LayerOrDisk::Disk(disk.into_resource())
2130        }
2131        match disk_cli {
2132            &DiskCliKind::Memory(len) => {
2133                layers.push(layer(RamDiskLayerHandle {
2134                    len: Some(len),
2135                    sector_size: None,
2136                }));
2137            }
2138            DiskCliKind::File {
2139                path,
2140                create_with_len,
2141                direct,
2142            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
2143                create_disk_type(
2144                    path,
2145                    *size,
2146                    OpenDiskOptions {
2147                        read_only: false,
2148                        direct: *direct,
2149                    },
2150                )
2151                .with_context(|| format!("failed to create {}", path.display()))?
2152            } else {
2153                open_disk_type(
2154                    path,
2155                    OpenDiskOptions {
2156                        read_only,
2157                        direct: *direct,
2158                    },
2159                )
2160                .await
2161                .with_context(|| format!("failed to open {}", path.display()))?
2162            })),
2163            DiskCliKind::Blob { kind, url } => {
2164                layers.push(disk(disk_backend_resources::BlobDiskHandle {
2165                    url: url.to_owned(),
2166                    format: match kind {
2167                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
2168                        cli_args::BlobKind::Vhd1 => {
2169                            disk_backend_resources::BlobDiskFormat::FixedVhd1
2170                        }
2171                    },
2172                }))
2173            }
2174            DiskCliKind::MemoryDiff(inner) => {
2175                layers.push(layer(RamDiskLayerHandle {
2176                    len: None,
2177                    sector_size: None,
2178                }));
2179                disk_open_inner(inner, true, layers).await?;
2180            }
2181            DiskCliKind::PersistentReservationsWrapper(inner) => {
2182                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
2183                    disk_open(inner, read_only).await?,
2184                )))
2185            }
2186            DiskCliKind::DelayDiskWrapper {
2187                delay_ms,
2188                disk: inner,
2189            } => layers.push(disk(DelayDiskHandle {
2190                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
2191                disk: disk_open(inner, read_only).await?,
2192            })),
2193            DiskCliKind::Crypt {
2194                disk: inner,
2195                cipher,
2196                key_file,
2197            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
2198                disk: disk_open(inner, read_only).await?,
2199                cipher: match cipher {
2200                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
2201                },
2202                key: fs_err::read(key_file).context("failed to read key file")?,
2203            })),
2204            DiskCliKind::Sqlite {
2205                path,
2206                create_with_len,
2207            } => {
2208                // FUTURE: this code should be responsible for opening
2209                // file-handle(s) itself, and passing them into sqlite via a custom
2210                // vfs. For now though - simply check if the file exists or not, and
2211                // perform early validation of filesystem-level create options.
2212                match (create_with_len.is_some(), path.exists()) {
2213                    (true, true) => anyhow::bail!(
2214                        "cannot create new sqlite disk at {} - file already exists",
2215                        path.display()
2216                    ),
2217                    (false, false) => anyhow::bail!(
2218                        "cannot open sqlite disk at {} - file not found",
2219                        path.display()
2220                    ),
2221                    _ => {}
2222                }
2223
2224                layers.push(layer(SqliteDiskLayerHandle {
2225                    dbhd_path: path.display().to_string(),
2226                    format_dbhd: create_with_len.map(|len| {
2227                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2228                            logically_read_only: false,
2229                            len: Some(len),
2230                        }
2231                    }),
2232                }));
2233            }
2234            DiskCliKind::SqliteDiff { path, create, disk } => {
2235                // FUTURE: this code should be responsible for opening
2236                // file-handle(s) itself, and passing them into sqlite via a custom
2237                // vfs. For now though - simply check if the file exists or not, and
2238                // perform early validation of filesystem-level create options.
2239                match (create, path.exists()) {
2240                    (true, true) => anyhow::bail!(
2241                        "cannot create new sqlite disk at {} - file already exists",
2242                        path.display()
2243                    ),
2244                    (false, false) => anyhow::bail!(
2245                        "cannot open sqlite disk at {} - file not found",
2246                        path.display()
2247                    ),
2248                    _ => {}
2249                }
2250
2251                layers.push(layer(SqliteDiskLayerHandle {
2252                    dbhd_path: path.display().to_string(),
2253                    format_dbhd: create.then_some(
2254                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2255                            logically_read_only: false,
2256                            len: None,
2257                        },
2258                    ),
2259                }));
2260                disk_open_inner(disk, true, layers).await?;
2261            }
2262            DiskCliKind::AutoCacheSqlite {
2263                cache_path,
2264                key,
2265                disk,
2266            } => {
2267                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2268                    read_cache: true,
2269                    write_through: false,
2270                    layer: SqliteAutoCacheDiskLayerHandle {
2271                        cache_path: cache_path.clone(),
2272                        cache_key: key.clone(),
2273                    }
2274                    .into_resource(),
2275                }));
2276                disk_open_inner(disk, read_only, layers).await?;
2277            }
2278        }
2279        Ok(())
2280    })
2281}
2282
2283/// Get the system page size.
2284pub(crate) fn system_page_size() -> u32 {
2285    sparse_mmap::SparseMapping::page_size() as u32
2286}
2287
2288/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2289pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2290    "x86_64"
2291} else {
2292    "aarch64"
2293};
2294
2295/// Open a snapshot directory and validate it against the current VM config.
2296/// Returns the shared memory fd (from memory.bin) and the saved device state.
2297fn prepare_snapshot_restore(
2298    snapshot_dir: &Path,
2299    opt: &Options,
2300) -> anyhow::Result<(
2301    openvmm_defs::worker::SharedMemoryFd,
2302    mesh::payload::message::ProtobufMessage,
2303)> {
2304    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2305
2306    // Validate manifest against current VM config.
2307    openvmm_helpers::snapshot::validate_manifest(
2308        &manifest,
2309        GUEST_ARCH,
2310        opt.memory_size(),
2311        opt.processors,
2312        system_page_size(),
2313    )?;
2314
2315    // Open memory.bin (existing file, no create, no resize).
2316    let memory_file = fs_err::OpenOptions::new()
2317        .read(true)
2318        .write(true)
2319        .open(snapshot_dir.join("memory.bin"))?;
2320
2321    // Validate file size matches expected memory size.
2322    let file_size = memory_file.metadata()?.len();
2323    if file_size != manifest.memory_size_bytes {
2324        anyhow::bail!(
2325            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2326            manifest.memory_size_bytes,
2327        );
2328    }
2329
2330    let shared_memory_fd =
2331        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2332
2333    // Reconstruct ProtobufMessage from the saved state bytes.
2334    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2335    // back to ProtobufMessage.
2336    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2337        .context("failed to decode saved state from snapshot")?;
2338
2339    Ok((shared_memory_fd, state_msg))
2340}
2341
2342fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2343    #[cfg(windows)]
2344    pal::windows::disable_hard_error_dialog();
2345
2346    tracing_init::enable_tracing()?;
2347
2348    // Try to run as a worker host.
2349    // On success the worker runs to completion and then exits the process (does
2350    // not return). Any worker host setup errors are return and bubbled up.
2351    meshworker::run_vmm_mesh_host()?;
2352
2353    let opt = cli_args::parse_options();
2354    if let Some(path) = &opt.write_saved_state_proto {
2355        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2356            .write_to_path(path)
2357            .context("failed to write protobuf descriptors")?;
2358        return Ok(());
2359    }
2360
2361    if let Some(ref path) = opt.pidfile {
2362        std::fs::write(path, format!("{}\n", std::process::id()))
2363            .context("failed to write pidfile")?;
2364        *pidfile_path = Some(path.clone());
2365    }
2366
2367    if let Some(path) = opt.relay_console_path {
2368        let console_title = opt.relay_console_title.unwrap_or_default();
2369        return console_relay::relay_console(&path, console_title.as_str());
2370    }
2371
2372    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2373    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2374        return block_on(async {
2375            let _ = std::fs::remove_file(path);
2376            let listener =
2377                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2378
2379            let transport = if opt.ttrpc.is_some() {
2380                ttrpc::RpcTransport::Ttrpc
2381            } else {
2382                ttrpc::RpcTransport::Grpc
2383            };
2384
2385            // This is a local launch
2386            let mut handle =
2387                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2388                    listener,
2389                    transport,
2390                })
2391                .await?;
2392
2393            tracing::info!(%transport, path = %path.display(), "listening");
2394
2395            // Signal the the parent process that the server is ready.
2396            pal::close_stdout().context("failed to close stdout")?;
2397
2398            handle.join().await?;
2399
2400            Ok(())
2401        });
2402    }
2403
2404    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2405}
2406
2407fn new_hvsock_service_id(port: u32) -> Guid {
2408    // This GUID is an embedding of the AF_VSOCK port into an
2409    // AF_HYPERV service ID.
2410    Guid {
2411        data1: port,
2412        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2413    }
2414}
2415
2416async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2417    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2418    let result = run_control_inner(driver, &mut mesh, opt).await;
2419    // If setup failed before the mesh was handed to the controller, shut it
2420    // down so the child host process exits cleanly without noisy logs.
2421    if let Some(mesh) = mesh {
2422        mesh.shutdown().await;
2423    }
2424    result
2425}
2426
2427async fn run_control_inner(
2428    driver: &DefaultDriver,
2429    mesh_slot: &mut Option<VmmMesh>,
2430    opt: Options,
2431) -> anyhow::Result<()> {
2432    let mesh = mesh_slot.as_ref().unwrap();
2433    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2434
2435    let mut vnc_worker = None;
2436    if opt.gfx || opt.vnc.vnc {
2437        // Parse the listen address. Try as a full SocketAddr (host:port) first;
2438        // fall back to a bare IP, using the configured port.
2439        let addr: std::net::SocketAddr = if let Ok(sa) =
2440            opt.vnc.vnc_listen.parse::<std::net::SocketAddr>()
2441        {
2442            sa
2443        } else {
2444            let ip: std::net::IpAddr = opt.vnc.vnc_listen.parse().with_context(|| {
2445                format!(
2446                    "invalid VNC listen address: {} (expected IP address or socket address like [::1]:5900)",
2447                    opt.vnc.vnc_listen
2448                )
2449            })?;
2450            std::net::SocketAddr::new(ip, opt.vnc.vnc_port)
2451        };
2452
2453        let socket = socket2::Socket::new(
2454            if addr.is_ipv6() {
2455                socket2::Domain::IPV6
2456            } else {
2457                socket2::Domain::IPV4
2458            },
2459            socket2::Type::STREAM,
2460            None,
2461        )
2462        .with_context(|| format!("creating VNC socket for {}", addr))?;
2463
2464        if addr.is_ipv6() {
2465            if let Err(e) = socket.set_only_v6(false) {
2466                tracing::warn!(
2467                    error = %e,
2468                    "failed to enable dual-stack on IPv6 VNC socket, IPv4 clients may not be able to connect"
2469                );
2470            }
2471        }
2472        socket.set_reuse_address(true)?;
2473        socket
2474            .bind(&addr.into())
2475            .with_context(|| format!("binding VNC socket to {}", addr))?;
2476        socket
2477            .listen(128)
2478            .with_context(|| format!("listening on VNC socket {}", addr))?;
2479        let listener: TcpListener = socket.into();
2480
2481        if !addr.ip().is_loopback() {
2482            tracing::warn!(
2483                address = %addr,
2484                "VNC server listening on non-localhost address without authentication"
2485            );
2486        }
2487
2488        let input_send = vm_config.input.sender();
2489        let framebuffer = resources
2490            .framebuffer_access
2491            .take()
2492            .expect("synth video enabled");
2493
2494        let vnc_host = mesh
2495            .make_host("vnc", None)
2496            .await
2497            .context("spawning vnc process failed")?;
2498
2499        vnc_worker = Some(
2500            vnc_host
2501                .launch_worker(
2502                    vnc_worker_defs::VNC_WORKER_TCP,
2503                    VncParameters {
2504                        listener,
2505                        framebuffer,
2506                        input_send,
2507                        dirty_recv: resources.dirty_rect_recv.take(),
2508                        max_clients: opt.vnc.vnc_max_clients,
2509                        evict_oldest: opt.vnc.vnc_evict_oldest,
2510                    },
2511                )
2512                .await?,
2513        )
2514    }
2515
2516    // spin up the debug worker
2517    let gdb_worker = if let Some(port) = opt.gdb {
2518        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2519            .with_context(|| format!("binding to gdb port {}", port))?;
2520
2521        let (req_tx, req_rx) = mesh::channel();
2522        vm_config.debugger_rpc = Some(req_rx);
2523
2524        let gdb_host = mesh
2525            .make_host("gdb", None)
2526            .await
2527            .context("spawning gdbstub process failed")?;
2528
2529        Some(
2530            gdb_host
2531                .launch_worker(
2532                    debug_worker_defs::DEBUGGER_WORKER,
2533                    debug_worker_defs::DebuggerParameters {
2534                        listener,
2535                        req_chan: req_tx,
2536                        vp_count: vm_config.processor_topology.proc_count,
2537                        target_arch: if cfg!(guest_arch = "x86_64") {
2538                            debug_worker_defs::TargetArch::X86_64
2539                        } else {
2540                            debug_worker_defs::TargetArch::Aarch64
2541                        },
2542                    },
2543                )
2544                .await
2545                .context("failed to launch gdbstub worker")?,
2546        )
2547    } else {
2548        None
2549    };
2550
2551    // spin up the VM
2552    let (vm_rpc, rpc_recv) = mesh::channel();
2553    let (notify_send, notify_recv) = mesh::channel();
2554    let vm_worker = {
2555        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2556
2557        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2558            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2559            (Some(fd), Some(state_msg))
2560        } else {
2561            let shared_memory = opt
2562                .memory_backing_file()
2563                .map(|path| {
2564                    openvmm_helpers::shared_memory::open_memory_backing_file(
2565                        path,
2566                        opt.memory_size(),
2567                    )
2568                })
2569                .transpose()?;
2570            (shared_memory, None)
2571        };
2572
2573        let params = VmWorkerParameters {
2574            hypervisor: match &opt.hypervisor {
2575                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2576                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2577            },
2578            cfg: vm_config,
2579            saved_state,
2580            shared_memory,
2581            rpc: rpc_recv,
2582            notify: notify_send,
2583        };
2584        vm_host
2585            .launch_worker(VM_WORKER, params)
2586            .await
2587            .context("failed to launch vm worker")?
2588    };
2589
2590    if opt.restore_snapshot.is_some() {
2591        tracing::info!("restoring VM from snapshot");
2592    }
2593
2594    if !opt.paused {
2595        vm_rpc.call(VmRpc::Resume, ()).await?;
2596    }
2597
2598    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2599        driver.clone(),
2600        DiagDialer {
2601            driver: driver.clone(),
2602            vm_rpc: vm_rpc.clone(),
2603            openhcl_vtl: if opt.vtl2 {
2604                DeviceVtl::Vtl2
2605            } else {
2606                DeviceVtl::Vtl0
2607            },
2608        },
2609    ));
2610
2611    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2612
2613    // Create channels between the REPL and VmController.
2614    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2615    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2616
2617    let has_vtl2 = resources.vtl2_settings.is_some();
2618
2619    // Build the VmController with exclusive resources.
2620    let controller = vm_controller::VmController {
2621        mesh: mesh_slot.take().unwrap(),
2622        vm_worker,
2623        vnc_worker,
2624        gdb_worker,
2625        diag_inspector: Some(diag_inspector),
2626        vtl2_settings: resources.vtl2_settings,
2627        ged_rpc: resources.ged_rpc.clone(),
2628        vm_rpc: vm_rpc.clone(),
2629        paravisor_diag: Some(paravisor_diag),
2630        igvm_path: opt.igvm.clone(),
2631        memory_backing_file: opt.memory_backing_file().cloned(),
2632        memory: opt.memory_size(),
2633        processors: opt.processors,
2634        log_file: opt.log_file.clone(),
2635    };
2636
2637    // Spawn the VmController as a task.
2638    let controller_task = driver.spawn(
2639        "vm-controller",
2640        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2641    );
2642
2643    // Run the REPL with shareable resources.
2644    let repl_result = repl::run_repl(
2645        driver,
2646        repl::ReplResources {
2647            vm_rpc,
2648            vm_controller: vm_controller_send,
2649            vm_controller_events: vm_controller_event_recv,
2650            scsi_rpc: resources.scsi_rpc,
2651            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2652            shutdown_ic: resources.shutdown_ic,
2653            kvp_ic: resources.kvp_ic,
2654            console_in: resources.console_in,
2655            has_vtl2,
2656        },
2657    )
2658    .await;
2659
2660    // Wait for the controller task to finish (it stops the VM worker and
2661    // shuts down the mesh).
2662    controller_task.await;
2663
2664    repl_result
2665}
2666
2667struct DiagDialer {
2668    driver: DefaultDriver,
2669    vm_rpc: mesh::Sender<VmRpc>,
2670    openhcl_vtl: DeviceVtl,
2671}
2672
2673impl mesh_rpc::client::Dial for DiagDialer {
2674    type Stream = PolledSocket<unix_socket::UnixStream>;
2675
2676    async fn dial(&mut self) -> io::Result<Self::Stream> {
2677        let service_id = new_hvsock_service_id(1);
2678        let socket = self
2679            .vm_rpc
2680            .call_failable(
2681                VmRpc::ConnectHvsock,
2682                (
2683                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2684                    service_id,
2685                    self.openhcl_vtl,
2686                ),
2687            )
2688            .await
2689            .map_err(io::Error::other)?;
2690
2691        PolledSocket::new(&self.driver, socket)
2692    }
2693}
2694
2695/// An object that implements [`InspectMut`] by sending an inspect request over
2696/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2697/// the response back into the inspect tree.
2698///
2699/// This also caches the TTRPC connection to the guest so that only the first
2700/// inspect request has to wait for the connection to be established.
2701pub(crate) struct DiagInspector(DiagInspectorInner);
2702
2703enum DiagInspectorInner {
2704    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2705    Started {
2706        send: mesh::Sender<inspect::Deferred>,
2707        _task: Task<()>,
2708    },
2709    Invalid,
2710}
2711
2712impl DiagInspector {
2713    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2714        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2715    }
2716
2717    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2718        loop {
2719            match self.0 {
2720                DiagInspectorInner::NotStarted { .. } => {
2721                    let DiagInspectorInner::NotStarted(driver, client) =
2722                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2723                    else {
2724                        unreachable!()
2725                    };
2726                    let (send, recv) = mesh::channel();
2727                    let task = driver.clone().spawn("diag-inspect", async move {
2728                        Self::run(&client, recv).await
2729                    });
2730
2731                    self.0 = DiagInspectorInner::Started { send, _task: task };
2732                }
2733                DiagInspectorInner::Started { ref send, .. } => break send,
2734                DiagInspectorInner::Invalid => unreachable!(),
2735            }
2736        }
2737    }
2738
2739    async fn run(
2740        diag_client: &diag_client::DiagClient,
2741        mut recv: mesh::Receiver<inspect::Deferred>,
2742    ) {
2743        while let Some(deferred) = recv.next().await {
2744            let info = deferred.external_request();
2745            let result = match info.request_type {
2746                inspect::ExternalRequestType::Inspect { depth } => {
2747                    if depth == 0 {
2748                        Ok(inspect::Node::Unevaluated)
2749                    } else {
2750                        // TODO: Support taking timeouts from the command line
2751                        diag_client
2752                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2753                            .await
2754                    }
2755                }
2756                inspect::ExternalRequestType::Update { value } => {
2757                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2758                }
2759            };
2760            deferred.complete_external(
2761                result.unwrap_or_else(|err| {
2762                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2763                }),
2764                inspect::SensitivityLevel::Unspecified,
2765            )
2766        }
2767    }
2768}
2769
2770impl InspectMut for DiagInspector {
2771    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2772        self.start().send(req.defer());
2773    }
2774}