Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use cxl_spec::test::CxlTestDeviceHandle;
42use disk_backend_resources::DelayDiskHandle;
43use disk_backend_resources::DiskLayerDescription;
44use disk_backend_resources::layer::DiskLayerHandle;
45use disk_backend_resources::layer::RamDiskLayerHandle;
46use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
47use disk_backend_resources::layer::SqliteDiskLayerHandle;
48use floppy_resources::FloppyDiskConfig;
49use framebuffer::FRAMEBUFFER_SIZE;
50use framebuffer::FramebufferAccess;
51use futures::AsyncReadExt;
52use futures::AsyncWrite;
53use futures::StreamExt;
54use futures::executor::block_on;
55use futures::io::AllowStdIo;
56use gdma_resources::GdmaDeviceHandle;
57use gdma_resources::VportDefinition;
58use guid::Guid;
59use input_core::MultiplexedInputHandle;
60use inspect::InspectMut;
61use io::Read;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
70use openvmm_defs::config::DeviceVtl;
71use openvmm_defs::config::EfiDiagnosticsLogLevelType;
72use openvmm_defs::config::HypervisorConfig;
73use openvmm_defs::config::LateMapVtl0MemoryPolicy;
74use openvmm_defs::config::LoadMode;
75use openvmm_defs::config::MemoryConfig;
76use openvmm_defs::config::PcieDeviceConfig;
77use openvmm_defs::config::PcieMmioRangeConfig;
78use openvmm_defs::config::PcieRootComplexConfig;
79use openvmm_defs::config::PcieRootPortConfig;
80use openvmm_defs::config::PcieSwitchConfig;
81use openvmm_defs::config::ProcessorTopologyConfig;
82use openvmm_defs::config::RootComplexCxlConfig;
83use openvmm_defs::config::SerialInformation;
84use openvmm_defs::config::VirtioBus;
85use openvmm_defs::config::VmbusConfig;
86use openvmm_defs::config::VpciDeviceConfig;
87use openvmm_defs::config::Vtl2Config;
88use openvmm_defs::rpc::VmRpc;
89use openvmm_defs::worker::VM_WORKER;
90use openvmm_defs::worker::VmWorkerParameters;
91use openvmm_helpers::disk::OpenDiskOptions;
92use openvmm_helpers::disk::create_disk_type;
93use openvmm_helpers::disk::open_disk_type;
94use pal_async::DefaultDriver;
95use pal_async::DefaultPool;
96use pal_async::socket::PolledSocket;
97use pal_async::task::Spawn;
98use pal_async::task::Task;
99use serial_16550_resources::ComPort;
100use serial_core::resources::DisconnectedSerialBackendHandle;
101use sparse_mmap::alloc_shared_memory;
102use std::cell::RefCell;
103use std::collections::BTreeMap;
104use std::fmt::Write as _;
105use std::future::pending;
106use std::io;
107#[cfg(unix)]
108use std::io::IsTerminal;
109use std::io::Write;
110use std::net::TcpListener;
111use std::path::Path;
112use std::path::PathBuf;
113use std::sync::Arc;
114use std::thread;
115use std::time::Duration;
116use storvsp_resources::ScsiControllerRequest;
117use tpm_resources::TpmDeviceHandle;
118use tpm_resources::TpmRegisterLayout;
119use uidevices_resources::SynthKeyboardHandle;
120use uidevices_resources::SynthMouseHandle;
121use uidevices_resources::SynthVideoHandle;
122use video_core::SharedFramebufferHandle;
123use virtio_resources::VirtioPciDeviceHandle;
124use vm_manifest_builder::BaseChipsetType;
125use vm_manifest_builder::MachineArch;
126use vm_manifest_builder::VmChipsetResult;
127use vm_manifest_builder::VmManifestBuilder;
128use vm_resource::IntoResource;
129use vm_resource::Resource;
130use vm_resource::kind::DiskHandleKind;
131use vm_resource::kind::DiskLayerHandleKind;
132use vm_resource::kind::NetEndpointHandleKind;
133use vm_resource::kind::VirtioDeviceHandle;
134use vm_resource::kind::VmbusDeviceHandleKind;
135use vmbus_serial_resources::VmbusSerialDeviceHandle;
136use vmbus_serial_resources::VmbusSerialPort;
137use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
138use vmgs_resources::GuestStateEncryptionPolicy;
139use vmgs_resources::VmgsDisk;
140use vmgs_resources::VmgsFileHandle;
141use vmgs_resources::VmgsResource;
142use vmotherboard::ChipsetDeviceHandle;
143use vnc_worker_defs::VncParameters;
144
145/// RAII guard that removes the pidfile when dropped. Ensures the pidfile is
146/// cleaned up even if [`do_main`] panics.
147struct PidfileGuard(Option<PathBuf>);
148
149impl Drop for PidfileGuard {
150    fn drop(&mut self) {
151        if let Some(path) = &self.0 {
152            let _ = fs_err::remove_file(path);
153        }
154    }
155}
156
157pub fn openvmm_main() {
158    // Save the current state of the terminal so we can restore it back to
159    // normal before exiting.
160    #[cfg(unix)]
161    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
162
163    let mut pidfile_guard = PidfileGuard(None);
164    let exit_code = match do_main(&mut pidfile_guard.0) {
165        Ok(_) => 0,
166        Err(err) => {
167            eprintln!("fatal error: {:?}", err);
168            1
169        }
170    };
171
172    // Restore the terminal to its initial state.
173    #[cfg(unix)]
174    if let Some(orig_termios) = orig_termios {
175        term::set_termios(orig_termios);
176    }
177
178    // Clean up the pidfile before terminating, since pal::process::terminate
179    // skips destructors.
180    drop(pidfile_guard);
181
182    // Terminate the process immediately without graceful shutdown of DLLs or
183    // C++ destructors or anything like that. This is all unnecessary and saves
184    // time on Windows.
185    //
186    // Do flush stdout, though, since there may be buffered data.
187    let _ = io::stdout().flush();
188    pal::process::terminate(exit_code);
189}
190
191#[derive(Default)]
192struct VmResources {
193    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
194    framebuffer_access: Option<FramebufferAccess>,
195    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
196    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
197    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
198    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
199    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
200    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
201    /// Receives dirty rectangles from the synthetic video device for the VNC worker.
202    dirty_rect_recv: Option<mesh::Receiver<Vec<video_core::DirtyRect>>>,
203    #[cfg(windows)]
204    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
205}
206
207struct ConsoleState<'a> {
208    device: &'a str,
209    input: Box<dyn AsyncWrite + Unpin + Send>,
210}
211
212/// Build a flat list of switches with their parent port assignments.
213///
214/// This function converts hierarchical CLI switch definitions into a flat list
215/// where each switch specifies its parent port directly.
216fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
217    all_switches
218        .iter()
219        .map(|switch_cli| PcieSwitchConfig {
220            name: switch_cli.name.clone(),
221            num_downstream_ports: switch_cli.num_downstream_ports,
222            parent_port: switch_cli.port_name.clone(),
223            hotplug: switch_cli.hotplug,
224            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
225        })
226        .collect()
227}
228
229async fn vm_config_from_command_line(
230    spawner: impl Spawn,
231    mesh: &VmmMesh,
232    opt: &Options,
233) -> anyhow::Result<(Config, VmResources)> {
234    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
235    // Ensure the serial driver stays alive with no tasks.
236    serial_driver.spawn("leak", pending::<()>()).detach();
237
238    let openhcl_vtl = if opt.vtl2 {
239        DeviceVtl::Vtl2
240    } else {
241        DeviceVtl::Vtl0
242    };
243
244    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
245    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
246        Ok(match cli_cfg {
247            SerialConfigCli::Console => {
248                if let Some(console_state) = console_state.borrow().as_ref() {
249                    bail!("console already set by {}", console_state.device);
250                }
251                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
252                let (serial_read, serial_write) = AsyncReadExt::split(serial);
253                *console_state.borrow_mut() = Some(ConsoleState {
254                    device,
255                    input: Box::new(serial_write),
256                });
257                thread::Builder::new()
258                    .name(name.to_owned())
259                    .spawn(move || {
260                        let _ = block_on(futures::io::copy(
261                            serial_read,
262                            &mut AllowStdIo::new(term::raw_stdout()),
263                        ));
264                    })
265                    .unwrap();
266                Some(config)
267            }
268            SerialConfigCli::Stderr => {
269                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
270                thread::Builder::new()
271                    .name(name.to_owned())
272                    .spawn(move || {
273                        let _ = block_on(futures::io::copy(
274                            serial,
275                            &mut AllowStdIo::new(term::raw_stderr()),
276                        ));
277                    })
278                    .unwrap();
279                Some(config)
280            }
281            SerialConfigCli::File(path) => {
282                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
283                let file = fs_err::File::create(path).context("failed to create file")?;
284
285                thread::Builder::new()
286                    .name(name.to_owned())
287                    .spawn(move || {
288                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
289                    })
290                    .unwrap();
291                Some(config)
292            }
293            SerialConfigCli::None => None,
294            SerialConfigCli::Pipe(path) => {
295                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
296            }
297            SerialConfigCli::Tcp(addr) => {
298                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
299            }
300            SerialConfigCli::NewConsole(app, window_title) => {
301                let path = console_relay::random_console_path();
302                let config =
303                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
304                let window_title =
305                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
306
307                console_relay::launch_console(
308                    app.or_else(openvmm_terminal_app).as_deref(),
309                    &path,
310                    ConsoleLaunchOptions {
311                        window_title: Some(window_title),
312                    },
313                )
314                .context("failed to launch console")?;
315
316                Some(config)
317            }
318        })
319    };
320
321    let mut vmbus_devices = Vec::new();
322
323    let serial0_cfg = setup_serial(
324        "com1",
325        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
326        if cfg!(guest_arch = "x86_64") {
327            "ttyS0"
328        } else {
329            "ttyAMA0"
330        },
331    )?;
332    let serial1_cfg = setup_serial(
333        "com2",
334        opt.com2.clone().unwrap_or(SerialConfigCli::None),
335        if cfg!(guest_arch = "x86_64") {
336            "ttyS1"
337        } else {
338            "ttyAMA1"
339        },
340    )?;
341    let serial2_cfg = setup_serial(
342        "com3",
343        opt.com3.clone().unwrap_or(SerialConfigCli::None),
344        if cfg!(guest_arch = "x86_64") {
345            "ttyS2"
346        } else {
347            "ttyAMA2"
348        },
349    )?;
350    let serial3_cfg = setup_serial(
351        "com4",
352        opt.com4.clone().unwrap_or(SerialConfigCli::None),
353        if cfg!(guest_arch = "x86_64") {
354            "ttyS3"
355        } else {
356            "ttyAMA3"
357        },
358    )?;
359    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
360        "vmbus_com1",
361        opt.vmbus_com1_serial
362            .clone()
363            .unwrap_or(SerialConfigCli::None),
364        "vmbus_com1",
365    )? {
366        vmbus_devices.push((
367            openhcl_vtl,
368            VmbusSerialDeviceHandle {
369                port: VmbusSerialPort::Com1,
370                backend: vmbus_com1_cfg,
371            }
372            .into_resource(),
373        ));
374        true
375    } else {
376        false
377    };
378    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
379        "vmbus_com2",
380        opt.vmbus_com2_serial
381            .clone()
382            .unwrap_or(SerialConfigCli::None),
383        "vmbus_com2",
384    )? {
385        vmbus_devices.push((
386            openhcl_vtl,
387            VmbusSerialDeviceHandle {
388                port: VmbusSerialPort::Com2,
389                backend: vmbus_com2_cfg,
390            }
391            .into_resource(),
392        ));
393        true
394    } else {
395        false
396    };
397    let debugcon_cfg = setup_serial(
398        "debugcon",
399        opt.debugcon
400            .clone()
401            .map(|cfg| cfg.serial)
402            .unwrap_or(SerialConfigCli::None),
403        "debugcon",
404    )?;
405
406    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
407        setup_serial("virtio-console", serial_cfg, "hvc0")?
408    } else {
409        None
410    };
411
412    let mut resources = VmResources::default();
413    let mut console_str = "";
414    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
415        resources.console_in = Some(input);
416        console_str = device;
417    }
418
419    if opt.shared_memory {
420        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
421    }
422    if opt.deprecated_prefetch {
423        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
424    }
425    if opt.deprecated_private_memory {
426        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
427    }
428    if opt.deprecated_thp {
429        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
430    }
431    if opt.deprecated_memory_backing_file.is_some() {
432        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
433    }
434
435    opt.validate_memory_options()?;
436
437    const MAX_PROCESSOR_COUNT: u32 = 1024;
438
439    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
440        bail!("invalid proc count: {}", opt.processors);
441    }
442
443    // Total SCSI channel count should not exceed the processor count
444    // (at most, one channel per VP).
445    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
446        bail!(
447            "invalid SCSI sub-channel count: requested {}, max {}",
448            opt.scsi_sub_channels,
449            MAX_PROCESSOR_COUNT - 1
450        );
451    }
452
453    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
454
455    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
456    for &cli_args::DiskCli {
457        vtl,
458        ref kind,
459        read_only,
460        is_dvd,
461        underhill,
462        ref pcie_port,
463    } in &opt.disk
464    {
465        if pcie_port.is_some() {
466            anyhow::bail!("`--disk` is incompatible with PCIe");
467        }
468
469        storage
470            .add(
471                vtl,
472                underhill,
473                storage_builder::DiskLocation::Scsi(None),
474                kind,
475                is_dvd,
476                read_only,
477            )
478            .await?;
479    }
480
481    for &cli_args::IdeDiskCli {
482        ref kind,
483        read_only,
484        channel,
485        device,
486        is_dvd,
487    } in &opt.ide
488    {
489        storage
490            .add(
491                DeviceVtl::Vtl0,
492                None,
493                storage_builder::DiskLocation::Ide(channel, device),
494                kind,
495                is_dvd,
496                read_only,
497            )
498            .await?;
499    }
500
501    for &cli_args::DiskCli {
502        vtl,
503        ref kind,
504        read_only,
505        is_dvd,
506        underhill,
507        ref pcie_port,
508    } in &opt.nvme
509    {
510        storage
511            .add(
512                vtl,
513                underhill,
514                storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
515                kind,
516                is_dvd,
517                read_only,
518            )
519            .await?;
520    }
521
522    for &cli_args::DiskCli {
523        vtl,
524        ref kind,
525        read_only,
526        is_dvd,
527        ref underhill,
528        ref pcie_port,
529    } in &opt.virtio_blk
530    {
531        if underhill.is_some() {
532            anyhow::bail!("underhill not supported with virtio-blk");
533        }
534        storage
535            .add(
536                vtl,
537                None,
538                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
539                kind,
540                is_dvd,
541                read_only,
542            )
543            .await?;
544    }
545
546    let mut floppy_disks = Vec::new();
547    for disk in &opt.floppy {
548        let &cli_args::FloppyDiskCli {
549            ref kind,
550            read_only,
551        } = disk;
552        floppy_disks.push(FloppyDiskConfig {
553            disk_type: disk_open(kind, read_only).await?,
554            read_only,
555        });
556    }
557
558    let mut vpci_mana_nics = [(); 3].map(|()| None);
559    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
560    let mut underhill_nics = Vec::new();
561    let mut vpci_devices = Vec::new();
562
563    let mut nic_index = 0;
564    for cli_cfg in &opt.net {
565        if cli_cfg.pcie_port.is_some() {
566            anyhow::bail!("`--net` does not support PCIe");
567        }
568        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
569        if cli_cfg.underhill {
570            if !opt.no_alias_map {
571                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
572            }
573            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
574                let vpci_instance_id = Guid::new_random();
575                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
576                    instance_id: vpci_instance_id.to_string(),
577                    subordinate_instance_id: None,
578                    max_sub_channels: None,
579                });
580                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
581            });
582            mana.1.vports.push(VportDefinition {
583                mac_address: vport.mac_address,
584                endpoint: vport.endpoint,
585            });
586        } else {
587            vmbus_devices.push(vport.into_netvsp_handle());
588        }
589    }
590
591    if opt.nic {
592        let nic_config = parse_endpoint(
593            &NicConfigCli {
594                vtl: DeviceVtl::Vtl0,
595                endpoint: EndpointConfigCli::Consomme {
596                    cidr: None,
597                    host_fwd: Vec::new(),
598                },
599                max_queues: None,
600                underhill: false,
601                pcie_port: None,
602            },
603            &mut nic_index,
604            &mut resources,
605        )?;
606        vmbus_devices.push(nic_config.into_netvsp_handle());
607    }
608
609    // Build initial PCIe devices list from CLI options. Storage devices
610    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
611    let mut pcie_devices = Vec::new();
612    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
613        tracing::info!(
614            port_name = %cli_cfg.port_name,
615            socket_addr = ?cli_cfg.socket_addr,
616            "instantiating PCIe remote device"
617        );
618
619        // Generate a deterministic instance ID based on index
620        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
621            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
622        let instance_id = Guid {
623            data1: index as u32,
624            ..PCIE_REMOTE_BASE_INSTANCE_ID
625        };
626
627        pcie_devices.push(PcieDeviceConfig {
628            port_name: cli_cfg.port_name.clone(),
629            resource: pcie_remote_resources::PcieRemoteHandle {
630                instance_id,
631                socket_addr: cli_cfg.socket_addr.clone(),
632                hu: cli_cfg.hu,
633                controller: cli_cfg.controller,
634            }
635            .into_resource(),
636        });
637    }
638
639    #[cfg(windows)]
640    let mut kernel_vmnics = Vec::new();
641    #[cfg(windows)]
642    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
643        // Pick a random MAC address.
644        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
645        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
646
647        // Pick a fixed instance ID based on the index.
648        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
649        let instance_id = Guid {
650            data1: index as u32,
651            ..BASE_INSTANCE_ID
652        };
653
654        let switch_id = if switch_id == "default" {
655            DEFAULT_SWITCH
656        } else {
657            switch_id
658        };
659        let (port_id, port) = new_switch_port(switch_id)?;
660        resources.switch_ports.push(port);
661
662        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
663            instance_id,
664            mac_address: mac_address.into(),
665            switch_port_id: port_id,
666        });
667    }
668
669    for vport in &opt.mana {
670        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
671        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
672            (vtl, None) => {
673                &mut vpci_mana_nics[vtl]
674                    .get_or_insert_with(|| {
675                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
676                    })
677                    .1
678                    .vports
679            }
680            (0, Some(pcie_port)) => {
681                &mut pcie_mana_nics
682                    .entry(pcie_port)
683                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
684                    .vports
685            }
686            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
687        };
688        vport_array.push(VportDefinition {
689            mac_address: vport.mac_address,
690            endpoint: vport.endpoint,
691        });
692    }
693
694    vpci_devices.extend(
695        vpci_mana_nics
696            .into_iter()
697            .enumerate()
698            .filter_map(|(vtl, nic)| {
699                nic.map(|(instance_id, handle)| VpciDeviceConfig {
700                    vtl: match vtl {
701                        0 => DeviceVtl::Vtl0,
702                        1 => DeviceVtl::Vtl1,
703                        2 => DeviceVtl::Vtl2,
704                        _ => unreachable!(),
705                    },
706                    instance_id,
707                    resource: handle.into_resource(),
708                })
709            }),
710    );
711
712    pcie_devices.extend(
713        pcie_mana_nics
714            .into_iter()
715            .map(|(pcie_port, handle)| PcieDeviceConfig {
716                port_name: pcie_port,
717                resource: handle.into_resource(),
718            }),
719    );
720
721    for cxl_test in &opt.cxl_test {
722        pcie_devices.push(PcieDeviceConfig {
723            port_name: cxl_test.pcie_port.clone(),
724            resource: CxlTestDeviceHandle {
725                hdm_size_bytes: cxl_test.hdm_size,
726            }
727            .into_resource(),
728        });
729    }
730
731    #[cfg(guest_arch = "aarch64")]
732    let arch = MachineArch::Aarch64;
733    #[cfg(guest_arch = "x86_64")]
734    let arch = MachineArch::X86_64;
735
736    let mut pcie_root_complexes = Vec::new();
737    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
738        let ports: Vec<PcieRootPortConfig> = opt
739            .pcie_root_port
740            .iter()
741            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
742            .map(|port_cli| PcieRootPortConfig {
743                name: port_cli.name.clone(),
744                hotplug: port_cli.hotplug,
745                acs_capabilities_supported: port_cli.acs_capabilities_supported,
746                cxl: port_cli.cxl,
747            })
748            .collect();
749
750        const ONE_MB: u64 = 1024 * 1024;
751        // Keep all PCI windows 1MB-granular to match layout and downstream placement rules.
752        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
753        let high_mmio_size = rc_cli
754            .high_mmio
755            .checked_next_multiple_of(ONE_MB)
756            .context("high mmio rounding error")?;
757
758        // Count CXL-capable ports under the root bus. If the root bus has CXL root ports, it needs CHBCR.
759        let cxl_port_count = ports.iter().filter(|port| port.cxl).count() as u64;
760
761        let cxl = if cxl_port_count != 0 {
762            Some(RootComplexCxlConfig {
763                hdm_size: rc_cli.hdm,
764                hdm_window_restrictions: rc_cli.hdm_window_restrictions.bits(),
765            })
766        } else {
767            None
768        };
769        pcie_root_complexes.push(PcieRootComplexConfig {
770            index: i as u32,
771            name: rc_cli.name.clone(),
772            segment: rc_cli.segment,
773            start_bus: rc_cli.start_bus,
774            end_bus: rc_cli.end_bus,
775            low_mmio: PcieMmioRangeConfig::Dynamic {
776                size: low_mmio_size,
777            },
778            high_mmio: PcieMmioRangeConfig::Dynamic {
779                size: high_mmio_size,
780            },
781            cxl,
782            ports,
783        });
784    }
785
786    let pcie_switches = build_switch_list(&opt.pcie_switch);
787
788    #[cfg(target_os = "linux")]
789    let vfio_pcie_devices: Vec<PcieDeviceConfig> = {
790        use std::collections::HashMap;
791        use vm_resource::IntoResource;
792
793        // Process --iommu flags: open /dev/iommu for each declared context.
794        let mut iommu_map: HashMap<String, std::fs::File> = HashMap::new();
795        for iommu_cli in &opt.iommu {
796            anyhow::ensure!(
797                !iommu_map.contains_key(&iommu_cli.id),
798                "duplicate --iommu id={}",
799                iommu_cli.id
800            );
801            let file = std::fs::OpenOptions::new()
802                .read(true)
803                .write(true)
804                .open("/dev/iommu")
805                .context("failed to open /dev/iommu (is iommufd available?)")?;
806            iommu_map.insert(iommu_cli.id.clone(), file);
807        }
808
809        opt.vfio
810            .iter()
811            .map(|cli_cfg| {
812                let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
813
814                if let Some(iommu_id) = &cli_cfg.iommu {
815                    // cdev + iommufd path
816                    let iommufd = iommu_map.get(iommu_id).with_context(|| {
817                        format!(
818                            "--vfio device {} references iommu={iommu_id}, \
819                             but no --iommu id={iommu_id} was specified",
820                            cli_cfg.pci_id
821                        )
822                    })?;
823                    // Clone the iommufd fd so the per-iommu manager can own it.
824                    // The first device for a given iommu ID uses the cloned fd
825                    // to create the IoasManager; subsequent devices reuse the
826                    // existing manager and the cloned fd is dropped.
827                    let iommufd = iommufd.try_clone().with_context(|| {
828                        format!("failed to dup iommufd fd for iommu={iommu_id}")
829                    })?;
830
831                    // Open the cdev device node.
832                    let vfio_dev_dir = sysfs_path.join("vfio-dev");
833                    let entry = std::fs::read_dir(&vfio_dev_dir)
834                        .with_context(|| {
835                            format!(
836                                "failed to read {}: is {} bound to vfio-pci?",
837                                vfio_dev_dir.display(),
838                                cli_cfg.pci_id
839                            )
840                        })?
841                        .next()
842                        .context("no vfio-dev entry found")?
843                        .context("failed to read vfio-dev entry")?;
844                    let dev_path = Path::new("/dev/vfio/devices").join(entry.file_name());
845                    let cdev = std::fs::OpenOptions::new()
846                        .read(true)
847                        .write(true)
848                        .open(&dev_path)
849                        .with_context(|| format!("failed to open {}", dev_path.display()))?;
850
851                    Ok(PcieDeviceConfig {
852                        port_name: cli_cfg.port_name.clone(),
853                        resource: vfio_assigned_device_resources::VfioCdevDeviceHandle {
854                            pci_id: cli_cfg.pci_id.clone(),
855                            cdev,
856                            iommufd,
857                            iommu_id: iommu_id.clone(),
858                        }
859                        .into_resource(),
860                    })
861                } else {
862                    // Legacy group/container path
863                    let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
864                        .with_context(|| {
865                            format!("failed to read IOMMU group for {}", cli_cfg.pci_id)
866                        })?;
867                    let group_id: u64 = iommu_group_link
868                        .file_name()
869                        .and_then(|s| s.to_str())
870                        .context("invalid iommu_group symlink")?
871                        .parse()
872                        .context("failed to parse IOMMU group ID")?;
873                    let group = std::fs::OpenOptions::new()
874                        .read(true)
875                        .write(true)
876                        .open(format!("/dev/vfio/{group_id}"))
877                        .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
878
879                    Ok(PcieDeviceConfig {
880                        port_name: cli_cfg.port_name.clone(),
881                        resource: vfio_assigned_device_resources::VfioDeviceHandle {
882                            pci_id: cli_cfg.pci_id.clone(),
883                            group,
884                        }
885                        .into_resource(),
886                    })
887                }
888            })
889            .collect::<anyhow::Result<Vec<_>>>()?
890    };
891
892    #[cfg(windows)]
893    let vpci_resources: Vec<_> = opt
894        .device
895        .iter()
896        .map(|path| -> anyhow::Result<_> {
897            Ok(virt_whp::device::DeviceHandle(
898                whp::VpciResource::new(
899                    None,
900                    Default::default(),
901                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
902                )
903                .with_context(|| format!("opening PCI device {}", path))?,
904            ))
905        })
906        .collect::<Result<_, _>>()?;
907
908    // Create a vmbusproxy handle if needed by any devices.
909    #[cfg(windows)]
910    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
911        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
912    } else {
913        None
914    };
915
916    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc.vnc || opt.pcat {
917        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
918        let (fb, fba) =
919            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
920        resources.framebuffer_access = Some(fba);
921        Some(fb)
922    } else {
923        None
924    };
925
926    let load_mode;
927    let with_hv;
928
929    let any_serial_configured = serial0_cfg.is_some()
930        || serial1_cfg.is_some()
931        || serial2_cfg.is_some()
932        || serial3_cfg.is_some();
933
934    let has_com3 = serial2_cfg.is_some();
935
936    let mut chipset = VmManifestBuilder::new(
937        if opt.igvm.is_some() {
938            BaseChipsetType::HclHost
939        } else if opt.pcat {
940            BaseChipsetType::HypervGen1
941        } else if opt.uefi {
942            BaseChipsetType::HypervGen2Uefi
943        } else if opt.hv {
944            BaseChipsetType::HyperVGen2LinuxDirect
945        } else {
946            BaseChipsetType::UnenlightenedLinuxDirect
947        },
948        arch,
949    );
950
951    if framebuffer.is_some() {
952        chipset = chipset.with_framebuffer();
953    }
954    if opt.guest_watchdog {
955        chipset = chipset.with_guest_watchdog();
956    }
957    if any_serial_configured {
958        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
959    }
960    if opt.battery {
961        let (tx, rx) = mesh::channel();
962        tx.send(HostBatteryUpdate::default_present());
963        chipset = chipset.with_battery(rx);
964    }
965    if opt.no_vmbus {
966        chipset = chipset.without_vmbus();
967    }
968    if let Some(cfg) = &opt.debugcon {
969        chipset = chipset.with_debugcon(
970            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
971            cfg.port,
972        );
973    }
974
975    let custom_uefi_vars = {
976        use firmware_uefi_custom_vars::CustomVars;
977
978        // load base vars from specified template, or use an empty set of base
979        // vars if none was specified.
980        let base_vars = match opt.secure_boot_template {
981            Some(template) => match (arch, template) {
982                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
983                    hyperv_secure_boot_templates::x64::microsoft_windows()
984                }
985                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
986                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
987                }
988                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
989                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
990                }
991                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
992                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
993                }
994            },
995            None => CustomVars::default(),
996        };
997
998        // TODO: fallback to VMGS read if no command line flag was given
999
1000        let custom_uefi_json_data = match &opt.custom_uefi_json {
1001            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1002            None => None,
1003        };
1004
1005        // obtain the final custom uefi vars by applying the delta onto the base vars
1006        match custom_uefi_json_data {
1007            Some(data) => {
1008                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1009                base_vars.apply_delta(delta)?
1010            }
1011            None => base_vars,
1012        }
1013    };
1014
1015    let efi_diagnostics_log_level = match opt.efi_diagnostics_log_level.unwrap_or_default() {
1016        EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1017        EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1018        EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1019    };
1020
1021    if opt.uefi {
1022        let log_level = match efi_diagnostics_log_level {
1023            EfiDiagnosticsLogLevelType::Default => {
1024                firmware_uefi_resources::LogLevel::make_default()
1025            }
1026            EfiDiagnosticsLogLevelType::Info => firmware_uefi_resources::LogLevel::make_info(),
1027            EfiDiagnosticsLogLevelType::Full => firmware_uefi_resources::LogLevel::make_full(),
1028        };
1029        let nvram_storage = if opt.vmgs.is_some() {
1030            VmgsFileHandle::new(vmgs_format::FileId::BIOS_NVRAM, true).into_resource()
1031        } else {
1032            EphemeralNonVolatileStoreHandle.into_resource()
1033        };
1034        chipset = chipset.with_uefi(vm_manifest_builder::UefiManifest::new(
1035            arch,
1036            custom_uefi_vars.clone(),
1037            opt.secure_boot,
1038            log_level,
1039            nvram_storage,
1040            None,
1041        ));
1042    }
1043
1044    // TODO: load from VMGS file if it exists
1045    let bios_guid = Guid::new_random();
1046
1047    let layout_config = chipset.layout_config();
1048    let VmChipsetResult {
1049        chipset,
1050        mut chipset_devices,
1051        pci_chipset_devices,
1052        isa_dma_controller,
1053        capabilities,
1054    } = chipset
1055        .build()
1056        .context("failed to build chipset configuration")?;
1057
1058    if opt.restore_snapshot.is_some() {
1059        // Snapshot restore: skip firmware loading entirely. Device state and
1060        // memory come from the snapshot directory.
1061        load_mode = LoadMode::None;
1062        with_hv = true;
1063    } else if let Some(path) = &opt.igvm {
1064        let file = fs_err::File::open(path)
1065            .context("failed to open igvm file")?
1066            .into();
1067        let cmdline = opt.cmdline.join(" ");
1068        with_hv = true;
1069
1070        load_mode = LoadMode::Igvm {
1071            file,
1072            cmdline,
1073            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1074            com_serial: has_com3.then(|| SerialInformation {
1075                io_port: ComPort::Com3.io_port(),
1076                irq: ComPort::Com3.irq().into(),
1077            }),
1078        };
1079    } else if opt.pcat {
1080        // Emit a nice error early instead of complaining about missing firmware.
1081        if arch != MachineArch::X86_64 {
1082            anyhow::bail!("pcat not supported on this architecture");
1083        }
1084        with_hv = true;
1085
1086        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1087        load_mode = LoadMode::Pcat {
1088            firmware,
1089            boot_order: opt
1090                .pcat_boot_order
1091                .map(|x| x.0)
1092                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1093        };
1094    } else if opt.uefi {
1095        use openvmm_defs::config::UefiConsoleMode;
1096
1097        with_hv = true;
1098
1099        let firmware = fs_err::File::open(
1100            (opt.uefi_firmware.0)
1101                .as_ref()
1102                .context("must provide uefi firmware when booting with uefi")?,
1103        )
1104        .context("failed to open uefi firmware")?;
1105
1106        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1107        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1108        load_mode = LoadMode::Uefi {
1109            firmware: firmware.into(),
1110            enable_debugging: opt.uefi_debug,
1111            enable_memory_protections: opt.uefi_enable_memory_protections,
1112            disable_frontpage: opt.disable_frontpage,
1113            enable_tpm: opt.tpm,
1114            enable_battery: opt.battery,
1115            enable_serial: any_serial_configured,
1116            enable_vpci_boot: false,
1117            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1118                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1119                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1120                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1121                UefiConsoleModeCli::None => UefiConsoleMode::None,
1122            }),
1123            default_boot_always_attempt: opt.default_boot_always_attempt,
1124            bios_guid,
1125            enable_vmbus: !opt.no_vmbus,
1126        };
1127    } else {
1128        // Linux Direct
1129        let mut cmdline = "panic=-1 debug".to_string();
1130
1131        with_hv = opt.hv;
1132        if with_hv && opt.pcie_root_complex.is_empty() {
1133            cmdline += " pci=off";
1134        }
1135
1136        if !console_str.is_empty() {
1137            let _ = write!(&mut cmdline, " console={}", console_str);
1138        }
1139
1140        if opt.gfx {
1141            cmdline += " console=tty";
1142        }
1143        for extra in &opt.cmdline {
1144            let _ = write!(&mut cmdline, " {}", extra);
1145        }
1146
1147        let kernel = fs_err::File::open(
1148            (opt.kernel.0)
1149                .as_ref()
1150                .context("must provide kernel when booting with linux direct")?,
1151        )
1152        .context("failed to open kernel")?;
1153        let initrd = (opt.initrd.0)
1154            .as_ref()
1155            .map(fs_err::File::open)
1156            .transpose()
1157            .context("failed to open initrd")?;
1158
1159        let custom_dsdt = match &opt.custom_dsdt {
1160            Some(path) => {
1161                let mut v = Vec::new();
1162                fs_err::File::open(path)
1163                    .context("failed to open custom dsdt")?
1164                    .read_to_end(&mut v)
1165                    .context("failed to read custom dsdt")?;
1166                Some(v)
1167            }
1168            None => None,
1169        };
1170
1171        load_mode = LoadMode::Linux {
1172            kernel: kernel.into(),
1173            initrd: initrd.map(Into::into),
1174            cmdline,
1175            custom_dsdt,
1176            enable_serial: any_serial_configured,
1177            boot_mode: if opt.device_tree {
1178                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1179            } else {
1180                openvmm_defs::config::LinuxDirectBootMode::Acpi
1181            },
1182        };
1183    }
1184
1185    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1186        let disk = VmgsDisk {
1187            disk: disk_open(kind, false)
1188                .await
1189                .context("failed to open vmgs disk")?,
1190            encryption_policy: if opt.test_gsp_by_id {
1191                GuestStateEncryptionPolicy::GspById(true)
1192            } else {
1193                GuestStateEncryptionPolicy::None(true)
1194            },
1195        };
1196        match provision {
1197            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1198            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1199            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1200        }
1201    } else {
1202        VmgsResource::Ephemeral
1203    });
1204
1205    if with_get && with_hv {
1206        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1207            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1208            fixed: Some(Default::default()),
1209            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1210                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1211                nic_devices: underhill_nics,
1212            }),
1213            namespace_settings: Vec::default(),
1214        };
1215
1216        // Cache the VTL2 settings for later modification via the interactive console.
1217        resources.vtl2_settings = Some(vtl2_settings.clone());
1218
1219        let (send, guest_request_recv) = mesh::channel();
1220        resources.ged_rpc = Some(send);
1221
1222        let vmgs = vmgs.take().unwrap();
1223
1224        vmbus_devices.extend([
1225            (
1226                openhcl_vtl,
1227                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1228            ),
1229            (
1230                openhcl_vtl,
1231                get_resources::ged::GuestEmulationDeviceHandle {
1232                    firmware: if opt.pcat {
1233                        get_resources::ged::GuestFirmwareConfig::Pcat {
1234                            boot_order: opt
1235                                .pcat_boot_order
1236                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1237                                .map(|x| match x {
1238                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1239                                        get_resources::ged::PcatBootDevice::Floppy
1240                                    }
1241                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1242                                        get_resources::ged::PcatBootDevice::HardDrive
1243                                    }
1244                                    openvmm_defs::config::PcatBootDevice::Optical => {
1245                                        get_resources::ged::PcatBootDevice::Optical
1246                                    }
1247                                    openvmm_defs::config::PcatBootDevice::Network => {
1248                                        get_resources::ged::PcatBootDevice::Network
1249                                    }
1250                                }),
1251                        }
1252                    } else {
1253                        use get_resources::ged::UefiConsoleMode;
1254
1255                        get_resources::ged::GuestFirmwareConfig::Uefi {
1256                            enable_vpci_boot: storage.has_vtl0_nvme(),
1257                            firmware_debug: opt.uefi_debug,
1258                            disable_frontpage: opt.disable_frontpage,
1259                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1260                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1261                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1262                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1263                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1264                            },
1265                            default_boot_always_attempt: opt.default_boot_always_attempt,
1266                        }
1267                    },
1268                    com1: with_vmbus_com1_serial,
1269                    com2: with_vmbus_com2_serial,
1270                    serial_tx_only: opt.serial_tx_only,
1271                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1272                    vmbus_redirection: opt.vmbus_redirect,
1273                    vmgs,
1274                    framebuffer: opt
1275                        .vtl2_gfx
1276                        .then(|| SharedFramebufferHandle.into_resource()),
1277                    guest_request_recv,
1278                    enable_tpm: opt.tpm,
1279                    firmware_event_send: None,
1280                    secure_boot_enabled: opt.secure_boot,
1281                    secure_boot_template: match opt.secure_boot_template {
1282                        Some(SecureBootTemplateCli::Windows) => {
1283                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1284                        },
1285                        Some(SecureBootTemplateCli::UefiCa) => {
1286                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1287                        }
1288                        None => {
1289                            get_resources::ged::GuestSecureBootTemplateType::None
1290                        },
1291                    },
1292                    enable_battery: opt.battery,
1293                    no_persistent_secrets: true,
1294                    igvm_attest_test_config: None,
1295                    test_gsp_by_id: opt.test_gsp_by_id,
1296                    efi_diagnostics_log_level: {
1297                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1298                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1299                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1300                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1301                        }
1302                    },
1303                    hv_sint_enabled: false,
1304                }
1305                .into_resource(),
1306            ),
1307        ]);
1308    }
1309
1310    if opt.tpm && !opt.vtl2 {
1311        let register_layout = if cfg!(guest_arch = "x86_64") {
1312            TpmRegisterLayout::IoPort
1313        } else {
1314            TpmRegisterLayout::Mmio
1315        };
1316
1317        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1318            (
1319                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1320                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1321            )
1322        } else {
1323            (
1324                EphemeralNonVolatileStoreHandle.into_resource(),
1325                EphemeralNonVolatileStoreHandle.into_resource(),
1326            )
1327        };
1328
1329        chipset_devices.push(ChipsetDeviceHandle {
1330            name: "tpm".to_string(),
1331            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1332                device: TpmDeviceHandle {
1333                    ppi_store,
1334                    nvram_store,
1335                    nvram_size: None,
1336                    refresh_tpm_seeds: false,
1337                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1338                    register_layout,
1339                    guest_secret_key: None,
1340                    logger: None,
1341                    is_confidential_vm: false,
1342                    bios_guid,
1343                }
1344                .into_resource(),
1345                worker_host: mesh.make_host("tpm", None).await?,
1346            }
1347            .into_resource(),
1348        });
1349    }
1350
1351    let vga_firmware = if opt.pcat {
1352        Some(openvmm_pcat_locator::find_svga_bios(
1353            opt.vga_firmware.as_deref(),
1354        )?)
1355    } else {
1356        None
1357    };
1358
1359    if opt.gfx {
1360        // Channel for the video device to report dirty rectangles to the VNC worker.
1361        let (dirt_send, dirt_recv) = mesh::channel();
1362        resources.dirty_rect_recv = Some(dirt_recv);
1363
1364        vmbus_devices.extend([
1365            (
1366                DeviceVtl::Vtl0,
1367                SynthVideoHandle {
1368                    framebuffer: SharedFramebufferHandle.into_resource(),
1369                    dirt_send: Some(dirt_send),
1370                }
1371                .into_resource(),
1372            ),
1373            (
1374                DeviceVtl::Vtl0,
1375                SynthKeyboardHandle {
1376                    source: MultiplexedInputHandle {
1377                        // Save 0 for PS/2
1378                        elevation: 1,
1379                    }
1380                    .into_resource(),
1381                }
1382                .into_resource(),
1383            ),
1384            (
1385                DeviceVtl::Vtl0,
1386                SynthMouseHandle {
1387                    source: MultiplexedInputHandle {
1388                        // Save 0 for PS/2
1389                        elevation: 1,
1390                    }
1391                    .into_resource(),
1392                }
1393                .into_resource(),
1394            ),
1395        ]);
1396    }
1397
1398    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1399        if let Some(path) = path {
1400            cleanup_socket(path.as_ref());
1401            let listener = unix_socket::UnixListener::bind(path)
1402                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1403            Ok(Some(listener))
1404        } else {
1405            Ok(None)
1406        }
1407    };
1408
1409    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1410    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1411
1412    if let Some(path) = &opt.openhcl_dump_path {
1413        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1414        task.detach();
1415        vmbus_devices.push((openhcl_vtl, resource));
1416    }
1417
1418    #[cfg(guest_arch = "aarch64")]
1419    let smmu_instances: Vec<openvmm_defs::config::SmmuInstanceConfig> = opt
1420        .smmu
1421        .iter()
1422        .map(|s| openvmm_defs::config::SmmuInstanceConfig { rc_name: s.clone() })
1423        .collect();
1424
1425    #[cfg(guest_arch = "aarch64")]
1426    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1427        openvmm_defs::config::Aarch64TopologyConfig {
1428            // TODO: allow this to be configured from the command line
1429            gic_config: None,
1430            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1431            gic_msi: match opt.gic_msi {
1432                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1433                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1434                cli_args::GicMsiCli::V2m => {
1435                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1436                }
1437            },
1438            smmu: smmu_instances,
1439        },
1440    );
1441    #[cfg(guest_arch = "x86_64")]
1442    let topology_arch =
1443        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1444            apic_id_offset: opt.apic_id_offset,
1445            x2apic: opt.x2apic,
1446        });
1447
1448    let with_isolation = if let Some(isolation) = &opt.isolation {
1449        // TODO: For now, isolation is only supported with VTL2.
1450        if !opt.vtl2 {
1451            anyhow::bail!("isolation is only currently supported with vtl2");
1452        }
1453
1454        // TODO: Alias map support is not yet implement with isolation.
1455        if !opt.no_alias_map {
1456            anyhow::bail!("alias map not supported with isolation");
1457        }
1458
1459        match isolation {
1460            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1461        }
1462    } else {
1463        None
1464    };
1465
1466    if with_hv && !opt.no_vmbus {
1467        let (shutdown_send, shutdown_recv) = mesh::channel();
1468        resources.shutdown_ic = Some(shutdown_send);
1469        let (kvp_send, kvp_recv) = mesh::channel();
1470        resources.kvp_ic = Some(kvp_send);
1471        vmbus_devices.extend(
1472            [
1473                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1474                    recv: shutdown_recv,
1475                }
1476                .into_resource(),
1477                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1478                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1479            ]
1480            .map(|r| (DeviceVtl::Vtl0, r)),
1481        );
1482    }
1483
1484    if let Some(hive_path) = &opt.imc {
1485        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1486        vmbus_devices.push((
1487            DeviceVtl::Vtl0,
1488            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1489        ));
1490    }
1491
1492    let mut virtio_devices = Vec::new();
1493    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1494        let bus = match bus {
1495            VirtioBusCli::Auto => {
1496                // Use VPCI when possible (currently only on Windows and macOS due
1497                // to KVM backend limitations).
1498                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1499                    None
1500                } else {
1501                    Some(VirtioBus::Pci)
1502                }
1503            }
1504            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1505            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1506            VirtioBusCli::Vpci => None,
1507        };
1508        if let Some(bus) = bus {
1509            virtio_devices.push((bus, resource));
1510        } else {
1511            vpci_devices.push(VpciDeviceConfig {
1512                vtl: DeviceVtl::Vtl0,
1513                instance_id: Guid::new_random(),
1514                resource: VirtioPciDeviceHandle(resource).into_resource(),
1515            });
1516        }
1517    };
1518
1519    for cli_cfg in &opt.virtio_net {
1520        if cli_cfg.underhill {
1521            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1522        }
1523        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1524        let resource = virtio_resources::net::VirtioNetHandle {
1525            max_queues: vport.max_queues,
1526            mac_address: vport.mac_address,
1527            endpoint: vport.endpoint,
1528        }
1529        .into_resource();
1530        if let Some(pcie_port) = &cli_cfg.pcie_port {
1531            pcie_devices.push(PcieDeviceConfig {
1532                port_name: pcie_port.clone(),
1533                resource: VirtioPciDeviceHandle(resource).into_resource(),
1534            });
1535        } else {
1536            add_virtio_device(VirtioBusCli::Auto, resource);
1537        }
1538    }
1539
1540    for args in &opt.virtio_fs {
1541        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1542            tag: args.tag.clone(),
1543            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1544                root_path: args.path.clone(),
1545                mount_options: args.options.clone(),
1546            },
1547        }
1548        .into_resource();
1549        if let Some(pcie_port) = &args.pcie_port {
1550            pcie_devices.push(PcieDeviceConfig {
1551                port_name: pcie_port.clone(),
1552                resource: VirtioPciDeviceHandle(resource).into_resource(),
1553            });
1554        } else {
1555            add_virtio_device(opt.virtio_fs_bus, resource);
1556        }
1557    }
1558
1559    for args in &opt.virtio_fs_shmem {
1560        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1561            tag: args.tag.clone(),
1562            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1563                root_path: args.path.clone(),
1564            },
1565        }
1566        .into_resource();
1567        if let Some(pcie_port) = &args.pcie_port {
1568            pcie_devices.push(PcieDeviceConfig {
1569                port_name: pcie_port.clone(),
1570                resource: VirtioPciDeviceHandle(resource).into_resource(),
1571            });
1572        } else {
1573            add_virtio_device(opt.virtio_fs_bus, resource);
1574        }
1575    }
1576
1577    for args in &opt.virtio_9p {
1578        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1579            tag: args.tag.clone(),
1580            root_path: args.path.clone(),
1581            debug: opt.virtio_9p_debug,
1582        }
1583        .into_resource();
1584        if let Some(pcie_port) = &args.pcie_port {
1585            pcie_devices.push(PcieDeviceConfig {
1586                port_name: pcie_port.clone(),
1587                resource: VirtioPciDeviceHandle(resource).into_resource(),
1588            });
1589        } else {
1590            add_virtio_device(VirtioBusCli::Auto, resource);
1591        }
1592    }
1593
1594    if let Some(pmem_args) = &opt.virtio_pmem {
1595        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1596            path: pmem_args.path.clone(),
1597        }
1598        .into_resource();
1599        if let Some(pcie_port) = &pmem_args.pcie_port {
1600            pcie_devices.push(PcieDeviceConfig {
1601                port_name: pcie_port.clone(),
1602                resource: VirtioPciDeviceHandle(resource).into_resource(),
1603            });
1604        } else {
1605            add_virtio_device(VirtioBusCli::Auto, resource);
1606        }
1607    }
1608
1609    if opt.virtio_rng {
1610        let resource: Resource<VirtioDeviceHandle> =
1611            virtio_resources::rng::VirtioRngHandle.into_resource();
1612        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1613            pcie_devices.push(PcieDeviceConfig {
1614                port_name: pcie_port.clone(),
1615                resource: VirtioPciDeviceHandle(resource).into_resource(),
1616            });
1617        } else {
1618            add_virtio_device(opt.virtio_rng_bus, resource);
1619        }
1620    }
1621
1622    if let Some(backend) = virtio_console_backend {
1623        let resource: Resource<VirtioDeviceHandle> =
1624            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1625        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1626            pcie_devices.push(PcieDeviceConfig {
1627                port_name: pcie_port.clone(),
1628                resource: VirtioPciDeviceHandle(resource).into_resource(),
1629            });
1630        } else {
1631            add_virtio_device(VirtioBusCli::Auto, resource);
1632        }
1633    }
1634
1635    // Handle --vhost-user arguments.
1636    #[cfg(target_os = "linux")]
1637    for vhost_cli in &opt.vhost_user {
1638        let stream =
1639            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1640                format!(
1641                    "failed to connect to vhost-user socket: {}",
1642                    vhost_cli.socket_path
1643                )
1644            })?;
1645
1646        use crate::cli_args::VhostUserDeviceTypeCli;
1647        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1648            VhostUserDeviceTypeCli::Fs {
1649                ref tag,
1650                num_queues,
1651                queue_size,
1652            } => virtio_resources::vhost_user::VhostUserFsHandle {
1653                socket: stream.into(),
1654                tag: tag.clone(),
1655                num_queues,
1656                queue_size,
1657            }
1658            .into_resource(),
1659            VhostUserDeviceTypeCli::Blk {
1660                num_queues,
1661                queue_size,
1662            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1663                socket: stream.into(),
1664                num_queues,
1665                queue_size,
1666            }
1667            .into_resource(),
1668            VhostUserDeviceTypeCli::Other {
1669                device_id,
1670                ref queue_sizes,
1671            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1672                socket: stream.into(),
1673                device_id,
1674                queue_sizes: queue_sizes.clone(),
1675            }
1676            .into_resource(),
1677        };
1678        if let Some(pcie_port) = &vhost_cli.pcie_port {
1679            pcie_devices.push(PcieDeviceConfig {
1680                port_name: pcie_port.clone(),
1681                resource: VirtioPciDeviceHandle(resource).into_resource(),
1682            });
1683        } else {
1684            add_virtio_device(VirtioBusCli::Auto, resource);
1685        }
1686    }
1687
1688    if let Some(vsock_path) = &opt.virtio_vsock_path {
1689        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1690        add_virtio_device(
1691            VirtioBusCli::Auto,
1692            virtio_resources::vsock::VirtioVsockHandle {
1693                // The guest CID does not matter since the UDS relay does not use it. It just needs
1694                // to be some non-reserved value for the guest to use.
1695                guest_cid: 0x3,
1696                base_path: vsock_path.clone(),
1697                listener,
1698            }
1699            .into_resource(),
1700        );
1701    }
1702
1703    let mut cfg = Config {
1704        chipset,
1705        load_mode,
1706        floppy_disks,
1707        pcie_root_complexes,
1708        #[cfg(target_os = "linux")]
1709        pcie_devices: {
1710            let mut devs = pcie_devices;
1711            devs.extend(vfio_pcie_devices);
1712            devs
1713        },
1714        #[cfg(not(target_os = "linux"))]
1715        pcie_devices,
1716        pcie_switches,
1717        vpci_devices,
1718        ide_disks: Vec::new(),
1719        memory: MemoryConfig {
1720            mem_size: if let Some(ref sizes) = opt.numa_memory {
1721                sizes
1722                    .iter()
1723                    .try_fold(0u64, |acc, &s| acc.checked_add(s))
1724                    .context("numa memory sizes overflow")?
1725            } else {
1726                opt.memory_size()
1727            },
1728            prefetch_memory: opt.prefetch_memory(),
1729            private_memory: opt.private_memory(),
1730            transparent_hugepages: opt.transparent_hugepages(),
1731            hugepages: opt.memory.hugepages,
1732            hugepage_size: opt.memory.hugepage_size,
1733            numa_mem_sizes: opt.numa_memory.clone(),
1734        },
1735        processor_topology: ProcessorTopologyConfig {
1736            proc_count: opt.processors,
1737            vps_per_socket: opt.vps_per_socket,
1738            enable_smt: match opt.smt {
1739                cli_args::SmtConfigCli::Auto => None,
1740                cli_args::SmtConfigCli::Force => Some(true),
1741                cli_args::SmtConfigCli::Off => Some(false),
1742            },
1743            arch: Some(topology_arch),
1744        },
1745        hypervisor: HypervisorConfig {
1746            with_hv,
1747            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1748                vtl0_alias_map: !opt.no_alias_map,
1749                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1750                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1751                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1752                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1753                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1754                        Some(LateMapVtl0MemoryPolicy::InjectException)
1755                    }
1756                },
1757            }),
1758            with_isolation,
1759        },
1760        #[cfg(windows)]
1761        kernel_vmnics,
1762        input: mesh::Receiver::new(),
1763        framebuffer,
1764        vga_firmware,
1765        vtl2_gfx: opt.vtl2_gfx,
1766        virtio_devices,
1767        vmbus: (with_hv && !opt.no_vmbus).then_some(VmbusConfig {
1768            vsock_listener: vtl0_vsock_listener,
1769            vsock_path: opt.vmbus_vsock_path.clone(),
1770            vtl2_redirect: opt.vmbus_redirect,
1771            vmbus_max_version: opt.vmbus_max_version,
1772            #[cfg(windows)]
1773            vmbusproxy_handle,
1774        }),
1775        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1776            vsock_listener: vtl2_vsock_listener,
1777            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1778            ..Default::default()
1779        }),
1780        vmbus_devices,
1781        chipset_devices,
1782        pci_chipset_devices,
1783        isa_dma_controller,
1784        chipset_capabilities: capabilities,
1785        layout: layout_config,
1786        #[cfg(windows)]
1787        vpci_resources,
1788        vmgs,
1789        secure_boot_enabled: opt.secure_boot,
1790        custom_uefi_vars,
1791        firmware_event_send: None,
1792        debugger_rpc: None,
1793        rtc_delta_milliseconds: 0,
1794        automatic_guest_reset: !opt.halt_on_reset,
1795        efi_diagnostics_log_level: {
1796            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1797                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1798                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1799                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1800            }
1801        },
1802    };
1803
1804    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1805    Ok((cfg, resources))
1806}
1807
1808/// Gets the terminal to use for externally launched console windows.
1809pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1810    std::env::var_os("OPENVMM_TERM")
1811        .or_else(|| std::env::var_os("HVLITE_TERM"))
1812        .map(Into::into)
1813}
1814
1815// Tries to remove `path` if it is confirmed to be a Unix socket.
1816fn cleanup_socket(path: &Path) {
1817    #[cfg(windows)]
1818    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1819    #[cfg(not(windows))]
1820    let is_socket = path
1821        .metadata()
1822        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1823
1824    if is_socket {
1825        let _ = std::fs::remove_file(path);
1826    }
1827}
1828
1829#[cfg(windows)]
1830const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1831
1832#[cfg(windows)]
1833fn new_switch_port(
1834    switch_id: &str,
1835) -> anyhow::Result<(
1836    openvmm_defs::config::SwitchPortId,
1837    vmswitch::kernel::SwitchPort,
1838)> {
1839    let id = vmswitch::kernel::SwitchPortId {
1840        switch: switch_id.parse().context("invalid switch id")?,
1841        port: Guid::new_random(),
1842    };
1843    let _ = vmswitch::hcn::Network::open(&id.switch)
1844        .with_context(|| format!("could not find switch {}", id.switch))?;
1845
1846    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1847
1848    let id = openvmm_defs::config::SwitchPortId {
1849        switch: id.switch,
1850        port: id.port,
1851    };
1852    Ok((id, port))
1853}
1854
1855fn parse_endpoint(
1856    cli_cfg: &NicConfigCli,
1857    index: &mut usize,
1858    resources: &mut VmResources,
1859) -> anyhow::Result<NicConfig> {
1860    let _ = resources;
1861    let endpoint = match &cli_cfg.endpoint {
1862        EndpointConfigCli::Consomme { cidr, host_fwd } => {
1863            let ports = host_fwd
1864                .iter()
1865                .map(|fwd| {
1866                    use net_backend_resources::consomme::HostPortProtocol;
1867                    net_backend_resources::consomme::HostPortConfig {
1868                        protocol: match fwd.protocol {
1869                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
1870                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
1871                        },
1872                        host_address: fwd
1873                            .host_address
1874                            .map(net_backend_resources::consomme::HostIpAddress::from),
1875                        host_port: fwd.host_port,
1876                        guest_port: fwd.guest_port,
1877                    }
1878                })
1879                .collect();
1880            net_backend_resources::consomme::ConsommeHandle {
1881                cidr: cidr.clone(),
1882                ports,
1883            }
1884            .into_resource()
1885        }
1886        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1887        EndpointConfigCli::Dio { id } => {
1888            #[cfg(windows)]
1889            {
1890                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1891                resources.switch_ports.push(port);
1892                net_backend_resources::dio::WindowsDirectIoHandle {
1893                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1894                        switch: port_id.switch,
1895                        port: port_id.port,
1896                    },
1897                }
1898                .into_resource()
1899            }
1900
1901            #[cfg(not(windows))]
1902            {
1903                let _ = id;
1904                bail!("cannot use dio on non-windows platforms")
1905            }
1906        }
1907        EndpointConfigCli::Tap { name } => {
1908            #[cfg(target_os = "linux")]
1909            {
1910                let fd = net_tap::tap::open_tap(name)
1911                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1912                net_backend_resources::tap::TapHandle { fd }.into_resource()
1913            }
1914
1915            #[cfg(not(target_os = "linux"))]
1916            {
1917                let _ = name;
1918                bail!("TAP backend is only supported on Linux")
1919            }
1920        }
1921    };
1922
1923    // Pick a random MAC address.
1924    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1925    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1926
1927    // Pick a fixed instance ID based on the index.
1928    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1929    let instance_id = Guid {
1930        data1: *index as u32,
1931        ..BASE_INSTANCE_ID
1932    };
1933    *index += 1;
1934
1935    Ok(NicConfig {
1936        vtl: cli_cfg.vtl,
1937        instance_id,
1938        endpoint,
1939        mac_address: mac_address.into(),
1940        max_queues: cli_cfg.max_queues,
1941        pcie_port: cli_cfg.pcie_port.clone(),
1942    })
1943}
1944
1945#[derive(Debug)]
1946struct NicConfig {
1947    vtl: DeviceVtl,
1948    instance_id: Guid,
1949    mac_address: MacAddress,
1950    endpoint: Resource<NetEndpointHandleKind>,
1951    max_queues: Option<u16>,
1952    pcie_port: Option<String>,
1953}
1954
1955impl NicConfig {
1956    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1957        (
1958            self.vtl,
1959            netvsp_resources::NetvspHandle {
1960                instance_id: self.instance_id,
1961                mac_address: self.mac_address,
1962                endpoint: self.endpoint,
1963                max_queues: self.max_queues,
1964            }
1965            .into_resource(),
1966        )
1967    }
1968}
1969
1970enum LayerOrDisk {
1971    Layer(DiskLayerDescription),
1972    Disk(Resource<DiskHandleKind>),
1973}
1974
1975async fn disk_open(
1976    disk_cli: &DiskCliKind,
1977    read_only: bool,
1978) -> anyhow::Result<Resource<DiskHandleKind>> {
1979    let mut layers = Vec::new();
1980    disk_open_inner(disk_cli, read_only, &mut layers).await?;
1981    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1982        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1983            unreachable!()
1984        };
1985        Ok(disk)
1986    } else {
1987        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1988            layers: layers
1989                .into_iter()
1990                .map(|layer| match layer {
1991                    LayerOrDisk::Layer(layer) => layer,
1992                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1993                        layer: DiskLayerHandle(disk).into_resource(),
1994                        read_cache: false,
1995                        write_through: false,
1996                    },
1997                })
1998                .collect(),
1999        }))
2000    }
2001}
2002
2003fn disk_open_inner<'a>(
2004    disk_cli: &'a DiskCliKind,
2005    read_only: bool,
2006    layers: &'a mut Vec<LayerOrDisk>,
2007) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
2008    Box::pin(async move {
2009        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
2010            LayerOrDisk::Layer(layer.into_resource().into())
2011        }
2012        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
2013            LayerOrDisk::Disk(disk.into_resource())
2014        }
2015        match disk_cli {
2016            &DiskCliKind::Memory(len) => {
2017                layers.push(layer(RamDiskLayerHandle {
2018                    len: Some(len),
2019                    sector_size: None,
2020                }));
2021            }
2022            DiskCliKind::File {
2023                path,
2024                create_with_len,
2025                direct,
2026            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
2027                create_disk_type(
2028                    path,
2029                    *size,
2030                    OpenDiskOptions {
2031                        read_only: false,
2032                        direct: *direct,
2033                    },
2034                )
2035                .with_context(|| format!("failed to create {}", path.display()))?
2036            } else {
2037                open_disk_type(
2038                    path,
2039                    OpenDiskOptions {
2040                        read_only,
2041                        direct: *direct,
2042                    },
2043                )
2044                .await
2045                .with_context(|| format!("failed to open {}", path.display()))?
2046            })),
2047            DiskCliKind::Blob { kind, url } => {
2048                layers.push(disk(disk_backend_resources::BlobDiskHandle {
2049                    url: url.to_owned(),
2050                    format: match kind {
2051                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
2052                        cli_args::BlobKind::Vhd1 => {
2053                            disk_backend_resources::BlobDiskFormat::FixedVhd1
2054                        }
2055                    },
2056                }))
2057            }
2058            DiskCliKind::MemoryDiff(inner) => {
2059                layers.push(layer(RamDiskLayerHandle {
2060                    len: None,
2061                    sector_size: None,
2062                }));
2063                disk_open_inner(inner, true, layers).await?;
2064            }
2065            DiskCliKind::PersistentReservationsWrapper(inner) => {
2066                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
2067                    disk_open(inner, read_only).await?,
2068                )))
2069            }
2070            DiskCliKind::DelayDiskWrapper {
2071                delay_ms,
2072                disk: inner,
2073            } => layers.push(disk(DelayDiskHandle {
2074                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
2075                disk: disk_open(inner, read_only).await?,
2076            })),
2077            DiskCliKind::Crypt {
2078                disk: inner,
2079                cipher,
2080                key_file,
2081            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
2082                disk: disk_open(inner, read_only).await?,
2083                cipher: match cipher {
2084                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
2085                },
2086                key: fs_err::read(key_file).context("failed to read key file")?,
2087            })),
2088            DiskCliKind::Sqlite {
2089                path,
2090                create_with_len,
2091            } => {
2092                // FUTURE: this code should be responsible for opening
2093                // file-handle(s) itself, and passing them into sqlite via a custom
2094                // vfs. For now though - simply check if the file exists or not, and
2095                // perform early validation of filesystem-level create options.
2096                match (create_with_len.is_some(), path.exists()) {
2097                    (true, true) => anyhow::bail!(
2098                        "cannot create new sqlite disk at {} - file already exists",
2099                        path.display()
2100                    ),
2101                    (false, false) => anyhow::bail!(
2102                        "cannot open sqlite disk at {} - file not found",
2103                        path.display()
2104                    ),
2105                    _ => {}
2106                }
2107
2108                layers.push(layer(SqliteDiskLayerHandle {
2109                    dbhd_path: path.display().to_string(),
2110                    format_dbhd: create_with_len.map(|len| {
2111                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2112                            logically_read_only: false,
2113                            len: Some(len),
2114                        }
2115                    }),
2116                }));
2117            }
2118            DiskCliKind::SqliteDiff { path, create, disk } => {
2119                // FUTURE: this code should be responsible for opening
2120                // file-handle(s) itself, and passing them into sqlite via a custom
2121                // vfs. For now though - simply check if the file exists or not, and
2122                // perform early validation of filesystem-level create options.
2123                match (create, path.exists()) {
2124                    (true, true) => anyhow::bail!(
2125                        "cannot create new sqlite disk at {} - file already exists",
2126                        path.display()
2127                    ),
2128                    (false, false) => anyhow::bail!(
2129                        "cannot open sqlite disk at {} - file not found",
2130                        path.display()
2131                    ),
2132                    _ => {}
2133                }
2134
2135                layers.push(layer(SqliteDiskLayerHandle {
2136                    dbhd_path: path.display().to_string(),
2137                    format_dbhd: create.then_some(
2138                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2139                            logically_read_only: false,
2140                            len: None,
2141                        },
2142                    ),
2143                }));
2144                disk_open_inner(disk, true, layers).await?;
2145            }
2146            DiskCliKind::AutoCacheSqlite {
2147                cache_path,
2148                key,
2149                disk,
2150            } => {
2151                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2152                    read_cache: true,
2153                    write_through: false,
2154                    layer: SqliteAutoCacheDiskLayerHandle {
2155                        cache_path: cache_path.clone(),
2156                        cache_key: key.clone(),
2157                    }
2158                    .into_resource(),
2159                }));
2160                disk_open_inner(disk, read_only, layers).await?;
2161            }
2162        }
2163        Ok(())
2164    })
2165}
2166
2167/// Get the system page size.
2168pub(crate) fn system_page_size() -> u32 {
2169    sparse_mmap::SparseMapping::page_size() as u32
2170}
2171
2172/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2173pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2174    "x86_64"
2175} else {
2176    "aarch64"
2177};
2178
2179/// Open a snapshot directory and validate it against the current VM config.
2180/// Returns the shared memory fd (from memory.bin) and the saved device state.
2181fn prepare_snapshot_restore(
2182    snapshot_dir: &Path,
2183    opt: &Options,
2184) -> anyhow::Result<(
2185    openvmm_defs::worker::SharedMemoryFd,
2186    mesh::payload::message::ProtobufMessage,
2187)> {
2188    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2189
2190    // Validate manifest against current VM config.
2191    openvmm_helpers::snapshot::validate_manifest(
2192        &manifest,
2193        GUEST_ARCH,
2194        opt.memory_size(),
2195        opt.processors,
2196        system_page_size(),
2197    )?;
2198
2199    // Open memory.bin (existing file, no create, no resize).
2200    let memory_file = fs_err::OpenOptions::new()
2201        .read(true)
2202        .write(true)
2203        .open(snapshot_dir.join("memory.bin"))?;
2204
2205    // Validate file size matches expected memory size.
2206    let file_size = memory_file.metadata()?.len();
2207    if file_size != manifest.memory_size_bytes {
2208        anyhow::bail!(
2209            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2210            manifest.memory_size_bytes,
2211        );
2212    }
2213
2214    let shared_memory_fd =
2215        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2216
2217    // Reconstruct ProtobufMessage from the saved state bytes.
2218    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2219    // back to ProtobufMessage.
2220    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2221        .context("failed to decode saved state from snapshot")?;
2222
2223    Ok((shared_memory_fd, state_msg))
2224}
2225
2226fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2227    #[cfg(windows)]
2228    pal::windows::disable_hard_error_dialog();
2229
2230    tracing_init::enable_tracing()?;
2231
2232    // Try to run as a worker host.
2233    // On success the worker runs to completion and then exits the process (does
2234    // not return). Any worker host setup errors are return and bubbled up.
2235    meshworker::run_vmm_mesh_host()?;
2236
2237    let opt = Options::parse();
2238    if let Some(path) = &opt.write_saved_state_proto {
2239        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2240            .write_to_path(path)
2241            .context("failed to write protobuf descriptors")?;
2242        return Ok(());
2243    }
2244
2245    if let Some(ref path) = opt.pidfile {
2246        std::fs::write(path, format!("{}\n", std::process::id()))
2247            .context("failed to write pidfile")?;
2248        *pidfile_path = Some(path.clone());
2249    }
2250
2251    if let Some(path) = opt.relay_console_path {
2252        let console_title = opt.relay_console_title.unwrap_or_default();
2253        return console_relay::relay_console(&path, console_title.as_str());
2254    }
2255
2256    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2257    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2258        return block_on(async {
2259            let _ = std::fs::remove_file(path);
2260            let listener =
2261                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2262
2263            let transport = if opt.ttrpc.is_some() {
2264                ttrpc::RpcTransport::Ttrpc
2265            } else {
2266                ttrpc::RpcTransport::Grpc
2267            };
2268
2269            // This is a local launch
2270            let mut handle =
2271                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2272                    listener,
2273                    transport,
2274                })
2275                .await?;
2276
2277            tracing::info!(%transport, path = %path.display(), "listening");
2278
2279            // Signal the the parent process that the server is ready.
2280            pal::close_stdout().context("failed to close stdout")?;
2281
2282            handle.join().await?;
2283
2284            Ok(())
2285        });
2286    }
2287
2288    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2289}
2290
2291fn new_hvsock_service_id(port: u32) -> Guid {
2292    // This GUID is an embedding of the AF_VSOCK port into an
2293    // AF_HYPERV service ID.
2294    Guid {
2295        data1: port,
2296        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2297    }
2298}
2299
2300async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2301    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2302    let result = run_control_inner(driver, &mut mesh, opt).await;
2303    // If setup failed before the mesh was handed to the controller, shut it
2304    // down so the child host process exits cleanly without noisy logs.
2305    if let Some(mesh) = mesh {
2306        mesh.shutdown().await;
2307    }
2308    result
2309}
2310
2311async fn run_control_inner(
2312    driver: &DefaultDriver,
2313    mesh_slot: &mut Option<VmmMesh>,
2314    opt: Options,
2315) -> anyhow::Result<()> {
2316    let mesh = mesh_slot.as_ref().unwrap();
2317    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2318
2319    let mut vnc_worker = None;
2320    if opt.gfx || opt.vnc.vnc {
2321        // Parse the listen address. Try as a full SocketAddr (host:port) first;
2322        // fall back to a bare IP, using the configured port.
2323        let addr: std::net::SocketAddr = if let Ok(sa) =
2324            opt.vnc.vnc_listen.parse::<std::net::SocketAddr>()
2325        {
2326            sa
2327        } else {
2328            let ip: std::net::IpAddr = opt.vnc.vnc_listen.parse().with_context(|| {
2329                format!(
2330                    "invalid VNC listen address: {} (expected IP address or socket address like [::1]:5900)",
2331                    opt.vnc.vnc_listen
2332                )
2333            })?;
2334            std::net::SocketAddr::new(ip, opt.vnc.vnc_port)
2335        };
2336
2337        let socket = socket2::Socket::new(
2338            if addr.is_ipv6() {
2339                socket2::Domain::IPV6
2340            } else {
2341                socket2::Domain::IPV4
2342            },
2343            socket2::Type::STREAM,
2344            None,
2345        )
2346        .with_context(|| format!("creating VNC socket for {}", addr))?;
2347
2348        if addr.is_ipv6() {
2349            if let Err(e) = socket.set_only_v6(false) {
2350                tracing::warn!(
2351                    error = %e,
2352                    "failed to enable dual-stack on IPv6 VNC socket, IPv4 clients may not be able to connect"
2353                );
2354            }
2355        }
2356        socket.set_reuse_address(true)?;
2357        socket
2358            .bind(&addr.into())
2359            .with_context(|| format!("binding VNC socket to {}", addr))?;
2360        socket
2361            .listen(128)
2362            .with_context(|| format!("listening on VNC socket {}", addr))?;
2363        let listener: TcpListener = socket.into();
2364
2365        if !addr.ip().is_loopback() {
2366            tracing::warn!(
2367                address = %addr,
2368                "VNC server listening on non-localhost address without authentication"
2369            );
2370        }
2371
2372        let input_send = vm_config.input.sender();
2373        let framebuffer = resources
2374            .framebuffer_access
2375            .take()
2376            .expect("synth video enabled");
2377
2378        let vnc_host = mesh
2379            .make_host("vnc", None)
2380            .await
2381            .context("spawning vnc process failed")?;
2382
2383        vnc_worker = Some(
2384            vnc_host
2385                .launch_worker(
2386                    vnc_worker_defs::VNC_WORKER_TCP,
2387                    VncParameters {
2388                        listener,
2389                        framebuffer,
2390                        input_send,
2391                        dirty_recv: resources.dirty_rect_recv.take(),
2392                        max_clients: opt.vnc.vnc_max_clients,
2393                        evict_oldest: opt.vnc.vnc_evict_oldest,
2394                    },
2395                )
2396                .await?,
2397        )
2398    }
2399
2400    // spin up the debug worker
2401    let gdb_worker = if let Some(port) = opt.gdb {
2402        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2403            .with_context(|| format!("binding to gdb port {}", port))?;
2404
2405        let (req_tx, req_rx) = mesh::channel();
2406        vm_config.debugger_rpc = Some(req_rx);
2407
2408        let gdb_host = mesh
2409            .make_host("gdb", None)
2410            .await
2411            .context("spawning gdbstub process failed")?;
2412
2413        Some(
2414            gdb_host
2415                .launch_worker(
2416                    debug_worker_defs::DEBUGGER_WORKER,
2417                    debug_worker_defs::DebuggerParameters {
2418                        listener,
2419                        req_chan: req_tx,
2420                        vp_count: vm_config.processor_topology.proc_count,
2421                        target_arch: if cfg!(guest_arch = "x86_64") {
2422                            debug_worker_defs::TargetArch::X86_64
2423                        } else {
2424                            debug_worker_defs::TargetArch::Aarch64
2425                        },
2426                    },
2427                )
2428                .await
2429                .context("failed to launch gdbstub worker")?,
2430        )
2431    } else {
2432        None
2433    };
2434
2435    // spin up the VM
2436    let (vm_rpc, rpc_recv) = mesh::channel();
2437    let (notify_send, notify_recv) = mesh::channel();
2438    let vm_worker = {
2439        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2440
2441        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2442            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2443            (Some(fd), Some(state_msg))
2444        } else {
2445            let shared_memory = opt
2446                .memory_backing_file()
2447                .map(|path| {
2448                    openvmm_helpers::shared_memory::open_memory_backing_file(
2449                        path,
2450                        opt.memory_size(),
2451                    )
2452                })
2453                .transpose()?;
2454            (shared_memory, None)
2455        };
2456
2457        let params = VmWorkerParameters {
2458            hypervisor: match &opt.hypervisor {
2459                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2460                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2461            },
2462            cfg: vm_config,
2463            saved_state,
2464            shared_memory,
2465            rpc: rpc_recv,
2466            notify: notify_send,
2467        };
2468        vm_host
2469            .launch_worker(VM_WORKER, params)
2470            .await
2471            .context("failed to launch vm worker")?
2472    };
2473
2474    if opt.restore_snapshot.is_some() {
2475        tracing::info!("restoring VM from snapshot");
2476    }
2477
2478    if !opt.paused {
2479        vm_rpc.call(VmRpc::Resume, ()).await?;
2480    }
2481
2482    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2483        driver.clone(),
2484        DiagDialer {
2485            driver: driver.clone(),
2486            vm_rpc: vm_rpc.clone(),
2487            openhcl_vtl: if opt.vtl2 {
2488                DeviceVtl::Vtl2
2489            } else {
2490                DeviceVtl::Vtl0
2491            },
2492        },
2493    ));
2494
2495    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2496
2497    // Create channels between the REPL and VmController.
2498    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2499    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2500
2501    let has_vtl2 = resources.vtl2_settings.is_some();
2502
2503    // Build the VmController with exclusive resources.
2504    let controller = vm_controller::VmController {
2505        mesh: mesh_slot.take().unwrap(),
2506        vm_worker,
2507        vnc_worker,
2508        gdb_worker,
2509        diag_inspector: Some(diag_inspector),
2510        vtl2_settings: resources.vtl2_settings,
2511        ged_rpc: resources.ged_rpc.clone(),
2512        vm_rpc: vm_rpc.clone(),
2513        paravisor_diag: Some(paravisor_diag),
2514        igvm_path: opt.igvm.clone(),
2515        memory_backing_file: opt.memory_backing_file().cloned(),
2516        memory: opt.memory_size(),
2517        processors: opt.processors,
2518        log_file: opt.log_file.clone(),
2519    };
2520
2521    // Spawn the VmController as a task.
2522    let controller_task = driver.spawn(
2523        "vm-controller",
2524        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2525    );
2526
2527    // Run the REPL with shareable resources.
2528    let repl_result = repl::run_repl(
2529        driver,
2530        repl::ReplResources {
2531            vm_rpc,
2532            vm_controller: vm_controller_send,
2533            vm_controller_events: vm_controller_event_recv,
2534            scsi_rpc: resources.scsi_rpc,
2535            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2536            shutdown_ic: resources.shutdown_ic,
2537            kvp_ic: resources.kvp_ic,
2538            console_in: resources.console_in,
2539            has_vtl2,
2540        },
2541    )
2542    .await;
2543
2544    // Wait for the controller task to finish (it stops the VM worker and
2545    // shuts down the mesh).
2546    controller_task.await;
2547
2548    repl_result
2549}
2550
2551struct DiagDialer {
2552    driver: DefaultDriver,
2553    vm_rpc: mesh::Sender<VmRpc>,
2554    openhcl_vtl: DeviceVtl,
2555}
2556
2557impl mesh_rpc::client::Dial for DiagDialer {
2558    type Stream = PolledSocket<unix_socket::UnixStream>;
2559
2560    async fn dial(&mut self) -> io::Result<Self::Stream> {
2561        let service_id = new_hvsock_service_id(1);
2562        let socket = self
2563            .vm_rpc
2564            .call_failable(
2565                VmRpc::ConnectHvsock,
2566                (
2567                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2568                    service_id,
2569                    self.openhcl_vtl,
2570                ),
2571            )
2572            .await
2573            .map_err(io::Error::other)?;
2574
2575        PolledSocket::new(&self.driver, socket)
2576    }
2577}
2578
2579/// An object that implements [`InspectMut`] by sending an inspect request over
2580/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2581/// the response back into the inspect tree.
2582///
2583/// This also caches the TTRPC connection to the guest so that only the first
2584/// inspect request has to wait for the connection to be established.
2585pub(crate) struct DiagInspector(DiagInspectorInner);
2586
2587enum DiagInspectorInner {
2588    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2589    Started {
2590        send: mesh::Sender<inspect::Deferred>,
2591        _task: Task<()>,
2592    },
2593    Invalid,
2594}
2595
2596impl DiagInspector {
2597    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2598        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2599    }
2600
2601    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2602        loop {
2603            match self.0 {
2604                DiagInspectorInner::NotStarted { .. } => {
2605                    let DiagInspectorInner::NotStarted(driver, client) =
2606                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2607                    else {
2608                        unreachable!()
2609                    };
2610                    let (send, recv) = mesh::channel();
2611                    let task = driver.clone().spawn("diag-inspect", async move {
2612                        Self::run(&client, recv).await
2613                    });
2614
2615                    self.0 = DiagInspectorInner::Started { send, _task: task };
2616                }
2617                DiagInspectorInner::Started { ref send, .. } => break send,
2618                DiagInspectorInner::Invalid => unreachable!(),
2619            }
2620        }
2621    }
2622
2623    async fn run(
2624        diag_client: &diag_client::DiagClient,
2625        mut recv: mesh::Receiver<inspect::Deferred>,
2626    ) {
2627        while let Some(deferred) = recv.next().await {
2628            let info = deferred.external_request();
2629            let result = match info.request_type {
2630                inspect::ExternalRequestType::Inspect { depth } => {
2631                    if depth == 0 {
2632                        Ok(inspect::Node::Unevaluated)
2633                    } else {
2634                        // TODO: Support taking timeouts from the command line
2635                        diag_client
2636                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2637                            .await
2638                    }
2639                }
2640                inspect::ExternalRequestType::Update { value } => {
2641                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2642                }
2643            };
2644            deferred.complete_external(
2645                result.unwrap_or_else(|err| {
2646                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2647                }),
2648                inspect::SensitivityLevel::Unspecified,
2649            )
2650        }
2651    }
2652}
2653
2654impl InspectMut for DiagInspector {
2655    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2656        self.start().send(req.defer());
2657    }
2658}