Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::StreamExt;
53use futures::executor::block_on;
54use futures::io::AllowStdIo;
55use gdma_resources::GdmaDeviceHandle;
56use gdma_resources::VportDefinition;
57use guid::Guid;
58use input_core::MultiplexedInputHandle;
59use inspect::InspectMut;
60use io::Read;
61use memory_range::MemoryRange;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64;
70use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2;
71use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86;
72use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2;
73use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
74use openvmm_defs::config::DeviceVtl;
75use openvmm_defs::config::EfiDiagnosticsLogLevelType;
76use openvmm_defs::config::HypervisorConfig;
77use openvmm_defs::config::LateMapVtl0MemoryPolicy;
78use openvmm_defs::config::LoadMode;
79use openvmm_defs::config::MemoryConfig;
80use openvmm_defs::config::PcieDeviceConfig;
81use openvmm_defs::config::PcieRootComplexConfig;
82use openvmm_defs::config::PcieRootPortConfig;
83use openvmm_defs::config::PcieSwitchConfig;
84use openvmm_defs::config::ProcessorTopologyConfig;
85use openvmm_defs::config::SerialInformation;
86use openvmm_defs::config::VirtioBus;
87use openvmm_defs::config::VmbusConfig;
88use openvmm_defs::config::VpciDeviceConfig;
89use openvmm_defs::config::Vtl2BaseAddressType;
90use openvmm_defs::config::Vtl2Config;
91use openvmm_defs::rpc::VmRpc;
92use openvmm_defs::worker::VM_WORKER;
93use openvmm_defs::worker::VmWorkerParameters;
94use openvmm_helpers::disk::OpenDiskOptions;
95use openvmm_helpers::disk::create_disk_type;
96use openvmm_helpers::disk::open_disk_type;
97use pal_async::DefaultDriver;
98use pal_async::DefaultPool;
99use pal_async::socket::PolledSocket;
100use pal_async::task::Spawn;
101use pal_async::task::Task;
102use serial_16550_resources::ComPort;
103use serial_core::resources::DisconnectedSerialBackendHandle;
104use sparse_mmap::alloc_shared_memory;
105use std::cell::RefCell;
106use std::collections::BTreeMap;
107use std::fmt::Write as _;
108use std::future::pending;
109use std::io;
110#[cfg(unix)]
111use std::io::IsTerminal;
112use std::io::Write;
113use std::net::TcpListener;
114use std::path::Path;
115use std::path::PathBuf;
116use std::sync::Arc;
117use std::thread;
118use std::time::Duration;
119use storvsp_resources::ScsiControllerRequest;
120use tpm_resources::TpmDeviceHandle;
121use tpm_resources::TpmRegisterLayout;
122use uidevices_resources::SynthKeyboardHandle;
123use uidevices_resources::SynthMouseHandle;
124use uidevices_resources::SynthVideoHandle;
125use video_core::SharedFramebufferHandle;
126use virtio_resources::VirtioPciDeviceHandle;
127use vm_manifest_builder::BaseChipsetType;
128use vm_manifest_builder::MachineArch;
129use vm_manifest_builder::VmChipsetResult;
130use vm_manifest_builder::VmManifestBuilder;
131use vm_resource::IntoResource;
132use vm_resource::Resource;
133use vm_resource::kind::DiskHandleKind;
134use vm_resource::kind::DiskLayerHandleKind;
135use vm_resource::kind::NetEndpointHandleKind;
136use vm_resource::kind::VirtioDeviceHandle;
137use vm_resource::kind::VmbusDeviceHandleKind;
138use vmbus_serial_resources::VmbusSerialDeviceHandle;
139use vmbus_serial_resources::VmbusSerialPort;
140use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
141use vmgs_resources::GuestStateEncryptionPolicy;
142use vmgs_resources::VmgsDisk;
143use vmgs_resources::VmgsFileHandle;
144use vmgs_resources::VmgsResource;
145use vmotherboard::ChipsetDeviceHandle;
146use vnc_worker_defs::VncParameters;
147
148pub fn openvmm_main() {
149    // Save the current state of the terminal so we can restore it back to
150    // normal before exiting.
151    #[cfg(unix)]
152    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
153
154    let mut pidfile_path = None;
155    let exit_code = match do_main(&mut pidfile_path) {
156        Ok(_) => 0,
157        Err(err) => {
158            eprintln!("fatal error: {:?}", err);
159            1
160        }
161    };
162
163    // Restore the terminal to its initial state.
164    #[cfg(unix)]
165    if let Some(orig_termios) = orig_termios {
166        term::set_termios(orig_termios);
167    }
168
169    // Clean up the pidfile before terminating, since pal::process::terminate
170    // skips destructors.
171    if let Some(ref path) = pidfile_path {
172        let _ = std::fs::remove_file(path);
173    }
174
175    // Terminate the process immediately without graceful shutdown of DLLs or
176    // C++ destructors or anything like that. This is all unnecessary and saves
177    // time on Windows.
178    //
179    // Do flush stdout, though, since there may be buffered data.
180    let _ = io::stdout().flush();
181    pal::process::terminate(exit_code);
182}
183
184#[derive(Default)]
185struct VmResources {
186    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
187    framebuffer_access: Option<FramebufferAccess>,
188    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
189    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
190    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
191    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
192    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
193    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
194    #[cfg(windows)]
195    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
196}
197
198struct ConsoleState<'a> {
199    device: &'a str,
200    input: Box<dyn AsyncWrite + Unpin + Send>,
201}
202
203/// Build a flat list of switches with their parent port assignments.
204///
205/// This function converts hierarchical CLI switch definitions into a flat list
206/// where each switch specifies its parent port directly.
207fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
208    all_switches
209        .iter()
210        .map(|switch_cli| PcieSwitchConfig {
211            name: switch_cli.name.clone(),
212            num_downstream_ports: switch_cli.num_downstream_ports,
213            parent_port: switch_cli.port_name.clone(),
214            hotplug: switch_cli.hotplug,
215        })
216        .collect()
217}
218
219async fn vm_config_from_command_line(
220    spawner: impl Spawn,
221    mesh: &VmmMesh,
222    opt: &Options,
223) -> anyhow::Result<(Config, VmResources)> {
224    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
225    // Ensure the serial driver stays alive with no tasks.
226    serial_driver.spawn("leak", pending::<()>()).detach();
227
228    let openhcl_vtl = if opt.vtl2 {
229        DeviceVtl::Vtl2
230    } else {
231        DeviceVtl::Vtl0
232    };
233
234    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
235    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
236        Ok(match cli_cfg {
237            SerialConfigCli::Console => {
238                if let Some(console_state) = console_state.borrow().as_ref() {
239                    bail!("console already set by {}", console_state.device);
240                }
241                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
242                let (serial_read, serial_write) = AsyncReadExt::split(serial);
243                *console_state.borrow_mut() = Some(ConsoleState {
244                    device,
245                    input: Box::new(serial_write),
246                });
247                thread::Builder::new()
248                    .name(name.to_owned())
249                    .spawn(move || {
250                        let _ = block_on(futures::io::copy(
251                            serial_read,
252                            &mut AllowStdIo::new(term::raw_stdout()),
253                        ));
254                    })
255                    .unwrap();
256                Some(config)
257            }
258            SerialConfigCli::Stderr => {
259                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
260                thread::Builder::new()
261                    .name(name.to_owned())
262                    .spawn(move || {
263                        let _ = block_on(futures::io::copy(
264                            serial,
265                            &mut AllowStdIo::new(term::raw_stderr()),
266                        ));
267                    })
268                    .unwrap();
269                Some(config)
270            }
271            SerialConfigCli::File(path) => {
272                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
273                let file = fs_err::File::create(path).context("failed to create file")?;
274
275                thread::Builder::new()
276                    .name(name.to_owned())
277                    .spawn(move || {
278                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
279                    })
280                    .unwrap();
281                Some(config)
282            }
283            SerialConfigCli::None => None,
284            SerialConfigCli::Pipe(path) => {
285                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
286            }
287            SerialConfigCli::Tcp(addr) => {
288                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
289            }
290            SerialConfigCli::NewConsole(app, window_title) => {
291                let path = console_relay::random_console_path();
292                let config =
293                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
294                let window_title =
295                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
296
297                console_relay::launch_console(
298                    app.or_else(openvmm_terminal_app).as_deref(),
299                    &path,
300                    ConsoleLaunchOptions {
301                        window_title: Some(window_title),
302                    },
303                )
304                .context("failed to launch console")?;
305
306                Some(config)
307            }
308        })
309    };
310
311    let mut vmbus_devices = Vec::new();
312
313    let serial0_cfg = setup_serial(
314        "com1",
315        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
316        if cfg!(guest_arch = "x86_64") {
317            "ttyS0"
318        } else {
319            "ttyAMA0"
320        },
321    )?;
322    let serial1_cfg = setup_serial(
323        "com2",
324        opt.com2.clone().unwrap_or(SerialConfigCli::None),
325        if cfg!(guest_arch = "x86_64") {
326            "ttyS1"
327        } else {
328            "ttyAMA1"
329        },
330    )?;
331    let serial2_cfg = setup_serial(
332        "com3",
333        opt.com3.clone().unwrap_or(SerialConfigCli::None),
334        if cfg!(guest_arch = "x86_64") {
335            "ttyS2"
336        } else {
337            "ttyAMA2"
338        },
339    )?;
340    let serial3_cfg = setup_serial(
341        "com4",
342        opt.com4.clone().unwrap_or(SerialConfigCli::None),
343        if cfg!(guest_arch = "x86_64") {
344            "ttyS3"
345        } else {
346            "ttyAMA3"
347        },
348    )?;
349    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
350        "vmbus_com1",
351        opt.vmbus_com1_serial
352            .clone()
353            .unwrap_or(SerialConfigCli::None),
354        "vmbus_com1",
355    )? {
356        vmbus_devices.push((
357            openhcl_vtl,
358            VmbusSerialDeviceHandle {
359                port: VmbusSerialPort::Com1,
360                backend: vmbus_com1_cfg,
361            }
362            .into_resource(),
363        ));
364        true
365    } else {
366        false
367    };
368    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
369        "vmbus_com2",
370        opt.vmbus_com2_serial
371            .clone()
372            .unwrap_or(SerialConfigCli::None),
373        "vmbus_com2",
374    )? {
375        vmbus_devices.push((
376            openhcl_vtl,
377            VmbusSerialDeviceHandle {
378                port: VmbusSerialPort::Com2,
379                backend: vmbus_com2_cfg,
380            }
381            .into_resource(),
382        ));
383        true
384    } else {
385        false
386    };
387    let debugcon_cfg = setup_serial(
388        "debugcon",
389        opt.debugcon
390            .clone()
391            .map(|cfg| cfg.serial)
392            .unwrap_or(SerialConfigCli::None),
393        "debugcon",
394    )?;
395
396    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
397        setup_serial("virtio-console", serial_cfg, "hvc0")?
398    } else {
399        None
400    };
401
402    let mut resources = VmResources::default();
403    let mut console_str = "";
404    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
405        resources.console_in = Some(input);
406        console_str = device;
407    }
408
409    if opt.shared_memory {
410        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
411    }
412    if opt.deprecated_prefetch {
413        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
414    }
415    if opt.deprecated_private_memory {
416        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
417    }
418    if opt.deprecated_thp {
419        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
420    }
421    if opt.deprecated_memory_backing_file.is_some() {
422        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
423    }
424
425    opt.validate_memory_options()?;
426
427    const MAX_PROCESSOR_COUNT: u32 = 1024;
428
429    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
430        bail!("invalid proc count: {}", opt.processors);
431    }
432
433    // Total SCSI channel count should not exceed the processor count
434    // (at most, one channel per VP).
435    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
436        bail!(
437            "invalid SCSI sub-channel count: requested {}, max {}",
438            opt.scsi_sub_channels,
439            MAX_PROCESSOR_COUNT - 1
440        );
441    }
442
443    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
444
445    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
446    for &cli_args::DiskCli {
447        vtl,
448        ref kind,
449        read_only,
450        is_dvd,
451        underhill,
452        ref pcie_port,
453    } in &opt.disk
454    {
455        if pcie_port.is_some() {
456            anyhow::bail!("`--disk` is incompatible with PCIe");
457        }
458
459        storage
460            .add(
461                vtl,
462                underhill,
463                storage_builder::DiskLocation::Scsi(None),
464                kind,
465                is_dvd,
466                read_only,
467            )
468            .await?;
469    }
470
471    for &cli_args::IdeDiskCli {
472        ref kind,
473        read_only,
474        channel,
475        device,
476        is_dvd,
477    } in &opt.ide
478    {
479        storage
480            .add(
481                DeviceVtl::Vtl0,
482                None,
483                storage_builder::DiskLocation::Ide(channel, device),
484                kind,
485                is_dvd,
486                read_only,
487            )
488            .await?;
489    }
490
491    for &cli_args::DiskCli {
492        vtl,
493        ref kind,
494        read_only,
495        is_dvd,
496        underhill,
497        ref pcie_port,
498    } in &opt.nvme
499    {
500        storage
501            .add(
502                vtl,
503                underhill,
504                storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
505                kind,
506                is_dvd,
507                read_only,
508            )
509            .await?;
510    }
511
512    for &cli_args::DiskCli {
513        vtl,
514        ref kind,
515        read_only,
516        is_dvd,
517        ref underhill,
518        ref pcie_port,
519    } in &opt.virtio_blk
520    {
521        if underhill.is_some() {
522            anyhow::bail!("underhill not supported with virtio-blk");
523        }
524        storage
525            .add(
526                vtl,
527                None,
528                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
529                kind,
530                is_dvd,
531                read_only,
532            )
533            .await?;
534    }
535
536    let mut floppy_disks = Vec::new();
537    for disk in &opt.floppy {
538        let &cli_args::FloppyDiskCli {
539            ref kind,
540            read_only,
541        } = disk;
542        floppy_disks.push(FloppyDiskConfig {
543            disk_type: disk_open(kind, read_only).await?,
544            read_only,
545        });
546    }
547
548    let mut vpci_mana_nics = [(); 3].map(|()| None);
549    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
550    let mut underhill_nics = Vec::new();
551    let mut vpci_devices = Vec::new();
552
553    let mut nic_index = 0;
554    for cli_cfg in &opt.net {
555        if cli_cfg.pcie_port.is_some() {
556            anyhow::bail!("`--net` does not support PCIe");
557        }
558        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
559        if cli_cfg.underhill {
560            if !opt.no_alias_map {
561                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
562            }
563            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
564                let vpci_instance_id = Guid::new_random();
565                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
566                    instance_id: vpci_instance_id.to_string(),
567                    subordinate_instance_id: None,
568                    max_sub_channels: None,
569                });
570                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
571            });
572            mana.1.vports.push(VportDefinition {
573                mac_address: vport.mac_address,
574                endpoint: vport.endpoint,
575            });
576        } else {
577            vmbus_devices.push(vport.into_netvsp_handle());
578        }
579    }
580
581    if opt.nic {
582        let nic_config = parse_endpoint(
583            &NicConfigCli {
584                vtl: DeviceVtl::Vtl0,
585                endpoint: EndpointConfigCli::Consomme {
586                    cidr: None,
587                    host_fwd: Vec::new(),
588                },
589                max_queues: None,
590                underhill: false,
591                pcie_port: None,
592            },
593            &mut nic_index,
594            &mut resources,
595        )?;
596        vmbus_devices.push(nic_config.into_netvsp_handle());
597    }
598
599    // Build initial PCIe devices list from CLI options. Storage devices
600    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
601    let mut pcie_devices = Vec::new();
602    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
603        tracing::info!(
604            port_name = %cli_cfg.port_name,
605            socket_addr = ?cli_cfg.socket_addr,
606            "instantiating PCIe remote device"
607        );
608
609        // Generate a deterministic instance ID based on index
610        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
611            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
612        let instance_id = Guid {
613            data1: index as u32,
614            ..PCIE_REMOTE_BASE_INSTANCE_ID
615        };
616
617        pcie_devices.push(PcieDeviceConfig {
618            port_name: cli_cfg.port_name.clone(),
619            resource: pcie_remote_resources::PcieRemoteHandle {
620                instance_id,
621                socket_addr: cli_cfg.socket_addr.clone(),
622                hu: cli_cfg.hu,
623                controller: cli_cfg.controller,
624            }
625            .into_resource(),
626        });
627    }
628
629    #[cfg(windows)]
630    let mut kernel_vmnics = Vec::new();
631    #[cfg(windows)]
632    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
633        // Pick a random MAC address.
634        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
635        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
636
637        // Pick a fixed instance ID based on the index.
638        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
639        let instance_id = Guid {
640            data1: index as u32,
641            ..BASE_INSTANCE_ID
642        };
643
644        let switch_id = if switch_id == "default" {
645            DEFAULT_SWITCH
646        } else {
647            switch_id
648        };
649        let (port_id, port) = new_switch_port(switch_id)?;
650        resources.switch_ports.push(port);
651
652        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
653            instance_id,
654            mac_address: mac_address.into(),
655            switch_port_id: port_id,
656        });
657    }
658
659    for vport in &opt.mana {
660        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
661        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
662            (vtl, None) => {
663                &mut vpci_mana_nics[vtl]
664                    .get_or_insert_with(|| {
665                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
666                    })
667                    .1
668                    .vports
669            }
670            (0, Some(pcie_port)) => {
671                &mut pcie_mana_nics
672                    .entry(pcie_port)
673                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
674                    .vports
675            }
676            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
677        };
678        vport_array.push(VportDefinition {
679            mac_address: vport.mac_address,
680            endpoint: vport.endpoint,
681        });
682    }
683
684    vpci_devices.extend(
685        vpci_mana_nics
686            .into_iter()
687            .enumerate()
688            .filter_map(|(vtl, nic)| {
689                nic.map(|(instance_id, handle)| VpciDeviceConfig {
690                    vtl: match vtl {
691                        0 => DeviceVtl::Vtl0,
692                        1 => DeviceVtl::Vtl1,
693                        2 => DeviceVtl::Vtl2,
694                        _ => unreachable!(),
695                    },
696                    instance_id,
697                    resource: handle.into_resource(),
698                })
699            }),
700    );
701
702    pcie_devices.extend(
703        pcie_mana_nics
704            .into_iter()
705            .map(|(pcie_port, handle)| PcieDeviceConfig {
706                port_name: pcie_port,
707                resource: handle.into_resource(),
708            }),
709    );
710
711    // If VTL2 is enabled, and we are not in VTL2 self allocate mode, provide an
712    // mmio gap for VTL2.
713    let use_vtl2_gap = opt.vtl2
714        && !matches!(
715            opt.igvm_vtl2_relocation_type,
716            Vtl2BaseAddressType::Vtl2Allocate { .. },
717        );
718
719    #[cfg(guest_arch = "aarch64")]
720    let arch = MachineArch::Aarch64;
721    #[cfg(guest_arch = "x86_64")]
722    let arch = MachineArch::X86_64;
723
724    let mmio_gaps: Vec<MemoryRange> = match (use_vtl2_gap, arch) {
725        (true, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86_WITH_VTL2.into(),
726        (true, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2.into(),
727        (false, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86.into(),
728        (false, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64.into(),
729    };
730
731    let mut pci_ecam_gaps = Vec::new();
732    let mut pci_mmio_gaps = Vec::new();
733
734    let mut low_mmio_start = mmio_gaps.first().context("expected mmio gap")?.start();
735    let mut high_mmio_end = mmio_gaps.last().context("expected second mmio gap")?.end();
736
737    let mut pcie_root_complexes = Vec::new();
738    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
739        let ports = opt
740            .pcie_root_port
741            .iter()
742            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
743            .map(|port_cli| PcieRootPortConfig {
744                name: port_cli.name.clone(),
745                hotplug: port_cli.hotplug,
746            })
747            .collect();
748
749        const ONE_MB: u64 = 1024 * 1024;
750        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
751        let high_mmio_size = rc_cli
752            .high_mmio
753            .checked_next_multiple_of(ONE_MB)
754            .context("high mmio rounding error")?;
755        let ecam_size = (((rc_cli.end_bus - rc_cli.start_bus) as u64) + 1) * 256 * 4096;
756
757        let low_pci_mmio_start = low_mmio_start
758            .checked_sub(low_mmio_size)
759            .context("pci low mmio underflow")?;
760        let ecam_start = low_pci_mmio_start
761            .checked_sub(ecam_size)
762            .context("pci ecam underflow")?;
763        low_mmio_start = ecam_start;
764        high_mmio_end = high_mmio_end
765            .checked_add(high_mmio_size)
766            .context("pci high mmio overflow")?;
767
768        let ecam_range = MemoryRange::new(ecam_start..ecam_start + ecam_size);
769        let low_mmio = MemoryRange::new(low_pci_mmio_start..low_pci_mmio_start + low_mmio_size);
770        let high_mmio = MemoryRange::new(high_mmio_end - high_mmio_size..high_mmio_end);
771
772        pci_ecam_gaps.push(ecam_range);
773        pci_mmio_gaps.push(low_mmio);
774        pci_mmio_gaps.push(high_mmio);
775
776        pcie_root_complexes.push(PcieRootComplexConfig {
777            index: i as u32,
778            name: rc_cli.name.clone(),
779            segment: rc_cli.segment,
780            start_bus: rc_cli.start_bus,
781            end_bus: rc_cli.end_bus,
782            ecam_range,
783            low_mmio,
784            high_mmio,
785            ports,
786        });
787    }
788
789    pci_ecam_gaps.sort();
790    pci_mmio_gaps.sort();
791
792    let pcie_switches = build_switch_list(&opt.pcie_switch);
793
794    #[cfg(target_os = "linux")]
795    let vfio_pcie_devices: Vec<PcieDeviceConfig> = opt
796        .vfio
797        .iter()
798        .map(|cli_cfg| {
799            use vm_resource::IntoResource;
800
801            let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
802            let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
803                .with_context(|| format!("failed to read IOMMU group for {}", cli_cfg.pci_id))?;
804            let group_id: u64 = iommu_group_link
805                .file_name()
806                .and_then(|s| s.to_str())
807                .context("invalid iommu_group symlink")?
808                .parse()
809                .context("failed to parse IOMMU group ID")?;
810            let group = std::fs::OpenOptions::new()
811                .read(true)
812                .write(true)
813                .open(format!("/dev/vfio/{group_id}"))
814                .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
815
816            Ok(PcieDeviceConfig {
817                port_name: cli_cfg.port_name.clone(),
818                resource: vfio_assigned_device_resources::VfioDeviceHandle {
819                    pci_id: cli_cfg.pci_id.clone(),
820                    group,
821                }
822                .into_resource(),
823            })
824        })
825        .collect::<anyhow::Result<Vec<_>>>()?;
826
827    #[cfg(windows)]
828    let vpci_resources: Vec<_> = opt
829        .device
830        .iter()
831        .map(|path| -> anyhow::Result<_> {
832            Ok(virt_whp::device::DeviceHandle(
833                whp::VpciResource::new(
834                    None,
835                    Default::default(),
836                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
837                )
838                .with_context(|| format!("opening PCI device {}", path))?,
839            ))
840        })
841        .collect::<Result<_, _>>()?;
842
843    // Create a vmbusproxy handle if needed by any devices.
844    #[cfg(windows)]
845    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
846        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
847    } else {
848        None
849    };
850
851    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
852        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
853        let (fb, fba) =
854            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
855        resources.framebuffer_access = Some(fba);
856        Some(fb)
857    } else {
858        None
859    };
860
861    let load_mode;
862    let with_hv;
863
864    let any_serial_configured = serial0_cfg.is_some()
865        || serial1_cfg.is_some()
866        || serial2_cfg.is_some()
867        || serial3_cfg.is_some();
868
869    let has_com3 = serial2_cfg.is_some();
870
871    let mut chipset = VmManifestBuilder::new(
872        if opt.igvm.is_some() {
873            BaseChipsetType::HclHost
874        } else if opt.pcat {
875            BaseChipsetType::HypervGen1
876        } else if opt.uefi {
877            BaseChipsetType::HypervGen2Uefi
878        } else if opt.hv {
879            BaseChipsetType::HyperVGen2LinuxDirect
880        } else {
881            BaseChipsetType::UnenlightenedLinuxDirect
882        },
883        arch,
884    );
885
886    if framebuffer.is_some() {
887        chipset = chipset.with_framebuffer();
888    }
889    if opt.guest_watchdog {
890        chipset = chipset.with_guest_watchdog();
891    }
892    if any_serial_configured {
893        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
894    }
895    if opt.battery {
896        let (tx, rx) = mesh::channel();
897        tx.send(HostBatteryUpdate::default_present());
898        chipset = chipset.with_battery(rx);
899    }
900    if let Some(cfg) = &opt.debugcon {
901        chipset = chipset.with_debugcon(
902            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
903            cfg.port,
904        );
905    }
906
907    // TODO: load from VMGS file if it exists
908    let bios_guid = Guid::new_random();
909
910    let VmChipsetResult {
911        chipset,
912        mut chipset_devices,
913        pci_chipset_devices,
914        capabilities,
915    } = chipset
916        .build()
917        .context("failed to build chipset configuration")?;
918
919    if opt.restore_snapshot.is_some() {
920        // Snapshot restore: skip firmware loading entirely. Device state and
921        // memory come from the snapshot directory.
922        load_mode = LoadMode::None;
923        with_hv = true;
924    } else if let Some(path) = &opt.igvm {
925        let file = fs_err::File::open(path)
926            .context("failed to open igvm file")?
927            .into();
928        let cmdline = opt.cmdline.join(" ");
929        with_hv = true;
930
931        load_mode = LoadMode::Igvm {
932            file,
933            cmdline,
934            vtl2_base_address: opt.igvm_vtl2_relocation_type,
935            com_serial: has_com3.then(|| SerialInformation {
936                io_port: ComPort::Com3.io_port(),
937                irq: ComPort::Com3.irq().into(),
938            }),
939        };
940    } else if opt.pcat {
941        // Emit a nice error early instead of complaining about missing firmware.
942        if arch != MachineArch::X86_64 {
943            anyhow::bail!("pcat not supported on this architecture");
944        }
945        with_hv = true;
946
947        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
948        load_mode = LoadMode::Pcat {
949            firmware,
950            boot_order: opt
951                .pcat_boot_order
952                .map(|x| x.0)
953                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
954        };
955    } else if opt.uefi {
956        use openvmm_defs::config::UefiConsoleMode;
957
958        with_hv = true;
959
960        let firmware = fs_err::File::open(
961            (opt.uefi_firmware.0)
962                .as_ref()
963                .context("must provide uefi firmware when booting with uefi")?,
964        )
965        .context("failed to open uefi firmware")?;
966
967        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
968        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
969        load_mode = LoadMode::Uefi {
970            firmware: firmware.into(),
971            enable_debugging: opt.uefi_debug,
972            enable_memory_protections: opt.uefi_enable_memory_protections,
973            disable_frontpage: opt.disable_frontpage,
974            enable_tpm: opt.tpm,
975            enable_battery: opt.battery,
976            enable_serial: any_serial_configured,
977            enable_vpci_boot: false,
978            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
979                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
980                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
981                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
982                UefiConsoleModeCli::None => UefiConsoleMode::None,
983            }),
984            default_boot_always_attempt: opt.default_boot_always_attempt,
985            bios_guid,
986        };
987    } else {
988        // Linux Direct
989        let mut cmdline = "panic=-1 debug".to_string();
990
991        with_hv = opt.hv;
992        if with_hv && opt.pcie_root_complex.is_empty() {
993            cmdline += " pci=off";
994        }
995
996        if !console_str.is_empty() {
997            let _ = write!(&mut cmdline, " console={}", console_str);
998        }
999
1000        if opt.gfx {
1001            cmdline += " console=tty";
1002        }
1003        for extra in &opt.cmdline {
1004            let _ = write!(&mut cmdline, " {}", extra);
1005        }
1006
1007        let kernel = fs_err::File::open(
1008            (opt.kernel.0)
1009                .as_ref()
1010                .context("must provide kernel when booting with linux direct")?,
1011        )
1012        .context("failed to open kernel")?;
1013        let initrd = (opt.initrd.0)
1014            .as_ref()
1015            .map(fs_err::File::open)
1016            .transpose()
1017            .context("failed to open initrd")?;
1018
1019        let custom_dsdt = match &opt.custom_dsdt {
1020            Some(path) => {
1021                let mut v = Vec::new();
1022                fs_err::File::open(path)
1023                    .context("failed to open custom dsdt")?
1024                    .read_to_end(&mut v)
1025                    .context("failed to read custom dsdt")?;
1026                Some(v)
1027            }
1028            None => None,
1029        };
1030
1031        load_mode = LoadMode::Linux {
1032            kernel: kernel.into(),
1033            initrd: initrd.map(Into::into),
1034            cmdline,
1035            custom_dsdt,
1036            enable_serial: any_serial_configured,
1037            boot_mode: if opt.device_tree {
1038                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1039            } else {
1040                openvmm_defs::config::LinuxDirectBootMode::Acpi
1041            },
1042        };
1043    }
1044
1045    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1046        let disk = VmgsDisk {
1047            disk: disk_open(kind, false)
1048                .await
1049                .context("failed to open vmgs disk")?,
1050            encryption_policy: if opt.test_gsp_by_id {
1051                GuestStateEncryptionPolicy::GspById(true)
1052            } else {
1053                GuestStateEncryptionPolicy::None(true)
1054            },
1055        };
1056        match provision {
1057            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1058            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1059            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1060        }
1061    } else {
1062        VmgsResource::Ephemeral
1063    });
1064
1065    if with_get && with_hv {
1066        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1067            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1068            fixed: Some(Default::default()),
1069            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1070                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1071                nic_devices: underhill_nics,
1072            }),
1073            namespace_settings: Vec::default(),
1074        };
1075
1076        // Cache the VTL2 settings for later modification via the interactive console.
1077        resources.vtl2_settings = Some(vtl2_settings.clone());
1078
1079        let (send, guest_request_recv) = mesh::channel();
1080        resources.ged_rpc = Some(send);
1081
1082        let vmgs = vmgs.take().unwrap();
1083
1084        vmbus_devices.extend([
1085            (
1086                openhcl_vtl,
1087                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1088            ),
1089            (
1090                openhcl_vtl,
1091                get_resources::ged::GuestEmulationDeviceHandle {
1092                    firmware: if opt.pcat {
1093                        get_resources::ged::GuestFirmwareConfig::Pcat {
1094                            boot_order: opt
1095                                .pcat_boot_order
1096                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1097                                .map(|x| match x {
1098                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1099                                        get_resources::ged::PcatBootDevice::Floppy
1100                                    }
1101                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1102                                        get_resources::ged::PcatBootDevice::HardDrive
1103                                    }
1104                                    openvmm_defs::config::PcatBootDevice::Optical => {
1105                                        get_resources::ged::PcatBootDevice::Optical
1106                                    }
1107                                    openvmm_defs::config::PcatBootDevice::Network => {
1108                                        get_resources::ged::PcatBootDevice::Network
1109                                    }
1110                                }),
1111                        }
1112                    } else {
1113                        use get_resources::ged::UefiConsoleMode;
1114
1115                        get_resources::ged::GuestFirmwareConfig::Uefi {
1116                            enable_vpci_boot: storage.has_vtl0_nvme(),
1117                            firmware_debug: opt.uefi_debug,
1118                            disable_frontpage: opt.disable_frontpage,
1119                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1120                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1121                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1122                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1123                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1124                            },
1125                            default_boot_always_attempt: opt.default_boot_always_attempt,
1126                        }
1127                    },
1128                    com1: with_vmbus_com1_serial,
1129                    com2: with_vmbus_com2_serial,
1130                    serial_tx_only: opt.serial_tx_only,
1131                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1132                    vmbus_redirection: opt.vmbus_redirect,
1133                    vmgs,
1134                    framebuffer: opt
1135                        .vtl2_gfx
1136                        .then(|| SharedFramebufferHandle.into_resource()),
1137                    guest_request_recv,
1138                    enable_tpm: opt.tpm,
1139                    firmware_event_send: None,
1140                    secure_boot_enabled: opt.secure_boot,
1141                    secure_boot_template: match opt.secure_boot_template {
1142                        Some(SecureBootTemplateCli::Windows) => {
1143                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1144                        },
1145                        Some(SecureBootTemplateCli::UefiCa) => {
1146                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1147                        }
1148                        None => {
1149                            get_resources::ged::GuestSecureBootTemplateType::None
1150                        },
1151                    },
1152                    enable_battery: opt.battery,
1153                    no_persistent_secrets: true,
1154                    igvm_attest_test_config: None,
1155                    test_gsp_by_id: opt.test_gsp_by_id,
1156                    efi_diagnostics_log_level: {
1157                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1158                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1159                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1160                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1161                        }
1162                    },
1163                    hv_sint_enabled: false,
1164                }
1165                .into_resource(),
1166            ),
1167        ]);
1168    }
1169
1170    if opt.tpm && !opt.vtl2 {
1171        let register_layout = if cfg!(guest_arch = "x86_64") {
1172            TpmRegisterLayout::IoPort
1173        } else {
1174            TpmRegisterLayout::Mmio
1175        };
1176
1177        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1178            (
1179                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1180                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1181            )
1182        } else {
1183            (
1184                EphemeralNonVolatileStoreHandle.into_resource(),
1185                EphemeralNonVolatileStoreHandle.into_resource(),
1186            )
1187        };
1188
1189        chipset_devices.push(ChipsetDeviceHandle {
1190            name: "tpm".to_string(),
1191            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1192                device: TpmDeviceHandle {
1193                    ppi_store,
1194                    nvram_store,
1195                    nvram_size: None,
1196                    refresh_tpm_seeds: false,
1197                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1198                    register_layout,
1199                    guest_secret_key: None,
1200                    logger: None,
1201                    is_confidential_vm: false,
1202                    bios_guid,
1203                }
1204                .into_resource(),
1205                worker_host: mesh.make_host("tpm", None).await?,
1206            }
1207            .into_resource(),
1208        });
1209    }
1210
1211    let custom_uefi_vars = {
1212        use firmware_uefi_custom_vars::CustomVars;
1213
1214        // load base vars from specified template, or use an empty set of base
1215        // vars if none was specified.
1216        let base_vars = match opt.secure_boot_template {
1217            Some(template) => match (arch, template) {
1218                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1219                    hyperv_secure_boot_templates::x64::microsoft_windows()
1220                }
1221                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1222                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1223                }
1224                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1225                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1226                }
1227                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1228                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1229                }
1230            },
1231            None => CustomVars::default(),
1232        };
1233
1234        // TODO: fallback to VMGS read if no command line flag was given
1235
1236        let custom_uefi_json_data = match &opt.custom_uefi_json {
1237            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1238            None => None,
1239        };
1240
1241        // obtain the final custom uefi vars by applying the delta onto the base vars
1242        match custom_uefi_json_data {
1243            Some(data) => {
1244                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1245                base_vars.apply_delta(delta)?
1246            }
1247            None => base_vars,
1248        }
1249    };
1250
1251    let vga_firmware = if opt.pcat {
1252        Some(openvmm_pcat_locator::find_svga_bios(
1253            opt.vga_firmware.as_deref(),
1254        )?)
1255    } else {
1256        None
1257    };
1258
1259    if opt.gfx {
1260        vmbus_devices.extend([
1261            (
1262                DeviceVtl::Vtl0,
1263                SynthVideoHandle {
1264                    framebuffer: SharedFramebufferHandle.into_resource(),
1265                }
1266                .into_resource(),
1267            ),
1268            (
1269                DeviceVtl::Vtl0,
1270                SynthKeyboardHandle {
1271                    source: MultiplexedInputHandle {
1272                        // Save 0 for PS/2
1273                        elevation: 1,
1274                    }
1275                    .into_resource(),
1276                }
1277                .into_resource(),
1278            ),
1279            (
1280                DeviceVtl::Vtl0,
1281                SynthMouseHandle {
1282                    source: MultiplexedInputHandle {
1283                        // Save 0 for PS/2
1284                        elevation: 1,
1285                    }
1286                    .into_resource(),
1287                }
1288                .into_resource(),
1289            ),
1290        ]);
1291    }
1292
1293    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1294        if let Some(path) = path {
1295            cleanup_socket(path.as_ref());
1296            let listener = unix_socket::UnixListener::bind(path)
1297                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1298            Ok(Some(listener))
1299        } else {
1300            Ok(None)
1301        }
1302    };
1303
1304    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1305    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1306
1307    if let Some(path) = &opt.openhcl_dump_path {
1308        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1309        task.detach();
1310        vmbus_devices.push((openhcl_vtl, resource));
1311    }
1312
1313    #[cfg(guest_arch = "aarch64")]
1314    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1315        openvmm_defs::config::Aarch64TopologyConfig {
1316            // TODO: allow this to be configured from the command line
1317            gic_config: None,
1318            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1319            gic_msi: match opt.gic_msi {
1320                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1321                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1322                cli_args::GicMsiCli::V2m => openvmm_defs::config::GicMsiConfig::V2m,
1323            },
1324        },
1325    );
1326    #[cfg(guest_arch = "x86_64")]
1327    let topology_arch =
1328        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1329            apic_id_offset: opt.apic_id_offset,
1330            x2apic: opt.x2apic,
1331        });
1332
1333    let with_isolation = if let Some(isolation) = &opt.isolation {
1334        // TODO: For now, isolation is only supported with VTL2.
1335        if !opt.vtl2 {
1336            anyhow::bail!("isolation is only currently supported with vtl2");
1337        }
1338
1339        // TODO: Alias map support is not yet implement with isolation.
1340        if !opt.no_alias_map {
1341            anyhow::bail!("alias map not supported with isolation");
1342        }
1343
1344        match isolation {
1345            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1346        }
1347    } else {
1348        None
1349    };
1350
1351    if with_hv {
1352        let (shutdown_send, shutdown_recv) = mesh::channel();
1353        resources.shutdown_ic = Some(shutdown_send);
1354        let (kvp_send, kvp_recv) = mesh::channel();
1355        resources.kvp_ic = Some(kvp_send);
1356        vmbus_devices.extend(
1357            [
1358                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1359                    recv: shutdown_recv,
1360                }
1361                .into_resource(),
1362                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1363                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1364            ]
1365            .map(|r| (DeviceVtl::Vtl0, r)),
1366        );
1367    }
1368
1369    if let Some(hive_path) = &opt.imc {
1370        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1371        vmbus_devices.push((
1372            DeviceVtl::Vtl0,
1373            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1374        ));
1375    }
1376
1377    let mut virtio_devices = Vec::new();
1378    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1379        let bus = match bus {
1380            VirtioBusCli::Auto => {
1381                // Use VPCI when possible (currently only on Windows and macOS due
1382                // to KVM backend limitations).
1383                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1384                    None
1385                } else {
1386                    Some(VirtioBus::Pci)
1387                }
1388            }
1389            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1390            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1391            VirtioBusCli::Vpci => None,
1392        };
1393        if let Some(bus) = bus {
1394            virtio_devices.push((bus, resource));
1395        } else {
1396            vpci_devices.push(VpciDeviceConfig {
1397                vtl: DeviceVtl::Vtl0,
1398                instance_id: Guid::new_random(),
1399                resource: VirtioPciDeviceHandle(resource).into_resource(),
1400            });
1401        }
1402    };
1403
1404    for cli_cfg in &opt.virtio_net {
1405        if cli_cfg.underhill {
1406            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1407        }
1408        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1409        let resource = virtio_resources::net::VirtioNetHandle {
1410            max_queues: vport.max_queues,
1411            mac_address: vport.mac_address,
1412            endpoint: vport.endpoint,
1413        }
1414        .into_resource();
1415        if let Some(pcie_port) = &cli_cfg.pcie_port {
1416            pcie_devices.push(PcieDeviceConfig {
1417                port_name: pcie_port.clone(),
1418                resource: VirtioPciDeviceHandle(resource).into_resource(),
1419            });
1420        } else {
1421            add_virtio_device(VirtioBusCli::Auto, resource);
1422        }
1423    }
1424
1425    for args in &opt.virtio_fs {
1426        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1427            tag: args.tag.clone(),
1428            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1429                root_path: args.path.clone(),
1430                mount_options: args.options.clone(),
1431            },
1432        }
1433        .into_resource();
1434        if let Some(pcie_port) = &args.pcie_port {
1435            pcie_devices.push(PcieDeviceConfig {
1436                port_name: pcie_port.clone(),
1437                resource: VirtioPciDeviceHandle(resource).into_resource(),
1438            });
1439        } else {
1440            add_virtio_device(opt.virtio_fs_bus, resource);
1441        }
1442    }
1443
1444    for args in &opt.virtio_fs_shmem {
1445        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1446            tag: args.tag.clone(),
1447            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1448                root_path: args.path.clone(),
1449            },
1450        }
1451        .into_resource();
1452        if let Some(pcie_port) = &args.pcie_port {
1453            pcie_devices.push(PcieDeviceConfig {
1454                port_name: pcie_port.clone(),
1455                resource: VirtioPciDeviceHandle(resource).into_resource(),
1456            });
1457        } else {
1458            add_virtio_device(opt.virtio_fs_bus, resource);
1459        }
1460    }
1461
1462    for args in &opt.virtio_9p {
1463        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1464            tag: args.tag.clone(),
1465            root_path: args.path.clone(),
1466            debug: opt.virtio_9p_debug,
1467        }
1468        .into_resource();
1469        if let Some(pcie_port) = &args.pcie_port {
1470            pcie_devices.push(PcieDeviceConfig {
1471                port_name: pcie_port.clone(),
1472                resource: VirtioPciDeviceHandle(resource).into_resource(),
1473            });
1474        } else {
1475            add_virtio_device(VirtioBusCli::Auto, resource);
1476        }
1477    }
1478
1479    if let Some(pmem_args) = &opt.virtio_pmem {
1480        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1481            path: pmem_args.path.clone(),
1482        }
1483        .into_resource();
1484        if let Some(pcie_port) = &pmem_args.pcie_port {
1485            pcie_devices.push(PcieDeviceConfig {
1486                port_name: pcie_port.clone(),
1487                resource: VirtioPciDeviceHandle(resource).into_resource(),
1488            });
1489        } else {
1490            add_virtio_device(VirtioBusCli::Auto, resource);
1491        }
1492    }
1493
1494    if opt.virtio_rng {
1495        let resource: Resource<VirtioDeviceHandle> =
1496            virtio_resources::rng::VirtioRngHandle.into_resource();
1497        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1498            pcie_devices.push(PcieDeviceConfig {
1499                port_name: pcie_port.clone(),
1500                resource: VirtioPciDeviceHandle(resource).into_resource(),
1501            });
1502        } else {
1503            add_virtio_device(opt.virtio_rng_bus, resource);
1504        }
1505    }
1506
1507    if let Some(backend) = virtio_console_backend {
1508        let resource: Resource<VirtioDeviceHandle> =
1509            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1510        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1511            pcie_devices.push(PcieDeviceConfig {
1512                port_name: pcie_port.clone(),
1513                resource: VirtioPciDeviceHandle(resource).into_resource(),
1514            });
1515        } else {
1516            add_virtio_device(VirtioBusCli::Auto, resource);
1517        }
1518    }
1519
1520    // Handle --vhost-user arguments.
1521    #[cfg(target_os = "linux")]
1522    for vhost_cli in &opt.vhost_user {
1523        let stream =
1524            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1525                format!(
1526                    "failed to connect to vhost-user socket: {}",
1527                    vhost_cli.socket_path
1528                )
1529            })?;
1530
1531        use crate::cli_args::VhostUserDeviceTypeCli;
1532        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1533            VhostUserDeviceTypeCli::Fs {
1534                ref tag,
1535                num_queues,
1536                queue_size,
1537            } => virtio_resources::vhost_user::VhostUserFsHandle {
1538                socket: stream.into(),
1539                tag: tag.clone(),
1540                num_queues,
1541                queue_size,
1542            }
1543            .into_resource(),
1544            VhostUserDeviceTypeCli::Blk {
1545                num_queues,
1546                queue_size,
1547            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1548                socket: stream.into(),
1549                num_queues,
1550                queue_size,
1551            }
1552            .into_resource(),
1553            VhostUserDeviceTypeCli::Other {
1554                device_id,
1555                ref queue_sizes,
1556            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1557                socket: stream.into(),
1558                device_id,
1559                queue_sizes: queue_sizes.clone(),
1560            }
1561            .into_resource(),
1562        };
1563        if let Some(pcie_port) = &vhost_cli.pcie_port {
1564            pcie_devices.push(PcieDeviceConfig {
1565                port_name: pcie_port.clone(),
1566                resource: VirtioPciDeviceHandle(resource).into_resource(),
1567            });
1568        } else {
1569            add_virtio_device(VirtioBusCli::Auto, resource);
1570        }
1571    }
1572
1573    if let Some(vsock_path) = &opt.virtio_vsock_path {
1574        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1575        add_virtio_device(
1576            VirtioBusCli::Auto,
1577            virtio_resources::vsock::VirtioVsockHandle {
1578                // The guest CID does not matter since the UDS relay does not use it. It just needs
1579                // to be some non-reserved value for the guest to use.
1580                guest_cid: 0x3,
1581                base_path: vsock_path.clone(),
1582                listener,
1583            }
1584            .into_resource(),
1585        );
1586    }
1587
1588    let mut cfg = Config {
1589        chipset,
1590        load_mode,
1591        floppy_disks,
1592        pcie_root_complexes,
1593        #[cfg(target_os = "linux")]
1594        pcie_devices: {
1595            let mut devs = pcie_devices;
1596            devs.extend(vfio_pcie_devices);
1597            devs
1598        },
1599        #[cfg(not(target_os = "linux"))]
1600        pcie_devices,
1601        pcie_switches,
1602        vpci_devices,
1603        ide_disks: Vec::new(),
1604        memory: MemoryConfig {
1605            mem_size: if let Some(ref sizes) = opt.numa_memory {
1606                sizes
1607                    .iter()
1608                    .try_fold(0u64, |acc, &s| acc.checked_add(s))
1609                    .context("numa memory sizes overflow")?
1610            } else {
1611                opt.memory_size()
1612            },
1613            mmio_gaps,
1614            prefetch_memory: opt.prefetch_memory(),
1615            private_memory: opt.private_memory(),
1616            transparent_hugepages: opt.transparent_hugepages(),
1617            hugepages: opt.memory.hugepages,
1618            hugepage_size: opt.memory.hugepage_size,
1619            pci_ecam_gaps,
1620            pci_mmio_gaps,
1621            numa_mem_sizes: opt.numa_memory.clone(),
1622        },
1623        processor_topology: ProcessorTopologyConfig {
1624            proc_count: opt.processors,
1625            vps_per_socket: opt.vps_per_socket,
1626            enable_smt: match opt.smt {
1627                cli_args::SmtConfigCli::Auto => None,
1628                cli_args::SmtConfigCli::Force => Some(true),
1629                cli_args::SmtConfigCli::Off => Some(false),
1630            },
1631            arch: Some(topology_arch),
1632        },
1633        hypervisor: HypervisorConfig {
1634            with_hv,
1635            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1636                vtl0_alias_map: !opt.no_alias_map,
1637                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1638                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1639                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1640                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1641                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1642                        Some(LateMapVtl0MemoryPolicy::InjectException)
1643                    }
1644                },
1645            }),
1646            with_isolation,
1647        },
1648        #[cfg(windows)]
1649        kernel_vmnics,
1650        input: mesh::Receiver::new(),
1651        framebuffer,
1652        vga_firmware,
1653        vtl2_gfx: opt.vtl2_gfx,
1654        virtio_devices,
1655        vmbus: with_hv.then_some(VmbusConfig {
1656            vsock_listener: vtl0_vsock_listener,
1657            vsock_path: opt.vmbus_vsock_path.clone(),
1658            vtl2_redirect: opt.vmbus_redirect,
1659            vmbus_max_version: opt.vmbus_max_version,
1660            #[cfg(windows)]
1661            vmbusproxy_handle,
1662        }),
1663        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1664            vsock_listener: vtl2_vsock_listener,
1665            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1666            ..Default::default()
1667        }),
1668        vmbus_devices,
1669        chipset_devices,
1670        pci_chipset_devices,
1671        chipset_capabilities: capabilities,
1672        #[cfg(windows)]
1673        vpci_resources,
1674        vmgs,
1675        secure_boot_enabled: opt.secure_boot,
1676        custom_uefi_vars,
1677        firmware_event_send: None,
1678        debugger_rpc: None,
1679        generation_id_recv: None,
1680        rtc_delta_milliseconds: 0,
1681        automatic_guest_reset: !opt.halt_on_reset,
1682        efi_diagnostics_log_level: {
1683            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1684                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1685                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1686                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1687            }
1688        },
1689    };
1690
1691    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1692    Ok((cfg, resources))
1693}
1694
1695/// Gets the terminal to use for externally launched console windows.
1696pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1697    std::env::var_os("OPENVMM_TERM")
1698        .or_else(|| std::env::var_os("HVLITE_TERM"))
1699        .map(Into::into)
1700}
1701
1702// Tries to remove `path` if it is confirmed to be a Unix socket.
1703fn cleanup_socket(path: &Path) {
1704    #[cfg(windows)]
1705    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1706    #[cfg(not(windows))]
1707    let is_socket = path
1708        .metadata()
1709        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1710
1711    if is_socket {
1712        let _ = std::fs::remove_file(path);
1713    }
1714}
1715
1716#[cfg(windows)]
1717const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1718
1719#[cfg(windows)]
1720fn new_switch_port(
1721    switch_id: &str,
1722) -> anyhow::Result<(
1723    openvmm_defs::config::SwitchPortId,
1724    vmswitch::kernel::SwitchPort,
1725)> {
1726    let id = vmswitch::kernel::SwitchPortId {
1727        switch: switch_id.parse().context("invalid switch id")?,
1728        port: Guid::new_random(),
1729    };
1730    let _ = vmswitch::hcn::Network::open(&id.switch)
1731        .with_context(|| format!("could not find switch {}", id.switch))?;
1732
1733    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1734
1735    let id = openvmm_defs::config::SwitchPortId {
1736        switch: id.switch,
1737        port: id.port,
1738    };
1739    Ok((id, port))
1740}
1741
1742fn parse_endpoint(
1743    cli_cfg: &NicConfigCli,
1744    index: &mut usize,
1745    resources: &mut VmResources,
1746) -> anyhow::Result<NicConfig> {
1747    let _ = resources;
1748    let endpoint = match &cli_cfg.endpoint {
1749        EndpointConfigCli::Consomme { cidr, host_fwd } => {
1750            let ports = host_fwd
1751                .iter()
1752                .map(|fwd| {
1753                    use net_backend_resources::consomme::HostPortProtocol;
1754                    net_backend_resources::consomme::HostPortConfig {
1755                        protocol: match fwd.protocol {
1756                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
1757                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
1758                        },
1759                        host_address: fwd
1760                            .host_address
1761                            .map(net_backend_resources::consomme::HostIpAddress::from),
1762                        host_port: fwd.host_port,
1763                        guest_port: fwd.guest_port,
1764                    }
1765                })
1766                .collect();
1767            net_backend_resources::consomme::ConsommeHandle {
1768                cidr: cidr.clone(),
1769                ports,
1770            }
1771            .into_resource()
1772        }
1773        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1774        EndpointConfigCli::Dio { id } => {
1775            #[cfg(windows)]
1776            {
1777                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1778                resources.switch_ports.push(port);
1779                net_backend_resources::dio::WindowsDirectIoHandle {
1780                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1781                        switch: port_id.switch,
1782                        port: port_id.port,
1783                    },
1784                }
1785                .into_resource()
1786            }
1787
1788            #[cfg(not(windows))]
1789            {
1790                let _ = id;
1791                bail!("cannot use dio on non-windows platforms")
1792            }
1793        }
1794        EndpointConfigCli::Tap { name } => {
1795            #[cfg(target_os = "linux")]
1796            {
1797                let fd = net_tap::tap::open_tap(name)
1798                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1799                net_backend_resources::tap::TapHandle { fd }.into_resource()
1800            }
1801
1802            #[cfg(not(target_os = "linux"))]
1803            {
1804                let _ = name;
1805                bail!("TAP backend is only supported on Linux")
1806            }
1807        }
1808    };
1809
1810    // Pick a random MAC address.
1811    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1812    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1813
1814    // Pick a fixed instance ID based on the index.
1815    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1816    let instance_id = Guid {
1817        data1: *index as u32,
1818        ..BASE_INSTANCE_ID
1819    };
1820    *index += 1;
1821
1822    Ok(NicConfig {
1823        vtl: cli_cfg.vtl,
1824        instance_id,
1825        endpoint,
1826        mac_address: mac_address.into(),
1827        max_queues: cli_cfg.max_queues,
1828        pcie_port: cli_cfg.pcie_port.clone(),
1829    })
1830}
1831
1832#[derive(Debug)]
1833struct NicConfig {
1834    vtl: DeviceVtl,
1835    instance_id: Guid,
1836    mac_address: MacAddress,
1837    endpoint: Resource<NetEndpointHandleKind>,
1838    max_queues: Option<u16>,
1839    pcie_port: Option<String>,
1840}
1841
1842impl NicConfig {
1843    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1844        (
1845            self.vtl,
1846            netvsp_resources::NetvspHandle {
1847                instance_id: self.instance_id,
1848                mac_address: self.mac_address,
1849                endpoint: self.endpoint,
1850                max_queues: self.max_queues,
1851            }
1852            .into_resource(),
1853        )
1854    }
1855}
1856
1857enum LayerOrDisk {
1858    Layer(DiskLayerDescription),
1859    Disk(Resource<DiskHandleKind>),
1860}
1861
1862async fn disk_open(
1863    disk_cli: &DiskCliKind,
1864    read_only: bool,
1865) -> anyhow::Result<Resource<DiskHandleKind>> {
1866    let mut layers = Vec::new();
1867    disk_open_inner(disk_cli, read_only, &mut layers).await?;
1868    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1869        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1870            unreachable!()
1871        };
1872        Ok(disk)
1873    } else {
1874        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1875            layers: layers
1876                .into_iter()
1877                .map(|layer| match layer {
1878                    LayerOrDisk::Layer(layer) => layer,
1879                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1880                        layer: DiskLayerHandle(disk).into_resource(),
1881                        read_cache: false,
1882                        write_through: false,
1883                    },
1884                })
1885                .collect(),
1886        }))
1887    }
1888}
1889
1890fn disk_open_inner<'a>(
1891    disk_cli: &'a DiskCliKind,
1892    read_only: bool,
1893    layers: &'a mut Vec<LayerOrDisk>,
1894) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
1895    Box::pin(async move {
1896        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1897            LayerOrDisk::Layer(layer.into_resource().into())
1898        }
1899        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1900            LayerOrDisk::Disk(disk.into_resource())
1901        }
1902        match disk_cli {
1903            &DiskCliKind::Memory(len) => {
1904                layers.push(layer(RamDiskLayerHandle {
1905                    len: Some(len),
1906                    sector_size: None,
1907                }));
1908            }
1909            DiskCliKind::File {
1910                path,
1911                create_with_len,
1912                direct,
1913            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1914                create_disk_type(
1915                    path,
1916                    *size,
1917                    OpenDiskOptions {
1918                        read_only: false,
1919                        direct: *direct,
1920                    },
1921                )
1922                .with_context(|| format!("failed to create {}", path.display()))?
1923            } else {
1924                open_disk_type(
1925                    path,
1926                    OpenDiskOptions {
1927                        read_only,
1928                        direct: *direct,
1929                    },
1930                )
1931                .await
1932                .with_context(|| format!("failed to open {}", path.display()))?
1933            })),
1934            DiskCliKind::Blob { kind, url } => {
1935                layers.push(disk(disk_backend_resources::BlobDiskHandle {
1936                    url: url.to_owned(),
1937                    format: match kind {
1938                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1939                        cli_args::BlobKind::Vhd1 => {
1940                            disk_backend_resources::BlobDiskFormat::FixedVhd1
1941                        }
1942                    },
1943                }))
1944            }
1945            DiskCliKind::MemoryDiff(inner) => {
1946                layers.push(layer(RamDiskLayerHandle {
1947                    len: None,
1948                    sector_size: None,
1949                }));
1950                disk_open_inner(inner, true, layers).await?;
1951            }
1952            DiskCliKind::PersistentReservationsWrapper(inner) => {
1953                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
1954                    disk_open(inner, read_only).await?,
1955                )))
1956            }
1957            DiskCliKind::DelayDiskWrapper {
1958                delay_ms,
1959                disk: inner,
1960            } => layers.push(disk(DelayDiskHandle {
1961                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1962                disk: disk_open(inner, read_only).await?,
1963            })),
1964            DiskCliKind::Crypt {
1965                disk: inner,
1966                cipher,
1967                key_file,
1968            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1969                disk: disk_open(inner, read_only).await?,
1970                cipher: match cipher {
1971                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1972                },
1973                key: fs_err::read(key_file).context("failed to read key file")?,
1974            })),
1975            DiskCliKind::Sqlite {
1976                path,
1977                create_with_len,
1978            } => {
1979                // FUTURE: this code should be responsible for opening
1980                // file-handle(s) itself, and passing them into sqlite via a custom
1981                // vfs. For now though - simply check if the file exists or not, and
1982                // perform early validation of filesystem-level create options.
1983                match (create_with_len.is_some(), path.exists()) {
1984                    (true, true) => anyhow::bail!(
1985                        "cannot create new sqlite disk at {} - file already exists",
1986                        path.display()
1987                    ),
1988                    (false, false) => anyhow::bail!(
1989                        "cannot open sqlite disk at {} - file not found",
1990                        path.display()
1991                    ),
1992                    _ => {}
1993                }
1994
1995                layers.push(layer(SqliteDiskLayerHandle {
1996                    dbhd_path: path.display().to_string(),
1997                    format_dbhd: create_with_len.map(|len| {
1998                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1999                            logically_read_only: false,
2000                            len: Some(len),
2001                        }
2002                    }),
2003                }));
2004            }
2005            DiskCliKind::SqliteDiff { path, create, disk } => {
2006                // FUTURE: this code should be responsible for opening
2007                // file-handle(s) itself, and passing them into sqlite via a custom
2008                // vfs. For now though - simply check if the file exists or not, and
2009                // perform early validation of filesystem-level create options.
2010                match (create, path.exists()) {
2011                    (true, true) => anyhow::bail!(
2012                        "cannot create new sqlite disk at {} - file already exists",
2013                        path.display()
2014                    ),
2015                    (false, false) => anyhow::bail!(
2016                        "cannot open sqlite disk at {} - file not found",
2017                        path.display()
2018                    ),
2019                    _ => {}
2020                }
2021
2022                layers.push(layer(SqliteDiskLayerHandle {
2023                    dbhd_path: path.display().to_string(),
2024                    format_dbhd: create.then_some(
2025                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2026                            logically_read_only: false,
2027                            len: None,
2028                        },
2029                    ),
2030                }));
2031                disk_open_inner(disk, true, layers).await?;
2032            }
2033            DiskCliKind::AutoCacheSqlite {
2034                cache_path,
2035                key,
2036                disk,
2037            } => {
2038                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2039                    read_cache: true,
2040                    write_through: false,
2041                    layer: SqliteAutoCacheDiskLayerHandle {
2042                        cache_path: cache_path.clone(),
2043                        cache_key: key.clone(),
2044                    }
2045                    .into_resource(),
2046                }));
2047                disk_open_inner(disk, read_only, layers).await?;
2048            }
2049        }
2050        Ok(())
2051    })
2052}
2053
2054/// Get the system page size.
2055pub(crate) fn system_page_size() -> u32 {
2056    sparse_mmap::SparseMapping::page_size() as u32
2057}
2058
2059/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2060pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2061    "x86_64"
2062} else {
2063    "aarch64"
2064};
2065
2066/// Open a snapshot directory and validate it against the current VM config.
2067/// Returns the shared memory fd (from memory.bin) and the saved device state.
2068fn prepare_snapshot_restore(
2069    snapshot_dir: &Path,
2070    opt: &Options,
2071) -> anyhow::Result<(
2072    openvmm_defs::worker::SharedMemoryFd,
2073    mesh::payload::message::ProtobufMessage,
2074)> {
2075    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2076
2077    // Validate manifest against current VM config.
2078    openvmm_helpers::snapshot::validate_manifest(
2079        &manifest,
2080        GUEST_ARCH,
2081        opt.memory_size(),
2082        opt.processors,
2083        system_page_size(),
2084    )?;
2085
2086    // Open memory.bin (existing file, no create, no resize).
2087    let memory_file = fs_err::OpenOptions::new()
2088        .read(true)
2089        .write(true)
2090        .open(snapshot_dir.join("memory.bin"))?;
2091
2092    // Validate file size matches expected memory size.
2093    let file_size = memory_file.metadata()?.len();
2094    if file_size != manifest.memory_size_bytes {
2095        anyhow::bail!(
2096            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2097            manifest.memory_size_bytes,
2098        );
2099    }
2100
2101    let shared_memory_fd =
2102        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2103
2104    // Reconstruct ProtobufMessage from the saved state bytes.
2105    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2106    // back to ProtobufMessage.
2107    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2108        .context("failed to decode saved state from snapshot")?;
2109
2110    Ok((shared_memory_fd, state_msg))
2111}
2112
2113fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2114    #[cfg(windows)]
2115    pal::windows::disable_hard_error_dialog();
2116
2117    tracing_init::enable_tracing()?;
2118
2119    // Try to run as a worker host.
2120    // On success the worker runs to completion and then exits the process (does
2121    // not return). Any worker host setup errors are return and bubbled up.
2122    meshworker::run_vmm_mesh_host()?;
2123
2124    let opt = Options::parse();
2125    if let Some(path) = &opt.write_saved_state_proto {
2126        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2127            .write_to_path(path)
2128            .context("failed to write protobuf descriptors")?;
2129        return Ok(());
2130    }
2131
2132    if let Some(ref path) = opt.pidfile {
2133        std::fs::write(path, format!("{}\n", std::process::id()))
2134            .context("failed to write pidfile")?;
2135        *pidfile_path = Some(path.clone());
2136    }
2137
2138    if let Some(path) = opt.relay_console_path {
2139        let console_title = opt.relay_console_title.unwrap_or_default();
2140        return console_relay::relay_console(&path, console_title.as_str());
2141    }
2142
2143    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2144    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2145        return block_on(async {
2146            let _ = std::fs::remove_file(path);
2147            let listener =
2148                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2149
2150            let transport = if opt.ttrpc.is_some() {
2151                ttrpc::RpcTransport::Ttrpc
2152            } else {
2153                ttrpc::RpcTransport::Grpc
2154            };
2155
2156            // This is a local launch
2157            let mut handle =
2158                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2159                    listener,
2160                    transport,
2161                })
2162                .await?;
2163
2164            tracing::info!(%transport, path = %path.display(), "listening");
2165
2166            // Signal the the parent process that the server is ready.
2167            pal::close_stdout().context("failed to close stdout")?;
2168
2169            handle.join().await?;
2170
2171            Ok(())
2172        });
2173    }
2174
2175    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2176}
2177
2178fn new_hvsock_service_id(port: u32) -> Guid {
2179    // This GUID is an embedding of the AF_VSOCK port into an
2180    // AF_HYPERV service ID.
2181    Guid {
2182        data1: port,
2183        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2184    }
2185}
2186
2187async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2188    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2189    let result = run_control_inner(driver, &mut mesh, opt).await;
2190    // If setup failed before the mesh was handed to the controller, shut it
2191    // down so the child host process exits cleanly without noisy logs.
2192    if let Some(mesh) = mesh {
2193        mesh.shutdown().await;
2194    }
2195    result
2196}
2197
2198async fn run_control_inner(
2199    driver: &DefaultDriver,
2200    mesh_slot: &mut Option<VmmMesh>,
2201    opt: Options,
2202) -> anyhow::Result<()> {
2203    let mesh = mesh_slot.as_ref().unwrap();
2204    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2205
2206    let mut vnc_worker = None;
2207    if opt.gfx || opt.vnc {
2208        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2209            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2210
2211        let input_send = vm_config.input.sender();
2212        let framebuffer = resources
2213            .framebuffer_access
2214            .take()
2215            .expect("synth video enabled");
2216
2217        let vnc_host = mesh
2218            .make_host("vnc", None)
2219            .await
2220            .context("spawning vnc process failed")?;
2221
2222        vnc_worker = Some(
2223            vnc_host
2224                .launch_worker(
2225                    vnc_worker_defs::VNC_WORKER_TCP,
2226                    VncParameters {
2227                        listener,
2228                        framebuffer,
2229                        input_send,
2230                    },
2231                )
2232                .await?,
2233        )
2234    }
2235
2236    // spin up the debug worker
2237    let gdb_worker = if let Some(port) = opt.gdb {
2238        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2239            .with_context(|| format!("binding to gdb port {}", port))?;
2240
2241        let (req_tx, req_rx) = mesh::channel();
2242        vm_config.debugger_rpc = Some(req_rx);
2243
2244        let gdb_host = mesh
2245            .make_host("gdb", None)
2246            .await
2247            .context("spawning gdbstub process failed")?;
2248
2249        Some(
2250            gdb_host
2251                .launch_worker(
2252                    debug_worker_defs::DEBUGGER_WORKER,
2253                    debug_worker_defs::DebuggerParameters {
2254                        listener,
2255                        req_chan: req_tx,
2256                        vp_count: vm_config.processor_topology.proc_count,
2257                        target_arch: if cfg!(guest_arch = "x86_64") {
2258                            debug_worker_defs::TargetArch::X86_64
2259                        } else {
2260                            debug_worker_defs::TargetArch::Aarch64
2261                        },
2262                    },
2263                )
2264                .await
2265                .context("failed to launch gdbstub worker")?,
2266        )
2267    } else {
2268        None
2269    };
2270
2271    // spin up the VM
2272    let (vm_rpc, rpc_recv) = mesh::channel();
2273    let (notify_send, notify_recv) = mesh::channel();
2274    let vm_worker = {
2275        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2276
2277        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2278            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2279            (Some(fd), Some(state_msg))
2280        } else {
2281            let shared_memory = opt
2282                .memory_backing_file()
2283                .map(|path| {
2284                    openvmm_helpers::shared_memory::open_memory_backing_file(
2285                        path,
2286                        opt.memory_size(),
2287                    )
2288                })
2289                .transpose()?;
2290            (shared_memory, None)
2291        };
2292
2293        let params = VmWorkerParameters {
2294            hypervisor: match &opt.hypervisor {
2295                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2296                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2297            },
2298            cfg: vm_config,
2299            saved_state,
2300            shared_memory,
2301            rpc: rpc_recv,
2302            notify: notify_send,
2303        };
2304        vm_host
2305            .launch_worker(VM_WORKER, params)
2306            .await
2307            .context("failed to launch vm worker")?
2308    };
2309
2310    if opt.restore_snapshot.is_some() {
2311        tracing::info!("restoring VM from snapshot");
2312    }
2313
2314    if !opt.paused {
2315        vm_rpc.call(VmRpc::Resume, ()).await?;
2316    }
2317
2318    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2319        driver.clone(),
2320        DiagDialer {
2321            driver: driver.clone(),
2322            vm_rpc: vm_rpc.clone(),
2323            openhcl_vtl: if opt.vtl2 {
2324                DeviceVtl::Vtl2
2325            } else {
2326                DeviceVtl::Vtl0
2327            },
2328        },
2329    ));
2330
2331    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2332
2333    // Create channels between the REPL and VmController.
2334    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2335    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2336
2337    let has_vtl2 = resources.vtl2_settings.is_some();
2338
2339    // Build the VmController with exclusive resources.
2340    let controller = vm_controller::VmController {
2341        mesh: mesh_slot.take().unwrap(),
2342        vm_worker,
2343        vnc_worker,
2344        gdb_worker,
2345        diag_inspector: Some(diag_inspector),
2346        vtl2_settings: resources.vtl2_settings,
2347        ged_rpc: resources.ged_rpc.clone(),
2348        vm_rpc: vm_rpc.clone(),
2349        paravisor_diag: Some(paravisor_diag),
2350        igvm_path: opt.igvm.clone(),
2351        memory_backing_file: opt.memory_backing_file().cloned(),
2352        memory: opt.memory_size(),
2353        processors: opt.processors,
2354        log_file: opt.log_file.clone(),
2355    };
2356
2357    // Spawn the VmController as a task.
2358    let controller_task = driver.spawn(
2359        "vm-controller",
2360        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2361    );
2362
2363    // Run the REPL with shareable resources.
2364    let repl_result = repl::run_repl(
2365        driver,
2366        repl::ReplResources {
2367            vm_rpc,
2368            vm_controller: vm_controller_send,
2369            vm_controller_events: vm_controller_event_recv,
2370            scsi_rpc: resources.scsi_rpc,
2371            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2372            shutdown_ic: resources.shutdown_ic,
2373            kvp_ic: resources.kvp_ic,
2374            console_in: resources.console_in,
2375            has_vtl2,
2376        },
2377    )
2378    .await;
2379
2380    // Wait for the controller task to finish (it stops the VM worker and
2381    // shuts down the mesh).
2382    controller_task.await;
2383
2384    repl_result
2385}
2386
2387struct DiagDialer {
2388    driver: DefaultDriver,
2389    vm_rpc: mesh::Sender<VmRpc>,
2390    openhcl_vtl: DeviceVtl,
2391}
2392
2393impl mesh_rpc::client::Dial for DiagDialer {
2394    type Stream = PolledSocket<unix_socket::UnixStream>;
2395
2396    async fn dial(&mut self) -> io::Result<Self::Stream> {
2397        let service_id = new_hvsock_service_id(1);
2398        let socket = self
2399            .vm_rpc
2400            .call_failable(
2401                VmRpc::ConnectHvsock,
2402                (
2403                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2404                    service_id,
2405                    self.openhcl_vtl,
2406                ),
2407            )
2408            .await
2409            .map_err(io::Error::other)?;
2410
2411        PolledSocket::new(&self.driver, socket)
2412    }
2413}
2414
2415/// An object that implements [`InspectMut`] by sending an inspect request over
2416/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2417/// the response back into the inspect tree.
2418///
2419/// This also caches the TTRPC connection to the guest so that only the first
2420/// inspect request has to wait for the connection to be established.
2421pub(crate) struct DiagInspector(DiagInspectorInner);
2422
2423enum DiagInspectorInner {
2424    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2425    Started {
2426        send: mesh::Sender<inspect::Deferred>,
2427        _task: Task<()>,
2428    },
2429    Invalid,
2430}
2431
2432impl DiagInspector {
2433    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2434        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2435    }
2436
2437    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2438        loop {
2439            match self.0 {
2440                DiagInspectorInner::NotStarted { .. } => {
2441                    let DiagInspectorInner::NotStarted(driver, client) =
2442                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2443                    else {
2444                        unreachable!()
2445                    };
2446                    let (send, recv) = mesh::channel();
2447                    let task = driver.clone().spawn("diag-inspect", async move {
2448                        Self::run(&client, recv).await
2449                    });
2450
2451                    self.0 = DiagInspectorInner::Started { send, _task: task };
2452                }
2453                DiagInspectorInner::Started { ref send, .. } => break send,
2454                DiagInspectorInner::Invalid => unreachable!(),
2455            }
2456        }
2457    }
2458
2459    async fn run(
2460        diag_client: &diag_client::DiagClient,
2461        mut recv: mesh::Receiver<inspect::Deferred>,
2462    ) {
2463        while let Some(deferred) = recv.next().await {
2464            let info = deferred.external_request();
2465            let result = match info.request_type {
2466                inspect::ExternalRequestType::Inspect { depth } => {
2467                    if depth == 0 {
2468                        Ok(inspect::Node::Unevaluated)
2469                    } else {
2470                        // TODO: Support taking timeouts from the command line
2471                        diag_client
2472                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2473                            .await
2474                    }
2475                }
2476                inspect::ExternalRequestType::Update { value } => {
2477                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2478                }
2479            };
2480            deferred.complete_external(
2481                result.unwrap_or_else(|err| {
2482                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2483                }),
2484                inspect::SensitivityLevel::Unspecified,
2485            )
2486        }
2487    }
2488}
2489
2490impl InspectMut for DiagInspector {
2491    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2492        self.start().send(req.defer());
2493    }
2494}