Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::StreamExt;
53use futures::executor::block_on;
54use futures::io::AllowStdIo;
55use gdma_resources::GdmaDeviceHandle;
56use gdma_resources::VportDefinition;
57use guid::Guid;
58use input_core::MultiplexedInputHandle;
59use inspect::InspectMut;
60use io::Read;
61use mesh::CancelContext;
62use mesh::CellUpdater;
63use mesh::rpc::RpcSend;
64use meshworker::VmmMesh;
65use net_backend_resources::mac_address::MacAddress;
66use nvme_resources::NvmeControllerRequest;
67use openvmm_defs::config::Config;
68use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
69use openvmm_defs::config::DeviceVtl;
70use openvmm_defs::config::EfiDiagnosticsLogLevelType;
71use openvmm_defs::config::HypervisorConfig;
72use openvmm_defs::config::LateMapVtl0MemoryPolicy;
73use openvmm_defs::config::LoadMode;
74use openvmm_defs::config::MemoryConfig;
75use openvmm_defs::config::PcieDeviceConfig;
76use openvmm_defs::config::PcieMmioRangeConfig;
77use openvmm_defs::config::PcieRootComplexConfig;
78use openvmm_defs::config::PcieRootPortConfig;
79use openvmm_defs::config::PcieSwitchConfig;
80use openvmm_defs::config::ProcessorTopologyConfig;
81use openvmm_defs::config::SerialInformation;
82use openvmm_defs::config::VirtioBus;
83use openvmm_defs::config::VmbusConfig;
84use openvmm_defs::config::VpciDeviceConfig;
85use openvmm_defs::config::Vtl2Config;
86use openvmm_defs::rpc::VmRpc;
87use openvmm_defs::worker::VM_WORKER;
88use openvmm_defs::worker::VmWorkerParameters;
89use openvmm_helpers::disk::OpenDiskOptions;
90use openvmm_helpers::disk::create_disk_type;
91use openvmm_helpers::disk::open_disk_type;
92use pal_async::DefaultDriver;
93use pal_async::DefaultPool;
94use pal_async::socket::PolledSocket;
95use pal_async::task::Spawn;
96use pal_async::task::Task;
97use serial_16550_resources::ComPort;
98use serial_core::resources::DisconnectedSerialBackendHandle;
99use sparse_mmap::alloc_shared_memory;
100use std::cell::RefCell;
101use std::collections::BTreeMap;
102use std::fmt::Write as _;
103use std::future::pending;
104use std::io;
105#[cfg(unix)]
106use std::io::IsTerminal;
107use std::io::Write;
108use std::net::TcpListener;
109use std::path::Path;
110use std::path::PathBuf;
111use std::sync::Arc;
112use std::thread;
113use std::time::Duration;
114use storvsp_resources::ScsiControllerRequest;
115use tpm_resources::TpmDeviceHandle;
116use tpm_resources::TpmRegisterLayout;
117use uidevices_resources::SynthKeyboardHandle;
118use uidevices_resources::SynthMouseHandle;
119use uidevices_resources::SynthVideoHandle;
120use video_core::SharedFramebufferHandle;
121use virtio_resources::VirtioPciDeviceHandle;
122use vm_manifest_builder::BaseChipsetType;
123use vm_manifest_builder::MachineArch;
124use vm_manifest_builder::VmChipsetResult;
125use vm_manifest_builder::VmManifestBuilder;
126use vm_resource::IntoResource;
127use vm_resource::Resource;
128use vm_resource::kind::DiskHandleKind;
129use vm_resource::kind::DiskLayerHandleKind;
130use vm_resource::kind::NetEndpointHandleKind;
131use vm_resource::kind::VirtioDeviceHandle;
132use vm_resource::kind::VmbusDeviceHandleKind;
133use vmbus_serial_resources::VmbusSerialDeviceHandle;
134use vmbus_serial_resources::VmbusSerialPort;
135use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
136use vmgs_resources::GuestStateEncryptionPolicy;
137use vmgs_resources::VmgsDisk;
138use vmgs_resources::VmgsFileHandle;
139use vmgs_resources::VmgsResource;
140use vmotherboard::ChipsetDeviceHandle;
141use vnc_worker_defs::VncParameters;
142
143/// RAII guard that removes the pidfile when dropped. Ensures the pidfile is
144/// cleaned up even if [`do_main`] panics.
145struct PidfileGuard(Option<PathBuf>);
146
147impl Drop for PidfileGuard {
148    fn drop(&mut self) {
149        if let Some(path) = &self.0 {
150            let _ = fs_err::remove_file(path);
151        }
152    }
153}
154
155pub fn openvmm_main() {
156    // Save the current state of the terminal so we can restore it back to
157    // normal before exiting.
158    #[cfg(unix)]
159    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
160
161    let mut pidfile_guard = PidfileGuard(None);
162    let exit_code = match do_main(&mut pidfile_guard.0) {
163        Ok(_) => 0,
164        Err(err) => {
165            eprintln!("fatal error: {:?}", err);
166            1
167        }
168    };
169
170    // Restore the terminal to its initial state.
171    #[cfg(unix)]
172    if let Some(orig_termios) = orig_termios {
173        term::set_termios(orig_termios);
174    }
175
176    // Clean up the pidfile before terminating, since pal::process::terminate
177    // skips destructors.
178    drop(pidfile_guard);
179
180    // Terminate the process immediately without graceful shutdown of DLLs or
181    // C++ destructors or anything like that. This is all unnecessary and saves
182    // time on Windows.
183    //
184    // Do flush stdout, though, since there may be buffered data.
185    let _ = io::stdout().flush();
186    pal::process::terminate(exit_code);
187}
188
189#[derive(Default)]
190struct VmResources {
191    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
192    framebuffer_access: Option<FramebufferAccess>,
193    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
194    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
195    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
196    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
197    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
198    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
199    #[cfg(windows)]
200    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
201}
202
203struct ConsoleState<'a> {
204    device: &'a str,
205    input: Box<dyn AsyncWrite + Unpin + Send>,
206}
207
208/// Build a flat list of switches with their parent port assignments.
209///
210/// This function converts hierarchical CLI switch definitions into a flat list
211/// where each switch specifies its parent port directly.
212fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
213    all_switches
214        .iter()
215        .map(|switch_cli| PcieSwitchConfig {
216            name: switch_cli.name.clone(),
217            num_downstream_ports: switch_cli.num_downstream_ports,
218            parent_port: switch_cli.port_name.clone(),
219            hotplug: switch_cli.hotplug,
220            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
221        })
222        .collect()
223}
224
225async fn vm_config_from_command_line(
226    spawner: impl Spawn,
227    mesh: &VmmMesh,
228    opt: &Options,
229) -> anyhow::Result<(Config, VmResources)> {
230    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
231    // Ensure the serial driver stays alive with no tasks.
232    serial_driver.spawn("leak", pending::<()>()).detach();
233
234    let openhcl_vtl = if opt.vtl2 {
235        DeviceVtl::Vtl2
236    } else {
237        DeviceVtl::Vtl0
238    };
239
240    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
241    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
242        Ok(match cli_cfg {
243            SerialConfigCli::Console => {
244                if let Some(console_state) = console_state.borrow().as_ref() {
245                    bail!("console already set by {}", console_state.device);
246                }
247                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
248                let (serial_read, serial_write) = AsyncReadExt::split(serial);
249                *console_state.borrow_mut() = Some(ConsoleState {
250                    device,
251                    input: Box::new(serial_write),
252                });
253                thread::Builder::new()
254                    .name(name.to_owned())
255                    .spawn(move || {
256                        let _ = block_on(futures::io::copy(
257                            serial_read,
258                            &mut AllowStdIo::new(term::raw_stdout()),
259                        ));
260                    })
261                    .unwrap();
262                Some(config)
263            }
264            SerialConfigCli::Stderr => {
265                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
266                thread::Builder::new()
267                    .name(name.to_owned())
268                    .spawn(move || {
269                        let _ = block_on(futures::io::copy(
270                            serial,
271                            &mut AllowStdIo::new(term::raw_stderr()),
272                        ));
273                    })
274                    .unwrap();
275                Some(config)
276            }
277            SerialConfigCli::File(path) => {
278                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
279                let file = fs_err::File::create(path).context("failed to create file")?;
280
281                thread::Builder::new()
282                    .name(name.to_owned())
283                    .spawn(move || {
284                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
285                    })
286                    .unwrap();
287                Some(config)
288            }
289            SerialConfigCli::None => None,
290            SerialConfigCli::Pipe(path) => {
291                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
292            }
293            SerialConfigCli::Tcp(addr) => {
294                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
295            }
296            SerialConfigCli::NewConsole(app, window_title) => {
297                let path = console_relay::random_console_path();
298                let config =
299                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
300                let window_title =
301                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
302
303                console_relay::launch_console(
304                    app.or_else(openvmm_terminal_app).as_deref(),
305                    &path,
306                    ConsoleLaunchOptions {
307                        window_title: Some(window_title),
308                    },
309                )
310                .context("failed to launch console")?;
311
312                Some(config)
313            }
314        })
315    };
316
317    let mut vmbus_devices = Vec::new();
318
319    let serial0_cfg = setup_serial(
320        "com1",
321        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
322        if cfg!(guest_arch = "x86_64") {
323            "ttyS0"
324        } else {
325            "ttyAMA0"
326        },
327    )?;
328    let serial1_cfg = setup_serial(
329        "com2",
330        opt.com2.clone().unwrap_or(SerialConfigCli::None),
331        if cfg!(guest_arch = "x86_64") {
332            "ttyS1"
333        } else {
334            "ttyAMA1"
335        },
336    )?;
337    let serial2_cfg = setup_serial(
338        "com3",
339        opt.com3.clone().unwrap_or(SerialConfigCli::None),
340        if cfg!(guest_arch = "x86_64") {
341            "ttyS2"
342        } else {
343            "ttyAMA2"
344        },
345    )?;
346    let serial3_cfg = setup_serial(
347        "com4",
348        opt.com4.clone().unwrap_or(SerialConfigCli::None),
349        if cfg!(guest_arch = "x86_64") {
350            "ttyS3"
351        } else {
352            "ttyAMA3"
353        },
354    )?;
355    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
356        "vmbus_com1",
357        opt.vmbus_com1_serial
358            .clone()
359            .unwrap_or(SerialConfigCli::None),
360        "vmbus_com1",
361    )? {
362        vmbus_devices.push((
363            openhcl_vtl,
364            VmbusSerialDeviceHandle {
365                port: VmbusSerialPort::Com1,
366                backend: vmbus_com1_cfg,
367            }
368            .into_resource(),
369        ));
370        true
371    } else {
372        false
373    };
374    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
375        "vmbus_com2",
376        opt.vmbus_com2_serial
377            .clone()
378            .unwrap_or(SerialConfigCli::None),
379        "vmbus_com2",
380    )? {
381        vmbus_devices.push((
382            openhcl_vtl,
383            VmbusSerialDeviceHandle {
384                port: VmbusSerialPort::Com2,
385                backend: vmbus_com2_cfg,
386            }
387            .into_resource(),
388        ));
389        true
390    } else {
391        false
392    };
393    let debugcon_cfg = setup_serial(
394        "debugcon",
395        opt.debugcon
396            .clone()
397            .map(|cfg| cfg.serial)
398            .unwrap_or(SerialConfigCli::None),
399        "debugcon",
400    )?;
401
402    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
403        setup_serial("virtio-console", serial_cfg, "hvc0")?
404    } else {
405        None
406    };
407
408    let mut resources = VmResources::default();
409    let mut console_str = "";
410    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
411        resources.console_in = Some(input);
412        console_str = device;
413    }
414
415    if opt.shared_memory {
416        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
417    }
418    if opt.deprecated_prefetch {
419        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
420    }
421    if opt.deprecated_private_memory {
422        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
423    }
424    if opt.deprecated_thp {
425        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
426    }
427    if opt.deprecated_memory_backing_file.is_some() {
428        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
429    }
430
431    opt.validate_memory_options()?;
432
433    const MAX_PROCESSOR_COUNT: u32 = 1024;
434
435    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
436        bail!("invalid proc count: {}", opt.processors);
437    }
438
439    // Total SCSI channel count should not exceed the processor count
440    // (at most, one channel per VP).
441    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
442        bail!(
443            "invalid SCSI sub-channel count: requested {}, max {}",
444            opt.scsi_sub_channels,
445            MAX_PROCESSOR_COUNT - 1
446        );
447    }
448
449    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
450
451    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
452    for &cli_args::DiskCli {
453        vtl,
454        ref kind,
455        read_only,
456        is_dvd,
457        underhill,
458        ref pcie_port,
459    } in &opt.disk
460    {
461        if pcie_port.is_some() {
462            anyhow::bail!("`--disk` is incompatible with PCIe");
463        }
464
465        storage
466            .add(
467                vtl,
468                underhill,
469                storage_builder::DiskLocation::Scsi(None),
470                kind,
471                is_dvd,
472                read_only,
473            )
474            .await?;
475    }
476
477    for &cli_args::IdeDiskCli {
478        ref kind,
479        read_only,
480        channel,
481        device,
482        is_dvd,
483    } in &opt.ide
484    {
485        storage
486            .add(
487                DeviceVtl::Vtl0,
488                None,
489                storage_builder::DiskLocation::Ide(channel, device),
490                kind,
491                is_dvd,
492                read_only,
493            )
494            .await?;
495    }
496
497    for &cli_args::DiskCli {
498        vtl,
499        ref kind,
500        read_only,
501        is_dvd,
502        underhill,
503        ref pcie_port,
504    } in &opt.nvme
505    {
506        storage
507            .add(
508                vtl,
509                underhill,
510                storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
511                kind,
512                is_dvd,
513                read_only,
514            )
515            .await?;
516    }
517
518    for &cli_args::DiskCli {
519        vtl,
520        ref kind,
521        read_only,
522        is_dvd,
523        ref underhill,
524        ref pcie_port,
525    } in &opt.virtio_blk
526    {
527        if underhill.is_some() {
528            anyhow::bail!("underhill not supported with virtio-blk");
529        }
530        storage
531            .add(
532                vtl,
533                None,
534                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
535                kind,
536                is_dvd,
537                read_only,
538            )
539            .await?;
540    }
541
542    let mut floppy_disks = Vec::new();
543    for disk in &opt.floppy {
544        let &cli_args::FloppyDiskCli {
545            ref kind,
546            read_only,
547        } = disk;
548        floppy_disks.push(FloppyDiskConfig {
549            disk_type: disk_open(kind, read_only).await?,
550            read_only,
551        });
552    }
553
554    let mut vpci_mana_nics = [(); 3].map(|()| None);
555    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
556    let mut underhill_nics = Vec::new();
557    let mut vpci_devices = Vec::new();
558
559    let mut nic_index = 0;
560    for cli_cfg in &opt.net {
561        if cli_cfg.pcie_port.is_some() {
562            anyhow::bail!("`--net` does not support PCIe");
563        }
564        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
565        if cli_cfg.underhill {
566            if !opt.no_alias_map {
567                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
568            }
569            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
570                let vpci_instance_id = Guid::new_random();
571                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
572                    instance_id: vpci_instance_id.to_string(),
573                    subordinate_instance_id: None,
574                    max_sub_channels: None,
575                });
576                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
577            });
578            mana.1.vports.push(VportDefinition {
579                mac_address: vport.mac_address,
580                endpoint: vport.endpoint,
581            });
582        } else {
583            vmbus_devices.push(vport.into_netvsp_handle());
584        }
585    }
586
587    if opt.nic {
588        let nic_config = parse_endpoint(
589            &NicConfigCli {
590                vtl: DeviceVtl::Vtl0,
591                endpoint: EndpointConfigCli::Consomme {
592                    cidr: None,
593                    host_fwd: Vec::new(),
594                },
595                max_queues: None,
596                underhill: false,
597                pcie_port: None,
598            },
599            &mut nic_index,
600            &mut resources,
601        )?;
602        vmbus_devices.push(nic_config.into_netvsp_handle());
603    }
604
605    // Build initial PCIe devices list from CLI options. Storage devices
606    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
607    let mut pcie_devices = Vec::new();
608    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
609        tracing::info!(
610            port_name = %cli_cfg.port_name,
611            socket_addr = ?cli_cfg.socket_addr,
612            "instantiating PCIe remote device"
613        );
614
615        // Generate a deterministic instance ID based on index
616        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
617            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
618        let instance_id = Guid {
619            data1: index as u32,
620            ..PCIE_REMOTE_BASE_INSTANCE_ID
621        };
622
623        pcie_devices.push(PcieDeviceConfig {
624            port_name: cli_cfg.port_name.clone(),
625            resource: pcie_remote_resources::PcieRemoteHandle {
626                instance_id,
627                socket_addr: cli_cfg.socket_addr.clone(),
628                hu: cli_cfg.hu,
629                controller: cli_cfg.controller,
630            }
631            .into_resource(),
632        });
633    }
634
635    #[cfg(windows)]
636    let mut kernel_vmnics = Vec::new();
637    #[cfg(windows)]
638    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
639        // Pick a random MAC address.
640        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
641        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
642
643        // Pick a fixed instance ID based on the index.
644        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
645        let instance_id = Guid {
646            data1: index as u32,
647            ..BASE_INSTANCE_ID
648        };
649
650        let switch_id = if switch_id == "default" {
651            DEFAULT_SWITCH
652        } else {
653            switch_id
654        };
655        let (port_id, port) = new_switch_port(switch_id)?;
656        resources.switch_ports.push(port);
657
658        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
659            instance_id,
660            mac_address: mac_address.into(),
661            switch_port_id: port_id,
662        });
663    }
664
665    for vport in &opt.mana {
666        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
667        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
668            (vtl, None) => {
669                &mut vpci_mana_nics[vtl]
670                    .get_or_insert_with(|| {
671                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
672                    })
673                    .1
674                    .vports
675            }
676            (0, Some(pcie_port)) => {
677                &mut pcie_mana_nics
678                    .entry(pcie_port)
679                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
680                    .vports
681            }
682            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
683        };
684        vport_array.push(VportDefinition {
685            mac_address: vport.mac_address,
686            endpoint: vport.endpoint,
687        });
688    }
689
690    vpci_devices.extend(
691        vpci_mana_nics
692            .into_iter()
693            .enumerate()
694            .filter_map(|(vtl, nic)| {
695                nic.map(|(instance_id, handle)| VpciDeviceConfig {
696                    vtl: match vtl {
697                        0 => DeviceVtl::Vtl0,
698                        1 => DeviceVtl::Vtl1,
699                        2 => DeviceVtl::Vtl2,
700                        _ => unreachable!(),
701                    },
702                    instance_id,
703                    resource: handle.into_resource(),
704                })
705            }),
706    );
707
708    pcie_devices.extend(
709        pcie_mana_nics
710            .into_iter()
711            .map(|(pcie_port, handle)| PcieDeviceConfig {
712                port_name: pcie_port,
713                resource: handle.into_resource(),
714            }),
715    );
716
717    let mut pcie_root_complexes = Vec::new();
718
719    #[cfg(guest_arch = "aarch64")]
720    let arch = MachineArch::Aarch64;
721    #[cfg(guest_arch = "x86_64")]
722    let arch = MachineArch::X86_64;
723    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
724        let ports = opt
725            .pcie_root_port
726            .iter()
727            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
728            .map(|port_cli| PcieRootPortConfig {
729                name: port_cli.name.clone(),
730                hotplug: port_cli.hotplug,
731                acs_capabilities_supported: port_cli.acs_capabilities_supported,
732            })
733            .collect();
734
735        const ONE_MB: u64 = 1024 * 1024;
736        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
737        let high_mmio_size = rc_cli
738            .high_mmio
739            .checked_next_multiple_of(ONE_MB)
740            .context("high mmio rounding error")?;
741        pcie_root_complexes.push(PcieRootComplexConfig {
742            index: i as u32,
743            name: rc_cli.name.clone(),
744            segment: rc_cli.segment,
745            start_bus: rc_cli.start_bus,
746            end_bus: rc_cli.end_bus,
747            low_mmio: PcieMmioRangeConfig::Dynamic {
748                size: low_mmio_size,
749            },
750            high_mmio: PcieMmioRangeConfig::Dynamic {
751                size: high_mmio_size,
752            },
753            ports,
754        });
755    }
756
757    let pcie_switches = build_switch_list(&opt.pcie_switch);
758
759    #[cfg(target_os = "linux")]
760    let vfio_pcie_devices: Vec<PcieDeviceConfig> = opt
761        .vfio
762        .iter()
763        .map(|cli_cfg| {
764            use vm_resource::IntoResource;
765
766            let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
767            let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
768                .with_context(|| format!("failed to read IOMMU group for {}", cli_cfg.pci_id))?;
769            let group_id: u64 = iommu_group_link
770                .file_name()
771                .and_then(|s| s.to_str())
772                .context("invalid iommu_group symlink")?
773                .parse()
774                .context("failed to parse IOMMU group ID")?;
775            let group = std::fs::OpenOptions::new()
776                .read(true)
777                .write(true)
778                .open(format!("/dev/vfio/{group_id}"))
779                .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
780
781            Ok(PcieDeviceConfig {
782                port_name: cli_cfg.port_name.clone(),
783                resource: vfio_assigned_device_resources::VfioDeviceHandle {
784                    pci_id: cli_cfg.pci_id.clone(),
785                    group,
786                }
787                .into_resource(),
788            })
789        })
790        .collect::<anyhow::Result<Vec<_>>>()?;
791
792    #[cfg(windows)]
793    let vpci_resources: Vec<_> = opt
794        .device
795        .iter()
796        .map(|path| -> anyhow::Result<_> {
797            Ok(virt_whp::device::DeviceHandle(
798                whp::VpciResource::new(
799                    None,
800                    Default::default(),
801                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
802                )
803                .with_context(|| format!("opening PCI device {}", path))?,
804            ))
805        })
806        .collect::<Result<_, _>>()?;
807
808    // Create a vmbusproxy handle if needed by any devices.
809    #[cfg(windows)]
810    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
811        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
812    } else {
813        None
814    };
815
816    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
817        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
818        let (fb, fba) =
819            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
820        resources.framebuffer_access = Some(fba);
821        Some(fb)
822    } else {
823        None
824    };
825
826    let load_mode;
827    let with_hv;
828
829    let any_serial_configured = serial0_cfg.is_some()
830        || serial1_cfg.is_some()
831        || serial2_cfg.is_some()
832        || serial3_cfg.is_some();
833
834    let has_com3 = serial2_cfg.is_some();
835
836    let mut chipset = VmManifestBuilder::new(
837        if opt.igvm.is_some() {
838            BaseChipsetType::HclHost
839        } else if opt.pcat {
840            BaseChipsetType::HypervGen1
841        } else if opt.uefi {
842            BaseChipsetType::HypervGen2Uefi
843        } else if opt.hv {
844            BaseChipsetType::HyperVGen2LinuxDirect
845        } else {
846            BaseChipsetType::UnenlightenedLinuxDirect
847        },
848        arch,
849    );
850
851    if framebuffer.is_some() {
852        chipset = chipset.with_framebuffer();
853    }
854    if opt.guest_watchdog {
855        chipset = chipset.with_guest_watchdog();
856    }
857    if any_serial_configured {
858        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
859    }
860    if opt.battery {
861        let (tx, rx) = mesh::channel();
862        tx.send(HostBatteryUpdate::default_present());
863        chipset = chipset.with_battery(rx);
864    }
865    if let Some(cfg) = &opt.debugcon {
866        chipset = chipset.with_debugcon(
867            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
868            cfg.port,
869        );
870    }
871
872    // TODO: load from VMGS file if it exists
873    let bios_guid = Guid::new_random();
874
875    let layout_config = chipset.layout_config();
876    let VmChipsetResult {
877        chipset,
878        mut chipset_devices,
879        pci_chipset_devices,
880        capabilities,
881    } = chipset
882        .build()
883        .context("failed to build chipset configuration")?;
884
885    if opt.restore_snapshot.is_some() {
886        // Snapshot restore: skip firmware loading entirely. Device state and
887        // memory come from the snapshot directory.
888        load_mode = LoadMode::None;
889        with_hv = true;
890    } else if let Some(path) = &opt.igvm {
891        let file = fs_err::File::open(path)
892            .context("failed to open igvm file")?
893            .into();
894        let cmdline = opt.cmdline.join(" ");
895        with_hv = true;
896
897        load_mode = LoadMode::Igvm {
898            file,
899            cmdline,
900            vtl2_base_address: opt.igvm_vtl2_relocation_type,
901            com_serial: has_com3.then(|| SerialInformation {
902                io_port: ComPort::Com3.io_port(),
903                irq: ComPort::Com3.irq().into(),
904            }),
905        };
906    } else if opt.pcat {
907        // Emit a nice error early instead of complaining about missing firmware.
908        if arch != MachineArch::X86_64 {
909            anyhow::bail!("pcat not supported on this architecture");
910        }
911        with_hv = true;
912
913        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
914        load_mode = LoadMode::Pcat {
915            firmware,
916            boot_order: opt
917                .pcat_boot_order
918                .map(|x| x.0)
919                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
920        };
921    } else if opt.uefi {
922        use openvmm_defs::config::UefiConsoleMode;
923
924        with_hv = true;
925
926        let firmware = fs_err::File::open(
927            (opt.uefi_firmware.0)
928                .as_ref()
929                .context("must provide uefi firmware when booting with uefi")?,
930        )
931        .context("failed to open uefi firmware")?;
932
933        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
934        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
935        load_mode = LoadMode::Uefi {
936            firmware: firmware.into(),
937            enable_debugging: opt.uefi_debug,
938            enable_memory_protections: opt.uefi_enable_memory_protections,
939            disable_frontpage: opt.disable_frontpage,
940            enable_tpm: opt.tpm,
941            enable_battery: opt.battery,
942            enable_serial: any_serial_configured,
943            enable_vpci_boot: false,
944            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
945                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
946                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
947                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
948                UefiConsoleModeCli::None => UefiConsoleMode::None,
949            }),
950            default_boot_always_attempt: opt.default_boot_always_attempt,
951            bios_guid,
952        };
953    } else {
954        // Linux Direct
955        let mut cmdline = "panic=-1 debug".to_string();
956
957        with_hv = opt.hv;
958        if with_hv && opt.pcie_root_complex.is_empty() {
959            cmdline += " pci=off";
960        }
961
962        if !console_str.is_empty() {
963            let _ = write!(&mut cmdline, " console={}", console_str);
964        }
965
966        if opt.gfx {
967            cmdline += " console=tty";
968        }
969        for extra in &opt.cmdline {
970            let _ = write!(&mut cmdline, " {}", extra);
971        }
972
973        let kernel = fs_err::File::open(
974            (opt.kernel.0)
975                .as_ref()
976                .context("must provide kernel when booting with linux direct")?,
977        )
978        .context("failed to open kernel")?;
979        let initrd = (opt.initrd.0)
980            .as_ref()
981            .map(fs_err::File::open)
982            .transpose()
983            .context("failed to open initrd")?;
984
985        let custom_dsdt = match &opt.custom_dsdt {
986            Some(path) => {
987                let mut v = Vec::new();
988                fs_err::File::open(path)
989                    .context("failed to open custom dsdt")?
990                    .read_to_end(&mut v)
991                    .context("failed to read custom dsdt")?;
992                Some(v)
993            }
994            None => None,
995        };
996
997        load_mode = LoadMode::Linux {
998            kernel: kernel.into(),
999            initrd: initrd.map(Into::into),
1000            cmdline,
1001            custom_dsdt,
1002            enable_serial: any_serial_configured,
1003            boot_mode: if opt.device_tree {
1004                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1005            } else {
1006                openvmm_defs::config::LinuxDirectBootMode::Acpi
1007            },
1008        };
1009    }
1010
1011    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1012        let disk = VmgsDisk {
1013            disk: disk_open(kind, false)
1014                .await
1015                .context("failed to open vmgs disk")?,
1016            encryption_policy: if opt.test_gsp_by_id {
1017                GuestStateEncryptionPolicy::GspById(true)
1018            } else {
1019                GuestStateEncryptionPolicy::None(true)
1020            },
1021        };
1022        match provision {
1023            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1024            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1025            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1026        }
1027    } else {
1028        VmgsResource::Ephemeral
1029    });
1030
1031    if with_get && with_hv {
1032        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1033            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1034            fixed: Some(Default::default()),
1035            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1036                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1037                nic_devices: underhill_nics,
1038            }),
1039            namespace_settings: Vec::default(),
1040        };
1041
1042        // Cache the VTL2 settings for later modification via the interactive console.
1043        resources.vtl2_settings = Some(vtl2_settings.clone());
1044
1045        let (send, guest_request_recv) = mesh::channel();
1046        resources.ged_rpc = Some(send);
1047
1048        let vmgs = vmgs.take().unwrap();
1049
1050        vmbus_devices.extend([
1051            (
1052                openhcl_vtl,
1053                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1054            ),
1055            (
1056                openhcl_vtl,
1057                get_resources::ged::GuestEmulationDeviceHandle {
1058                    firmware: if opt.pcat {
1059                        get_resources::ged::GuestFirmwareConfig::Pcat {
1060                            boot_order: opt
1061                                .pcat_boot_order
1062                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1063                                .map(|x| match x {
1064                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1065                                        get_resources::ged::PcatBootDevice::Floppy
1066                                    }
1067                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1068                                        get_resources::ged::PcatBootDevice::HardDrive
1069                                    }
1070                                    openvmm_defs::config::PcatBootDevice::Optical => {
1071                                        get_resources::ged::PcatBootDevice::Optical
1072                                    }
1073                                    openvmm_defs::config::PcatBootDevice::Network => {
1074                                        get_resources::ged::PcatBootDevice::Network
1075                                    }
1076                                }),
1077                        }
1078                    } else {
1079                        use get_resources::ged::UefiConsoleMode;
1080
1081                        get_resources::ged::GuestFirmwareConfig::Uefi {
1082                            enable_vpci_boot: storage.has_vtl0_nvme(),
1083                            firmware_debug: opt.uefi_debug,
1084                            disable_frontpage: opt.disable_frontpage,
1085                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1086                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1087                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1088                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1089                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1090                            },
1091                            default_boot_always_attempt: opt.default_boot_always_attempt,
1092                        }
1093                    },
1094                    com1: with_vmbus_com1_serial,
1095                    com2: with_vmbus_com2_serial,
1096                    serial_tx_only: opt.serial_tx_only,
1097                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1098                    vmbus_redirection: opt.vmbus_redirect,
1099                    vmgs,
1100                    framebuffer: opt
1101                        .vtl2_gfx
1102                        .then(|| SharedFramebufferHandle.into_resource()),
1103                    guest_request_recv,
1104                    enable_tpm: opt.tpm,
1105                    firmware_event_send: None,
1106                    secure_boot_enabled: opt.secure_boot,
1107                    secure_boot_template: match opt.secure_boot_template {
1108                        Some(SecureBootTemplateCli::Windows) => {
1109                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1110                        },
1111                        Some(SecureBootTemplateCli::UefiCa) => {
1112                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1113                        }
1114                        None => {
1115                            get_resources::ged::GuestSecureBootTemplateType::None
1116                        },
1117                    },
1118                    enable_battery: opt.battery,
1119                    no_persistent_secrets: true,
1120                    igvm_attest_test_config: None,
1121                    test_gsp_by_id: opt.test_gsp_by_id,
1122                    efi_diagnostics_log_level: {
1123                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1124                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1125                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1126                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1127                        }
1128                    },
1129                    hv_sint_enabled: false,
1130                }
1131                .into_resource(),
1132            ),
1133        ]);
1134    }
1135
1136    if opt.tpm && !opt.vtl2 {
1137        let register_layout = if cfg!(guest_arch = "x86_64") {
1138            TpmRegisterLayout::IoPort
1139        } else {
1140            TpmRegisterLayout::Mmio
1141        };
1142
1143        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1144            (
1145                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1146                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1147            )
1148        } else {
1149            (
1150                EphemeralNonVolatileStoreHandle.into_resource(),
1151                EphemeralNonVolatileStoreHandle.into_resource(),
1152            )
1153        };
1154
1155        chipset_devices.push(ChipsetDeviceHandle {
1156            name: "tpm".to_string(),
1157            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1158                device: TpmDeviceHandle {
1159                    ppi_store,
1160                    nvram_store,
1161                    nvram_size: None,
1162                    refresh_tpm_seeds: false,
1163                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1164                    register_layout,
1165                    guest_secret_key: None,
1166                    logger: None,
1167                    is_confidential_vm: false,
1168                    bios_guid,
1169                }
1170                .into_resource(),
1171                worker_host: mesh.make_host("tpm", None).await?,
1172            }
1173            .into_resource(),
1174        });
1175    }
1176
1177    let custom_uefi_vars = {
1178        use firmware_uefi_custom_vars::CustomVars;
1179
1180        // load base vars from specified template, or use an empty set of base
1181        // vars if none was specified.
1182        let base_vars = match opt.secure_boot_template {
1183            Some(template) => match (arch, template) {
1184                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1185                    hyperv_secure_boot_templates::x64::microsoft_windows()
1186                }
1187                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1188                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1189                }
1190                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1191                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1192                }
1193                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1194                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1195                }
1196            },
1197            None => CustomVars::default(),
1198        };
1199
1200        // TODO: fallback to VMGS read if no command line flag was given
1201
1202        let custom_uefi_json_data = match &opt.custom_uefi_json {
1203            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1204            None => None,
1205        };
1206
1207        // obtain the final custom uefi vars by applying the delta onto the base vars
1208        match custom_uefi_json_data {
1209            Some(data) => {
1210                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1211                base_vars.apply_delta(delta)?
1212            }
1213            None => base_vars,
1214        }
1215    };
1216
1217    let vga_firmware = if opt.pcat {
1218        Some(openvmm_pcat_locator::find_svga_bios(
1219            opt.vga_firmware.as_deref(),
1220        )?)
1221    } else {
1222        None
1223    };
1224
1225    if opt.gfx {
1226        vmbus_devices.extend([
1227            (
1228                DeviceVtl::Vtl0,
1229                SynthVideoHandle {
1230                    framebuffer: SharedFramebufferHandle.into_resource(),
1231                }
1232                .into_resource(),
1233            ),
1234            (
1235                DeviceVtl::Vtl0,
1236                SynthKeyboardHandle {
1237                    source: MultiplexedInputHandle {
1238                        // Save 0 for PS/2
1239                        elevation: 1,
1240                    }
1241                    .into_resource(),
1242                }
1243                .into_resource(),
1244            ),
1245            (
1246                DeviceVtl::Vtl0,
1247                SynthMouseHandle {
1248                    source: MultiplexedInputHandle {
1249                        // Save 0 for PS/2
1250                        elevation: 1,
1251                    }
1252                    .into_resource(),
1253                }
1254                .into_resource(),
1255            ),
1256        ]);
1257    }
1258
1259    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1260        if let Some(path) = path {
1261            cleanup_socket(path.as_ref());
1262            let listener = unix_socket::UnixListener::bind(path)
1263                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1264            Ok(Some(listener))
1265        } else {
1266            Ok(None)
1267        }
1268    };
1269
1270    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1271    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1272
1273    if let Some(path) = &opt.openhcl_dump_path {
1274        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1275        task.detach();
1276        vmbus_devices.push((openhcl_vtl, resource));
1277    }
1278
1279    #[cfg(guest_arch = "aarch64")]
1280    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1281        openvmm_defs::config::Aarch64TopologyConfig {
1282            // TODO: allow this to be configured from the command line
1283            gic_config: None,
1284            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1285            gic_msi: match opt.gic_msi {
1286                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1287                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1288                cli_args::GicMsiCli::V2m => {
1289                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1290                }
1291            },
1292        },
1293    );
1294    #[cfg(guest_arch = "x86_64")]
1295    let topology_arch =
1296        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1297            apic_id_offset: opt.apic_id_offset,
1298            x2apic: opt.x2apic,
1299        });
1300
1301    let with_isolation = if let Some(isolation) = &opt.isolation {
1302        // TODO: For now, isolation is only supported with VTL2.
1303        if !opt.vtl2 {
1304            anyhow::bail!("isolation is only currently supported with vtl2");
1305        }
1306
1307        // TODO: Alias map support is not yet implement with isolation.
1308        if !opt.no_alias_map {
1309            anyhow::bail!("alias map not supported with isolation");
1310        }
1311
1312        match isolation {
1313            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1314        }
1315    } else {
1316        None
1317    };
1318
1319    if with_hv {
1320        let (shutdown_send, shutdown_recv) = mesh::channel();
1321        resources.shutdown_ic = Some(shutdown_send);
1322        let (kvp_send, kvp_recv) = mesh::channel();
1323        resources.kvp_ic = Some(kvp_send);
1324        vmbus_devices.extend(
1325            [
1326                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1327                    recv: shutdown_recv,
1328                }
1329                .into_resource(),
1330                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1331                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1332            ]
1333            .map(|r| (DeviceVtl::Vtl0, r)),
1334        );
1335    }
1336
1337    if let Some(hive_path) = &opt.imc {
1338        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1339        vmbus_devices.push((
1340            DeviceVtl::Vtl0,
1341            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1342        ));
1343    }
1344
1345    let mut virtio_devices = Vec::new();
1346    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1347        let bus = match bus {
1348            VirtioBusCli::Auto => {
1349                // Use VPCI when possible (currently only on Windows and macOS due
1350                // to KVM backend limitations).
1351                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1352                    None
1353                } else {
1354                    Some(VirtioBus::Pci)
1355                }
1356            }
1357            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1358            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1359            VirtioBusCli::Vpci => None,
1360        };
1361        if let Some(bus) = bus {
1362            virtio_devices.push((bus, resource));
1363        } else {
1364            vpci_devices.push(VpciDeviceConfig {
1365                vtl: DeviceVtl::Vtl0,
1366                instance_id: Guid::new_random(),
1367                resource: VirtioPciDeviceHandle(resource).into_resource(),
1368            });
1369        }
1370    };
1371
1372    for cli_cfg in &opt.virtio_net {
1373        if cli_cfg.underhill {
1374            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1375        }
1376        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1377        let resource = virtio_resources::net::VirtioNetHandle {
1378            max_queues: vport.max_queues,
1379            mac_address: vport.mac_address,
1380            endpoint: vport.endpoint,
1381        }
1382        .into_resource();
1383        if let Some(pcie_port) = &cli_cfg.pcie_port {
1384            pcie_devices.push(PcieDeviceConfig {
1385                port_name: pcie_port.clone(),
1386                resource: VirtioPciDeviceHandle(resource).into_resource(),
1387            });
1388        } else {
1389            add_virtio_device(VirtioBusCli::Auto, resource);
1390        }
1391    }
1392
1393    for args in &opt.virtio_fs {
1394        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1395            tag: args.tag.clone(),
1396            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1397                root_path: args.path.clone(),
1398                mount_options: args.options.clone(),
1399            },
1400        }
1401        .into_resource();
1402        if let Some(pcie_port) = &args.pcie_port {
1403            pcie_devices.push(PcieDeviceConfig {
1404                port_name: pcie_port.clone(),
1405                resource: VirtioPciDeviceHandle(resource).into_resource(),
1406            });
1407        } else {
1408            add_virtio_device(opt.virtio_fs_bus, resource);
1409        }
1410    }
1411
1412    for args in &opt.virtio_fs_shmem {
1413        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1414            tag: args.tag.clone(),
1415            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1416                root_path: args.path.clone(),
1417            },
1418        }
1419        .into_resource();
1420        if let Some(pcie_port) = &args.pcie_port {
1421            pcie_devices.push(PcieDeviceConfig {
1422                port_name: pcie_port.clone(),
1423                resource: VirtioPciDeviceHandle(resource).into_resource(),
1424            });
1425        } else {
1426            add_virtio_device(opt.virtio_fs_bus, resource);
1427        }
1428    }
1429
1430    for args in &opt.virtio_9p {
1431        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1432            tag: args.tag.clone(),
1433            root_path: args.path.clone(),
1434            debug: opt.virtio_9p_debug,
1435        }
1436        .into_resource();
1437        if let Some(pcie_port) = &args.pcie_port {
1438            pcie_devices.push(PcieDeviceConfig {
1439                port_name: pcie_port.clone(),
1440                resource: VirtioPciDeviceHandle(resource).into_resource(),
1441            });
1442        } else {
1443            add_virtio_device(VirtioBusCli::Auto, resource);
1444        }
1445    }
1446
1447    if let Some(pmem_args) = &opt.virtio_pmem {
1448        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1449            path: pmem_args.path.clone(),
1450        }
1451        .into_resource();
1452        if let Some(pcie_port) = &pmem_args.pcie_port {
1453            pcie_devices.push(PcieDeviceConfig {
1454                port_name: pcie_port.clone(),
1455                resource: VirtioPciDeviceHandle(resource).into_resource(),
1456            });
1457        } else {
1458            add_virtio_device(VirtioBusCli::Auto, resource);
1459        }
1460    }
1461
1462    if opt.virtio_rng {
1463        let resource: Resource<VirtioDeviceHandle> =
1464            virtio_resources::rng::VirtioRngHandle.into_resource();
1465        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1466            pcie_devices.push(PcieDeviceConfig {
1467                port_name: pcie_port.clone(),
1468                resource: VirtioPciDeviceHandle(resource).into_resource(),
1469            });
1470        } else {
1471            add_virtio_device(opt.virtio_rng_bus, resource);
1472        }
1473    }
1474
1475    if let Some(backend) = virtio_console_backend {
1476        let resource: Resource<VirtioDeviceHandle> =
1477            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1478        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1479            pcie_devices.push(PcieDeviceConfig {
1480                port_name: pcie_port.clone(),
1481                resource: VirtioPciDeviceHandle(resource).into_resource(),
1482            });
1483        } else {
1484            add_virtio_device(VirtioBusCli::Auto, resource);
1485        }
1486    }
1487
1488    // Handle --vhost-user arguments.
1489    #[cfg(target_os = "linux")]
1490    for vhost_cli in &opt.vhost_user {
1491        let stream =
1492            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1493                format!(
1494                    "failed to connect to vhost-user socket: {}",
1495                    vhost_cli.socket_path
1496                )
1497            })?;
1498
1499        use crate::cli_args::VhostUserDeviceTypeCli;
1500        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1501            VhostUserDeviceTypeCli::Fs {
1502                ref tag,
1503                num_queues,
1504                queue_size,
1505            } => virtio_resources::vhost_user::VhostUserFsHandle {
1506                socket: stream.into(),
1507                tag: tag.clone(),
1508                num_queues,
1509                queue_size,
1510            }
1511            .into_resource(),
1512            VhostUserDeviceTypeCli::Blk {
1513                num_queues,
1514                queue_size,
1515            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1516                socket: stream.into(),
1517                num_queues,
1518                queue_size,
1519            }
1520            .into_resource(),
1521            VhostUserDeviceTypeCli::Other {
1522                device_id,
1523                ref queue_sizes,
1524            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1525                socket: stream.into(),
1526                device_id,
1527                queue_sizes: queue_sizes.clone(),
1528            }
1529            .into_resource(),
1530        };
1531        if let Some(pcie_port) = &vhost_cli.pcie_port {
1532            pcie_devices.push(PcieDeviceConfig {
1533                port_name: pcie_port.clone(),
1534                resource: VirtioPciDeviceHandle(resource).into_resource(),
1535            });
1536        } else {
1537            add_virtio_device(VirtioBusCli::Auto, resource);
1538        }
1539    }
1540
1541    if let Some(vsock_path) = &opt.virtio_vsock_path {
1542        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1543        add_virtio_device(
1544            VirtioBusCli::Auto,
1545            virtio_resources::vsock::VirtioVsockHandle {
1546                // The guest CID does not matter since the UDS relay does not use it. It just needs
1547                // to be some non-reserved value for the guest to use.
1548                guest_cid: 0x3,
1549                base_path: vsock_path.clone(),
1550                listener,
1551            }
1552            .into_resource(),
1553        );
1554    }
1555
1556    let mut cfg = Config {
1557        chipset,
1558        load_mode,
1559        floppy_disks,
1560        pcie_root_complexes,
1561        #[cfg(target_os = "linux")]
1562        pcie_devices: {
1563            let mut devs = pcie_devices;
1564            devs.extend(vfio_pcie_devices);
1565            devs
1566        },
1567        #[cfg(not(target_os = "linux"))]
1568        pcie_devices,
1569        pcie_switches,
1570        vpci_devices,
1571        ide_disks: Vec::new(),
1572        memory: MemoryConfig {
1573            mem_size: if let Some(ref sizes) = opt.numa_memory {
1574                sizes
1575                    .iter()
1576                    .try_fold(0u64, |acc, &s| acc.checked_add(s))
1577                    .context("numa memory sizes overflow")?
1578            } else {
1579                opt.memory_size()
1580            },
1581            prefetch_memory: opt.prefetch_memory(),
1582            private_memory: opt.private_memory(),
1583            transparent_hugepages: opt.transparent_hugepages(),
1584            hugepages: opt.memory.hugepages,
1585            hugepage_size: opt.memory.hugepage_size,
1586            numa_mem_sizes: opt.numa_memory.clone(),
1587        },
1588        processor_topology: ProcessorTopologyConfig {
1589            proc_count: opt.processors,
1590            vps_per_socket: opt.vps_per_socket,
1591            enable_smt: match opt.smt {
1592                cli_args::SmtConfigCli::Auto => None,
1593                cli_args::SmtConfigCli::Force => Some(true),
1594                cli_args::SmtConfigCli::Off => Some(false),
1595            },
1596            arch: Some(topology_arch),
1597        },
1598        hypervisor: HypervisorConfig {
1599            with_hv,
1600            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1601                vtl0_alias_map: !opt.no_alias_map,
1602                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1603                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1604                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1605                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1606                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1607                        Some(LateMapVtl0MemoryPolicy::InjectException)
1608                    }
1609                },
1610            }),
1611            with_isolation,
1612        },
1613        #[cfg(windows)]
1614        kernel_vmnics,
1615        input: mesh::Receiver::new(),
1616        framebuffer,
1617        vga_firmware,
1618        vtl2_gfx: opt.vtl2_gfx,
1619        virtio_devices,
1620        vmbus: with_hv.then_some(VmbusConfig {
1621            vsock_listener: vtl0_vsock_listener,
1622            vsock_path: opt.vmbus_vsock_path.clone(),
1623            vtl2_redirect: opt.vmbus_redirect,
1624            vmbus_max_version: opt.vmbus_max_version,
1625            #[cfg(windows)]
1626            vmbusproxy_handle,
1627        }),
1628        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1629            vsock_listener: vtl2_vsock_listener,
1630            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1631            ..Default::default()
1632        }),
1633        vmbus_devices,
1634        chipset_devices,
1635        pci_chipset_devices,
1636        chipset_capabilities: capabilities,
1637        layout: layout_config,
1638        #[cfg(windows)]
1639        vpci_resources,
1640        vmgs,
1641        secure_boot_enabled: opt.secure_boot,
1642        custom_uefi_vars,
1643        firmware_event_send: None,
1644        debugger_rpc: None,
1645        generation_id_recv: None,
1646        rtc_delta_milliseconds: 0,
1647        automatic_guest_reset: !opt.halt_on_reset,
1648        efi_diagnostics_log_level: {
1649            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1650                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1651                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1652                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1653            }
1654        },
1655    };
1656
1657    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1658    Ok((cfg, resources))
1659}
1660
1661/// Gets the terminal to use for externally launched console windows.
1662pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1663    std::env::var_os("OPENVMM_TERM")
1664        .or_else(|| std::env::var_os("HVLITE_TERM"))
1665        .map(Into::into)
1666}
1667
1668// Tries to remove `path` if it is confirmed to be a Unix socket.
1669fn cleanup_socket(path: &Path) {
1670    #[cfg(windows)]
1671    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1672    #[cfg(not(windows))]
1673    let is_socket = path
1674        .metadata()
1675        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1676
1677    if is_socket {
1678        let _ = std::fs::remove_file(path);
1679    }
1680}
1681
1682#[cfg(windows)]
1683const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1684
1685#[cfg(windows)]
1686fn new_switch_port(
1687    switch_id: &str,
1688) -> anyhow::Result<(
1689    openvmm_defs::config::SwitchPortId,
1690    vmswitch::kernel::SwitchPort,
1691)> {
1692    let id = vmswitch::kernel::SwitchPortId {
1693        switch: switch_id.parse().context("invalid switch id")?,
1694        port: Guid::new_random(),
1695    };
1696    let _ = vmswitch::hcn::Network::open(&id.switch)
1697        .with_context(|| format!("could not find switch {}", id.switch))?;
1698
1699    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1700
1701    let id = openvmm_defs::config::SwitchPortId {
1702        switch: id.switch,
1703        port: id.port,
1704    };
1705    Ok((id, port))
1706}
1707
1708fn parse_endpoint(
1709    cli_cfg: &NicConfigCli,
1710    index: &mut usize,
1711    resources: &mut VmResources,
1712) -> anyhow::Result<NicConfig> {
1713    let _ = resources;
1714    let endpoint = match &cli_cfg.endpoint {
1715        EndpointConfigCli::Consomme { cidr, host_fwd } => {
1716            let ports = host_fwd
1717                .iter()
1718                .map(|fwd| {
1719                    use net_backend_resources::consomme::HostPortProtocol;
1720                    net_backend_resources::consomme::HostPortConfig {
1721                        protocol: match fwd.protocol {
1722                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
1723                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
1724                        },
1725                        host_address: fwd
1726                            .host_address
1727                            .map(net_backend_resources::consomme::HostIpAddress::from),
1728                        host_port: fwd.host_port,
1729                        guest_port: fwd.guest_port,
1730                    }
1731                })
1732                .collect();
1733            net_backend_resources::consomme::ConsommeHandle {
1734                cidr: cidr.clone(),
1735                ports,
1736            }
1737            .into_resource()
1738        }
1739        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1740        EndpointConfigCli::Dio { id } => {
1741            #[cfg(windows)]
1742            {
1743                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1744                resources.switch_ports.push(port);
1745                net_backend_resources::dio::WindowsDirectIoHandle {
1746                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1747                        switch: port_id.switch,
1748                        port: port_id.port,
1749                    },
1750                }
1751                .into_resource()
1752            }
1753
1754            #[cfg(not(windows))]
1755            {
1756                let _ = id;
1757                bail!("cannot use dio on non-windows platforms")
1758            }
1759        }
1760        EndpointConfigCli::Tap { name } => {
1761            #[cfg(target_os = "linux")]
1762            {
1763                let fd = net_tap::tap::open_tap(name)
1764                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1765                net_backend_resources::tap::TapHandle { fd }.into_resource()
1766            }
1767
1768            #[cfg(not(target_os = "linux"))]
1769            {
1770                let _ = name;
1771                bail!("TAP backend is only supported on Linux")
1772            }
1773        }
1774    };
1775
1776    // Pick a random MAC address.
1777    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1778    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1779
1780    // Pick a fixed instance ID based on the index.
1781    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1782    let instance_id = Guid {
1783        data1: *index as u32,
1784        ..BASE_INSTANCE_ID
1785    };
1786    *index += 1;
1787
1788    Ok(NicConfig {
1789        vtl: cli_cfg.vtl,
1790        instance_id,
1791        endpoint,
1792        mac_address: mac_address.into(),
1793        max_queues: cli_cfg.max_queues,
1794        pcie_port: cli_cfg.pcie_port.clone(),
1795    })
1796}
1797
1798#[derive(Debug)]
1799struct NicConfig {
1800    vtl: DeviceVtl,
1801    instance_id: Guid,
1802    mac_address: MacAddress,
1803    endpoint: Resource<NetEndpointHandleKind>,
1804    max_queues: Option<u16>,
1805    pcie_port: Option<String>,
1806}
1807
1808impl NicConfig {
1809    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1810        (
1811            self.vtl,
1812            netvsp_resources::NetvspHandle {
1813                instance_id: self.instance_id,
1814                mac_address: self.mac_address,
1815                endpoint: self.endpoint,
1816                max_queues: self.max_queues,
1817            }
1818            .into_resource(),
1819        )
1820    }
1821}
1822
1823enum LayerOrDisk {
1824    Layer(DiskLayerDescription),
1825    Disk(Resource<DiskHandleKind>),
1826}
1827
1828async fn disk_open(
1829    disk_cli: &DiskCliKind,
1830    read_only: bool,
1831) -> anyhow::Result<Resource<DiskHandleKind>> {
1832    let mut layers = Vec::new();
1833    disk_open_inner(disk_cli, read_only, &mut layers).await?;
1834    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1835        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1836            unreachable!()
1837        };
1838        Ok(disk)
1839    } else {
1840        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1841            layers: layers
1842                .into_iter()
1843                .map(|layer| match layer {
1844                    LayerOrDisk::Layer(layer) => layer,
1845                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1846                        layer: DiskLayerHandle(disk).into_resource(),
1847                        read_cache: false,
1848                        write_through: false,
1849                    },
1850                })
1851                .collect(),
1852        }))
1853    }
1854}
1855
1856fn disk_open_inner<'a>(
1857    disk_cli: &'a DiskCliKind,
1858    read_only: bool,
1859    layers: &'a mut Vec<LayerOrDisk>,
1860) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
1861    Box::pin(async move {
1862        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1863            LayerOrDisk::Layer(layer.into_resource().into())
1864        }
1865        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1866            LayerOrDisk::Disk(disk.into_resource())
1867        }
1868        match disk_cli {
1869            &DiskCliKind::Memory(len) => {
1870                layers.push(layer(RamDiskLayerHandle {
1871                    len: Some(len),
1872                    sector_size: None,
1873                }));
1874            }
1875            DiskCliKind::File {
1876                path,
1877                create_with_len,
1878                direct,
1879            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1880                create_disk_type(
1881                    path,
1882                    *size,
1883                    OpenDiskOptions {
1884                        read_only: false,
1885                        direct: *direct,
1886                    },
1887                )
1888                .with_context(|| format!("failed to create {}", path.display()))?
1889            } else {
1890                open_disk_type(
1891                    path,
1892                    OpenDiskOptions {
1893                        read_only,
1894                        direct: *direct,
1895                    },
1896                )
1897                .await
1898                .with_context(|| format!("failed to open {}", path.display()))?
1899            })),
1900            DiskCliKind::Blob { kind, url } => {
1901                layers.push(disk(disk_backend_resources::BlobDiskHandle {
1902                    url: url.to_owned(),
1903                    format: match kind {
1904                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1905                        cli_args::BlobKind::Vhd1 => {
1906                            disk_backend_resources::BlobDiskFormat::FixedVhd1
1907                        }
1908                    },
1909                }))
1910            }
1911            DiskCliKind::MemoryDiff(inner) => {
1912                layers.push(layer(RamDiskLayerHandle {
1913                    len: None,
1914                    sector_size: None,
1915                }));
1916                disk_open_inner(inner, true, layers).await?;
1917            }
1918            DiskCliKind::PersistentReservationsWrapper(inner) => {
1919                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
1920                    disk_open(inner, read_only).await?,
1921                )))
1922            }
1923            DiskCliKind::DelayDiskWrapper {
1924                delay_ms,
1925                disk: inner,
1926            } => layers.push(disk(DelayDiskHandle {
1927                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1928                disk: disk_open(inner, read_only).await?,
1929            })),
1930            DiskCliKind::Crypt {
1931                disk: inner,
1932                cipher,
1933                key_file,
1934            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1935                disk: disk_open(inner, read_only).await?,
1936                cipher: match cipher {
1937                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1938                },
1939                key: fs_err::read(key_file).context("failed to read key file")?,
1940            })),
1941            DiskCliKind::Sqlite {
1942                path,
1943                create_with_len,
1944            } => {
1945                // FUTURE: this code should be responsible for opening
1946                // file-handle(s) itself, and passing them into sqlite via a custom
1947                // vfs. For now though - simply check if the file exists or not, and
1948                // perform early validation of filesystem-level create options.
1949                match (create_with_len.is_some(), path.exists()) {
1950                    (true, true) => anyhow::bail!(
1951                        "cannot create new sqlite disk at {} - file already exists",
1952                        path.display()
1953                    ),
1954                    (false, false) => anyhow::bail!(
1955                        "cannot open sqlite disk at {} - file not found",
1956                        path.display()
1957                    ),
1958                    _ => {}
1959                }
1960
1961                layers.push(layer(SqliteDiskLayerHandle {
1962                    dbhd_path: path.display().to_string(),
1963                    format_dbhd: create_with_len.map(|len| {
1964                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1965                            logically_read_only: false,
1966                            len: Some(len),
1967                        }
1968                    }),
1969                }));
1970            }
1971            DiskCliKind::SqliteDiff { path, create, disk } => {
1972                // FUTURE: this code should be responsible for opening
1973                // file-handle(s) itself, and passing them into sqlite via a custom
1974                // vfs. For now though - simply check if the file exists or not, and
1975                // perform early validation of filesystem-level create options.
1976                match (create, path.exists()) {
1977                    (true, true) => anyhow::bail!(
1978                        "cannot create new sqlite disk at {} - file already exists",
1979                        path.display()
1980                    ),
1981                    (false, false) => anyhow::bail!(
1982                        "cannot open sqlite disk at {} - file not found",
1983                        path.display()
1984                    ),
1985                    _ => {}
1986                }
1987
1988                layers.push(layer(SqliteDiskLayerHandle {
1989                    dbhd_path: path.display().to_string(),
1990                    format_dbhd: create.then_some(
1991                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1992                            logically_read_only: false,
1993                            len: None,
1994                        },
1995                    ),
1996                }));
1997                disk_open_inner(disk, true, layers).await?;
1998            }
1999            DiskCliKind::AutoCacheSqlite {
2000                cache_path,
2001                key,
2002                disk,
2003            } => {
2004                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2005                    read_cache: true,
2006                    write_through: false,
2007                    layer: SqliteAutoCacheDiskLayerHandle {
2008                        cache_path: cache_path.clone(),
2009                        cache_key: key.clone(),
2010                    }
2011                    .into_resource(),
2012                }));
2013                disk_open_inner(disk, read_only, layers).await?;
2014            }
2015        }
2016        Ok(())
2017    })
2018}
2019
2020/// Get the system page size.
2021pub(crate) fn system_page_size() -> u32 {
2022    sparse_mmap::SparseMapping::page_size() as u32
2023}
2024
2025/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2026pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2027    "x86_64"
2028} else {
2029    "aarch64"
2030};
2031
2032/// Open a snapshot directory and validate it against the current VM config.
2033/// Returns the shared memory fd (from memory.bin) and the saved device state.
2034fn prepare_snapshot_restore(
2035    snapshot_dir: &Path,
2036    opt: &Options,
2037) -> anyhow::Result<(
2038    openvmm_defs::worker::SharedMemoryFd,
2039    mesh::payload::message::ProtobufMessage,
2040)> {
2041    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2042
2043    // Validate manifest against current VM config.
2044    openvmm_helpers::snapshot::validate_manifest(
2045        &manifest,
2046        GUEST_ARCH,
2047        opt.memory_size(),
2048        opt.processors,
2049        system_page_size(),
2050    )?;
2051
2052    // Open memory.bin (existing file, no create, no resize).
2053    let memory_file = fs_err::OpenOptions::new()
2054        .read(true)
2055        .write(true)
2056        .open(snapshot_dir.join("memory.bin"))?;
2057
2058    // Validate file size matches expected memory size.
2059    let file_size = memory_file.metadata()?.len();
2060    if file_size != manifest.memory_size_bytes {
2061        anyhow::bail!(
2062            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2063            manifest.memory_size_bytes,
2064        );
2065    }
2066
2067    let shared_memory_fd =
2068        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2069
2070    // Reconstruct ProtobufMessage from the saved state bytes.
2071    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2072    // back to ProtobufMessage.
2073    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2074        .context("failed to decode saved state from snapshot")?;
2075
2076    Ok((shared_memory_fd, state_msg))
2077}
2078
2079fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2080    #[cfg(windows)]
2081    pal::windows::disable_hard_error_dialog();
2082
2083    tracing_init::enable_tracing()?;
2084
2085    // Try to run as a worker host.
2086    // On success the worker runs to completion and then exits the process (does
2087    // not return). Any worker host setup errors are return and bubbled up.
2088    meshworker::run_vmm_mesh_host()?;
2089
2090    let opt = Options::parse();
2091    if let Some(path) = &opt.write_saved_state_proto {
2092        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2093            .write_to_path(path)
2094            .context("failed to write protobuf descriptors")?;
2095        return Ok(());
2096    }
2097
2098    if let Some(ref path) = opt.pidfile {
2099        std::fs::write(path, format!("{}\n", std::process::id()))
2100            .context("failed to write pidfile")?;
2101        *pidfile_path = Some(path.clone());
2102    }
2103
2104    if let Some(path) = opt.relay_console_path {
2105        let console_title = opt.relay_console_title.unwrap_or_default();
2106        return console_relay::relay_console(&path, console_title.as_str());
2107    }
2108
2109    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2110    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2111        return block_on(async {
2112            let _ = std::fs::remove_file(path);
2113            let listener =
2114                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2115
2116            let transport = if opt.ttrpc.is_some() {
2117                ttrpc::RpcTransport::Ttrpc
2118            } else {
2119                ttrpc::RpcTransport::Grpc
2120            };
2121
2122            // This is a local launch
2123            let mut handle =
2124                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2125                    listener,
2126                    transport,
2127                })
2128                .await?;
2129
2130            tracing::info!(%transport, path = %path.display(), "listening");
2131
2132            // Signal the the parent process that the server is ready.
2133            pal::close_stdout().context("failed to close stdout")?;
2134
2135            handle.join().await?;
2136
2137            Ok(())
2138        });
2139    }
2140
2141    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2142}
2143
2144fn new_hvsock_service_id(port: u32) -> Guid {
2145    // This GUID is an embedding of the AF_VSOCK port into an
2146    // AF_HYPERV service ID.
2147    Guid {
2148        data1: port,
2149        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2150    }
2151}
2152
2153async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2154    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2155    let result = run_control_inner(driver, &mut mesh, opt).await;
2156    // If setup failed before the mesh was handed to the controller, shut it
2157    // down so the child host process exits cleanly without noisy logs.
2158    if let Some(mesh) = mesh {
2159        mesh.shutdown().await;
2160    }
2161    result
2162}
2163
2164async fn run_control_inner(
2165    driver: &DefaultDriver,
2166    mesh_slot: &mut Option<VmmMesh>,
2167    opt: Options,
2168) -> anyhow::Result<()> {
2169    let mesh = mesh_slot.as_ref().unwrap();
2170    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2171
2172    let mut vnc_worker = None;
2173    if opt.gfx || opt.vnc {
2174        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2175            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2176
2177        let input_send = vm_config.input.sender();
2178        let framebuffer = resources
2179            .framebuffer_access
2180            .take()
2181            .expect("synth video enabled");
2182
2183        let vnc_host = mesh
2184            .make_host("vnc", None)
2185            .await
2186            .context("spawning vnc process failed")?;
2187
2188        vnc_worker = Some(
2189            vnc_host
2190                .launch_worker(
2191                    vnc_worker_defs::VNC_WORKER_TCP,
2192                    VncParameters {
2193                        listener,
2194                        framebuffer,
2195                        input_send,
2196                    },
2197                )
2198                .await?,
2199        )
2200    }
2201
2202    // spin up the debug worker
2203    let gdb_worker = if let Some(port) = opt.gdb {
2204        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2205            .with_context(|| format!("binding to gdb port {}", port))?;
2206
2207        let (req_tx, req_rx) = mesh::channel();
2208        vm_config.debugger_rpc = Some(req_rx);
2209
2210        let gdb_host = mesh
2211            .make_host("gdb", None)
2212            .await
2213            .context("spawning gdbstub process failed")?;
2214
2215        Some(
2216            gdb_host
2217                .launch_worker(
2218                    debug_worker_defs::DEBUGGER_WORKER,
2219                    debug_worker_defs::DebuggerParameters {
2220                        listener,
2221                        req_chan: req_tx,
2222                        vp_count: vm_config.processor_topology.proc_count,
2223                        target_arch: if cfg!(guest_arch = "x86_64") {
2224                            debug_worker_defs::TargetArch::X86_64
2225                        } else {
2226                            debug_worker_defs::TargetArch::Aarch64
2227                        },
2228                    },
2229                )
2230                .await
2231                .context("failed to launch gdbstub worker")?,
2232        )
2233    } else {
2234        None
2235    };
2236
2237    // spin up the VM
2238    let (vm_rpc, rpc_recv) = mesh::channel();
2239    let (notify_send, notify_recv) = mesh::channel();
2240    let vm_worker = {
2241        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2242
2243        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2244            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2245            (Some(fd), Some(state_msg))
2246        } else {
2247            let shared_memory = opt
2248                .memory_backing_file()
2249                .map(|path| {
2250                    openvmm_helpers::shared_memory::open_memory_backing_file(
2251                        path,
2252                        opt.memory_size(),
2253                    )
2254                })
2255                .transpose()?;
2256            (shared_memory, None)
2257        };
2258
2259        let params = VmWorkerParameters {
2260            hypervisor: match &opt.hypervisor {
2261                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2262                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2263            },
2264            cfg: vm_config,
2265            saved_state,
2266            shared_memory,
2267            rpc: rpc_recv,
2268            notify: notify_send,
2269        };
2270        vm_host
2271            .launch_worker(VM_WORKER, params)
2272            .await
2273            .context("failed to launch vm worker")?
2274    };
2275
2276    if opt.restore_snapshot.is_some() {
2277        tracing::info!("restoring VM from snapshot");
2278    }
2279
2280    if !opt.paused {
2281        vm_rpc.call(VmRpc::Resume, ()).await?;
2282    }
2283
2284    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2285        driver.clone(),
2286        DiagDialer {
2287            driver: driver.clone(),
2288            vm_rpc: vm_rpc.clone(),
2289            openhcl_vtl: if opt.vtl2 {
2290                DeviceVtl::Vtl2
2291            } else {
2292                DeviceVtl::Vtl0
2293            },
2294        },
2295    ));
2296
2297    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2298
2299    // Create channels between the REPL and VmController.
2300    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2301    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2302
2303    let has_vtl2 = resources.vtl2_settings.is_some();
2304
2305    // Build the VmController with exclusive resources.
2306    let controller = vm_controller::VmController {
2307        mesh: mesh_slot.take().unwrap(),
2308        vm_worker,
2309        vnc_worker,
2310        gdb_worker,
2311        diag_inspector: Some(diag_inspector),
2312        vtl2_settings: resources.vtl2_settings,
2313        ged_rpc: resources.ged_rpc.clone(),
2314        vm_rpc: vm_rpc.clone(),
2315        paravisor_diag: Some(paravisor_diag),
2316        igvm_path: opt.igvm.clone(),
2317        memory_backing_file: opt.memory_backing_file().cloned(),
2318        memory: opt.memory_size(),
2319        processors: opt.processors,
2320        log_file: opt.log_file.clone(),
2321    };
2322
2323    // Spawn the VmController as a task.
2324    let controller_task = driver.spawn(
2325        "vm-controller",
2326        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2327    );
2328
2329    // Run the REPL with shareable resources.
2330    let repl_result = repl::run_repl(
2331        driver,
2332        repl::ReplResources {
2333            vm_rpc,
2334            vm_controller: vm_controller_send,
2335            vm_controller_events: vm_controller_event_recv,
2336            scsi_rpc: resources.scsi_rpc,
2337            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2338            shutdown_ic: resources.shutdown_ic,
2339            kvp_ic: resources.kvp_ic,
2340            console_in: resources.console_in,
2341            has_vtl2,
2342        },
2343    )
2344    .await;
2345
2346    // Wait for the controller task to finish (it stops the VM worker and
2347    // shuts down the mesh).
2348    controller_task.await;
2349
2350    repl_result
2351}
2352
2353struct DiagDialer {
2354    driver: DefaultDriver,
2355    vm_rpc: mesh::Sender<VmRpc>,
2356    openhcl_vtl: DeviceVtl,
2357}
2358
2359impl mesh_rpc::client::Dial for DiagDialer {
2360    type Stream = PolledSocket<unix_socket::UnixStream>;
2361
2362    async fn dial(&mut self) -> io::Result<Self::Stream> {
2363        let service_id = new_hvsock_service_id(1);
2364        let socket = self
2365            .vm_rpc
2366            .call_failable(
2367                VmRpc::ConnectHvsock,
2368                (
2369                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2370                    service_id,
2371                    self.openhcl_vtl,
2372                ),
2373            )
2374            .await
2375            .map_err(io::Error::other)?;
2376
2377        PolledSocket::new(&self.driver, socket)
2378    }
2379}
2380
2381/// An object that implements [`InspectMut`] by sending an inspect request over
2382/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2383/// the response back into the inspect tree.
2384///
2385/// This also caches the TTRPC connection to the guest so that only the first
2386/// inspect request has to wait for the connection to be established.
2387pub(crate) struct DiagInspector(DiagInspectorInner);
2388
2389enum DiagInspectorInner {
2390    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2391    Started {
2392        send: mesh::Sender<inspect::Deferred>,
2393        _task: Task<()>,
2394    },
2395    Invalid,
2396}
2397
2398impl DiagInspector {
2399    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2400        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2401    }
2402
2403    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2404        loop {
2405            match self.0 {
2406                DiagInspectorInner::NotStarted { .. } => {
2407                    let DiagInspectorInner::NotStarted(driver, client) =
2408                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2409                    else {
2410                        unreachable!()
2411                    };
2412                    let (send, recv) = mesh::channel();
2413                    let task = driver.clone().spawn("diag-inspect", async move {
2414                        Self::run(&client, recv).await
2415                    });
2416
2417                    self.0 = DiagInspectorInner::Started { send, _task: task };
2418                }
2419                DiagInspectorInner::Started { ref send, .. } => break send,
2420                DiagInspectorInner::Invalid => unreachable!(),
2421            }
2422        }
2423    }
2424
2425    async fn run(
2426        diag_client: &diag_client::DiagClient,
2427        mut recv: mesh::Receiver<inspect::Deferred>,
2428    ) {
2429        while let Some(deferred) = recv.next().await {
2430            let info = deferred.external_request();
2431            let result = match info.request_type {
2432                inspect::ExternalRequestType::Inspect { depth } => {
2433                    if depth == 0 {
2434                        Ok(inspect::Node::Unevaluated)
2435                    } else {
2436                        // TODO: Support taking timeouts from the command line
2437                        diag_client
2438                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2439                            .await
2440                    }
2441                }
2442                inspect::ExternalRequestType::Update { value } => {
2443                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2444                }
2445            };
2446            deferred.complete_external(
2447                result.unwrap_or_else(|err| {
2448                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2449                }),
2450                inspect::SensitivityLevel::Unspecified,
2451            )
2452        }
2453    }
2454}
2455
2456impl InspectMut for DiagInspector {
2457    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2458        self.start().send(req.defer());
2459    }
2460}