openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::StreamExt;
53use futures::executor::block_on;
54use futures::io::AllowStdIo;
55use gdma_resources::GdmaDeviceHandle;
56use gdma_resources::VportDefinition;
57use guid::Guid;
58use input_core::MultiplexedInputHandle;
59use inspect::InspectMut;
60use io::Read;
61use memory_range::MemoryRange;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64;
70use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2;
71use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86;
72use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2;
73use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
74use openvmm_defs::config::DeviceVtl;
75use openvmm_defs::config::EfiDiagnosticsLogLevelType;
76use openvmm_defs::config::HypervisorConfig;
77use openvmm_defs::config::LateMapVtl0MemoryPolicy;
78use openvmm_defs::config::LoadMode;
79use openvmm_defs::config::MemoryConfig;
80use openvmm_defs::config::PcieDeviceConfig;
81use openvmm_defs::config::PcieRootComplexConfig;
82use openvmm_defs::config::PcieRootPortConfig;
83use openvmm_defs::config::PcieSwitchConfig;
84use openvmm_defs::config::ProcessorTopologyConfig;
85use openvmm_defs::config::SerialInformation;
86use openvmm_defs::config::VirtioBus;
87use openvmm_defs::config::VmbusConfig;
88use openvmm_defs::config::VpciDeviceConfig;
89use openvmm_defs::config::Vtl2BaseAddressType;
90use openvmm_defs::config::Vtl2Config;
91use openvmm_defs::rpc::VmRpc;
92use openvmm_defs::worker::VM_WORKER;
93use openvmm_defs::worker::VmWorkerParameters;
94use openvmm_helpers::disk::create_disk_type;
95use openvmm_helpers::disk::open_disk_type;
96use pal_async::DefaultDriver;
97use pal_async::DefaultPool;
98use pal_async::socket::PolledSocket;
99use pal_async::task::Spawn;
100use pal_async::task::Task;
101use serial_16550_resources::ComPort;
102use serial_core::resources::DisconnectedSerialBackendHandle;
103use sparse_mmap::alloc_shared_memory;
104use std::cell::RefCell;
105use std::collections::BTreeMap;
106use std::fmt::Write as _;
107use std::future::pending;
108use std::io;
109#[cfg(unix)]
110use std::io::IsTerminal;
111use std::io::Write;
112use std::net::TcpListener;
113use std::path::Path;
114use std::path::PathBuf;
115use std::sync::Arc;
116use std::thread;
117use std::time::Duration;
118use storvsp_resources::ScsiControllerRequest;
119use tpm_resources::TpmDeviceHandle;
120use tpm_resources::TpmRegisterLayout;
121use uidevices_resources::SynthKeyboardHandle;
122use uidevices_resources::SynthMouseHandle;
123use uidevices_resources::SynthVideoHandle;
124use video_core::SharedFramebufferHandle;
125use virtio_resources::VirtioPciDeviceHandle;
126use vm_manifest_builder::BaseChipsetType;
127use vm_manifest_builder::MachineArch;
128use vm_manifest_builder::VmChipsetResult;
129use vm_manifest_builder::VmManifestBuilder;
130use vm_resource::IntoResource;
131use vm_resource::Resource;
132use vm_resource::kind::DiskHandleKind;
133use vm_resource::kind::DiskLayerHandleKind;
134use vm_resource::kind::NetEndpointHandleKind;
135use vm_resource::kind::VirtioDeviceHandle;
136use vm_resource::kind::VmbusDeviceHandleKind;
137use vmbus_serial_resources::VmbusSerialDeviceHandle;
138use vmbus_serial_resources::VmbusSerialPort;
139use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
140use vmgs_resources::GuestStateEncryptionPolicy;
141use vmgs_resources::VmgsDisk;
142use vmgs_resources::VmgsFileHandle;
143use vmgs_resources::VmgsResource;
144use vmotherboard::ChipsetDeviceHandle;
145use vnc_worker_defs::VncParameters;
146
147pub fn openvmm_main() {
148    // Save the current state of the terminal so we can restore it back to
149    // normal before exiting.
150    #[cfg(unix)]
151    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
152
153    let mut pidfile_path = None;
154    let exit_code = match do_main(&mut pidfile_path) {
155        Ok(_) => 0,
156        Err(err) => {
157            eprintln!("fatal error: {:?}", err);
158            1
159        }
160    };
161
162    // Restore the terminal to its initial state.
163    #[cfg(unix)]
164    if let Some(orig_termios) = orig_termios {
165        term::set_termios(orig_termios);
166    }
167
168    // Clean up the pidfile before terminating, since pal::process::terminate
169    // skips destructors.
170    if let Some(ref path) = pidfile_path {
171        let _ = std::fs::remove_file(path);
172    }
173
174    // Terminate the process immediately without graceful shutdown of DLLs or
175    // C++ destructors or anything like that. This is all unnecessary and saves
176    // time on Windows.
177    //
178    // Do flush stdout, though, since there may be buffered data.
179    let _ = io::stdout().flush();
180    pal::process::terminate(exit_code);
181}
182
183#[derive(Default)]
184struct VmResources {
185    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
186    framebuffer_access: Option<FramebufferAccess>,
187    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
188    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
189    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
190    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
191    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
192    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
193    #[cfg(windows)]
194    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
195}
196
197struct ConsoleState<'a> {
198    device: &'a str,
199    input: Box<dyn AsyncWrite + Unpin + Send>,
200}
201
202/// Build a flat list of switches with their parent port assignments.
203///
204/// This function converts hierarchical CLI switch definitions into a flat list
205/// where each switch specifies its parent port directly.
206fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
207    all_switches
208        .iter()
209        .map(|switch_cli| PcieSwitchConfig {
210            name: switch_cli.name.clone(),
211            num_downstream_ports: switch_cli.num_downstream_ports,
212            parent_port: switch_cli.port_name.clone(),
213            hotplug: switch_cli.hotplug,
214        })
215        .collect()
216}
217
218async fn vm_config_from_command_line(
219    spawner: impl Spawn,
220    mesh: &VmmMesh,
221    opt: &Options,
222) -> anyhow::Result<(Config, VmResources)> {
223    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
224    // Ensure the serial driver stays alive with no tasks.
225    serial_driver.spawn("leak", pending::<()>()).detach();
226
227    let openhcl_vtl = if opt.vtl2 {
228        DeviceVtl::Vtl2
229    } else {
230        DeviceVtl::Vtl0
231    };
232
233    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
234    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
235        Ok(match cli_cfg {
236            SerialConfigCli::Console => {
237                if let Some(console_state) = console_state.borrow().as_ref() {
238                    bail!("console already set by {}", console_state.device);
239                }
240                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
241                let (serial_read, serial_write) = AsyncReadExt::split(serial);
242                *console_state.borrow_mut() = Some(ConsoleState {
243                    device,
244                    input: Box::new(serial_write),
245                });
246                thread::Builder::new()
247                    .name(name.to_owned())
248                    .spawn(move || {
249                        let _ = block_on(futures::io::copy(
250                            serial_read,
251                            &mut AllowStdIo::new(term::raw_stdout()),
252                        ));
253                    })
254                    .unwrap();
255                Some(config)
256            }
257            SerialConfigCli::Stderr => {
258                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
259                thread::Builder::new()
260                    .name(name.to_owned())
261                    .spawn(move || {
262                        let _ = block_on(futures::io::copy(
263                            serial,
264                            &mut AllowStdIo::new(term::raw_stderr()),
265                        ));
266                    })
267                    .unwrap();
268                Some(config)
269            }
270            SerialConfigCli::File(path) => {
271                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
272                let file = fs_err::File::create(path).context("failed to create file")?;
273
274                thread::Builder::new()
275                    .name(name.to_owned())
276                    .spawn(move || {
277                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
278                    })
279                    .unwrap();
280                Some(config)
281            }
282            SerialConfigCli::None => None,
283            SerialConfigCli::Pipe(path) => {
284                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
285            }
286            SerialConfigCli::Tcp(addr) => {
287                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
288            }
289            SerialConfigCli::NewConsole(app, window_title) => {
290                let path = console_relay::random_console_path();
291                let config =
292                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
293                let window_title =
294                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
295
296                console_relay::launch_console(
297                    app.or_else(openvmm_terminal_app).as_deref(),
298                    &path,
299                    ConsoleLaunchOptions {
300                        window_title: Some(window_title),
301                    },
302                )
303                .context("failed to launch console")?;
304
305                Some(config)
306            }
307        })
308    };
309
310    let mut vmbus_devices = Vec::new();
311
312    let serial0_cfg = setup_serial(
313        "com1",
314        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
315        if cfg!(guest_arch = "x86_64") {
316            "ttyS0"
317        } else {
318            "ttyAMA0"
319        },
320    )?;
321    let serial1_cfg = setup_serial(
322        "com2",
323        opt.com2.clone().unwrap_or(SerialConfigCli::None),
324        if cfg!(guest_arch = "x86_64") {
325            "ttyS1"
326        } else {
327            "ttyAMA1"
328        },
329    )?;
330    let serial2_cfg = setup_serial(
331        "com3",
332        opt.com3.clone().unwrap_or(SerialConfigCli::None),
333        if cfg!(guest_arch = "x86_64") {
334            "ttyS2"
335        } else {
336            "ttyAMA2"
337        },
338    )?;
339    let serial3_cfg = setup_serial(
340        "com4",
341        opt.com4.clone().unwrap_or(SerialConfigCli::None),
342        if cfg!(guest_arch = "x86_64") {
343            "ttyS3"
344        } else {
345            "ttyAMA3"
346        },
347    )?;
348    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
349        "vmbus_com1",
350        opt.vmbus_com1_serial
351            .clone()
352            .unwrap_or(SerialConfigCli::None),
353        "vmbus_com1",
354    )? {
355        vmbus_devices.push((
356            openhcl_vtl,
357            VmbusSerialDeviceHandle {
358                port: VmbusSerialPort::Com1,
359                backend: vmbus_com1_cfg,
360            }
361            .into_resource(),
362        ));
363        true
364    } else {
365        false
366    };
367    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
368        "vmbus_com2",
369        opt.vmbus_com2_serial
370            .clone()
371            .unwrap_or(SerialConfigCli::None),
372        "vmbus_com2",
373    )? {
374        vmbus_devices.push((
375            openhcl_vtl,
376            VmbusSerialDeviceHandle {
377                port: VmbusSerialPort::Com2,
378                backend: vmbus_com2_cfg,
379            }
380            .into_resource(),
381        ));
382        true
383    } else {
384        false
385    };
386    let debugcon_cfg = setup_serial(
387        "debugcon",
388        opt.debugcon
389            .clone()
390            .map(|cfg| cfg.serial)
391            .unwrap_or(SerialConfigCli::None),
392        "debugcon",
393    )?;
394
395    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
396        setup_serial("virtio-console", serial_cfg, "hvc0")?
397    } else {
398        None
399    };
400
401    let mut resources = VmResources::default();
402    let mut console_str = "";
403    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
404        resources.console_in = Some(input);
405        console_str = device;
406    }
407
408    if opt.shared_memory {
409        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
410    }
411
412    const MAX_PROCESSOR_COUNT: u32 = 1024;
413
414    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
415        bail!("invalid proc count: {}", opt.processors);
416    }
417
418    // Total SCSI channel count should not exceed the processor count
419    // (at most, one channel per VP).
420    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
421        bail!(
422            "invalid SCSI sub-channel count: requested {}, max {}",
423            opt.scsi_sub_channels,
424            MAX_PROCESSOR_COUNT - 1
425        );
426    }
427
428    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
429
430    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
431    for &cli_args::DiskCli {
432        vtl,
433        ref kind,
434        read_only,
435        is_dvd,
436        underhill,
437        ref pcie_port,
438    } in &opt.disk
439    {
440        if pcie_port.is_some() {
441            anyhow::bail!("`--disk` is incompatible with PCIe");
442        }
443
444        storage.add(
445            vtl,
446            underhill,
447            storage_builder::DiskLocation::Scsi(None),
448            kind,
449            is_dvd,
450            read_only,
451        )?;
452    }
453
454    for &cli_args::IdeDiskCli {
455        ref kind,
456        read_only,
457        channel,
458        device,
459        is_dvd,
460    } in &opt.ide
461    {
462        storage.add(
463            DeviceVtl::Vtl0,
464            None,
465            storage_builder::DiskLocation::Ide(channel, device),
466            kind,
467            is_dvd,
468            read_only,
469        )?;
470    }
471
472    for &cli_args::DiskCli {
473        vtl,
474        ref kind,
475        read_only,
476        is_dvd,
477        underhill,
478        ref pcie_port,
479    } in &opt.nvme
480    {
481        storage.add(
482            vtl,
483            underhill,
484            storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
485            kind,
486            is_dvd,
487            read_only,
488        )?;
489    }
490
491    for &cli_args::DiskCli {
492        vtl,
493        ref kind,
494        read_only,
495        is_dvd,
496        ref underhill,
497        ref pcie_port,
498    } in &opt.virtio_blk
499    {
500        if underhill.is_some() {
501            anyhow::bail!("underhill not supported with virtio-blk");
502        }
503        storage.add(
504            vtl,
505            None,
506            storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
507            kind,
508            is_dvd,
509            read_only,
510        )?;
511    }
512
513    let floppy_disks: Vec<_> = opt
514        .floppy
515        .iter()
516        .map(|disk| -> anyhow::Result<_> {
517            let &cli_args::FloppyDiskCli {
518                ref kind,
519                read_only,
520            } = disk;
521            Ok(FloppyDiskConfig {
522                disk_type: disk_open(kind, read_only)?,
523                read_only,
524            })
525        })
526        .collect::<Result<Vec<_>, _>>()?;
527
528    let mut vpci_mana_nics = [(); 3].map(|()| None);
529    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
530    let mut underhill_nics = Vec::new();
531    let mut vpci_devices = Vec::new();
532
533    let mut nic_index = 0;
534    for cli_cfg in &opt.net {
535        if cli_cfg.pcie_port.is_some() {
536            anyhow::bail!("`--net` does not support PCIe");
537        }
538        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
539        if cli_cfg.underhill {
540            if !opt.no_alias_map {
541                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
542            }
543            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
544                let vpci_instance_id = Guid::new_random();
545                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
546                    instance_id: vpci_instance_id.to_string(),
547                    subordinate_instance_id: None,
548                    max_sub_channels: None,
549                });
550                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
551            });
552            mana.1.vports.push(VportDefinition {
553                mac_address: vport.mac_address,
554                endpoint: vport.endpoint,
555            });
556        } else {
557            vmbus_devices.push(vport.into_netvsp_handle());
558        }
559    }
560
561    if opt.nic {
562        let nic_config = parse_endpoint(
563            &NicConfigCli {
564                vtl: DeviceVtl::Vtl0,
565                endpoint: EndpointConfigCli::Consomme { cidr: None },
566                max_queues: None,
567                underhill: false,
568                pcie_port: None,
569            },
570            &mut nic_index,
571            &mut resources,
572        )?;
573        vmbus_devices.push(nic_config.into_netvsp_handle());
574    }
575
576    // Build initial PCIe devices list from CLI options. Storage devices
577    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
578    let mut pcie_devices = Vec::new();
579    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
580        tracing::info!(
581            port_name = %cli_cfg.port_name,
582            socket_addr = ?cli_cfg.socket_addr,
583            "instantiating PCIe remote device"
584        );
585
586        // Generate a deterministic instance ID based on index
587        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
588            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
589        let instance_id = Guid {
590            data1: index as u32,
591            ..PCIE_REMOTE_BASE_INSTANCE_ID
592        };
593
594        pcie_devices.push(PcieDeviceConfig {
595            port_name: cli_cfg.port_name.clone(),
596            resource: pcie_remote_resources::PcieRemoteHandle {
597                instance_id,
598                socket_addr: cli_cfg.socket_addr.clone(),
599                hu: cli_cfg.hu,
600                controller: cli_cfg.controller,
601            }
602            .into_resource(),
603        });
604    }
605
606    #[cfg(windows)]
607    let mut kernel_vmnics = Vec::new();
608    #[cfg(windows)]
609    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
610        // Pick a random MAC address.
611        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
612        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
613
614        // Pick a fixed instance ID based on the index.
615        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
616        let instance_id = Guid {
617            data1: index as u32,
618            ..BASE_INSTANCE_ID
619        };
620
621        let switch_id = if switch_id == "default" {
622            DEFAULT_SWITCH
623        } else {
624            switch_id
625        };
626        let (port_id, port) = new_switch_port(switch_id)?;
627        resources.switch_ports.push(port);
628
629        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
630            instance_id,
631            mac_address: mac_address.into(),
632            switch_port_id: port_id,
633        });
634    }
635
636    for vport in &opt.mana {
637        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
638        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
639            (vtl, None) => {
640                &mut vpci_mana_nics[vtl]
641                    .get_or_insert_with(|| {
642                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
643                    })
644                    .1
645                    .vports
646            }
647            (0, Some(pcie_port)) => {
648                &mut pcie_mana_nics
649                    .entry(pcie_port)
650                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
651                    .vports
652            }
653            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
654        };
655        vport_array.push(VportDefinition {
656            mac_address: vport.mac_address,
657            endpoint: vport.endpoint,
658        });
659    }
660
661    vpci_devices.extend(
662        vpci_mana_nics
663            .into_iter()
664            .enumerate()
665            .filter_map(|(vtl, nic)| {
666                nic.map(|(instance_id, handle)| VpciDeviceConfig {
667                    vtl: match vtl {
668                        0 => DeviceVtl::Vtl0,
669                        1 => DeviceVtl::Vtl1,
670                        2 => DeviceVtl::Vtl2,
671                        _ => unreachable!(),
672                    },
673                    instance_id,
674                    resource: handle.into_resource(),
675                })
676            }),
677    );
678
679    pcie_devices.extend(
680        pcie_mana_nics
681            .into_iter()
682            .map(|(pcie_port, handle)| PcieDeviceConfig {
683                port_name: pcie_port,
684                resource: handle.into_resource(),
685            }),
686    );
687
688    // If VTL2 is enabled, and we are not in VTL2 self allocate mode, provide an
689    // mmio gap for VTL2.
690    let use_vtl2_gap = opt.vtl2
691        && !matches!(
692            opt.igvm_vtl2_relocation_type,
693            Vtl2BaseAddressType::Vtl2Allocate { .. },
694        );
695
696    #[cfg(guest_arch = "aarch64")]
697    let arch = MachineArch::Aarch64;
698    #[cfg(guest_arch = "x86_64")]
699    let arch = MachineArch::X86_64;
700
701    let mmio_gaps: Vec<MemoryRange> = match (use_vtl2_gap, arch) {
702        (true, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86_WITH_VTL2.into(),
703        (true, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2.into(),
704        (false, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86.into(),
705        (false, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64.into(),
706    };
707
708    let mut pci_ecam_gaps = Vec::new();
709    let mut pci_mmio_gaps = Vec::new();
710
711    let mut low_mmio_start = mmio_gaps.first().context("expected mmio gap")?.start();
712    let mut high_mmio_end = mmio_gaps.last().context("expected second mmio gap")?.end();
713
714    let mut pcie_root_complexes = Vec::new();
715    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
716        let ports = opt
717            .pcie_root_port
718            .iter()
719            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
720            .map(|port_cli| PcieRootPortConfig {
721                name: port_cli.name.clone(),
722                hotplug: port_cli.hotplug,
723            })
724            .collect();
725
726        const ONE_MB: u64 = 1024 * 1024;
727        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
728        let high_mmio_size = rc_cli
729            .high_mmio
730            .checked_next_multiple_of(ONE_MB)
731            .context("high mmio rounding error")?;
732        let ecam_size = (((rc_cli.end_bus - rc_cli.start_bus) as u64) + 1) * 256 * 4096;
733
734        let low_pci_mmio_start = low_mmio_start
735            .checked_sub(low_mmio_size)
736            .context("pci low mmio underflow")?;
737        let ecam_start = low_pci_mmio_start
738            .checked_sub(ecam_size)
739            .context("pci ecam underflow")?;
740        low_mmio_start = ecam_start;
741        high_mmio_end = high_mmio_end
742            .checked_add(high_mmio_size)
743            .context("pci high mmio overflow")?;
744
745        let ecam_range = MemoryRange::new(ecam_start..ecam_start + ecam_size);
746        let low_mmio = MemoryRange::new(low_pci_mmio_start..low_pci_mmio_start + low_mmio_size);
747        let high_mmio = MemoryRange::new(high_mmio_end - high_mmio_size..high_mmio_end);
748
749        pci_ecam_gaps.push(ecam_range);
750        pci_mmio_gaps.push(low_mmio);
751        pci_mmio_gaps.push(high_mmio);
752
753        pcie_root_complexes.push(PcieRootComplexConfig {
754            index: i as u32,
755            name: rc_cli.name.clone(),
756            segment: rc_cli.segment,
757            start_bus: rc_cli.start_bus,
758            end_bus: rc_cli.end_bus,
759            ecam_range,
760            low_mmio,
761            high_mmio,
762            ports,
763        });
764    }
765
766    pci_ecam_gaps.sort();
767    pci_mmio_gaps.sort();
768
769    let pcie_switches = build_switch_list(&opt.pcie_switch);
770
771    #[cfg(windows)]
772    let vpci_resources: Vec<_> = opt
773        .device
774        .iter()
775        .map(|path| -> anyhow::Result<_> {
776            Ok(virt_whp::device::DeviceHandle(
777                whp::VpciResource::new(
778                    None,
779                    Default::default(),
780                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
781                )
782                .with_context(|| format!("opening PCI device {}", path))?,
783            ))
784        })
785        .collect::<Result<_, _>>()?;
786
787    // Create a vmbusproxy handle if needed by any devices.
788    #[cfg(windows)]
789    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
790        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
791    } else {
792        None
793    };
794
795    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
796        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
797        let (fb, fba) =
798            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
799        resources.framebuffer_access = Some(fba);
800        Some(fb)
801    } else {
802        None
803    };
804
805    let load_mode;
806    let with_hv;
807
808    let any_serial_configured = serial0_cfg.is_some()
809        || serial1_cfg.is_some()
810        || serial2_cfg.is_some()
811        || serial3_cfg.is_some();
812
813    let has_com3 = serial2_cfg.is_some();
814
815    let mut chipset = VmManifestBuilder::new(
816        if opt.igvm.is_some() {
817            BaseChipsetType::HclHost
818        } else if opt.pcat {
819            BaseChipsetType::HypervGen1
820        } else if opt.uefi {
821            BaseChipsetType::HypervGen2Uefi
822        } else if opt.hv {
823            BaseChipsetType::HyperVGen2LinuxDirect
824        } else {
825            BaseChipsetType::UnenlightenedLinuxDirect
826        },
827        arch,
828    );
829
830    if framebuffer.is_some() {
831        chipset = chipset.with_framebuffer();
832    }
833    if opt.guest_watchdog {
834        chipset = chipset.with_guest_watchdog();
835    }
836    if any_serial_configured {
837        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
838    }
839    if opt.battery {
840        let (tx, rx) = mesh::channel();
841        tx.send(HostBatteryUpdate::default_present());
842        chipset = chipset.with_battery(rx);
843    }
844    if let Some(cfg) = &opt.debugcon {
845        chipset = chipset.with_debugcon(
846            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
847            cfg.port,
848        );
849    }
850
851    // TODO: load from VMGS file if it exists
852    let bios_guid = Guid::new_random();
853
854    let VmChipsetResult {
855        chipset,
856        mut chipset_devices,
857    } = chipset
858        .build()
859        .context("failed to build chipset configuration")?;
860
861    if opt.restore_snapshot.is_some() {
862        // Snapshot restore: skip firmware loading entirely. Device state and
863        // memory come from the snapshot directory.
864        load_mode = LoadMode::None;
865        with_hv = true;
866    } else if let Some(path) = &opt.igvm {
867        let file = fs_err::File::open(path)
868            .context("failed to open igvm file")?
869            .into();
870        let cmdline = opt.cmdline.join(" ");
871        with_hv = true;
872
873        load_mode = LoadMode::Igvm {
874            file,
875            cmdline,
876            vtl2_base_address: opt.igvm_vtl2_relocation_type,
877            com_serial: has_com3.then(|| SerialInformation {
878                io_port: ComPort::Com3.io_port(),
879                irq: ComPort::Com3.irq().into(),
880            }),
881        };
882    } else if opt.pcat {
883        // Emit a nice error early instead of complaining about missing firmware.
884        if arch != MachineArch::X86_64 {
885            anyhow::bail!("pcat not supported on this architecture");
886        }
887        with_hv = true;
888
889        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
890        load_mode = LoadMode::Pcat {
891            firmware,
892            boot_order: opt
893                .pcat_boot_order
894                .map(|x| x.0)
895                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
896        };
897    } else if opt.uefi {
898        use openvmm_defs::config::UefiConsoleMode;
899
900        with_hv = true;
901
902        let firmware = fs_err::File::open(
903            (opt.uefi_firmware.0)
904                .as_ref()
905                .context("must provide uefi firmware when booting with uefi")?,
906        )
907        .context("failed to open uefi firmware")?;
908
909        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
910        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
911        load_mode = LoadMode::Uefi {
912            firmware: firmware.into(),
913            enable_debugging: opt.uefi_debug,
914            enable_memory_protections: opt.uefi_enable_memory_protections,
915            disable_frontpage: opt.disable_frontpage,
916            enable_tpm: opt.tpm,
917            enable_battery: opt.battery,
918            enable_serial: any_serial_configured,
919            enable_vpci_boot: false,
920            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
921                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
922                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
923                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
924                UefiConsoleModeCli::None => UefiConsoleMode::None,
925            }),
926            default_boot_always_attempt: opt.default_boot_always_attempt,
927            bios_guid,
928        };
929    } else {
930        // Linux Direct
931        let mut cmdline = "panic=-1 debug".to_string();
932
933        with_hv = opt.hv;
934        if with_hv && opt.pcie_root_complex.is_empty() {
935            cmdline += " pci=off";
936        }
937
938        if !console_str.is_empty() {
939            let _ = write!(&mut cmdline, " console={}", console_str);
940        }
941
942        if opt.gfx {
943            cmdline += " console=tty";
944        }
945        for extra in &opt.cmdline {
946            let _ = write!(&mut cmdline, " {}", extra);
947        }
948
949        let kernel = fs_err::File::open(
950            (opt.kernel.0)
951                .as_ref()
952                .context("must provide kernel when booting with linux direct")?,
953        )
954        .context("failed to open kernel")?;
955        let initrd = (opt.initrd.0)
956            .as_ref()
957            .map(fs_err::File::open)
958            .transpose()
959            .context("failed to open initrd")?;
960
961        let custom_dsdt = match &opt.custom_dsdt {
962            Some(path) => {
963                let mut v = Vec::new();
964                fs_err::File::open(path)
965                    .context("failed to open custom dsdt")?
966                    .read_to_end(&mut v)
967                    .context("failed to read custom dsdt")?;
968                Some(v)
969            }
970            None => None,
971        };
972
973        load_mode = LoadMode::Linux {
974            kernel: kernel.into(),
975            initrd: initrd.map(Into::into),
976            cmdline,
977            custom_dsdt,
978            enable_serial: any_serial_configured,
979            boot_mode: if opt.device_tree {
980                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
981            } else {
982                openvmm_defs::config::LinuxDirectBootMode::Acpi
983            },
984        };
985    }
986
987    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
988        let disk = VmgsDisk {
989            disk: disk_open(kind, false).context("failed to open vmgs disk")?,
990            encryption_policy: if opt.test_gsp_by_id {
991                GuestStateEncryptionPolicy::GspById(true)
992            } else {
993                GuestStateEncryptionPolicy::None(true)
994            },
995        };
996        match provision {
997            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
998            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
999            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1000        }
1001    } else {
1002        VmgsResource::Ephemeral
1003    });
1004
1005    if with_get && with_hv {
1006        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1007            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1008            fixed: Some(Default::default()),
1009            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1010                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1011                nic_devices: underhill_nics,
1012            }),
1013            namespace_settings: Vec::default(),
1014        };
1015
1016        // Cache the VTL2 settings for later modification via the interactive console.
1017        resources.vtl2_settings = Some(vtl2_settings.clone());
1018
1019        let (send, guest_request_recv) = mesh::channel();
1020        resources.ged_rpc = Some(send);
1021
1022        let vmgs = vmgs.take().unwrap();
1023
1024        vmbus_devices.extend([
1025            (
1026                openhcl_vtl,
1027                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1028            ),
1029            (
1030                openhcl_vtl,
1031                get_resources::ged::GuestEmulationDeviceHandle {
1032                    firmware: if opt.pcat {
1033                        get_resources::ged::GuestFirmwareConfig::Pcat {
1034                            boot_order: opt
1035                                .pcat_boot_order
1036                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1037                                .map(|x| match x {
1038                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1039                                        get_resources::ged::PcatBootDevice::Floppy
1040                                    }
1041                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1042                                        get_resources::ged::PcatBootDevice::HardDrive
1043                                    }
1044                                    openvmm_defs::config::PcatBootDevice::Optical => {
1045                                        get_resources::ged::PcatBootDevice::Optical
1046                                    }
1047                                    openvmm_defs::config::PcatBootDevice::Network => {
1048                                        get_resources::ged::PcatBootDevice::Network
1049                                    }
1050                                }),
1051                        }
1052                    } else {
1053                        use get_resources::ged::UefiConsoleMode;
1054
1055                        get_resources::ged::GuestFirmwareConfig::Uefi {
1056                            enable_vpci_boot: storage.has_vtl0_nvme(),
1057                            firmware_debug: opt.uefi_debug,
1058                            disable_frontpage: opt.disable_frontpage,
1059                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1060                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1061                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1062                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1063                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1064                            },
1065                            default_boot_always_attempt: opt.default_boot_always_attempt,
1066                        }
1067                    },
1068                    com1: with_vmbus_com1_serial,
1069                    com2: with_vmbus_com2_serial,
1070                    serial_tx_only: opt.serial_tx_only,
1071                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1072                    vmbus_redirection: opt.vmbus_redirect,
1073                    vmgs,
1074                    framebuffer: opt
1075                        .vtl2_gfx
1076                        .then(|| SharedFramebufferHandle.into_resource()),
1077                    guest_request_recv,
1078                    enable_tpm: opt.tpm,
1079                    firmware_event_send: None,
1080                    secure_boot_enabled: opt.secure_boot,
1081                    secure_boot_template: match opt.secure_boot_template {
1082                        Some(SecureBootTemplateCli::Windows) => {
1083                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1084                        },
1085                        Some(SecureBootTemplateCli::UefiCa) => {
1086                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1087                        }
1088                        None => {
1089                            get_resources::ged::GuestSecureBootTemplateType::None
1090                        },
1091                    },
1092                    enable_battery: opt.battery,
1093                    no_persistent_secrets: true,
1094                    igvm_attest_test_config: None,
1095                    test_gsp_by_id: opt.test_gsp_by_id,
1096                    efi_diagnostics_log_level: {
1097                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1098                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1099                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1100                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1101                        }
1102                    },
1103                    hv_sint_enabled: false,
1104                }
1105                .into_resource(),
1106            ),
1107        ]);
1108    }
1109
1110    if opt.tpm && !opt.vtl2 {
1111        let register_layout = if cfg!(guest_arch = "x86_64") {
1112            TpmRegisterLayout::IoPort
1113        } else {
1114            TpmRegisterLayout::Mmio
1115        };
1116
1117        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1118            (
1119                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1120                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1121            )
1122        } else {
1123            (
1124                EphemeralNonVolatileStoreHandle.into_resource(),
1125                EphemeralNonVolatileStoreHandle.into_resource(),
1126            )
1127        };
1128
1129        chipset_devices.push(ChipsetDeviceHandle {
1130            name: "tpm".to_string(),
1131            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1132                device: TpmDeviceHandle {
1133                    ppi_store,
1134                    nvram_store,
1135                    nvram_size: None,
1136                    refresh_tpm_seeds: false,
1137                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1138                    register_layout,
1139                    guest_secret_key: None,
1140                    logger: None,
1141                    is_confidential_vm: false,
1142                    bios_guid,
1143                }
1144                .into_resource(),
1145                worker_host: mesh.make_host("tpm", None).await?,
1146            }
1147            .into_resource(),
1148        });
1149    }
1150
1151    let custom_uefi_vars = {
1152        use firmware_uefi_custom_vars::CustomVars;
1153
1154        // load base vars from specified template, or use an empty set of base
1155        // vars if none was specified.
1156        let base_vars = match opt.secure_boot_template {
1157            Some(template) => match (arch, template) {
1158                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1159                    hyperv_secure_boot_templates::x64::microsoft_windows()
1160                }
1161                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1162                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1163                }
1164                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1165                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1166                }
1167                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1168                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1169                }
1170            },
1171            None => CustomVars::default(),
1172        };
1173
1174        // TODO: fallback to VMGS read if no command line flag was given
1175
1176        let custom_uefi_json_data = match &opt.custom_uefi_json {
1177            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1178            None => None,
1179        };
1180
1181        // obtain the final custom uefi vars by applying the delta onto the base vars
1182        match custom_uefi_json_data {
1183            Some(data) => {
1184                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1185                base_vars.apply_delta(delta)?
1186            }
1187            None => base_vars,
1188        }
1189    };
1190
1191    let vga_firmware = if opt.pcat {
1192        Some(openvmm_pcat_locator::find_svga_bios(
1193            opt.vga_firmware.as_deref(),
1194        )?)
1195    } else {
1196        None
1197    };
1198
1199    if opt.gfx {
1200        vmbus_devices.extend([
1201            (
1202                DeviceVtl::Vtl0,
1203                SynthVideoHandle {
1204                    framebuffer: SharedFramebufferHandle.into_resource(),
1205                }
1206                .into_resource(),
1207            ),
1208            (
1209                DeviceVtl::Vtl0,
1210                SynthKeyboardHandle {
1211                    source: MultiplexedInputHandle {
1212                        // Save 0 for PS/2
1213                        elevation: 1,
1214                    }
1215                    .into_resource(),
1216                }
1217                .into_resource(),
1218            ),
1219            (
1220                DeviceVtl::Vtl0,
1221                SynthMouseHandle {
1222                    source: MultiplexedInputHandle {
1223                        // Save 0 for PS/2
1224                        elevation: 1,
1225                    }
1226                    .into_resource(),
1227                }
1228                .into_resource(),
1229            ),
1230        ]);
1231    }
1232
1233    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1234        if let Some(path) = path {
1235            cleanup_socket(path.as_ref());
1236            let listener = unix_socket::UnixListener::bind(path)
1237                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1238            Ok(Some(listener))
1239        } else {
1240            Ok(None)
1241        }
1242    };
1243
1244    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1245    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1246
1247    if let Some(path) = &opt.openhcl_dump_path {
1248        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1249        task.detach();
1250        vmbus_devices.push((openhcl_vtl, resource));
1251    }
1252
1253    #[cfg(guest_arch = "aarch64")]
1254    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1255        openvmm_defs::config::Aarch64TopologyConfig {
1256            // TODO: allow this to be configured from the command line
1257            gic_config: None,
1258            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1259        },
1260    );
1261    #[cfg(guest_arch = "x86_64")]
1262    let topology_arch =
1263        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1264            apic_id_offset: opt.apic_id_offset,
1265            x2apic: opt.x2apic,
1266        });
1267
1268    let with_isolation = if let Some(isolation) = &opt.isolation {
1269        // TODO: For now, isolation is only supported with VTL2.
1270        if !opt.vtl2 {
1271            anyhow::bail!("isolation is only currently supported with vtl2");
1272        }
1273
1274        // TODO: Alias map support is not yet implement with isolation.
1275        if !opt.no_alias_map {
1276            anyhow::bail!("alias map not supported with isolation");
1277        }
1278
1279        match isolation {
1280            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1281        }
1282    } else {
1283        None
1284    };
1285
1286    if with_hv {
1287        let (shutdown_send, shutdown_recv) = mesh::channel();
1288        resources.shutdown_ic = Some(shutdown_send);
1289        let (kvp_send, kvp_recv) = mesh::channel();
1290        resources.kvp_ic = Some(kvp_send);
1291        vmbus_devices.extend(
1292            [
1293                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1294                    recv: shutdown_recv,
1295                }
1296                .into_resource(),
1297                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1298                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1299            ]
1300            .map(|r| (DeviceVtl::Vtl0, r)),
1301        );
1302    }
1303
1304    if let Some(hive_path) = &opt.imc {
1305        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1306        vmbus_devices.push((
1307            DeviceVtl::Vtl0,
1308            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1309        ));
1310    }
1311
1312    let mut virtio_devices = Vec::new();
1313    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1314        let bus = match bus {
1315            VirtioBusCli::Auto => {
1316                // Use VPCI when possible (currently only on Windows and macOS due
1317                // to KVM backend limitations).
1318                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1319                    None
1320                } else {
1321                    Some(VirtioBus::Pci)
1322                }
1323            }
1324            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1325            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1326            VirtioBusCli::Vpci => None,
1327        };
1328        if let Some(bus) = bus {
1329            virtio_devices.push((bus, resource));
1330        } else {
1331            vpci_devices.push(VpciDeviceConfig {
1332                vtl: DeviceVtl::Vtl0,
1333                instance_id: Guid::new_random(),
1334                resource: VirtioPciDeviceHandle(resource).into_resource(),
1335            });
1336        }
1337    };
1338
1339    for cli_cfg in &opt.virtio_net {
1340        if cli_cfg.underhill {
1341            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1342        }
1343        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1344        let resource = virtio_resources::net::VirtioNetHandle {
1345            max_queues: vport.max_queues,
1346            mac_address: vport.mac_address,
1347            endpoint: vport.endpoint,
1348        }
1349        .into_resource();
1350        if let Some(pcie_port) = &cli_cfg.pcie_port {
1351            pcie_devices.push(PcieDeviceConfig {
1352                port_name: pcie_port.clone(),
1353                resource: VirtioPciDeviceHandle(resource).into_resource(),
1354            });
1355        } else {
1356            add_virtio_device(VirtioBusCli::Auto, resource);
1357        }
1358    }
1359
1360    for args in &opt.virtio_fs {
1361        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1362            tag: args.tag.clone(),
1363            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1364                root_path: args.path.clone(),
1365                mount_options: args.options.clone(),
1366            },
1367        }
1368        .into_resource();
1369        if let Some(pcie_port) = &args.pcie_port {
1370            pcie_devices.push(PcieDeviceConfig {
1371                port_name: pcie_port.clone(),
1372                resource: VirtioPciDeviceHandle(resource).into_resource(),
1373            });
1374        } else {
1375            add_virtio_device(opt.virtio_fs_bus, resource);
1376        }
1377    }
1378
1379    for args in &opt.virtio_fs_shmem {
1380        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1381            tag: args.tag.clone(),
1382            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1383                root_path: args.path.clone(),
1384            },
1385        }
1386        .into_resource();
1387        if let Some(pcie_port) = &args.pcie_port {
1388            pcie_devices.push(PcieDeviceConfig {
1389                port_name: pcie_port.clone(),
1390                resource: VirtioPciDeviceHandle(resource).into_resource(),
1391            });
1392        } else {
1393            add_virtio_device(opt.virtio_fs_bus, resource);
1394        }
1395    }
1396
1397    for args in &opt.virtio_9p {
1398        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1399            tag: args.tag.clone(),
1400            root_path: args.path.clone(),
1401            debug: opt.virtio_9p_debug,
1402        }
1403        .into_resource();
1404        if let Some(pcie_port) = &args.pcie_port {
1405            pcie_devices.push(PcieDeviceConfig {
1406                port_name: pcie_port.clone(),
1407                resource: VirtioPciDeviceHandle(resource).into_resource(),
1408            });
1409        } else {
1410            add_virtio_device(VirtioBusCli::Auto, resource);
1411        }
1412    }
1413
1414    if let Some(pmem_args) = &opt.virtio_pmem {
1415        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1416            path: pmem_args.path.clone(),
1417        }
1418        .into_resource();
1419        if let Some(pcie_port) = &pmem_args.pcie_port {
1420            pcie_devices.push(PcieDeviceConfig {
1421                port_name: pcie_port.clone(),
1422                resource: VirtioPciDeviceHandle(resource).into_resource(),
1423            });
1424        } else {
1425            add_virtio_device(VirtioBusCli::Auto, resource);
1426        }
1427    }
1428
1429    if opt.virtio_rng {
1430        let resource: Resource<VirtioDeviceHandle> =
1431            virtio_resources::rng::VirtioRngHandle.into_resource();
1432        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1433            pcie_devices.push(PcieDeviceConfig {
1434                port_name: pcie_port.clone(),
1435                resource: VirtioPciDeviceHandle(resource).into_resource(),
1436            });
1437        } else {
1438            add_virtio_device(opt.virtio_rng_bus, resource);
1439        }
1440    }
1441
1442    if let Some(backend) = virtio_console_backend {
1443        let resource: Resource<VirtioDeviceHandle> =
1444            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1445        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1446            pcie_devices.push(PcieDeviceConfig {
1447                port_name: pcie_port.clone(),
1448                resource: VirtioPciDeviceHandle(resource).into_resource(),
1449            });
1450        } else {
1451            add_virtio_device(VirtioBusCli::Auto, resource);
1452        }
1453    }
1454
1455    // Handle --vhost-user arguments.
1456    #[cfg(target_os = "linux")]
1457    for vhost_cli in &opt.vhost_user {
1458        let stream =
1459            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1460                format!(
1461                    "failed to connect to vhost-user socket: {}",
1462                    vhost_cli.socket_path
1463                )
1464            })?;
1465
1466        use crate::cli_args::VhostUserDeviceTypeCli;
1467        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1468            VhostUserDeviceTypeCli::Fs { ref tag } => {
1469                virtio_resources::vhost_user::VhostUserFsHandle {
1470                    socket: stream.into(),
1471                    tag: tag.clone(),
1472                }
1473                .into_resource()
1474            }
1475            VhostUserDeviceTypeCli::Blk => virtio_resources::vhost_user::VhostUserDeviceHandle {
1476                socket: stream.into(),
1477                device_id: virtio::spec::VirtioDeviceType::BLK.0,
1478            }
1479            .into_resource(),
1480            VhostUserDeviceTypeCli::Other { device_id } => {
1481                virtio_resources::vhost_user::VhostUserDeviceHandle {
1482                    socket: stream.into(),
1483                    device_id,
1484                }
1485                .into_resource()
1486            }
1487        };
1488        if let Some(pcie_port) = &vhost_cli.pcie_port {
1489            pcie_devices.push(PcieDeviceConfig {
1490                port_name: pcie_port.clone(),
1491                resource: VirtioPciDeviceHandle(resource).into_resource(),
1492            });
1493        } else {
1494            add_virtio_device(VirtioBusCli::Auto, resource);
1495        }
1496    }
1497
1498    if let Some(vsock_path) = &opt.virtio_vsock_path {
1499        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1500        add_virtio_device(
1501            VirtioBusCli::Auto,
1502            virtio_resources::vsock::VirtioVsockHandle {
1503                // The guest CID does not matter since the UDS relay does not use it. It just needs
1504                // to be some non-reserved value for the guest to use.
1505                guest_cid: 0x3,
1506                base_path: vsock_path.clone(),
1507                listener,
1508            }
1509            .into_resource(),
1510        );
1511    }
1512
1513    let mut cfg = Config {
1514        chipset,
1515        load_mode,
1516        floppy_disks,
1517        pcie_root_complexes,
1518        pcie_devices,
1519        pcie_switches,
1520        vpci_devices,
1521        ide_disks: Vec::new(),
1522        memory: MemoryConfig {
1523            mem_size: opt.memory,
1524            mmio_gaps,
1525            prefetch_memory: opt.prefetch,
1526            private_memory: opt.private_memory,
1527            transparent_hugepages: opt.thp,
1528            pci_ecam_gaps,
1529            pci_mmio_gaps,
1530        },
1531        processor_topology: ProcessorTopologyConfig {
1532            proc_count: opt.processors,
1533            vps_per_socket: opt.vps_per_socket,
1534            enable_smt: match opt.smt {
1535                cli_args::SmtConfigCli::Auto => None,
1536                cli_args::SmtConfigCli::Force => Some(true),
1537                cli_args::SmtConfigCli::Off => Some(false),
1538            },
1539            arch: Some(topology_arch),
1540        },
1541        hypervisor: HypervisorConfig {
1542            with_hv,
1543            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1544                vtl0_alias_map: !opt.no_alias_map,
1545                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1546                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1547                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1548                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1549                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1550                        Some(LateMapVtl0MemoryPolicy::InjectException)
1551                    }
1552                },
1553            }),
1554            with_isolation,
1555            user_mode_hv_enlightenments: opt.no_enlightenments,
1556            user_mode_apic: opt.user_mode_apic,
1557        },
1558        #[cfg(windows)]
1559        kernel_vmnics,
1560        input: mesh::Receiver::new(),
1561        framebuffer,
1562        vga_firmware,
1563        vtl2_gfx: opt.vtl2_gfx,
1564        virtio_devices,
1565        vmbus: with_hv.then_some(VmbusConfig {
1566            vsock_listener: vtl0_vsock_listener,
1567            vsock_path: opt.vmbus_vsock_path.clone(),
1568            vtl2_redirect: opt.vmbus_redirect,
1569            vmbus_max_version: opt.vmbus_max_version,
1570            #[cfg(windows)]
1571            vmbusproxy_handle,
1572        }),
1573        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1574            vsock_listener: vtl2_vsock_listener,
1575            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1576            ..Default::default()
1577        }),
1578        vmbus_devices,
1579        chipset_devices,
1580        #[cfg(windows)]
1581        vpci_resources,
1582        vmgs,
1583        secure_boot_enabled: opt.secure_boot,
1584        custom_uefi_vars,
1585        firmware_event_send: None,
1586        debugger_rpc: None,
1587        generation_id_recv: None,
1588        rtc_delta_milliseconds: 0,
1589        automatic_guest_reset: !opt.halt_on_reset,
1590        efi_diagnostics_log_level: {
1591            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1592                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1593                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1594                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1595            }
1596        },
1597    };
1598
1599    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1600    Ok((cfg, resources))
1601}
1602
1603/// Gets the terminal to use for externally launched console windows.
1604pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1605    std::env::var_os("OPENVMM_TERM")
1606        .or_else(|| std::env::var_os("HVLITE_TERM"))
1607        .map(Into::into)
1608}
1609
1610// Tries to remove `path` if it is confirmed to be a Unix socket.
1611fn cleanup_socket(path: &Path) {
1612    #[cfg(windows)]
1613    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1614    #[cfg(not(windows))]
1615    let is_socket = path
1616        .metadata()
1617        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1618
1619    if is_socket {
1620        let _ = std::fs::remove_file(path);
1621    }
1622}
1623
1624#[cfg(windows)]
1625const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1626
1627#[cfg(windows)]
1628fn new_switch_port(
1629    switch_id: &str,
1630) -> anyhow::Result<(
1631    openvmm_defs::config::SwitchPortId,
1632    vmswitch::kernel::SwitchPort,
1633)> {
1634    let id = vmswitch::kernel::SwitchPortId {
1635        switch: switch_id.parse().context("invalid switch id")?,
1636        port: Guid::new_random(),
1637    };
1638    let _ = vmswitch::hcn::Network::open(&id.switch)
1639        .with_context(|| format!("could not find switch {}", id.switch))?;
1640
1641    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1642
1643    let id = openvmm_defs::config::SwitchPortId {
1644        switch: id.switch,
1645        port: id.port,
1646    };
1647    Ok((id, port))
1648}
1649
1650fn parse_endpoint(
1651    cli_cfg: &NicConfigCli,
1652    index: &mut usize,
1653    resources: &mut VmResources,
1654) -> anyhow::Result<NicConfig> {
1655    let _ = resources;
1656    let endpoint = match &cli_cfg.endpoint {
1657        EndpointConfigCli::Consomme { cidr } => {
1658            net_backend_resources::consomme::ConsommeHandle { cidr: cidr.clone() }.into_resource()
1659        }
1660        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1661        EndpointConfigCli::Dio { id } => {
1662            #[cfg(windows)]
1663            {
1664                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1665                resources.switch_ports.push(port);
1666                net_backend_resources::dio::WindowsDirectIoHandle {
1667                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1668                        switch: port_id.switch,
1669                        port: port_id.port,
1670                    },
1671                }
1672                .into_resource()
1673            }
1674
1675            #[cfg(not(windows))]
1676            {
1677                let _ = id;
1678                bail!("cannot use dio on non-windows platforms")
1679            }
1680        }
1681        EndpointConfigCli::Tap { name } => {
1682            #[cfg(target_os = "linux")]
1683            {
1684                let fd = net_tap::tap::open_tap(name)
1685                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1686                net_backend_resources::tap::TapHandle { fd }.into_resource()
1687            }
1688
1689            #[cfg(not(target_os = "linux"))]
1690            {
1691                let _ = name;
1692                bail!("TAP backend is only supported on Linux")
1693            }
1694        }
1695    };
1696
1697    // Pick a random MAC address.
1698    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1699    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1700
1701    // Pick a fixed instance ID based on the index.
1702    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1703    let instance_id = Guid {
1704        data1: *index as u32,
1705        ..BASE_INSTANCE_ID
1706    };
1707    *index += 1;
1708
1709    Ok(NicConfig {
1710        vtl: cli_cfg.vtl,
1711        instance_id,
1712        endpoint,
1713        mac_address: mac_address.into(),
1714        max_queues: cli_cfg.max_queues,
1715        pcie_port: cli_cfg.pcie_port.clone(),
1716    })
1717}
1718
1719#[derive(Debug)]
1720struct NicConfig {
1721    vtl: DeviceVtl,
1722    instance_id: Guid,
1723    mac_address: MacAddress,
1724    endpoint: Resource<NetEndpointHandleKind>,
1725    max_queues: Option<u16>,
1726    pcie_port: Option<String>,
1727}
1728
1729impl NicConfig {
1730    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1731        (
1732            self.vtl,
1733            netvsp_resources::NetvspHandle {
1734                instance_id: self.instance_id,
1735                mac_address: self.mac_address,
1736                endpoint: self.endpoint,
1737                max_queues: self.max_queues,
1738            }
1739            .into_resource(),
1740        )
1741    }
1742}
1743
1744enum LayerOrDisk {
1745    Layer(DiskLayerDescription),
1746    Disk(Resource<DiskHandleKind>),
1747}
1748
1749fn disk_open(disk_cli: &DiskCliKind, read_only: bool) -> anyhow::Result<Resource<DiskHandleKind>> {
1750    let mut layers = Vec::new();
1751    disk_open_inner(disk_cli, read_only, &mut layers)?;
1752    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1753        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1754            unreachable!()
1755        };
1756        Ok(disk)
1757    } else {
1758        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1759            layers: layers
1760                .into_iter()
1761                .map(|layer| match layer {
1762                    LayerOrDisk::Layer(layer) => layer,
1763                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1764                        layer: DiskLayerHandle(disk).into_resource(),
1765                        read_cache: false,
1766                        write_through: false,
1767                    },
1768                })
1769                .collect(),
1770        }))
1771    }
1772}
1773
1774fn disk_open_inner(
1775    disk_cli: &DiskCliKind,
1776    read_only: bool,
1777    layers: &mut Vec<LayerOrDisk>,
1778) -> anyhow::Result<()> {
1779    fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1780        LayerOrDisk::Layer(layer.into_resource().into())
1781    }
1782    fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1783        LayerOrDisk::Disk(disk.into_resource())
1784    }
1785    match disk_cli {
1786        &DiskCliKind::Memory(len) => {
1787            layers.push(layer(RamDiskLayerHandle {
1788                len: Some(len),
1789                sector_size: None,
1790            }));
1791        }
1792        DiskCliKind::File {
1793            path,
1794            create_with_len,
1795        } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1796            create_disk_type(path, *size)
1797                .with_context(|| format!("failed to create {}", path.display()))?
1798        } else {
1799            open_disk_type(path, read_only)
1800                .with_context(|| format!("failed to open {}", path.display()))?
1801        })),
1802        DiskCliKind::Blob { kind, url } => {
1803            layers.push(disk(disk_backend_resources::BlobDiskHandle {
1804                url: url.to_owned(),
1805                format: match kind {
1806                    cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1807                    cli_args::BlobKind::Vhd1 => disk_backend_resources::BlobDiskFormat::FixedVhd1,
1808                },
1809            }))
1810        }
1811        DiskCliKind::MemoryDiff(inner) => {
1812            layers.push(layer(RamDiskLayerHandle {
1813                len: None,
1814                sector_size: None,
1815            }));
1816            disk_open_inner(inner, true, layers)?;
1817        }
1818        DiskCliKind::PersistentReservationsWrapper(inner) => layers.push(disk(
1819            disk_backend_resources::DiskWithReservationsHandle(disk_open(inner, read_only)?),
1820        )),
1821        DiskCliKind::DelayDiskWrapper {
1822            delay_ms,
1823            disk: inner,
1824        } => layers.push(disk(DelayDiskHandle {
1825            delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1826            disk: disk_open(inner, read_only)?,
1827        })),
1828        DiskCliKind::Crypt {
1829            disk: inner,
1830            cipher,
1831            key_file,
1832        } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1833            disk: disk_open(inner, read_only)?,
1834            cipher: match cipher {
1835                cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1836            },
1837            key: fs_err::read(key_file).context("failed to read key file")?,
1838        })),
1839        DiskCliKind::Sqlite {
1840            path,
1841            create_with_len,
1842        } => {
1843            // FUTURE: this code should be responsible for opening
1844            // file-handle(s) itself, and passing them into sqlite via a custom
1845            // vfs. For now though - simply check if the file exists or not, and
1846            // perform early validation of filesystem-level create options.
1847            match (create_with_len.is_some(), path.exists()) {
1848                (true, true) => anyhow::bail!(
1849                    "cannot create new sqlite disk at {} - file already exists",
1850                    path.display()
1851                ),
1852                (false, false) => anyhow::bail!(
1853                    "cannot open sqlite disk at {} - file not found",
1854                    path.display()
1855                ),
1856                _ => {}
1857            }
1858
1859            layers.push(layer(SqliteDiskLayerHandle {
1860                dbhd_path: path.display().to_string(),
1861                format_dbhd: create_with_len.map(|len| {
1862                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1863                        logically_read_only: false,
1864                        len: Some(len),
1865                    }
1866                }),
1867            }));
1868        }
1869        DiskCliKind::SqliteDiff { path, create, disk } => {
1870            // FUTURE: this code should be responsible for opening
1871            // file-handle(s) itself, and passing them into sqlite via a custom
1872            // vfs. For now though - simply check if the file exists or not, and
1873            // perform early validation of filesystem-level create options.
1874            match (create, path.exists()) {
1875                (true, true) => anyhow::bail!(
1876                    "cannot create new sqlite disk at {} - file already exists",
1877                    path.display()
1878                ),
1879                (false, false) => anyhow::bail!(
1880                    "cannot open sqlite disk at {} - file not found",
1881                    path.display()
1882                ),
1883                _ => {}
1884            }
1885
1886            layers.push(layer(SqliteDiskLayerHandle {
1887                dbhd_path: path.display().to_string(),
1888                format_dbhd: create.then_some(
1889                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1890                        logically_read_only: false,
1891                        len: None,
1892                    },
1893                ),
1894            }));
1895            disk_open_inner(disk, true, layers)?;
1896        }
1897        DiskCliKind::AutoCacheSqlite {
1898            cache_path,
1899            key,
1900            disk,
1901        } => {
1902            layers.push(LayerOrDisk::Layer(DiskLayerDescription {
1903                read_cache: true,
1904                write_through: false,
1905                layer: SqliteAutoCacheDiskLayerHandle {
1906                    cache_path: cache_path.clone(),
1907                    cache_key: key.clone(),
1908                }
1909                .into_resource(),
1910            }));
1911            disk_open_inner(disk, read_only, layers)?;
1912        }
1913    }
1914    Ok(())
1915}
1916
1917/// Get the system page size.
1918pub(crate) fn system_page_size() -> u32 {
1919    sparse_mmap::SparseMapping::page_size() as u32
1920}
1921
1922/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
1923pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
1924    "x86_64"
1925} else {
1926    "aarch64"
1927};
1928
1929/// Open a snapshot directory and validate it against the current VM config.
1930/// Returns the shared memory fd (from memory.bin) and the saved device state.
1931fn prepare_snapshot_restore(
1932    snapshot_dir: &Path,
1933    opt: &Options,
1934) -> anyhow::Result<(
1935    openvmm_defs::worker::SharedMemoryFd,
1936    mesh::payload::message::ProtobufMessage,
1937)> {
1938    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
1939
1940    // Validate manifest against current VM config.
1941    openvmm_helpers::snapshot::validate_manifest(
1942        &manifest,
1943        GUEST_ARCH,
1944        opt.memory,
1945        opt.processors,
1946        system_page_size(),
1947    )?;
1948
1949    // Open memory.bin (existing file, no create, no resize).
1950    let memory_file = fs_err::OpenOptions::new()
1951        .read(true)
1952        .write(true)
1953        .open(snapshot_dir.join("memory.bin"))?;
1954
1955    // Validate file size matches expected memory size.
1956    let file_size = memory_file.metadata()?.len();
1957    if file_size != manifest.memory_size_bytes {
1958        anyhow::bail!(
1959            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
1960            manifest.memory_size_bytes,
1961        );
1962    }
1963
1964    let shared_memory_fd =
1965        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
1966
1967    // Reconstruct ProtobufMessage from the saved state bytes.
1968    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
1969    // back to ProtobufMessage.
1970    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
1971        .context("failed to decode saved state from snapshot")?;
1972
1973    Ok((shared_memory_fd, state_msg))
1974}
1975
1976fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
1977    #[cfg(windows)]
1978    pal::windows::disable_hard_error_dialog();
1979
1980    tracing_init::enable_tracing()?;
1981
1982    // Try to run as a worker host.
1983    // On success the worker runs to completion and then exits the process (does
1984    // not return). Any worker host setup errors are return and bubbled up.
1985    meshworker::run_vmm_mesh_host()?;
1986
1987    let opt = Options::parse();
1988    if let Some(path) = &opt.write_saved_state_proto {
1989        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
1990            .write_to_path(path)
1991            .context("failed to write protobuf descriptors")?;
1992        return Ok(());
1993    }
1994
1995    if let Some(ref path) = opt.pidfile {
1996        std::fs::write(path, format!("{}\n", std::process::id()))
1997            .context("failed to write pidfile")?;
1998        *pidfile_path = Some(path.clone());
1999    }
2000
2001    if let Some(path) = opt.relay_console_path {
2002        let console_title = opt.relay_console_title.unwrap_or_default();
2003        return console_relay::relay_console(&path, console_title.as_str());
2004    }
2005
2006    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2007    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2008        return block_on(async {
2009            let _ = std::fs::remove_file(path);
2010            let listener =
2011                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2012
2013            let transport = if opt.ttrpc.is_some() {
2014                ttrpc::RpcTransport::Ttrpc
2015            } else {
2016                ttrpc::RpcTransport::Grpc
2017            };
2018
2019            // This is a local launch
2020            let mut handle =
2021                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2022                    listener,
2023                    transport,
2024                })
2025                .await?;
2026
2027            tracing::info!(%transport, path = %path.display(), "listening");
2028
2029            // Signal the the parent process that the server is ready.
2030            pal::close_stdout().context("failed to close stdout")?;
2031
2032            handle.join().await?;
2033
2034            Ok(())
2035        });
2036    }
2037
2038    DefaultPool::run_with(async |driver| {
2039        let mesh = VmmMesh::new(&driver, opt.single_process)?;
2040        run_control(&driver, mesh, opt).await
2041    })
2042}
2043
2044fn new_hvsock_service_id(port: u32) -> Guid {
2045    // This GUID is an embedding of the AF_VSOCK port into an
2046    // AF_HYPERV service ID.
2047    Guid {
2048        data1: port,
2049        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2050    }
2051}
2052
2053async fn run_control(driver: &DefaultDriver, mesh: VmmMesh, opt: Options) -> anyhow::Result<()> {
2054    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, &mesh, &opt).await?;
2055
2056    let mut vnc_worker = None;
2057    if opt.gfx || opt.vnc {
2058        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2059            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2060
2061        let input_send = vm_config.input.sender();
2062        let framebuffer = resources
2063            .framebuffer_access
2064            .take()
2065            .expect("synth video enabled");
2066
2067        let vnc_host = mesh
2068            .make_host("vnc", None)
2069            .await
2070            .context("spawning vnc process failed")?;
2071
2072        vnc_worker = Some(
2073            vnc_host
2074                .launch_worker(
2075                    vnc_worker_defs::VNC_WORKER_TCP,
2076                    VncParameters {
2077                        listener,
2078                        framebuffer,
2079                        input_send,
2080                    },
2081                )
2082                .await?,
2083        )
2084    }
2085
2086    // spin up the debug worker
2087    let gdb_worker = if let Some(port) = opt.gdb {
2088        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2089            .with_context(|| format!("binding to gdb port {}", port))?;
2090
2091        let (req_tx, req_rx) = mesh::channel();
2092        vm_config.debugger_rpc = Some(req_rx);
2093
2094        let gdb_host = mesh
2095            .make_host("gdb", None)
2096            .await
2097            .context("spawning gdbstub process failed")?;
2098
2099        Some(
2100            gdb_host
2101                .launch_worker(
2102                    debug_worker_defs::DEBUGGER_WORKER,
2103                    debug_worker_defs::DebuggerParameters {
2104                        listener,
2105                        req_chan: req_tx,
2106                        vp_count: vm_config.processor_topology.proc_count,
2107                        target_arch: if cfg!(guest_arch = "x86_64") {
2108                            debug_worker_defs::TargetArch::X86_64
2109                        } else {
2110                            debug_worker_defs::TargetArch::Aarch64
2111                        },
2112                    },
2113                )
2114                .await
2115                .context("failed to launch gdbstub worker")?,
2116        )
2117    } else {
2118        None
2119    };
2120
2121    // spin up the VM
2122    let (vm_rpc, rpc_recv) = mesh::channel();
2123    let (notify_send, notify_recv) = mesh::channel();
2124    let vm_worker = {
2125        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2126
2127        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2128            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2129            (Some(fd), Some(state_msg))
2130        } else {
2131            let shared_memory = opt
2132                .memory_backing_file
2133                .as_ref()
2134                .map(|path| {
2135                    openvmm_helpers::shared_memory::open_memory_backing_file(path, opt.memory)
2136                })
2137                .transpose()?;
2138            (shared_memory, None)
2139        };
2140
2141        let params = VmWorkerParameters {
2142            hypervisor: match &opt.hypervisor {
2143                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2144                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2145            },
2146            cfg: vm_config,
2147            saved_state,
2148            shared_memory,
2149            rpc: rpc_recv,
2150            notify: notify_send,
2151        };
2152        vm_host
2153            .launch_worker(VM_WORKER, params)
2154            .await
2155            .context("failed to launch vm worker")?
2156    };
2157
2158    if opt.restore_snapshot.is_some() {
2159        tracing::info!("restoring VM from snapshot");
2160    }
2161
2162    if !opt.paused {
2163        vm_rpc.call(VmRpc::Resume, ()).await?;
2164    }
2165
2166    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2167        driver.clone(),
2168        DiagDialer {
2169            driver: driver.clone(),
2170            vm_rpc: vm_rpc.clone(),
2171            openhcl_vtl: if opt.vtl2 {
2172                DeviceVtl::Vtl2
2173            } else {
2174                DeviceVtl::Vtl0
2175            },
2176        },
2177    ));
2178
2179    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2180
2181    // Create channels between the REPL and VmController.
2182    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2183    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2184
2185    let has_vtl2 = resources.vtl2_settings.is_some();
2186
2187    // Build the VmController with exclusive resources.
2188    let controller = vm_controller::VmController {
2189        mesh,
2190        vm_worker,
2191        vnc_worker,
2192        gdb_worker,
2193        diag_inspector,
2194        vtl2_settings: resources.vtl2_settings,
2195        ged_rpc: resources.ged_rpc.clone(),
2196        vm_rpc: vm_rpc.clone(),
2197        paravisor_diag,
2198        igvm_path: opt.igvm.clone(),
2199        memory_backing_file: opt.memory_backing_file.clone(),
2200        memory: opt.memory,
2201        processors: opt.processors,
2202        log_file: opt.log_file.clone(),
2203    };
2204
2205    // Spawn the VmController as a task.
2206    let controller_task = driver.spawn(
2207        "vm-controller",
2208        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2209    );
2210
2211    // Run the REPL with shareable resources.
2212    let repl_result = repl::run_repl(
2213        driver,
2214        repl::ReplResources {
2215            vm_rpc,
2216            vm_controller: vm_controller_send,
2217            vm_controller_events: vm_controller_event_recv,
2218            scsi_rpc: resources.scsi_rpc,
2219            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2220            shutdown_ic: resources.shutdown_ic,
2221            kvp_ic: resources.kvp_ic,
2222            console_in: resources.console_in,
2223            has_vtl2,
2224        },
2225    )
2226    .await;
2227
2228    // Wait for the controller task to finish (it stops the VM worker and
2229    // shuts down the mesh).
2230    controller_task.await;
2231
2232    repl_result
2233}
2234
2235struct DiagDialer {
2236    driver: DefaultDriver,
2237    vm_rpc: mesh::Sender<VmRpc>,
2238    openhcl_vtl: DeviceVtl,
2239}
2240
2241impl mesh_rpc::client::Dial for DiagDialer {
2242    type Stream = PolledSocket<unix_socket::UnixStream>;
2243
2244    async fn dial(&mut self) -> io::Result<Self::Stream> {
2245        let service_id = new_hvsock_service_id(1);
2246        let socket = self
2247            .vm_rpc
2248            .call_failable(
2249                VmRpc::ConnectHvsock,
2250                (
2251                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2252                    service_id,
2253                    self.openhcl_vtl,
2254                ),
2255            )
2256            .await
2257            .map_err(io::Error::other)?;
2258
2259        PolledSocket::new(&self.driver, socket)
2260    }
2261}
2262
2263/// An object that implements [`InspectMut`] by sending an inspect request over
2264/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2265/// the response back into the inspect tree.
2266///
2267/// This also caches the TTRPC connection to the guest so that only the first
2268/// inspect request has to wait for the connection to be established.
2269pub(crate) struct DiagInspector(DiagInspectorInner);
2270
2271enum DiagInspectorInner {
2272    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2273    Started {
2274        send: mesh::Sender<inspect::Deferred>,
2275        _task: Task<()>,
2276    },
2277    Invalid,
2278}
2279
2280impl DiagInspector {
2281    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2282        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2283    }
2284
2285    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2286        loop {
2287            match self.0 {
2288                DiagInspectorInner::NotStarted { .. } => {
2289                    let DiagInspectorInner::NotStarted(driver, client) =
2290                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2291                    else {
2292                        unreachable!()
2293                    };
2294                    let (send, recv) = mesh::channel();
2295                    let task = driver.clone().spawn("diag-inspect", async move {
2296                        Self::run(&client, recv).await
2297                    });
2298
2299                    self.0 = DiagInspectorInner::Started { send, _task: task };
2300                }
2301                DiagInspectorInner::Started { ref send, .. } => break send,
2302                DiagInspectorInner::Invalid => unreachable!(),
2303            }
2304        }
2305    }
2306
2307    async fn run(
2308        diag_client: &diag_client::DiagClient,
2309        mut recv: mesh::Receiver<inspect::Deferred>,
2310    ) {
2311        while let Some(deferred) = recv.next().await {
2312            let info = deferred.external_request();
2313            let result = match info.request_type {
2314                inspect::ExternalRequestType::Inspect { depth } => {
2315                    if depth == 0 {
2316                        Ok(inspect::Node::Unevaluated)
2317                    } else {
2318                        // TODO: Support taking timeouts from the command line
2319                        diag_client
2320                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2321                            .await
2322                    }
2323                }
2324                inspect::ExternalRequestType::Update { value } => {
2325                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2326                }
2327            };
2328            deferred.complete_external(
2329                result.unwrap_or_else(|err| {
2330                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2331                }),
2332                inspect::SensitivityLevel::Unspecified,
2333            )
2334        }
2335    }
2336}
2337
2338impl InspectMut for DiagInspector {
2339    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2340        self.start().send(req.defer());
2341    }
2342}