Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use cxl_spec::test::CxlTestDeviceHandle;
42use disk_backend_resources::DelayDiskHandle;
43use disk_backend_resources::DiskLayerDescription;
44use disk_backend_resources::layer::DiskLayerHandle;
45use disk_backend_resources::layer::RamDiskLayerHandle;
46use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
47use disk_backend_resources::layer::SqliteDiskLayerHandle;
48use floppy_resources::FloppyDiskConfig;
49use framebuffer::FRAMEBUFFER_SIZE;
50use framebuffer::FramebufferAccess;
51use futures::AsyncReadExt;
52use futures::AsyncWrite;
53use futures::StreamExt;
54use futures::executor::block_on;
55use futures::io::AllowStdIo;
56use gdma_resources::GdmaDeviceHandle;
57use gdma_resources::VportDefinition;
58use guid::Guid;
59use input_core::MultiplexedInputHandle;
60use inspect::InspectMut;
61use io::Read;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
70use openvmm_defs::config::DeviceVtl;
71use openvmm_defs::config::EfiDiagnosticsLogLevelType;
72use openvmm_defs::config::HypervisorConfig;
73use openvmm_defs::config::LateMapVtl0MemoryPolicy;
74use openvmm_defs::config::LoadMode;
75use openvmm_defs::config::MemoryConfig;
76use openvmm_defs::config::PcieDeviceConfig;
77use openvmm_defs::config::PcieMmioRangeConfig;
78use openvmm_defs::config::PcieRootComplexConfig;
79use openvmm_defs::config::PcieRootPortConfig;
80use openvmm_defs::config::PcieSwitchConfig;
81use openvmm_defs::config::ProcessorTopologyConfig;
82use openvmm_defs::config::RootComplexCxlConfig;
83use openvmm_defs::config::SerialInformation;
84use openvmm_defs::config::VirtioBus;
85use openvmm_defs::config::VmbusConfig;
86use openvmm_defs::config::VpciDeviceConfig;
87use openvmm_defs::config::Vtl2Config;
88use openvmm_defs::rpc::VmRpc;
89use openvmm_defs::worker::VM_WORKER;
90use openvmm_defs::worker::VmWorkerParameters;
91use openvmm_helpers::disk::OpenDiskOptions;
92use openvmm_helpers::disk::create_disk_type;
93use openvmm_helpers::disk::open_disk_type;
94use pal_async::DefaultDriver;
95use pal_async::DefaultPool;
96use pal_async::socket::PolledSocket;
97use pal_async::task::Spawn;
98use pal_async::task::Task;
99use serial_16550_resources::ComPort;
100use serial_core::resources::DisconnectedSerialBackendHandle;
101use sparse_mmap::alloc_shared_memory;
102use std::cell::RefCell;
103use std::collections::BTreeMap;
104use std::fmt::Write as _;
105use std::future::pending;
106use std::io;
107#[cfg(unix)]
108use std::io::IsTerminal;
109use std::io::Write;
110use std::net::TcpListener;
111use std::path::Path;
112use std::path::PathBuf;
113use std::sync::Arc;
114use std::thread;
115use std::time::Duration;
116use storvsp_resources::ScsiControllerRequest;
117use tpm_resources::TpmDeviceHandle;
118use tpm_resources::TpmRegisterLayout;
119use uidevices_resources::SynthKeyboardHandle;
120use uidevices_resources::SynthMouseHandle;
121use uidevices_resources::SynthVideoHandle;
122use video_core::SharedFramebufferHandle;
123use virtio_resources::VirtioPciDeviceHandle;
124use vm_manifest_builder::BaseChipsetType;
125use vm_manifest_builder::MachineArch;
126use vm_manifest_builder::VmChipsetResult;
127use vm_manifest_builder::VmManifestBuilder;
128use vm_resource::IntoResource;
129use vm_resource::Resource;
130use vm_resource::kind::DiskHandleKind;
131use vm_resource::kind::DiskLayerHandleKind;
132use vm_resource::kind::NetEndpointHandleKind;
133use vm_resource::kind::VirtioDeviceHandle;
134use vm_resource::kind::VmbusDeviceHandleKind;
135use vmbus_serial_resources::VmbusSerialDeviceHandle;
136use vmbus_serial_resources::VmbusSerialPort;
137use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
138use vmgs_resources::GuestStateEncryptionPolicy;
139use vmgs_resources::VmgsDisk;
140use vmgs_resources::VmgsFileHandle;
141use vmgs_resources::VmgsResource;
142use vmotherboard::ChipsetDeviceHandle;
143use vnc_worker_defs::VncParameters;
144
145/// RAII guard that removes the pidfile when dropped. Ensures the pidfile is
146/// cleaned up even if [`do_main`] panics.
147struct PidfileGuard(Option<PathBuf>);
148
149impl Drop for PidfileGuard {
150    fn drop(&mut self) {
151        if let Some(path) = &self.0 {
152            let _ = fs_err::remove_file(path);
153        }
154    }
155}
156
157pub fn openvmm_main() {
158    // Save the current state of the terminal so we can restore it back to
159    // normal before exiting.
160    #[cfg(unix)]
161    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
162
163    let mut pidfile_guard = PidfileGuard(None);
164    let exit_code = match do_main(&mut pidfile_guard.0) {
165        Ok(_) => 0,
166        Err(err) => {
167            eprintln!("fatal error: {:?}", err);
168            1
169        }
170    };
171
172    // Restore the terminal to its initial state.
173    #[cfg(unix)]
174    if let Some(orig_termios) = orig_termios {
175        term::set_termios(orig_termios);
176    }
177
178    // Clean up the pidfile before terminating, since pal::process::terminate
179    // skips destructors.
180    drop(pidfile_guard);
181
182    // Terminate the process immediately without graceful shutdown of DLLs or
183    // C++ destructors or anything like that. This is all unnecessary and saves
184    // time on Windows.
185    //
186    // Do flush stdout, though, since there may be buffered data.
187    let _ = io::stdout().flush();
188    pal::process::terminate(exit_code);
189}
190
191#[derive(Default)]
192struct VmResources {
193    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
194    framebuffer_access: Option<FramebufferAccess>,
195    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
196    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
197    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
198    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
199    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
200    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
201    #[cfg(windows)]
202    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
203}
204
205struct ConsoleState<'a> {
206    device: &'a str,
207    input: Box<dyn AsyncWrite + Unpin + Send>,
208}
209
210/// Build a flat list of switches with their parent port assignments.
211///
212/// This function converts hierarchical CLI switch definitions into a flat list
213/// where each switch specifies its parent port directly.
214fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
215    all_switches
216        .iter()
217        .map(|switch_cli| PcieSwitchConfig {
218            name: switch_cli.name.clone(),
219            num_downstream_ports: switch_cli.num_downstream_ports,
220            parent_port: switch_cli.port_name.clone(),
221            hotplug: switch_cli.hotplug,
222            acs_capabilities_supported: switch_cli.acs_capabilities_supported,
223        })
224        .collect()
225}
226
227async fn vm_config_from_command_line(
228    spawner: impl Spawn,
229    mesh: &VmmMesh,
230    opt: &Options,
231) -> anyhow::Result<(Config, VmResources)> {
232    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
233    // Ensure the serial driver stays alive with no tasks.
234    serial_driver.spawn("leak", pending::<()>()).detach();
235
236    let openhcl_vtl = if opt.vtl2 {
237        DeviceVtl::Vtl2
238    } else {
239        DeviceVtl::Vtl0
240    };
241
242    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
243    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
244        Ok(match cli_cfg {
245            SerialConfigCli::Console => {
246                if let Some(console_state) = console_state.borrow().as_ref() {
247                    bail!("console already set by {}", console_state.device);
248                }
249                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
250                let (serial_read, serial_write) = AsyncReadExt::split(serial);
251                *console_state.borrow_mut() = Some(ConsoleState {
252                    device,
253                    input: Box::new(serial_write),
254                });
255                thread::Builder::new()
256                    .name(name.to_owned())
257                    .spawn(move || {
258                        let _ = block_on(futures::io::copy(
259                            serial_read,
260                            &mut AllowStdIo::new(term::raw_stdout()),
261                        ));
262                    })
263                    .unwrap();
264                Some(config)
265            }
266            SerialConfigCli::Stderr => {
267                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
268                thread::Builder::new()
269                    .name(name.to_owned())
270                    .spawn(move || {
271                        let _ = block_on(futures::io::copy(
272                            serial,
273                            &mut AllowStdIo::new(term::raw_stderr()),
274                        ));
275                    })
276                    .unwrap();
277                Some(config)
278            }
279            SerialConfigCli::File(path) => {
280                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
281                let file = fs_err::File::create(path).context("failed to create file")?;
282
283                thread::Builder::new()
284                    .name(name.to_owned())
285                    .spawn(move || {
286                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
287                    })
288                    .unwrap();
289                Some(config)
290            }
291            SerialConfigCli::None => None,
292            SerialConfigCli::Pipe(path) => {
293                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
294            }
295            SerialConfigCli::Tcp(addr) => {
296                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
297            }
298            SerialConfigCli::NewConsole(app, window_title) => {
299                let path = console_relay::random_console_path();
300                let config =
301                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
302                let window_title =
303                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
304
305                console_relay::launch_console(
306                    app.or_else(openvmm_terminal_app).as_deref(),
307                    &path,
308                    ConsoleLaunchOptions {
309                        window_title: Some(window_title),
310                    },
311                )
312                .context("failed to launch console")?;
313
314                Some(config)
315            }
316        })
317    };
318
319    let mut vmbus_devices = Vec::new();
320
321    let serial0_cfg = setup_serial(
322        "com1",
323        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
324        if cfg!(guest_arch = "x86_64") {
325            "ttyS0"
326        } else {
327            "ttyAMA0"
328        },
329    )?;
330    let serial1_cfg = setup_serial(
331        "com2",
332        opt.com2.clone().unwrap_or(SerialConfigCli::None),
333        if cfg!(guest_arch = "x86_64") {
334            "ttyS1"
335        } else {
336            "ttyAMA1"
337        },
338    )?;
339    let serial2_cfg = setup_serial(
340        "com3",
341        opt.com3.clone().unwrap_or(SerialConfigCli::None),
342        if cfg!(guest_arch = "x86_64") {
343            "ttyS2"
344        } else {
345            "ttyAMA2"
346        },
347    )?;
348    let serial3_cfg = setup_serial(
349        "com4",
350        opt.com4.clone().unwrap_or(SerialConfigCli::None),
351        if cfg!(guest_arch = "x86_64") {
352            "ttyS3"
353        } else {
354            "ttyAMA3"
355        },
356    )?;
357    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
358        "vmbus_com1",
359        opt.vmbus_com1_serial
360            .clone()
361            .unwrap_or(SerialConfigCli::None),
362        "vmbus_com1",
363    )? {
364        vmbus_devices.push((
365            openhcl_vtl,
366            VmbusSerialDeviceHandle {
367                port: VmbusSerialPort::Com1,
368                backend: vmbus_com1_cfg,
369            }
370            .into_resource(),
371        ));
372        true
373    } else {
374        false
375    };
376    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
377        "vmbus_com2",
378        opt.vmbus_com2_serial
379            .clone()
380            .unwrap_or(SerialConfigCli::None),
381        "vmbus_com2",
382    )? {
383        vmbus_devices.push((
384            openhcl_vtl,
385            VmbusSerialDeviceHandle {
386                port: VmbusSerialPort::Com2,
387                backend: vmbus_com2_cfg,
388            }
389            .into_resource(),
390        ));
391        true
392    } else {
393        false
394    };
395    let debugcon_cfg = setup_serial(
396        "debugcon",
397        opt.debugcon
398            .clone()
399            .map(|cfg| cfg.serial)
400            .unwrap_or(SerialConfigCli::None),
401        "debugcon",
402    )?;
403
404    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
405        setup_serial("virtio-console", serial_cfg, "hvc0")?
406    } else {
407        None
408    };
409
410    let mut resources = VmResources::default();
411    let mut console_str = "";
412    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
413        resources.console_in = Some(input);
414        console_str = device;
415    }
416
417    if opt.shared_memory {
418        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
419    }
420    if opt.deprecated_prefetch {
421        tracing::warn!("--prefetch is deprecated; use --memory prefetch=on");
422    }
423    if opt.deprecated_private_memory {
424        tracing::warn!("--private-memory is deprecated; use --memory shared=off");
425    }
426    if opt.deprecated_thp {
427        tracing::warn!("--thp is deprecated; use --memory shared=off,thp=on");
428    }
429    if opt.deprecated_memory_backing_file.is_some() {
430        tracing::warn!("--memory-backing-file is deprecated; use --memory file=<path>");
431    }
432
433    opt.validate_memory_options()?;
434
435    const MAX_PROCESSOR_COUNT: u32 = 1024;
436
437    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
438        bail!("invalid proc count: {}", opt.processors);
439    }
440
441    // Total SCSI channel count should not exceed the processor count
442    // (at most, one channel per VP).
443    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
444        bail!(
445            "invalid SCSI sub-channel count: requested {}, max {}",
446            opt.scsi_sub_channels,
447            MAX_PROCESSOR_COUNT - 1
448        );
449    }
450
451    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
452
453    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
454    for &cli_args::DiskCli {
455        vtl,
456        ref kind,
457        read_only,
458        is_dvd,
459        underhill,
460        ref pcie_port,
461    } in &opt.disk
462    {
463        if pcie_port.is_some() {
464            anyhow::bail!("`--disk` is incompatible with PCIe");
465        }
466
467        storage
468            .add(
469                vtl,
470                underhill,
471                storage_builder::DiskLocation::Scsi(None),
472                kind,
473                is_dvd,
474                read_only,
475            )
476            .await?;
477    }
478
479    for &cli_args::IdeDiskCli {
480        ref kind,
481        read_only,
482        channel,
483        device,
484        is_dvd,
485    } in &opt.ide
486    {
487        storage
488            .add(
489                DeviceVtl::Vtl0,
490                None,
491                storage_builder::DiskLocation::Ide(channel, device),
492                kind,
493                is_dvd,
494                read_only,
495            )
496            .await?;
497    }
498
499    for &cli_args::DiskCli {
500        vtl,
501        ref kind,
502        read_only,
503        is_dvd,
504        underhill,
505        ref pcie_port,
506    } in &opt.nvme
507    {
508        storage
509            .add(
510                vtl,
511                underhill,
512                storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
513                kind,
514                is_dvd,
515                read_only,
516            )
517            .await?;
518    }
519
520    for &cli_args::DiskCli {
521        vtl,
522        ref kind,
523        read_only,
524        is_dvd,
525        ref underhill,
526        ref pcie_port,
527    } in &opt.virtio_blk
528    {
529        if underhill.is_some() {
530            anyhow::bail!("underhill not supported with virtio-blk");
531        }
532        storage
533            .add(
534                vtl,
535                None,
536                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
537                kind,
538                is_dvd,
539                read_only,
540            )
541            .await?;
542    }
543
544    let mut floppy_disks = Vec::new();
545    for disk in &opt.floppy {
546        let &cli_args::FloppyDiskCli {
547            ref kind,
548            read_only,
549        } = disk;
550        floppy_disks.push(FloppyDiskConfig {
551            disk_type: disk_open(kind, read_only).await?,
552            read_only,
553        });
554    }
555
556    let mut vpci_mana_nics = [(); 3].map(|()| None);
557    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
558    let mut underhill_nics = Vec::new();
559    let mut vpci_devices = Vec::new();
560
561    let mut nic_index = 0;
562    for cli_cfg in &opt.net {
563        if cli_cfg.pcie_port.is_some() {
564            anyhow::bail!("`--net` does not support PCIe");
565        }
566        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
567        if cli_cfg.underhill {
568            if !opt.no_alias_map {
569                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
570            }
571            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
572                let vpci_instance_id = Guid::new_random();
573                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
574                    instance_id: vpci_instance_id.to_string(),
575                    subordinate_instance_id: None,
576                    max_sub_channels: None,
577                });
578                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
579            });
580            mana.1.vports.push(VportDefinition {
581                mac_address: vport.mac_address,
582                endpoint: vport.endpoint,
583            });
584        } else {
585            vmbus_devices.push(vport.into_netvsp_handle());
586        }
587    }
588
589    if opt.nic {
590        let nic_config = parse_endpoint(
591            &NicConfigCli {
592                vtl: DeviceVtl::Vtl0,
593                endpoint: EndpointConfigCli::Consomme {
594                    cidr: None,
595                    host_fwd: Vec::new(),
596                },
597                max_queues: None,
598                underhill: false,
599                pcie_port: None,
600            },
601            &mut nic_index,
602            &mut resources,
603        )?;
604        vmbus_devices.push(nic_config.into_netvsp_handle());
605    }
606
607    // Build initial PCIe devices list from CLI options. Storage devices
608    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
609    let mut pcie_devices = Vec::new();
610    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
611        tracing::info!(
612            port_name = %cli_cfg.port_name,
613            socket_addr = ?cli_cfg.socket_addr,
614            "instantiating PCIe remote device"
615        );
616
617        // Generate a deterministic instance ID based on index
618        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
619            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
620        let instance_id = Guid {
621            data1: index as u32,
622            ..PCIE_REMOTE_BASE_INSTANCE_ID
623        };
624
625        pcie_devices.push(PcieDeviceConfig {
626            port_name: cli_cfg.port_name.clone(),
627            resource: pcie_remote_resources::PcieRemoteHandle {
628                instance_id,
629                socket_addr: cli_cfg.socket_addr.clone(),
630                hu: cli_cfg.hu,
631                controller: cli_cfg.controller,
632            }
633            .into_resource(),
634        });
635    }
636
637    #[cfg(windows)]
638    let mut kernel_vmnics = Vec::new();
639    #[cfg(windows)]
640    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
641        // Pick a random MAC address.
642        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
643        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
644
645        // Pick a fixed instance ID based on the index.
646        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
647        let instance_id = Guid {
648            data1: index as u32,
649            ..BASE_INSTANCE_ID
650        };
651
652        let switch_id = if switch_id == "default" {
653            DEFAULT_SWITCH
654        } else {
655            switch_id
656        };
657        let (port_id, port) = new_switch_port(switch_id)?;
658        resources.switch_ports.push(port);
659
660        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
661            instance_id,
662            mac_address: mac_address.into(),
663            switch_port_id: port_id,
664        });
665    }
666
667    for vport in &opt.mana {
668        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
669        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
670            (vtl, None) => {
671                &mut vpci_mana_nics[vtl]
672                    .get_or_insert_with(|| {
673                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
674                    })
675                    .1
676                    .vports
677            }
678            (0, Some(pcie_port)) => {
679                &mut pcie_mana_nics
680                    .entry(pcie_port)
681                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
682                    .vports
683            }
684            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
685        };
686        vport_array.push(VportDefinition {
687            mac_address: vport.mac_address,
688            endpoint: vport.endpoint,
689        });
690    }
691
692    vpci_devices.extend(
693        vpci_mana_nics
694            .into_iter()
695            .enumerate()
696            .filter_map(|(vtl, nic)| {
697                nic.map(|(instance_id, handle)| VpciDeviceConfig {
698                    vtl: match vtl {
699                        0 => DeviceVtl::Vtl0,
700                        1 => DeviceVtl::Vtl1,
701                        2 => DeviceVtl::Vtl2,
702                        _ => unreachable!(),
703                    },
704                    instance_id,
705                    resource: handle.into_resource(),
706                })
707            }),
708    );
709
710    pcie_devices.extend(
711        pcie_mana_nics
712            .into_iter()
713            .map(|(pcie_port, handle)| PcieDeviceConfig {
714                port_name: pcie_port,
715                resource: handle.into_resource(),
716            }),
717    );
718
719    for cxl_test in &opt.cxl_test {
720        pcie_devices.push(PcieDeviceConfig {
721            port_name: cxl_test.pcie_port.clone(),
722            resource: CxlTestDeviceHandle {
723                hdm_size_bytes: cxl_test.hdm_size,
724            }
725            .into_resource(),
726        });
727    }
728
729    #[cfg(guest_arch = "aarch64")]
730    let arch = MachineArch::Aarch64;
731    #[cfg(guest_arch = "x86_64")]
732    let arch = MachineArch::X86_64;
733
734    let mut pcie_root_complexes = Vec::new();
735    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
736        let ports: Vec<PcieRootPortConfig> = opt
737            .pcie_root_port
738            .iter()
739            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
740            .map(|port_cli| PcieRootPortConfig {
741                name: port_cli.name.clone(),
742                hotplug: port_cli.hotplug,
743                acs_capabilities_supported: port_cli.acs_capabilities_supported,
744                cxl: port_cli.cxl,
745            })
746            .collect();
747
748        const ONE_MB: u64 = 1024 * 1024;
749        // Keep all PCI windows 1MB-granular to match layout and downstream placement rules.
750        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
751        let high_mmio_size = rc_cli
752            .high_mmio
753            .checked_next_multiple_of(ONE_MB)
754            .context("high mmio rounding error")?;
755
756        // Count CXL-capable ports under the root bus. If the root bus has CXL root ports, it needs CHBCR.
757        let cxl_port_count = ports.iter().filter(|port| port.cxl).count() as u64;
758
759        let cxl = if cxl_port_count != 0 {
760            Some(RootComplexCxlConfig {
761                hdm_size: rc_cli.hdm,
762                hdm_window_restrictions: rc_cli.hdm_window_restrictions.bits(),
763            })
764        } else {
765            None
766        };
767        pcie_root_complexes.push(PcieRootComplexConfig {
768            index: i as u32,
769            name: rc_cli.name.clone(),
770            segment: rc_cli.segment,
771            start_bus: rc_cli.start_bus,
772            end_bus: rc_cli.end_bus,
773            low_mmio: PcieMmioRangeConfig::Dynamic {
774                size: low_mmio_size,
775            },
776            high_mmio: PcieMmioRangeConfig::Dynamic {
777                size: high_mmio_size,
778            },
779            cxl,
780            ports,
781        });
782    }
783
784    let pcie_switches = build_switch_list(&opt.pcie_switch);
785
786    #[cfg(target_os = "linux")]
787    let vfio_pcie_devices: Vec<PcieDeviceConfig> = {
788        use std::collections::HashMap;
789        use vm_resource::IntoResource;
790
791        // Process --iommu flags: open /dev/iommu for each declared context.
792        let mut iommu_map: HashMap<String, std::fs::File> = HashMap::new();
793        for iommu_cli in &opt.iommu {
794            anyhow::ensure!(
795                !iommu_map.contains_key(&iommu_cli.id),
796                "duplicate --iommu id={}",
797                iommu_cli.id
798            );
799            let file = std::fs::OpenOptions::new()
800                .read(true)
801                .write(true)
802                .open("/dev/iommu")
803                .context("failed to open /dev/iommu (is iommufd available?)")?;
804            iommu_map.insert(iommu_cli.id.clone(), file);
805        }
806
807        opt.vfio
808            .iter()
809            .map(|cli_cfg| {
810                let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
811
812                if let Some(iommu_id) = &cli_cfg.iommu {
813                    // cdev + iommufd path
814                    let iommufd = iommu_map.get(iommu_id).with_context(|| {
815                        format!(
816                            "--vfio device {} references iommu={iommu_id}, \
817                             but no --iommu id={iommu_id} was specified",
818                            cli_cfg.pci_id
819                        )
820                    })?;
821                    // Clone the iommufd fd so the per-iommu manager can own it.
822                    // The first device for a given iommu ID uses the cloned fd
823                    // to create the IoasManager; subsequent devices reuse the
824                    // existing manager and the cloned fd is dropped.
825                    let iommufd = iommufd.try_clone().with_context(|| {
826                        format!("failed to dup iommufd fd for iommu={iommu_id}")
827                    })?;
828
829                    // Open the cdev device node.
830                    let vfio_dev_dir = sysfs_path.join("vfio-dev");
831                    let entry = std::fs::read_dir(&vfio_dev_dir)
832                        .with_context(|| {
833                            format!(
834                                "failed to read {}: is {} bound to vfio-pci?",
835                                vfio_dev_dir.display(),
836                                cli_cfg.pci_id
837                            )
838                        })?
839                        .next()
840                        .context("no vfio-dev entry found")?
841                        .context("failed to read vfio-dev entry")?;
842                    let dev_path = Path::new("/dev/vfio/devices").join(entry.file_name());
843                    let cdev = std::fs::OpenOptions::new()
844                        .read(true)
845                        .write(true)
846                        .open(&dev_path)
847                        .with_context(|| format!("failed to open {}", dev_path.display()))?;
848
849                    Ok(PcieDeviceConfig {
850                        port_name: cli_cfg.port_name.clone(),
851                        resource: vfio_assigned_device_resources::VfioCdevDeviceHandle {
852                            pci_id: cli_cfg.pci_id.clone(),
853                            cdev,
854                            iommufd,
855                            iommu_id: iommu_id.clone(),
856                        }
857                        .into_resource(),
858                    })
859                } else {
860                    // Legacy group/container path
861                    let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
862                        .with_context(|| {
863                            format!("failed to read IOMMU group for {}", cli_cfg.pci_id)
864                        })?;
865                    let group_id: u64 = iommu_group_link
866                        .file_name()
867                        .and_then(|s| s.to_str())
868                        .context("invalid iommu_group symlink")?
869                        .parse()
870                        .context("failed to parse IOMMU group ID")?;
871                    let group = std::fs::OpenOptions::new()
872                        .read(true)
873                        .write(true)
874                        .open(format!("/dev/vfio/{group_id}"))
875                        .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
876
877                    Ok(PcieDeviceConfig {
878                        port_name: cli_cfg.port_name.clone(),
879                        resource: vfio_assigned_device_resources::VfioDeviceHandle {
880                            pci_id: cli_cfg.pci_id.clone(),
881                            group,
882                        }
883                        .into_resource(),
884                    })
885                }
886            })
887            .collect::<anyhow::Result<Vec<_>>>()?
888    };
889
890    #[cfg(windows)]
891    let vpci_resources: Vec<_> = opt
892        .device
893        .iter()
894        .map(|path| -> anyhow::Result<_> {
895            Ok(virt_whp::device::DeviceHandle(
896                whp::VpciResource::new(
897                    None,
898                    Default::default(),
899                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
900                )
901                .with_context(|| format!("opening PCI device {}", path))?,
902            ))
903        })
904        .collect::<Result<_, _>>()?;
905
906    // Create a vmbusproxy handle if needed by any devices.
907    #[cfg(windows)]
908    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
909        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
910    } else {
911        None
912    };
913
914    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
915        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
916        let (fb, fba) =
917            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
918        resources.framebuffer_access = Some(fba);
919        Some(fb)
920    } else {
921        None
922    };
923
924    let load_mode;
925    let with_hv;
926
927    let any_serial_configured = serial0_cfg.is_some()
928        || serial1_cfg.is_some()
929        || serial2_cfg.is_some()
930        || serial3_cfg.is_some();
931
932    let has_com3 = serial2_cfg.is_some();
933
934    let mut chipset = VmManifestBuilder::new(
935        if opt.igvm.is_some() {
936            BaseChipsetType::HclHost
937        } else if opt.pcat {
938            BaseChipsetType::HypervGen1
939        } else if opt.uefi {
940            BaseChipsetType::HypervGen2Uefi
941        } else if opt.hv {
942            BaseChipsetType::HyperVGen2LinuxDirect
943        } else {
944            BaseChipsetType::UnenlightenedLinuxDirect
945        },
946        arch,
947    );
948
949    if framebuffer.is_some() {
950        chipset = chipset.with_framebuffer();
951    }
952    if opt.guest_watchdog {
953        chipset = chipset.with_guest_watchdog();
954    }
955    if any_serial_configured {
956        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
957    }
958    if opt.battery {
959        let (tx, rx) = mesh::channel();
960        tx.send(HostBatteryUpdate::default_present());
961        chipset = chipset.with_battery(rx);
962    }
963    if let Some(cfg) = &opt.debugcon {
964        chipset = chipset.with_debugcon(
965            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
966            cfg.port,
967        );
968    }
969
970    let custom_uefi_vars = {
971        use firmware_uefi_custom_vars::CustomVars;
972
973        // load base vars from specified template, or use an empty set of base
974        // vars if none was specified.
975        let base_vars = match opt.secure_boot_template {
976            Some(template) => match (arch, template) {
977                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
978                    hyperv_secure_boot_templates::x64::microsoft_windows()
979                }
980                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
981                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
982                }
983                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
984                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
985                }
986                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
987                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
988                }
989            },
990            None => CustomVars::default(),
991        };
992
993        // TODO: fallback to VMGS read if no command line flag was given
994
995        let custom_uefi_json_data = match &opt.custom_uefi_json {
996            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
997            None => None,
998        };
999
1000        // obtain the final custom uefi vars by applying the delta onto the base vars
1001        match custom_uefi_json_data {
1002            Some(data) => {
1003                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1004                base_vars.apply_delta(delta)?
1005            }
1006            None => base_vars,
1007        }
1008    };
1009
1010    let efi_diagnostics_log_level = match opt.efi_diagnostics_log_level.unwrap_or_default() {
1011        EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1012        EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1013        EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1014    };
1015
1016    if opt.uefi {
1017        let log_level = match efi_diagnostics_log_level {
1018            EfiDiagnosticsLogLevelType::Default => {
1019                firmware_uefi_resources::LogLevel::make_default()
1020            }
1021            EfiDiagnosticsLogLevelType::Info => firmware_uefi_resources::LogLevel::make_info(),
1022            EfiDiagnosticsLogLevelType::Full => firmware_uefi_resources::LogLevel::make_full(),
1023        };
1024        let nvram_storage = if opt.vmgs.is_some() {
1025            VmgsFileHandle::new(vmgs_format::FileId::BIOS_NVRAM, true).into_resource()
1026        } else {
1027            EphemeralNonVolatileStoreHandle.into_resource()
1028        };
1029        chipset = chipset.with_uefi(vm_manifest_builder::UefiManifest::new(
1030            arch,
1031            custom_uefi_vars.clone(),
1032            opt.secure_boot,
1033            log_level,
1034            nvram_storage,
1035            None,
1036        ));
1037    }
1038
1039    // TODO: load from VMGS file if it exists
1040    let bios_guid = Guid::new_random();
1041
1042    let layout_config = chipset.layout_config();
1043    let VmChipsetResult {
1044        chipset,
1045        mut chipset_devices,
1046        pci_chipset_devices,
1047        isa_dma_controller,
1048        capabilities,
1049    } = chipset
1050        .build()
1051        .context("failed to build chipset configuration")?;
1052
1053    if opt.restore_snapshot.is_some() {
1054        // Snapshot restore: skip firmware loading entirely. Device state and
1055        // memory come from the snapshot directory.
1056        load_mode = LoadMode::None;
1057        with_hv = true;
1058    } else if let Some(path) = &opt.igvm {
1059        let file = fs_err::File::open(path)
1060            .context("failed to open igvm file")?
1061            .into();
1062        let cmdline = opt.cmdline.join(" ");
1063        with_hv = true;
1064
1065        load_mode = LoadMode::Igvm {
1066            file,
1067            cmdline,
1068            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1069            com_serial: has_com3.then(|| SerialInformation {
1070                io_port: ComPort::Com3.io_port(),
1071                irq: ComPort::Com3.irq().into(),
1072            }),
1073        };
1074    } else if opt.pcat {
1075        // Emit a nice error early instead of complaining about missing firmware.
1076        if arch != MachineArch::X86_64 {
1077            anyhow::bail!("pcat not supported on this architecture");
1078        }
1079        with_hv = true;
1080
1081        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1082        load_mode = LoadMode::Pcat {
1083            firmware,
1084            boot_order: opt
1085                .pcat_boot_order
1086                .map(|x| x.0)
1087                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1088        };
1089    } else if opt.uefi {
1090        use openvmm_defs::config::UefiConsoleMode;
1091
1092        with_hv = true;
1093
1094        let firmware = fs_err::File::open(
1095            (opt.uefi_firmware.0)
1096                .as_ref()
1097                .context("must provide uefi firmware when booting with uefi")?,
1098        )
1099        .context("failed to open uefi firmware")?;
1100
1101        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1102        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1103        load_mode = LoadMode::Uefi {
1104            firmware: firmware.into(),
1105            enable_debugging: opt.uefi_debug,
1106            enable_memory_protections: opt.uefi_enable_memory_protections,
1107            disable_frontpage: opt.disable_frontpage,
1108            enable_tpm: opt.tpm,
1109            enable_battery: opt.battery,
1110            enable_serial: any_serial_configured,
1111            enable_vpci_boot: false,
1112            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1113                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1114                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1115                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1116                UefiConsoleModeCli::None => UefiConsoleMode::None,
1117            }),
1118            default_boot_always_attempt: opt.default_boot_always_attempt,
1119            bios_guid,
1120        };
1121    } else {
1122        // Linux Direct
1123        let mut cmdline = "panic=-1 debug".to_string();
1124
1125        with_hv = opt.hv;
1126        if with_hv && opt.pcie_root_complex.is_empty() {
1127            cmdline += " pci=off";
1128        }
1129
1130        if !console_str.is_empty() {
1131            let _ = write!(&mut cmdline, " console={}", console_str);
1132        }
1133
1134        if opt.gfx {
1135            cmdline += " console=tty";
1136        }
1137        for extra in &opt.cmdline {
1138            let _ = write!(&mut cmdline, " {}", extra);
1139        }
1140
1141        let kernel = fs_err::File::open(
1142            (opt.kernel.0)
1143                .as_ref()
1144                .context("must provide kernel when booting with linux direct")?,
1145        )
1146        .context("failed to open kernel")?;
1147        let initrd = (opt.initrd.0)
1148            .as_ref()
1149            .map(fs_err::File::open)
1150            .transpose()
1151            .context("failed to open initrd")?;
1152
1153        let custom_dsdt = match &opt.custom_dsdt {
1154            Some(path) => {
1155                let mut v = Vec::new();
1156                fs_err::File::open(path)
1157                    .context("failed to open custom dsdt")?
1158                    .read_to_end(&mut v)
1159                    .context("failed to read custom dsdt")?;
1160                Some(v)
1161            }
1162            None => None,
1163        };
1164
1165        load_mode = LoadMode::Linux {
1166            kernel: kernel.into(),
1167            initrd: initrd.map(Into::into),
1168            cmdline,
1169            custom_dsdt,
1170            enable_serial: any_serial_configured,
1171            boot_mode: if opt.device_tree {
1172                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1173            } else {
1174                openvmm_defs::config::LinuxDirectBootMode::Acpi
1175            },
1176        };
1177    }
1178
1179    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1180        let disk = VmgsDisk {
1181            disk: disk_open(kind, false)
1182                .await
1183                .context("failed to open vmgs disk")?,
1184            encryption_policy: if opt.test_gsp_by_id {
1185                GuestStateEncryptionPolicy::GspById(true)
1186            } else {
1187                GuestStateEncryptionPolicy::None(true)
1188            },
1189        };
1190        match provision {
1191            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1192            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1193            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1194        }
1195    } else {
1196        VmgsResource::Ephemeral
1197    });
1198
1199    if with_get && with_hv {
1200        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1201            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1202            fixed: Some(Default::default()),
1203            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1204                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1205                nic_devices: underhill_nics,
1206            }),
1207            namespace_settings: Vec::default(),
1208        };
1209
1210        // Cache the VTL2 settings for later modification via the interactive console.
1211        resources.vtl2_settings = Some(vtl2_settings.clone());
1212
1213        let (send, guest_request_recv) = mesh::channel();
1214        resources.ged_rpc = Some(send);
1215
1216        let vmgs = vmgs.take().unwrap();
1217
1218        vmbus_devices.extend([
1219            (
1220                openhcl_vtl,
1221                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1222            ),
1223            (
1224                openhcl_vtl,
1225                get_resources::ged::GuestEmulationDeviceHandle {
1226                    firmware: if opt.pcat {
1227                        get_resources::ged::GuestFirmwareConfig::Pcat {
1228                            boot_order: opt
1229                                .pcat_boot_order
1230                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1231                                .map(|x| match x {
1232                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1233                                        get_resources::ged::PcatBootDevice::Floppy
1234                                    }
1235                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1236                                        get_resources::ged::PcatBootDevice::HardDrive
1237                                    }
1238                                    openvmm_defs::config::PcatBootDevice::Optical => {
1239                                        get_resources::ged::PcatBootDevice::Optical
1240                                    }
1241                                    openvmm_defs::config::PcatBootDevice::Network => {
1242                                        get_resources::ged::PcatBootDevice::Network
1243                                    }
1244                                }),
1245                        }
1246                    } else {
1247                        use get_resources::ged::UefiConsoleMode;
1248
1249                        get_resources::ged::GuestFirmwareConfig::Uefi {
1250                            enable_vpci_boot: storage.has_vtl0_nvme(),
1251                            firmware_debug: opt.uefi_debug,
1252                            disable_frontpage: opt.disable_frontpage,
1253                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1254                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1255                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1256                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1257                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1258                            },
1259                            default_boot_always_attempt: opt.default_boot_always_attempt,
1260                        }
1261                    },
1262                    com1: with_vmbus_com1_serial,
1263                    com2: with_vmbus_com2_serial,
1264                    serial_tx_only: opt.serial_tx_only,
1265                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1266                    vmbus_redirection: opt.vmbus_redirect,
1267                    vmgs,
1268                    framebuffer: opt
1269                        .vtl2_gfx
1270                        .then(|| SharedFramebufferHandle.into_resource()),
1271                    guest_request_recv,
1272                    enable_tpm: opt.tpm,
1273                    firmware_event_send: None,
1274                    secure_boot_enabled: opt.secure_boot,
1275                    secure_boot_template: match opt.secure_boot_template {
1276                        Some(SecureBootTemplateCli::Windows) => {
1277                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1278                        },
1279                        Some(SecureBootTemplateCli::UefiCa) => {
1280                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1281                        }
1282                        None => {
1283                            get_resources::ged::GuestSecureBootTemplateType::None
1284                        },
1285                    },
1286                    enable_battery: opt.battery,
1287                    no_persistent_secrets: true,
1288                    igvm_attest_test_config: None,
1289                    test_gsp_by_id: opt.test_gsp_by_id,
1290                    efi_diagnostics_log_level: {
1291                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1292                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1293                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1294                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1295                        }
1296                    },
1297                    hv_sint_enabled: false,
1298                }
1299                .into_resource(),
1300            ),
1301        ]);
1302    }
1303
1304    if opt.tpm && !opt.vtl2 {
1305        let register_layout = if cfg!(guest_arch = "x86_64") {
1306            TpmRegisterLayout::IoPort
1307        } else {
1308            TpmRegisterLayout::Mmio
1309        };
1310
1311        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1312            (
1313                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1314                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1315            )
1316        } else {
1317            (
1318                EphemeralNonVolatileStoreHandle.into_resource(),
1319                EphemeralNonVolatileStoreHandle.into_resource(),
1320            )
1321        };
1322
1323        chipset_devices.push(ChipsetDeviceHandle {
1324            name: "tpm".to_string(),
1325            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1326                device: TpmDeviceHandle {
1327                    ppi_store,
1328                    nvram_store,
1329                    nvram_size: None,
1330                    refresh_tpm_seeds: false,
1331                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1332                    register_layout,
1333                    guest_secret_key: None,
1334                    logger: None,
1335                    is_confidential_vm: false,
1336                    bios_guid,
1337                }
1338                .into_resource(),
1339                worker_host: mesh.make_host("tpm", None).await?,
1340            }
1341            .into_resource(),
1342        });
1343    }
1344
1345    let vga_firmware = if opt.pcat {
1346        Some(openvmm_pcat_locator::find_svga_bios(
1347            opt.vga_firmware.as_deref(),
1348        )?)
1349    } else {
1350        None
1351    };
1352
1353    if opt.gfx {
1354        vmbus_devices.extend([
1355            (
1356                DeviceVtl::Vtl0,
1357                SynthVideoHandle {
1358                    framebuffer: SharedFramebufferHandle.into_resource(),
1359                }
1360                .into_resource(),
1361            ),
1362            (
1363                DeviceVtl::Vtl0,
1364                SynthKeyboardHandle {
1365                    source: MultiplexedInputHandle {
1366                        // Save 0 for PS/2
1367                        elevation: 1,
1368                    }
1369                    .into_resource(),
1370                }
1371                .into_resource(),
1372            ),
1373            (
1374                DeviceVtl::Vtl0,
1375                SynthMouseHandle {
1376                    source: MultiplexedInputHandle {
1377                        // Save 0 for PS/2
1378                        elevation: 1,
1379                    }
1380                    .into_resource(),
1381                }
1382                .into_resource(),
1383            ),
1384        ]);
1385    }
1386
1387    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1388        if let Some(path) = path {
1389            cleanup_socket(path.as_ref());
1390            let listener = unix_socket::UnixListener::bind(path)
1391                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1392            Ok(Some(listener))
1393        } else {
1394            Ok(None)
1395        }
1396    };
1397
1398    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1399    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1400
1401    if let Some(path) = &opt.openhcl_dump_path {
1402        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1403        task.detach();
1404        vmbus_devices.push((openhcl_vtl, resource));
1405    }
1406
1407    #[cfg(guest_arch = "aarch64")]
1408    let smmu_instances: Vec<openvmm_defs::config::SmmuInstanceConfig> = opt
1409        .smmu
1410        .iter()
1411        .map(|s| openvmm_defs::config::SmmuInstanceConfig { rc_name: s.clone() })
1412        .collect();
1413
1414    #[cfg(guest_arch = "aarch64")]
1415    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1416        openvmm_defs::config::Aarch64TopologyConfig {
1417            // TODO: allow this to be configured from the command line
1418            gic_config: None,
1419            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1420            gic_msi: match opt.gic_msi {
1421                cli_args::GicMsiCli::Auto => openvmm_defs::config::GicMsiConfig::Auto,
1422                cli_args::GicMsiCli::Its => openvmm_defs::config::GicMsiConfig::Its,
1423                cli_args::GicMsiCli::V2m => {
1424                    openvmm_defs::config::GicMsiConfig::V2m { spi_count: None }
1425                }
1426            },
1427            smmu: smmu_instances,
1428        },
1429    );
1430    #[cfg(guest_arch = "x86_64")]
1431    let topology_arch =
1432        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1433            apic_id_offset: opt.apic_id_offset,
1434            x2apic: opt.x2apic,
1435        });
1436
1437    let with_isolation = if let Some(isolation) = &opt.isolation {
1438        // TODO: For now, isolation is only supported with VTL2.
1439        if !opt.vtl2 {
1440            anyhow::bail!("isolation is only currently supported with vtl2");
1441        }
1442
1443        // TODO: Alias map support is not yet implement with isolation.
1444        if !opt.no_alias_map {
1445            anyhow::bail!("alias map not supported with isolation");
1446        }
1447
1448        match isolation {
1449            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1450        }
1451    } else {
1452        None
1453    };
1454
1455    if with_hv {
1456        let (shutdown_send, shutdown_recv) = mesh::channel();
1457        resources.shutdown_ic = Some(shutdown_send);
1458        let (kvp_send, kvp_recv) = mesh::channel();
1459        resources.kvp_ic = Some(kvp_send);
1460        vmbus_devices.extend(
1461            [
1462                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1463                    recv: shutdown_recv,
1464                }
1465                .into_resource(),
1466                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1467                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1468            ]
1469            .map(|r| (DeviceVtl::Vtl0, r)),
1470        );
1471    }
1472
1473    if let Some(hive_path) = &opt.imc {
1474        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1475        vmbus_devices.push((
1476            DeviceVtl::Vtl0,
1477            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1478        ));
1479    }
1480
1481    let mut virtio_devices = Vec::new();
1482    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1483        let bus = match bus {
1484            VirtioBusCli::Auto => {
1485                // Use VPCI when possible (currently only on Windows and macOS due
1486                // to KVM backend limitations).
1487                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1488                    None
1489                } else {
1490                    Some(VirtioBus::Pci)
1491                }
1492            }
1493            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1494            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1495            VirtioBusCli::Vpci => None,
1496        };
1497        if let Some(bus) = bus {
1498            virtio_devices.push((bus, resource));
1499        } else {
1500            vpci_devices.push(VpciDeviceConfig {
1501                vtl: DeviceVtl::Vtl0,
1502                instance_id: Guid::new_random(),
1503                resource: VirtioPciDeviceHandle(resource).into_resource(),
1504            });
1505        }
1506    };
1507
1508    for cli_cfg in &opt.virtio_net {
1509        if cli_cfg.underhill {
1510            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1511        }
1512        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1513        let resource = virtio_resources::net::VirtioNetHandle {
1514            max_queues: vport.max_queues,
1515            mac_address: vport.mac_address,
1516            endpoint: vport.endpoint,
1517        }
1518        .into_resource();
1519        if let Some(pcie_port) = &cli_cfg.pcie_port {
1520            pcie_devices.push(PcieDeviceConfig {
1521                port_name: pcie_port.clone(),
1522                resource: VirtioPciDeviceHandle(resource).into_resource(),
1523            });
1524        } else {
1525            add_virtio_device(VirtioBusCli::Auto, resource);
1526        }
1527    }
1528
1529    for args in &opt.virtio_fs {
1530        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1531            tag: args.tag.clone(),
1532            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1533                root_path: args.path.clone(),
1534                mount_options: args.options.clone(),
1535            },
1536        }
1537        .into_resource();
1538        if let Some(pcie_port) = &args.pcie_port {
1539            pcie_devices.push(PcieDeviceConfig {
1540                port_name: pcie_port.clone(),
1541                resource: VirtioPciDeviceHandle(resource).into_resource(),
1542            });
1543        } else {
1544            add_virtio_device(opt.virtio_fs_bus, resource);
1545        }
1546    }
1547
1548    for args in &opt.virtio_fs_shmem {
1549        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1550            tag: args.tag.clone(),
1551            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1552                root_path: args.path.clone(),
1553            },
1554        }
1555        .into_resource();
1556        if let Some(pcie_port) = &args.pcie_port {
1557            pcie_devices.push(PcieDeviceConfig {
1558                port_name: pcie_port.clone(),
1559                resource: VirtioPciDeviceHandle(resource).into_resource(),
1560            });
1561        } else {
1562            add_virtio_device(opt.virtio_fs_bus, resource);
1563        }
1564    }
1565
1566    for args in &opt.virtio_9p {
1567        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1568            tag: args.tag.clone(),
1569            root_path: args.path.clone(),
1570            debug: opt.virtio_9p_debug,
1571        }
1572        .into_resource();
1573        if let Some(pcie_port) = &args.pcie_port {
1574            pcie_devices.push(PcieDeviceConfig {
1575                port_name: pcie_port.clone(),
1576                resource: VirtioPciDeviceHandle(resource).into_resource(),
1577            });
1578        } else {
1579            add_virtio_device(VirtioBusCli::Auto, resource);
1580        }
1581    }
1582
1583    if let Some(pmem_args) = &opt.virtio_pmem {
1584        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1585            path: pmem_args.path.clone(),
1586        }
1587        .into_resource();
1588        if let Some(pcie_port) = &pmem_args.pcie_port {
1589            pcie_devices.push(PcieDeviceConfig {
1590                port_name: pcie_port.clone(),
1591                resource: VirtioPciDeviceHandle(resource).into_resource(),
1592            });
1593        } else {
1594            add_virtio_device(VirtioBusCli::Auto, resource);
1595        }
1596    }
1597
1598    if opt.virtio_rng {
1599        let resource: Resource<VirtioDeviceHandle> =
1600            virtio_resources::rng::VirtioRngHandle.into_resource();
1601        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1602            pcie_devices.push(PcieDeviceConfig {
1603                port_name: pcie_port.clone(),
1604                resource: VirtioPciDeviceHandle(resource).into_resource(),
1605            });
1606        } else {
1607            add_virtio_device(opt.virtio_rng_bus, resource);
1608        }
1609    }
1610
1611    if let Some(backend) = virtio_console_backend {
1612        let resource: Resource<VirtioDeviceHandle> =
1613            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1614        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1615            pcie_devices.push(PcieDeviceConfig {
1616                port_name: pcie_port.clone(),
1617                resource: VirtioPciDeviceHandle(resource).into_resource(),
1618            });
1619        } else {
1620            add_virtio_device(VirtioBusCli::Auto, resource);
1621        }
1622    }
1623
1624    // Handle --vhost-user arguments.
1625    #[cfg(target_os = "linux")]
1626    for vhost_cli in &opt.vhost_user {
1627        let stream =
1628            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1629                format!(
1630                    "failed to connect to vhost-user socket: {}",
1631                    vhost_cli.socket_path
1632                )
1633            })?;
1634
1635        use crate::cli_args::VhostUserDeviceTypeCli;
1636        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1637            VhostUserDeviceTypeCli::Fs {
1638                ref tag,
1639                num_queues,
1640                queue_size,
1641            } => virtio_resources::vhost_user::VhostUserFsHandle {
1642                socket: stream.into(),
1643                tag: tag.clone(),
1644                num_queues,
1645                queue_size,
1646            }
1647            .into_resource(),
1648            VhostUserDeviceTypeCli::Blk {
1649                num_queues,
1650                queue_size,
1651            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1652                socket: stream.into(),
1653                num_queues,
1654                queue_size,
1655            }
1656            .into_resource(),
1657            VhostUserDeviceTypeCli::Other {
1658                device_id,
1659                ref queue_sizes,
1660            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1661                socket: stream.into(),
1662                device_id,
1663                queue_sizes: queue_sizes.clone(),
1664            }
1665            .into_resource(),
1666        };
1667        if let Some(pcie_port) = &vhost_cli.pcie_port {
1668            pcie_devices.push(PcieDeviceConfig {
1669                port_name: pcie_port.clone(),
1670                resource: VirtioPciDeviceHandle(resource).into_resource(),
1671            });
1672        } else {
1673            add_virtio_device(VirtioBusCli::Auto, resource);
1674        }
1675    }
1676
1677    if let Some(vsock_path) = &opt.virtio_vsock_path {
1678        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1679        add_virtio_device(
1680            VirtioBusCli::Auto,
1681            virtio_resources::vsock::VirtioVsockHandle {
1682                // The guest CID does not matter since the UDS relay does not use it. It just needs
1683                // to be some non-reserved value for the guest to use.
1684                guest_cid: 0x3,
1685                base_path: vsock_path.clone(),
1686                listener,
1687            }
1688            .into_resource(),
1689        );
1690    }
1691
1692    let mut cfg = Config {
1693        chipset,
1694        load_mode,
1695        floppy_disks,
1696        pcie_root_complexes,
1697        #[cfg(target_os = "linux")]
1698        pcie_devices: {
1699            let mut devs = pcie_devices;
1700            devs.extend(vfio_pcie_devices);
1701            devs
1702        },
1703        #[cfg(not(target_os = "linux"))]
1704        pcie_devices,
1705        pcie_switches,
1706        vpci_devices,
1707        ide_disks: Vec::new(),
1708        memory: MemoryConfig {
1709            mem_size: if let Some(ref sizes) = opt.numa_memory {
1710                sizes
1711                    .iter()
1712                    .try_fold(0u64, |acc, &s| acc.checked_add(s))
1713                    .context("numa memory sizes overflow")?
1714            } else {
1715                opt.memory_size()
1716            },
1717            prefetch_memory: opt.prefetch_memory(),
1718            private_memory: opt.private_memory(),
1719            transparent_hugepages: opt.transparent_hugepages(),
1720            hugepages: opt.memory.hugepages,
1721            hugepage_size: opt.memory.hugepage_size,
1722            numa_mem_sizes: opt.numa_memory.clone(),
1723        },
1724        processor_topology: ProcessorTopologyConfig {
1725            proc_count: opt.processors,
1726            vps_per_socket: opt.vps_per_socket,
1727            enable_smt: match opt.smt {
1728                cli_args::SmtConfigCli::Auto => None,
1729                cli_args::SmtConfigCli::Force => Some(true),
1730                cli_args::SmtConfigCli::Off => Some(false),
1731            },
1732            arch: Some(topology_arch),
1733        },
1734        hypervisor: HypervisorConfig {
1735            with_hv,
1736            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1737                vtl0_alias_map: !opt.no_alias_map,
1738                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1739                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1740                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1741                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1742                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1743                        Some(LateMapVtl0MemoryPolicy::InjectException)
1744                    }
1745                },
1746            }),
1747            with_isolation,
1748        },
1749        #[cfg(windows)]
1750        kernel_vmnics,
1751        input: mesh::Receiver::new(),
1752        framebuffer,
1753        vga_firmware,
1754        vtl2_gfx: opt.vtl2_gfx,
1755        virtio_devices,
1756        vmbus: with_hv.then_some(VmbusConfig {
1757            vsock_listener: vtl0_vsock_listener,
1758            vsock_path: opt.vmbus_vsock_path.clone(),
1759            vtl2_redirect: opt.vmbus_redirect,
1760            vmbus_max_version: opt.vmbus_max_version,
1761            #[cfg(windows)]
1762            vmbusproxy_handle,
1763        }),
1764        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1765            vsock_listener: vtl2_vsock_listener,
1766            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1767            ..Default::default()
1768        }),
1769        vmbus_devices,
1770        chipset_devices,
1771        pci_chipset_devices,
1772        isa_dma_controller,
1773        chipset_capabilities: capabilities,
1774        layout: layout_config,
1775        #[cfg(windows)]
1776        vpci_resources,
1777        vmgs,
1778        secure_boot_enabled: opt.secure_boot,
1779        custom_uefi_vars,
1780        firmware_event_send: None,
1781        debugger_rpc: None,
1782        rtc_delta_milliseconds: 0,
1783        automatic_guest_reset: !opt.halt_on_reset,
1784        efi_diagnostics_log_level: {
1785            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1786                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1787                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1788                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1789            }
1790        },
1791    };
1792
1793    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1794    Ok((cfg, resources))
1795}
1796
1797/// Gets the terminal to use for externally launched console windows.
1798pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1799    std::env::var_os("OPENVMM_TERM")
1800        .or_else(|| std::env::var_os("HVLITE_TERM"))
1801        .map(Into::into)
1802}
1803
1804// Tries to remove `path` if it is confirmed to be a Unix socket.
1805fn cleanup_socket(path: &Path) {
1806    #[cfg(windows)]
1807    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1808    #[cfg(not(windows))]
1809    let is_socket = path
1810        .metadata()
1811        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1812
1813    if is_socket {
1814        let _ = std::fs::remove_file(path);
1815    }
1816}
1817
1818#[cfg(windows)]
1819const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1820
1821#[cfg(windows)]
1822fn new_switch_port(
1823    switch_id: &str,
1824) -> anyhow::Result<(
1825    openvmm_defs::config::SwitchPortId,
1826    vmswitch::kernel::SwitchPort,
1827)> {
1828    let id = vmswitch::kernel::SwitchPortId {
1829        switch: switch_id.parse().context("invalid switch id")?,
1830        port: Guid::new_random(),
1831    };
1832    let _ = vmswitch::hcn::Network::open(&id.switch)
1833        .with_context(|| format!("could not find switch {}", id.switch))?;
1834
1835    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1836
1837    let id = openvmm_defs::config::SwitchPortId {
1838        switch: id.switch,
1839        port: id.port,
1840    };
1841    Ok((id, port))
1842}
1843
1844fn parse_endpoint(
1845    cli_cfg: &NicConfigCli,
1846    index: &mut usize,
1847    resources: &mut VmResources,
1848) -> anyhow::Result<NicConfig> {
1849    let _ = resources;
1850    let endpoint = match &cli_cfg.endpoint {
1851        EndpointConfigCli::Consomme { cidr, host_fwd } => {
1852            let ports = host_fwd
1853                .iter()
1854                .map(|fwd| {
1855                    use net_backend_resources::consomme::HostPortProtocol;
1856                    net_backend_resources::consomme::HostPortConfig {
1857                        protocol: match fwd.protocol {
1858                            cli_args::HostPortProtocolCli::Tcp => HostPortProtocol::Tcp,
1859                            cli_args::HostPortProtocolCli::Udp => HostPortProtocol::Udp,
1860                        },
1861                        host_address: fwd
1862                            .host_address
1863                            .map(net_backend_resources::consomme::HostIpAddress::from),
1864                        host_port: fwd.host_port,
1865                        guest_port: fwd.guest_port,
1866                    }
1867                })
1868                .collect();
1869            net_backend_resources::consomme::ConsommeHandle {
1870                cidr: cidr.clone(),
1871                ports,
1872            }
1873            .into_resource()
1874        }
1875        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1876        EndpointConfigCli::Dio { id } => {
1877            #[cfg(windows)]
1878            {
1879                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1880                resources.switch_ports.push(port);
1881                net_backend_resources::dio::WindowsDirectIoHandle {
1882                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1883                        switch: port_id.switch,
1884                        port: port_id.port,
1885                    },
1886                }
1887                .into_resource()
1888            }
1889
1890            #[cfg(not(windows))]
1891            {
1892                let _ = id;
1893                bail!("cannot use dio on non-windows platforms")
1894            }
1895        }
1896        EndpointConfigCli::Tap { name } => {
1897            #[cfg(target_os = "linux")]
1898            {
1899                let fd = net_tap::tap::open_tap(name)
1900                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1901                net_backend_resources::tap::TapHandle { fd }.into_resource()
1902            }
1903
1904            #[cfg(not(target_os = "linux"))]
1905            {
1906                let _ = name;
1907                bail!("TAP backend is only supported on Linux")
1908            }
1909        }
1910    };
1911
1912    // Pick a random MAC address.
1913    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1914    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1915
1916    // Pick a fixed instance ID based on the index.
1917    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1918    let instance_id = Guid {
1919        data1: *index as u32,
1920        ..BASE_INSTANCE_ID
1921    };
1922    *index += 1;
1923
1924    Ok(NicConfig {
1925        vtl: cli_cfg.vtl,
1926        instance_id,
1927        endpoint,
1928        mac_address: mac_address.into(),
1929        max_queues: cli_cfg.max_queues,
1930        pcie_port: cli_cfg.pcie_port.clone(),
1931    })
1932}
1933
1934#[derive(Debug)]
1935struct NicConfig {
1936    vtl: DeviceVtl,
1937    instance_id: Guid,
1938    mac_address: MacAddress,
1939    endpoint: Resource<NetEndpointHandleKind>,
1940    max_queues: Option<u16>,
1941    pcie_port: Option<String>,
1942}
1943
1944impl NicConfig {
1945    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1946        (
1947            self.vtl,
1948            netvsp_resources::NetvspHandle {
1949                instance_id: self.instance_id,
1950                mac_address: self.mac_address,
1951                endpoint: self.endpoint,
1952                max_queues: self.max_queues,
1953            }
1954            .into_resource(),
1955        )
1956    }
1957}
1958
1959enum LayerOrDisk {
1960    Layer(DiskLayerDescription),
1961    Disk(Resource<DiskHandleKind>),
1962}
1963
1964async fn disk_open(
1965    disk_cli: &DiskCliKind,
1966    read_only: bool,
1967) -> anyhow::Result<Resource<DiskHandleKind>> {
1968    let mut layers = Vec::new();
1969    disk_open_inner(disk_cli, read_only, &mut layers).await?;
1970    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1971        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1972            unreachable!()
1973        };
1974        Ok(disk)
1975    } else {
1976        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1977            layers: layers
1978                .into_iter()
1979                .map(|layer| match layer {
1980                    LayerOrDisk::Layer(layer) => layer,
1981                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1982                        layer: DiskLayerHandle(disk).into_resource(),
1983                        read_cache: false,
1984                        write_through: false,
1985                    },
1986                })
1987                .collect(),
1988        }))
1989    }
1990}
1991
1992fn disk_open_inner<'a>(
1993    disk_cli: &'a DiskCliKind,
1994    read_only: bool,
1995    layers: &'a mut Vec<LayerOrDisk>,
1996) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
1997    Box::pin(async move {
1998        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1999            LayerOrDisk::Layer(layer.into_resource().into())
2000        }
2001        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
2002            LayerOrDisk::Disk(disk.into_resource())
2003        }
2004        match disk_cli {
2005            &DiskCliKind::Memory(len) => {
2006                layers.push(layer(RamDiskLayerHandle {
2007                    len: Some(len),
2008                    sector_size: None,
2009                }));
2010            }
2011            DiskCliKind::File {
2012                path,
2013                create_with_len,
2014                direct,
2015            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
2016                create_disk_type(
2017                    path,
2018                    *size,
2019                    OpenDiskOptions {
2020                        read_only: false,
2021                        direct: *direct,
2022                    },
2023                )
2024                .with_context(|| format!("failed to create {}", path.display()))?
2025            } else {
2026                open_disk_type(
2027                    path,
2028                    OpenDiskOptions {
2029                        read_only,
2030                        direct: *direct,
2031                    },
2032                )
2033                .await
2034                .with_context(|| format!("failed to open {}", path.display()))?
2035            })),
2036            DiskCliKind::Blob { kind, url } => {
2037                layers.push(disk(disk_backend_resources::BlobDiskHandle {
2038                    url: url.to_owned(),
2039                    format: match kind {
2040                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
2041                        cli_args::BlobKind::Vhd1 => {
2042                            disk_backend_resources::BlobDiskFormat::FixedVhd1
2043                        }
2044                    },
2045                }))
2046            }
2047            DiskCliKind::MemoryDiff(inner) => {
2048                layers.push(layer(RamDiskLayerHandle {
2049                    len: None,
2050                    sector_size: None,
2051                }));
2052                disk_open_inner(inner, true, layers).await?;
2053            }
2054            DiskCliKind::PersistentReservationsWrapper(inner) => {
2055                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
2056                    disk_open(inner, read_only).await?,
2057                )))
2058            }
2059            DiskCliKind::DelayDiskWrapper {
2060                delay_ms,
2061                disk: inner,
2062            } => layers.push(disk(DelayDiskHandle {
2063                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
2064                disk: disk_open(inner, read_only).await?,
2065            })),
2066            DiskCliKind::Crypt {
2067                disk: inner,
2068                cipher,
2069                key_file,
2070            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
2071                disk: disk_open(inner, read_only).await?,
2072                cipher: match cipher {
2073                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
2074                },
2075                key: fs_err::read(key_file).context("failed to read key file")?,
2076            })),
2077            DiskCliKind::Sqlite {
2078                path,
2079                create_with_len,
2080            } => {
2081                // FUTURE: this code should be responsible for opening
2082                // file-handle(s) itself, and passing them into sqlite via a custom
2083                // vfs. For now though - simply check if the file exists or not, and
2084                // perform early validation of filesystem-level create options.
2085                match (create_with_len.is_some(), path.exists()) {
2086                    (true, true) => anyhow::bail!(
2087                        "cannot create new sqlite disk at {} - file already exists",
2088                        path.display()
2089                    ),
2090                    (false, false) => anyhow::bail!(
2091                        "cannot open sqlite disk at {} - file not found",
2092                        path.display()
2093                    ),
2094                    _ => {}
2095                }
2096
2097                layers.push(layer(SqliteDiskLayerHandle {
2098                    dbhd_path: path.display().to_string(),
2099                    format_dbhd: create_with_len.map(|len| {
2100                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2101                            logically_read_only: false,
2102                            len: Some(len),
2103                        }
2104                    }),
2105                }));
2106            }
2107            DiskCliKind::SqliteDiff { path, create, disk } => {
2108                // FUTURE: this code should be responsible for opening
2109                // file-handle(s) itself, and passing them into sqlite via a custom
2110                // vfs. For now though - simply check if the file exists or not, and
2111                // perform early validation of filesystem-level create options.
2112                match (create, path.exists()) {
2113                    (true, true) => anyhow::bail!(
2114                        "cannot create new sqlite disk at {} - file already exists",
2115                        path.display()
2116                    ),
2117                    (false, false) => anyhow::bail!(
2118                        "cannot open sqlite disk at {} - file not found",
2119                        path.display()
2120                    ),
2121                    _ => {}
2122                }
2123
2124                layers.push(layer(SqliteDiskLayerHandle {
2125                    dbhd_path: path.display().to_string(),
2126                    format_dbhd: create.then_some(
2127                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2128                            logically_read_only: false,
2129                            len: None,
2130                        },
2131                    ),
2132                }));
2133                disk_open_inner(disk, true, layers).await?;
2134            }
2135            DiskCliKind::AutoCacheSqlite {
2136                cache_path,
2137                key,
2138                disk,
2139            } => {
2140                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2141                    read_cache: true,
2142                    write_through: false,
2143                    layer: SqliteAutoCacheDiskLayerHandle {
2144                        cache_path: cache_path.clone(),
2145                        cache_key: key.clone(),
2146                    }
2147                    .into_resource(),
2148                }));
2149                disk_open_inner(disk, read_only, layers).await?;
2150            }
2151        }
2152        Ok(())
2153    })
2154}
2155
2156/// Get the system page size.
2157pub(crate) fn system_page_size() -> u32 {
2158    sparse_mmap::SparseMapping::page_size() as u32
2159}
2160
2161/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2162pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2163    "x86_64"
2164} else {
2165    "aarch64"
2166};
2167
2168/// Open a snapshot directory and validate it against the current VM config.
2169/// Returns the shared memory fd (from memory.bin) and the saved device state.
2170fn prepare_snapshot_restore(
2171    snapshot_dir: &Path,
2172    opt: &Options,
2173) -> anyhow::Result<(
2174    openvmm_defs::worker::SharedMemoryFd,
2175    mesh::payload::message::ProtobufMessage,
2176)> {
2177    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2178
2179    // Validate manifest against current VM config.
2180    openvmm_helpers::snapshot::validate_manifest(
2181        &manifest,
2182        GUEST_ARCH,
2183        opt.memory_size(),
2184        opt.processors,
2185        system_page_size(),
2186    )?;
2187
2188    // Open memory.bin (existing file, no create, no resize).
2189    let memory_file = fs_err::OpenOptions::new()
2190        .read(true)
2191        .write(true)
2192        .open(snapshot_dir.join("memory.bin"))?;
2193
2194    // Validate file size matches expected memory size.
2195    let file_size = memory_file.metadata()?.len();
2196    if file_size != manifest.memory_size_bytes {
2197        anyhow::bail!(
2198            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2199            manifest.memory_size_bytes,
2200        );
2201    }
2202
2203    let shared_memory_fd =
2204        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2205
2206    // Reconstruct ProtobufMessage from the saved state bytes.
2207    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2208    // back to ProtobufMessage.
2209    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2210        .context("failed to decode saved state from snapshot")?;
2211
2212    Ok((shared_memory_fd, state_msg))
2213}
2214
2215fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2216    #[cfg(windows)]
2217    pal::windows::disable_hard_error_dialog();
2218
2219    tracing_init::enable_tracing()?;
2220
2221    // Try to run as a worker host.
2222    // On success the worker runs to completion and then exits the process (does
2223    // not return). Any worker host setup errors are return and bubbled up.
2224    meshworker::run_vmm_mesh_host()?;
2225
2226    let opt = Options::parse();
2227    if let Some(path) = &opt.write_saved_state_proto {
2228        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2229            .write_to_path(path)
2230            .context("failed to write protobuf descriptors")?;
2231        return Ok(());
2232    }
2233
2234    if let Some(ref path) = opt.pidfile {
2235        std::fs::write(path, format!("{}\n", std::process::id()))
2236            .context("failed to write pidfile")?;
2237        *pidfile_path = Some(path.clone());
2238    }
2239
2240    if let Some(path) = opt.relay_console_path {
2241        let console_title = opt.relay_console_title.unwrap_or_default();
2242        return console_relay::relay_console(&path, console_title.as_str());
2243    }
2244
2245    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2246    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2247        return block_on(async {
2248            let _ = std::fs::remove_file(path);
2249            let listener =
2250                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2251
2252            let transport = if opt.ttrpc.is_some() {
2253                ttrpc::RpcTransport::Ttrpc
2254            } else {
2255                ttrpc::RpcTransport::Grpc
2256            };
2257
2258            // This is a local launch
2259            let mut handle =
2260                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2261                    listener,
2262                    transport,
2263                })
2264                .await?;
2265
2266            tracing::info!(%transport, path = %path.display(), "listening");
2267
2268            // Signal the the parent process that the server is ready.
2269            pal::close_stdout().context("failed to close stdout")?;
2270
2271            handle.join().await?;
2272
2273            Ok(())
2274        });
2275    }
2276
2277    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2278}
2279
2280fn new_hvsock_service_id(port: u32) -> Guid {
2281    // This GUID is an embedding of the AF_VSOCK port into an
2282    // AF_HYPERV service ID.
2283    Guid {
2284        data1: port,
2285        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2286    }
2287}
2288
2289async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2290    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2291    let result = run_control_inner(driver, &mut mesh, opt).await;
2292    // If setup failed before the mesh was handed to the controller, shut it
2293    // down so the child host process exits cleanly without noisy logs.
2294    if let Some(mesh) = mesh {
2295        mesh.shutdown().await;
2296    }
2297    result
2298}
2299
2300async fn run_control_inner(
2301    driver: &DefaultDriver,
2302    mesh_slot: &mut Option<VmmMesh>,
2303    opt: Options,
2304) -> anyhow::Result<()> {
2305    let mesh = mesh_slot.as_ref().unwrap();
2306    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2307
2308    let mut vnc_worker = None;
2309    if opt.gfx || opt.vnc {
2310        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2311            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2312
2313        let input_send = vm_config.input.sender();
2314        let framebuffer = resources
2315            .framebuffer_access
2316            .take()
2317            .expect("synth video enabled");
2318
2319        let vnc_host = mesh
2320            .make_host("vnc", None)
2321            .await
2322            .context("spawning vnc process failed")?;
2323
2324        vnc_worker = Some(
2325            vnc_host
2326                .launch_worker(
2327                    vnc_worker_defs::VNC_WORKER_TCP,
2328                    VncParameters {
2329                        listener,
2330                        framebuffer,
2331                        input_send,
2332                    },
2333                )
2334                .await?,
2335        )
2336    }
2337
2338    // spin up the debug worker
2339    let gdb_worker = if let Some(port) = opt.gdb {
2340        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2341            .with_context(|| format!("binding to gdb port {}", port))?;
2342
2343        let (req_tx, req_rx) = mesh::channel();
2344        vm_config.debugger_rpc = Some(req_rx);
2345
2346        let gdb_host = mesh
2347            .make_host("gdb", None)
2348            .await
2349            .context("spawning gdbstub process failed")?;
2350
2351        Some(
2352            gdb_host
2353                .launch_worker(
2354                    debug_worker_defs::DEBUGGER_WORKER,
2355                    debug_worker_defs::DebuggerParameters {
2356                        listener,
2357                        req_chan: req_tx,
2358                        vp_count: vm_config.processor_topology.proc_count,
2359                        target_arch: if cfg!(guest_arch = "x86_64") {
2360                            debug_worker_defs::TargetArch::X86_64
2361                        } else {
2362                            debug_worker_defs::TargetArch::Aarch64
2363                        },
2364                    },
2365                )
2366                .await
2367                .context("failed to launch gdbstub worker")?,
2368        )
2369    } else {
2370        None
2371    };
2372
2373    // spin up the VM
2374    let (vm_rpc, rpc_recv) = mesh::channel();
2375    let (notify_send, notify_recv) = mesh::channel();
2376    let vm_worker = {
2377        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2378
2379        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2380            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2381            (Some(fd), Some(state_msg))
2382        } else {
2383            let shared_memory = opt
2384                .memory_backing_file()
2385                .map(|path| {
2386                    openvmm_helpers::shared_memory::open_memory_backing_file(
2387                        path,
2388                        opt.memory_size(),
2389                    )
2390                })
2391                .transpose()?;
2392            (shared_memory, None)
2393        };
2394
2395        let params = VmWorkerParameters {
2396            hypervisor: match &opt.hypervisor {
2397                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2398                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2399            },
2400            cfg: vm_config,
2401            saved_state,
2402            shared_memory,
2403            rpc: rpc_recv,
2404            notify: notify_send,
2405        };
2406        vm_host
2407            .launch_worker(VM_WORKER, params)
2408            .await
2409            .context("failed to launch vm worker")?
2410    };
2411
2412    if opt.restore_snapshot.is_some() {
2413        tracing::info!("restoring VM from snapshot");
2414    }
2415
2416    if !opt.paused {
2417        vm_rpc.call(VmRpc::Resume, ()).await?;
2418    }
2419
2420    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2421        driver.clone(),
2422        DiagDialer {
2423            driver: driver.clone(),
2424            vm_rpc: vm_rpc.clone(),
2425            openhcl_vtl: if opt.vtl2 {
2426                DeviceVtl::Vtl2
2427            } else {
2428                DeviceVtl::Vtl0
2429            },
2430        },
2431    ));
2432
2433    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2434
2435    // Create channels between the REPL and VmController.
2436    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2437    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2438
2439    let has_vtl2 = resources.vtl2_settings.is_some();
2440
2441    // Build the VmController with exclusive resources.
2442    let controller = vm_controller::VmController {
2443        mesh: mesh_slot.take().unwrap(),
2444        vm_worker,
2445        vnc_worker,
2446        gdb_worker,
2447        diag_inspector: Some(diag_inspector),
2448        vtl2_settings: resources.vtl2_settings,
2449        ged_rpc: resources.ged_rpc.clone(),
2450        vm_rpc: vm_rpc.clone(),
2451        paravisor_diag: Some(paravisor_diag),
2452        igvm_path: opt.igvm.clone(),
2453        memory_backing_file: opt.memory_backing_file().cloned(),
2454        memory: opt.memory_size(),
2455        processors: opt.processors,
2456        log_file: opt.log_file.clone(),
2457    };
2458
2459    // Spawn the VmController as a task.
2460    let controller_task = driver.spawn(
2461        "vm-controller",
2462        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2463    );
2464
2465    // Run the REPL with shareable resources.
2466    let repl_result = repl::run_repl(
2467        driver,
2468        repl::ReplResources {
2469            vm_rpc,
2470            vm_controller: vm_controller_send,
2471            vm_controller_events: vm_controller_event_recv,
2472            scsi_rpc: resources.scsi_rpc,
2473            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2474            shutdown_ic: resources.shutdown_ic,
2475            kvp_ic: resources.kvp_ic,
2476            console_in: resources.console_in,
2477            has_vtl2,
2478        },
2479    )
2480    .await;
2481
2482    // Wait for the controller task to finish (it stops the VM worker and
2483    // shuts down the mesh).
2484    controller_task.await;
2485
2486    repl_result
2487}
2488
2489struct DiagDialer {
2490    driver: DefaultDriver,
2491    vm_rpc: mesh::Sender<VmRpc>,
2492    openhcl_vtl: DeviceVtl,
2493}
2494
2495impl mesh_rpc::client::Dial for DiagDialer {
2496    type Stream = PolledSocket<unix_socket::UnixStream>;
2497
2498    async fn dial(&mut self) -> io::Result<Self::Stream> {
2499        let service_id = new_hvsock_service_id(1);
2500        let socket = self
2501            .vm_rpc
2502            .call_failable(
2503                VmRpc::ConnectHvsock,
2504                (
2505                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2506                    service_id,
2507                    self.openhcl_vtl,
2508                ),
2509            )
2510            .await
2511            .map_err(io::Error::other)?;
2512
2513        PolledSocket::new(&self.driver, socket)
2514    }
2515}
2516
2517/// An object that implements [`InspectMut`] by sending an inspect request over
2518/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2519/// the response back into the inspect tree.
2520///
2521/// This also caches the TTRPC connection to the guest so that only the first
2522/// inspect request has to wait for the connection to be established.
2523pub(crate) struct DiagInspector(DiagInspectorInner);
2524
2525enum DiagInspectorInner {
2526    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2527    Started {
2528        send: mesh::Sender<inspect::Deferred>,
2529        _task: Task<()>,
2530    },
2531    Invalid,
2532}
2533
2534impl DiagInspector {
2535    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2536        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2537    }
2538
2539    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2540        loop {
2541            match self.0 {
2542                DiagInspectorInner::NotStarted { .. } => {
2543                    let DiagInspectorInner::NotStarted(driver, client) =
2544                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2545                    else {
2546                        unreachable!()
2547                    };
2548                    let (send, recv) = mesh::channel();
2549                    let task = driver.clone().spawn("diag-inspect", async move {
2550                        Self::run(&client, recv).await
2551                    });
2552
2553                    self.0 = DiagInspectorInner::Started { send, _task: task };
2554                }
2555                DiagInspectorInner::Started { ref send, .. } => break send,
2556                DiagInspectorInner::Invalid => unreachable!(),
2557            }
2558        }
2559    }
2560
2561    async fn run(
2562        diag_client: &diag_client::DiagClient,
2563        mut recv: mesh::Receiver<inspect::Deferred>,
2564    ) {
2565        while let Some(deferred) = recv.next().await {
2566            let info = deferred.external_request();
2567            let result = match info.request_type {
2568                inspect::ExternalRequestType::Inspect { depth } => {
2569                    if depth == 0 {
2570                        Ok(inspect::Node::Unevaluated)
2571                    } else {
2572                        // TODO: Support taking timeouts from the command line
2573                        diag_client
2574                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2575                            .await
2576                    }
2577                }
2578                inspect::ExternalRequestType::Update { value } => {
2579                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2580                }
2581            };
2582            deferred.complete_external(
2583                result.unwrap_or_else(|err| {
2584                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2585                }),
2586                inspect::SensitivityLevel::Unspecified,
2587            )
2588        }
2589    }
2590}
2591
2592impl InspectMut for DiagInspector {
2593    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2594        self.start().send(req.defer());
2595    }
2596}