Skip to main content

openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod repl;
15mod serial_io;
16mod storage_builder;
17mod tracing_init;
18mod ttrpc;
19mod vm_controller;
20
21// `pub` so that the missing_docs warning fires for options without
22// documentation.
23pub use cli_args::Options;
24use console_relay::ConsoleLaunchOptions;
25
26use crate::cli_args::SecureBootTemplateCli;
27use anyhow::Context;
28use anyhow::bail;
29use chipset_resources::battery::HostBatteryUpdate;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::StreamExt;
53use futures::executor::block_on;
54use futures::io::AllowStdIo;
55use gdma_resources::GdmaDeviceHandle;
56use gdma_resources::VportDefinition;
57use guid::Guid;
58use input_core::MultiplexedInputHandle;
59use inspect::InspectMut;
60use io::Read;
61use memory_range::MemoryRange;
62use mesh::CancelContext;
63use mesh::CellUpdater;
64use mesh::rpc::RpcSend;
65use meshworker::VmmMesh;
66use net_backend_resources::mac_address::MacAddress;
67use nvme_resources::NvmeControllerRequest;
68use openvmm_defs::config::Config;
69use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64;
70use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2;
71use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86;
72use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2;
73use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
74use openvmm_defs::config::DeviceVtl;
75use openvmm_defs::config::EfiDiagnosticsLogLevelType;
76use openvmm_defs::config::HypervisorConfig;
77use openvmm_defs::config::LateMapVtl0MemoryPolicy;
78use openvmm_defs::config::LoadMode;
79use openvmm_defs::config::MemoryConfig;
80use openvmm_defs::config::PcieDeviceConfig;
81use openvmm_defs::config::PcieRootComplexConfig;
82use openvmm_defs::config::PcieRootPortConfig;
83use openvmm_defs::config::PcieSwitchConfig;
84use openvmm_defs::config::ProcessorTopologyConfig;
85use openvmm_defs::config::SerialInformation;
86use openvmm_defs::config::VirtioBus;
87use openvmm_defs::config::VmbusConfig;
88use openvmm_defs::config::VpciDeviceConfig;
89use openvmm_defs::config::Vtl2BaseAddressType;
90use openvmm_defs::config::Vtl2Config;
91use openvmm_defs::rpc::VmRpc;
92use openvmm_defs::worker::VM_WORKER;
93use openvmm_defs::worker::VmWorkerParameters;
94use openvmm_helpers::disk::OpenDiskOptions;
95use openvmm_helpers::disk::create_disk_type;
96use openvmm_helpers::disk::open_disk_type;
97use pal_async::DefaultDriver;
98use pal_async::DefaultPool;
99use pal_async::socket::PolledSocket;
100use pal_async::task::Spawn;
101use pal_async::task::Task;
102use serial_16550_resources::ComPort;
103use serial_core::resources::DisconnectedSerialBackendHandle;
104use sparse_mmap::alloc_shared_memory;
105use std::cell::RefCell;
106use std::collections::BTreeMap;
107use std::fmt::Write as _;
108use std::future::pending;
109use std::io;
110#[cfg(unix)]
111use std::io::IsTerminal;
112use std::io::Write;
113use std::net::TcpListener;
114use std::path::Path;
115use std::path::PathBuf;
116use std::sync::Arc;
117use std::thread;
118use std::time::Duration;
119use storvsp_resources::ScsiControllerRequest;
120use tpm_resources::TpmDeviceHandle;
121use tpm_resources::TpmRegisterLayout;
122use uidevices_resources::SynthKeyboardHandle;
123use uidevices_resources::SynthMouseHandle;
124use uidevices_resources::SynthVideoHandle;
125use video_core::SharedFramebufferHandle;
126use virtio_resources::VirtioPciDeviceHandle;
127use vm_manifest_builder::BaseChipsetType;
128use vm_manifest_builder::MachineArch;
129use vm_manifest_builder::VmChipsetResult;
130use vm_manifest_builder::VmManifestBuilder;
131use vm_resource::IntoResource;
132use vm_resource::Resource;
133use vm_resource::kind::DiskHandleKind;
134use vm_resource::kind::DiskLayerHandleKind;
135use vm_resource::kind::NetEndpointHandleKind;
136use vm_resource::kind::VirtioDeviceHandle;
137use vm_resource::kind::VmbusDeviceHandleKind;
138use vmbus_serial_resources::VmbusSerialDeviceHandle;
139use vmbus_serial_resources::VmbusSerialPort;
140use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
141use vmgs_resources::GuestStateEncryptionPolicy;
142use vmgs_resources::VmgsDisk;
143use vmgs_resources::VmgsFileHandle;
144use vmgs_resources::VmgsResource;
145use vmotherboard::ChipsetDeviceHandle;
146use vnc_worker_defs::VncParameters;
147
148pub fn openvmm_main() {
149    // Save the current state of the terminal so we can restore it back to
150    // normal before exiting.
151    #[cfg(unix)]
152    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
153
154    let mut pidfile_path = None;
155    let exit_code = match do_main(&mut pidfile_path) {
156        Ok(_) => 0,
157        Err(err) => {
158            eprintln!("fatal error: {:?}", err);
159            1
160        }
161    };
162
163    // Restore the terminal to its initial state.
164    #[cfg(unix)]
165    if let Some(orig_termios) = orig_termios {
166        term::set_termios(orig_termios);
167    }
168
169    // Clean up the pidfile before terminating, since pal::process::terminate
170    // skips destructors.
171    if let Some(ref path) = pidfile_path {
172        let _ = std::fs::remove_file(path);
173    }
174
175    // Terminate the process immediately without graceful shutdown of DLLs or
176    // C++ destructors or anything like that. This is all unnecessary and saves
177    // time on Windows.
178    //
179    // Do flush stdout, though, since there may be buffered data.
180    let _ = io::stdout().flush();
181    pal::process::terminate(exit_code);
182}
183
184#[derive(Default)]
185struct VmResources {
186    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
187    framebuffer_access: Option<FramebufferAccess>,
188    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
189    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
190    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
191    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
192    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
193    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
194    #[cfg(windows)]
195    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
196}
197
198struct ConsoleState<'a> {
199    device: &'a str,
200    input: Box<dyn AsyncWrite + Unpin + Send>,
201}
202
203/// Build a flat list of switches with their parent port assignments.
204///
205/// This function converts hierarchical CLI switch definitions into a flat list
206/// where each switch specifies its parent port directly.
207fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
208    all_switches
209        .iter()
210        .map(|switch_cli| PcieSwitchConfig {
211            name: switch_cli.name.clone(),
212            num_downstream_ports: switch_cli.num_downstream_ports,
213            parent_port: switch_cli.port_name.clone(),
214            hotplug: switch_cli.hotplug,
215        })
216        .collect()
217}
218
219async fn vm_config_from_command_line(
220    spawner: impl Spawn,
221    mesh: &VmmMesh,
222    opt: &Options,
223) -> anyhow::Result<(Config, VmResources)> {
224    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
225    // Ensure the serial driver stays alive with no tasks.
226    serial_driver.spawn("leak", pending::<()>()).detach();
227
228    let openhcl_vtl = if opt.vtl2 {
229        DeviceVtl::Vtl2
230    } else {
231        DeviceVtl::Vtl0
232    };
233
234    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
235    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
236        Ok(match cli_cfg {
237            SerialConfigCli::Console => {
238                if let Some(console_state) = console_state.borrow().as_ref() {
239                    bail!("console already set by {}", console_state.device);
240                }
241                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
242                let (serial_read, serial_write) = AsyncReadExt::split(serial);
243                *console_state.borrow_mut() = Some(ConsoleState {
244                    device,
245                    input: Box::new(serial_write),
246                });
247                thread::Builder::new()
248                    .name(name.to_owned())
249                    .spawn(move || {
250                        let _ = block_on(futures::io::copy(
251                            serial_read,
252                            &mut AllowStdIo::new(term::raw_stdout()),
253                        ));
254                    })
255                    .unwrap();
256                Some(config)
257            }
258            SerialConfigCli::Stderr => {
259                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
260                thread::Builder::new()
261                    .name(name.to_owned())
262                    .spawn(move || {
263                        let _ = block_on(futures::io::copy(
264                            serial,
265                            &mut AllowStdIo::new(term::raw_stderr()),
266                        ));
267                    })
268                    .unwrap();
269                Some(config)
270            }
271            SerialConfigCli::File(path) => {
272                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
273                let file = fs_err::File::create(path).context("failed to create file")?;
274
275                thread::Builder::new()
276                    .name(name.to_owned())
277                    .spawn(move || {
278                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
279                    })
280                    .unwrap();
281                Some(config)
282            }
283            SerialConfigCli::None => None,
284            SerialConfigCli::Pipe(path) => {
285                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
286            }
287            SerialConfigCli::Tcp(addr) => {
288                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
289            }
290            SerialConfigCli::NewConsole(app, window_title) => {
291                let path = console_relay::random_console_path();
292                let config =
293                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
294                let window_title =
295                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
296
297                console_relay::launch_console(
298                    app.or_else(openvmm_terminal_app).as_deref(),
299                    &path,
300                    ConsoleLaunchOptions {
301                        window_title: Some(window_title),
302                    },
303                )
304                .context("failed to launch console")?;
305
306                Some(config)
307            }
308        })
309    };
310
311    let mut vmbus_devices = Vec::new();
312
313    let serial0_cfg = setup_serial(
314        "com1",
315        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
316        if cfg!(guest_arch = "x86_64") {
317            "ttyS0"
318        } else {
319            "ttyAMA0"
320        },
321    )?;
322    let serial1_cfg = setup_serial(
323        "com2",
324        opt.com2.clone().unwrap_or(SerialConfigCli::None),
325        if cfg!(guest_arch = "x86_64") {
326            "ttyS1"
327        } else {
328            "ttyAMA1"
329        },
330    )?;
331    let serial2_cfg = setup_serial(
332        "com3",
333        opt.com3.clone().unwrap_or(SerialConfigCli::None),
334        if cfg!(guest_arch = "x86_64") {
335            "ttyS2"
336        } else {
337            "ttyAMA2"
338        },
339    )?;
340    let serial3_cfg = setup_serial(
341        "com4",
342        opt.com4.clone().unwrap_or(SerialConfigCli::None),
343        if cfg!(guest_arch = "x86_64") {
344            "ttyS3"
345        } else {
346            "ttyAMA3"
347        },
348    )?;
349    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
350        "vmbus_com1",
351        opt.vmbus_com1_serial
352            .clone()
353            .unwrap_or(SerialConfigCli::None),
354        "vmbus_com1",
355    )? {
356        vmbus_devices.push((
357            openhcl_vtl,
358            VmbusSerialDeviceHandle {
359                port: VmbusSerialPort::Com1,
360                backend: vmbus_com1_cfg,
361            }
362            .into_resource(),
363        ));
364        true
365    } else {
366        false
367    };
368    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
369        "vmbus_com2",
370        opt.vmbus_com2_serial
371            .clone()
372            .unwrap_or(SerialConfigCli::None),
373        "vmbus_com2",
374    )? {
375        vmbus_devices.push((
376            openhcl_vtl,
377            VmbusSerialDeviceHandle {
378                port: VmbusSerialPort::Com2,
379                backend: vmbus_com2_cfg,
380            }
381            .into_resource(),
382        ));
383        true
384    } else {
385        false
386    };
387    let debugcon_cfg = setup_serial(
388        "debugcon",
389        opt.debugcon
390            .clone()
391            .map(|cfg| cfg.serial)
392            .unwrap_or(SerialConfigCli::None),
393        "debugcon",
394    )?;
395
396    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
397        setup_serial("virtio-console", serial_cfg, "hvc0")?
398    } else {
399        None
400    };
401
402    let mut resources = VmResources::default();
403    let mut console_str = "";
404    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
405        resources.console_in = Some(input);
406        console_str = device;
407    }
408
409    if opt.shared_memory {
410        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
411    }
412
413    const MAX_PROCESSOR_COUNT: u32 = 1024;
414
415    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
416        bail!("invalid proc count: {}", opt.processors);
417    }
418
419    // Total SCSI channel count should not exceed the processor count
420    // (at most, one channel per VP).
421    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
422        bail!(
423            "invalid SCSI sub-channel count: requested {}, max {}",
424            opt.scsi_sub_channels,
425            MAX_PROCESSOR_COUNT - 1
426        );
427    }
428
429    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
430
431    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
432    for &cli_args::DiskCli {
433        vtl,
434        ref kind,
435        read_only,
436        is_dvd,
437        underhill,
438        ref pcie_port,
439    } in &opt.disk
440    {
441        if pcie_port.is_some() {
442            anyhow::bail!("`--disk` is incompatible with PCIe");
443        }
444
445        storage
446            .add(
447                vtl,
448                underhill,
449                storage_builder::DiskLocation::Scsi(None),
450                kind,
451                is_dvd,
452                read_only,
453            )
454            .await?;
455    }
456
457    for &cli_args::IdeDiskCli {
458        ref kind,
459        read_only,
460        channel,
461        device,
462        is_dvd,
463    } in &opt.ide
464    {
465        storage
466            .add(
467                DeviceVtl::Vtl0,
468                None,
469                storage_builder::DiskLocation::Ide(channel, device),
470                kind,
471                is_dvd,
472                read_only,
473            )
474            .await?;
475    }
476
477    for &cli_args::DiskCli {
478        vtl,
479        ref kind,
480        read_only,
481        is_dvd,
482        underhill,
483        ref pcie_port,
484    } in &opt.nvme
485    {
486        storage
487            .add(
488                vtl,
489                underhill,
490                storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
491                kind,
492                is_dvd,
493                read_only,
494            )
495            .await?;
496    }
497
498    for &cli_args::DiskCli {
499        vtl,
500        ref kind,
501        read_only,
502        is_dvd,
503        ref underhill,
504        ref pcie_port,
505    } in &opt.virtio_blk
506    {
507        if underhill.is_some() {
508            anyhow::bail!("underhill not supported with virtio-blk");
509        }
510        storage
511            .add(
512                vtl,
513                None,
514                storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
515                kind,
516                is_dvd,
517                read_only,
518            )
519            .await?;
520    }
521
522    let mut floppy_disks = Vec::new();
523    for disk in &opt.floppy {
524        let &cli_args::FloppyDiskCli {
525            ref kind,
526            read_only,
527        } = disk;
528        floppy_disks.push(FloppyDiskConfig {
529            disk_type: disk_open(kind, read_only).await?,
530            read_only,
531        });
532    }
533
534    let mut vpci_mana_nics = [(); 3].map(|()| None);
535    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
536    let mut underhill_nics = Vec::new();
537    let mut vpci_devices = Vec::new();
538
539    let mut nic_index = 0;
540    for cli_cfg in &opt.net {
541        if cli_cfg.pcie_port.is_some() {
542            anyhow::bail!("`--net` does not support PCIe");
543        }
544        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
545        if cli_cfg.underhill {
546            if !opt.no_alias_map {
547                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
548            }
549            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
550                let vpci_instance_id = Guid::new_random();
551                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
552                    instance_id: vpci_instance_id.to_string(),
553                    subordinate_instance_id: None,
554                    max_sub_channels: None,
555                });
556                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
557            });
558            mana.1.vports.push(VportDefinition {
559                mac_address: vport.mac_address,
560                endpoint: vport.endpoint,
561            });
562        } else {
563            vmbus_devices.push(vport.into_netvsp_handle());
564        }
565    }
566
567    if opt.nic {
568        let nic_config = parse_endpoint(
569            &NicConfigCli {
570                vtl: DeviceVtl::Vtl0,
571                endpoint: EndpointConfigCli::Consomme { cidr: None },
572                max_queues: None,
573                underhill: false,
574                pcie_port: None,
575            },
576            &mut nic_index,
577            &mut resources,
578        )?;
579        vmbus_devices.push(nic_config.into_netvsp_handle());
580    }
581
582    // Build initial PCIe devices list from CLI options. Storage devices
583    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
584    let mut pcie_devices = Vec::new();
585    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
586        tracing::info!(
587            port_name = %cli_cfg.port_name,
588            socket_addr = ?cli_cfg.socket_addr,
589            "instantiating PCIe remote device"
590        );
591
592        // Generate a deterministic instance ID based on index
593        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
594            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
595        let instance_id = Guid {
596            data1: index as u32,
597            ..PCIE_REMOTE_BASE_INSTANCE_ID
598        };
599
600        pcie_devices.push(PcieDeviceConfig {
601            port_name: cli_cfg.port_name.clone(),
602            resource: pcie_remote_resources::PcieRemoteHandle {
603                instance_id,
604                socket_addr: cli_cfg.socket_addr.clone(),
605                hu: cli_cfg.hu,
606                controller: cli_cfg.controller,
607            }
608            .into_resource(),
609        });
610    }
611
612    #[cfg(windows)]
613    let mut kernel_vmnics = Vec::new();
614    #[cfg(windows)]
615    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
616        // Pick a random MAC address.
617        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
618        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
619
620        // Pick a fixed instance ID based on the index.
621        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
622        let instance_id = Guid {
623            data1: index as u32,
624            ..BASE_INSTANCE_ID
625        };
626
627        let switch_id = if switch_id == "default" {
628            DEFAULT_SWITCH
629        } else {
630            switch_id
631        };
632        let (port_id, port) = new_switch_port(switch_id)?;
633        resources.switch_ports.push(port);
634
635        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
636            instance_id,
637            mac_address: mac_address.into(),
638            switch_port_id: port_id,
639        });
640    }
641
642    for vport in &opt.mana {
643        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
644        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
645            (vtl, None) => {
646                &mut vpci_mana_nics[vtl]
647                    .get_or_insert_with(|| {
648                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
649                    })
650                    .1
651                    .vports
652            }
653            (0, Some(pcie_port)) => {
654                &mut pcie_mana_nics
655                    .entry(pcie_port)
656                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
657                    .vports
658            }
659            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
660        };
661        vport_array.push(VportDefinition {
662            mac_address: vport.mac_address,
663            endpoint: vport.endpoint,
664        });
665    }
666
667    vpci_devices.extend(
668        vpci_mana_nics
669            .into_iter()
670            .enumerate()
671            .filter_map(|(vtl, nic)| {
672                nic.map(|(instance_id, handle)| VpciDeviceConfig {
673                    vtl: match vtl {
674                        0 => DeviceVtl::Vtl0,
675                        1 => DeviceVtl::Vtl1,
676                        2 => DeviceVtl::Vtl2,
677                        _ => unreachable!(),
678                    },
679                    instance_id,
680                    resource: handle.into_resource(),
681                })
682            }),
683    );
684
685    pcie_devices.extend(
686        pcie_mana_nics
687            .into_iter()
688            .map(|(pcie_port, handle)| PcieDeviceConfig {
689                port_name: pcie_port,
690                resource: handle.into_resource(),
691            }),
692    );
693
694    // If VTL2 is enabled, and we are not in VTL2 self allocate mode, provide an
695    // mmio gap for VTL2.
696    let use_vtl2_gap = opt.vtl2
697        && !matches!(
698            opt.igvm_vtl2_relocation_type,
699            Vtl2BaseAddressType::Vtl2Allocate { .. },
700        );
701
702    #[cfg(guest_arch = "aarch64")]
703    let arch = MachineArch::Aarch64;
704    #[cfg(guest_arch = "x86_64")]
705    let arch = MachineArch::X86_64;
706
707    let mmio_gaps: Vec<MemoryRange> = match (use_vtl2_gap, arch) {
708        (true, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86_WITH_VTL2.into(),
709        (true, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2.into(),
710        (false, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86.into(),
711        (false, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64.into(),
712    };
713
714    let mut pci_ecam_gaps = Vec::new();
715    let mut pci_mmio_gaps = Vec::new();
716
717    let mut low_mmio_start = mmio_gaps.first().context("expected mmio gap")?.start();
718    let mut high_mmio_end = mmio_gaps.last().context("expected second mmio gap")?.end();
719
720    let mut pcie_root_complexes = Vec::new();
721    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
722        let ports = opt
723            .pcie_root_port
724            .iter()
725            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
726            .map(|port_cli| PcieRootPortConfig {
727                name: port_cli.name.clone(),
728                hotplug: port_cli.hotplug,
729            })
730            .collect();
731
732        const ONE_MB: u64 = 1024 * 1024;
733        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
734        let high_mmio_size = rc_cli
735            .high_mmio
736            .checked_next_multiple_of(ONE_MB)
737            .context("high mmio rounding error")?;
738        let ecam_size = (((rc_cli.end_bus - rc_cli.start_bus) as u64) + 1) * 256 * 4096;
739
740        let low_pci_mmio_start = low_mmio_start
741            .checked_sub(low_mmio_size)
742            .context("pci low mmio underflow")?;
743        let ecam_start = low_pci_mmio_start
744            .checked_sub(ecam_size)
745            .context("pci ecam underflow")?;
746        low_mmio_start = ecam_start;
747        high_mmio_end = high_mmio_end
748            .checked_add(high_mmio_size)
749            .context("pci high mmio overflow")?;
750
751        let ecam_range = MemoryRange::new(ecam_start..ecam_start + ecam_size);
752        let low_mmio = MemoryRange::new(low_pci_mmio_start..low_pci_mmio_start + low_mmio_size);
753        let high_mmio = MemoryRange::new(high_mmio_end - high_mmio_size..high_mmio_end);
754
755        pci_ecam_gaps.push(ecam_range);
756        pci_mmio_gaps.push(low_mmio);
757        pci_mmio_gaps.push(high_mmio);
758
759        pcie_root_complexes.push(PcieRootComplexConfig {
760            index: i as u32,
761            name: rc_cli.name.clone(),
762            segment: rc_cli.segment,
763            start_bus: rc_cli.start_bus,
764            end_bus: rc_cli.end_bus,
765            ecam_range,
766            low_mmio,
767            high_mmio,
768            ports,
769        });
770    }
771
772    pci_ecam_gaps.sort();
773    pci_mmio_gaps.sort();
774
775    let pcie_switches = build_switch_list(&opt.pcie_switch);
776
777    #[cfg(target_os = "linux")]
778    let vfio_pcie_devices: Vec<PcieDeviceConfig> = opt
779        .vfio
780        .iter()
781        .map(|cli_cfg| {
782            use vm_resource::IntoResource;
783
784            let sysfs_path = Path::new("/sys/bus/pci/devices").join(&cli_cfg.pci_id);
785            let iommu_group_link = std::fs::read_link(sysfs_path.join("iommu_group"))
786                .with_context(|| format!("failed to read IOMMU group for {}", cli_cfg.pci_id))?;
787            let group_id: u64 = iommu_group_link
788                .file_name()
789                .and_then(|s| s.to_str())
790                .context("invalid iommu_group symlink")?
791                .parse()
792                .context("failed to parse IOMMU group ID")?;
793            let group = std::fs::OpenOptions::new()
794                .read(true)
795                .write(true)
796                .open(format!("/dev/vfio/{group_id}"))
797                .with_context(|| format!("failed to open /dev/vfio/{group_id}"))?;
798
799            Ok(PcieDeviceConfig {
800                port_name: cli_cfg.port_name.clone(),
801                resource: vfio_assigned_device_resources::VfioDeviceHandle {
802                    pci_id: cli_cfg.pci_id.clone(),
803                    group,
804                }
805                .into_resource(),
806            })
807        })
808        .collect::<anyhow::Result<Vec<_>>>()?;
809
810    #[cfg(windows)]
811    let vpci_resources: Vec<_> = opt
812        .device
813        .iter()
814        .map(|path| -> anyhow::Result<_> {
815            Ok(virt_whp::device::DeviceHandle(
816                whp::VpciResource::new(
817                    None,
818                    Default::default(),
819                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
820                )
821                .with_context(|| format!("opening PCI device {}", path))?,
822            ))
823        })
824        .collect::<Result<_, _>>()?;
825
826    // Create a vmbusproxy handle if needed by any devices.
827    #[cfg(windows)]
828    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
829        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
830    } else {
831        None
832    };
833
834    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
835        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
836        let (fb, fba) =
837            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
838        resources.framebuffer_access = Some(fba);
839        Some(fb)
840    } else {
841        None
842    };
843
844    let load_mode;
845    let with_hv;
846
847    let any_serial_configured = serial0_cfg.is_some()
848        || serial1_cfg.is_some()
849        || serial2_cfg.is_some()
850        || serial3_cfg.is_some();
851
852    let has_com3 = serial2_cfg.is_some();
853
854    let mut chipset = VmManifestBuilder::new(
855        if opt.igvm.is_some() {
856            BaseChipsetType::HclHost
857        } else if opt.pcat {
858            BaseChipsetType::HypervGen1
859        } else if opt.uefi {
860            BaseChipsetType::HypervGen2Uefi
861        } else if opt.hv {
862            BaseChipsetType::HyperVGen2LinuxDirect
863        } else {
864            BaseChipsetType::UnenlightenedLinuxDirect
865        },
866        arch,
867    );
868
869    if framebuffer.is_some() {
870        chipset = chipset.with_framebuffer();
871    }
872    if opt.guest_watchdog {
873        chipset = chipset.with_guest_watchdog();
874    }
875    if any_serial_configured {
876        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
877    }
878    if opt.battery {
879        let (tx, rx) = mesh::channel();
880        tx.send(HostBatteryUpdate::default_present());
881        chipset = chipset.with_battery(rx);
882    }
883    if let Some(cfg) = &opt.debugcon {
884        chipset = chipset.with_debugcon(
885            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
886            cfg.port,
887        );
888    }
889
890    // TODO: load from VMGS file if it exists
891    let bios_guid = Guid::new_random();
892
893    let VmChipsetResult {
894        chipset,
895        mut chipset_devices,
896        pci_chipset_devices,
897        capabilities,
898    } = chipset
899        .build()
900        .context("failed to build chipset configuration")?;
901
902    if opt.restore_snapshot.is_some() {
903        // Snapshot restore: skip firmware loading entirely. Device state and
904        // memory come from the snapshot directory.
905        load_mode = LoadMode::None;
906        with_hv = true;
907    } else if let Some(path) = &opt.igvm {
908        let file = fs_err::File::open(path)
909            .context("failed to open igvm file")?
910            .into();
911        let cmdline = opt.cmdline.join(" ");
912        with_hv = true;
913
914        load_mode = LoadMode::Igvm {
915            file,
916            cmdline,
917            vtl2_base_address: opt.igvm_vtl2_relocation_type,
918            com_serial: has_com3.then(|| SerialInformation {
919                io_port: ComPort::Com3.io_port(),
920                irq: ComPort::Com3.irq().into(),
921            }),
922        };
923    } else if opt.pcat {
924        // Emit a nice error early instead of complaining about missing firmware.
925        if arch != MachineArch::X86_64 {
926            anyhow::bail!("pcat not supported on this architecture");
927        }
928        with_hv = true;
929
930        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
931        load_mode = LoadMode::Pcat {
932            firmware,
933            boot_order: opt
934                .pcat_boot_order
935                .map(|x| x.0)
936                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
937        };
938    } else if opt.uefi {
939        use openvmm_defs::config::UefiConsoleMode;
940
941        with_hv = true;
942
943        let firmware = fs_err::File::open(
944            (opt.uefi_firmware.0)
945                .as_ref()
946                .context("must provide uefi firmware when booting with uefi")?,
947        )
948        .context("failed to open uefi firmware")?;
949
950        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
951        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
952        load_mode = LoadMode::Uefi {
953            firmware: firmware.into(),
954            enable_debugging: opt.uefi_debug,
955            enable_memory_protections: opt.uefi_enable_memory_protections,
956            disable_frontpage: opt.disable_frontpage,
957            enable_tpm: opt.tpm,
958            enable_battery: opt.battery,
959            enable_serial: any_serial_configured,
960            enable_vpci_boot: false,
961            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
962                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
963                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
964                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
965                UefiConsoleModeCli::None => UefiConsoleMode::None,
966            }),
967            default_boot_always_attempt: opt.default_boot_always_attempt,
968            bios_guid,
969        };
970    } else {
971        // Linux Direct
972        let mut cmdline = "panic=-1 debug".to_string();
973
974        with_hv = opt.hv;
975        if with_hv && opt.pcie_root_complex.is_empty() {
976            cmdline += " pci=off";
977        }
978
979        if !console_str.is_empty() {
980            let _ = write!(&mut cmdline, " console={}", console_str);
981        }
982
983        if opt.gfx {
984            cmdline += " console=tty";
985        }
986        for extra in &opt.cmdline {
987            let _ = write!(&mut cmdline, " {}", extra);
988        }
989
990        let kernel = fs_err::File::open(
991            (opt.kernel.0)
992                .as_ref()
993                .context("must provide kernel when booting with linux direct")?,
994        )
995        .context("failed to open kernel")?;
996        let initrd = (opt.initrd.0)
997            .as_ref()
998            .map(fs_err::File::open)
999            .transpose()
1000            .context("failed to open initrd")?;
1001
1002        let custom_dsdt = match &opt.custom_dsdt {
1003            Some(path) => {
1004                let mut v = Vec::new();
1005                fs_err::File::open(path)
1006                    .context("failed to open custom dsdt")?
1007                    .read_to_end(&mut v)
1008                    .context("failed to read custom dsdt")?;
1009                Some(v)
1010            }
1011            None => None,
1012        };
1013
1014        load_mode = LoadMode::Linux {
1015            kernel: kernel.into(),
1016            initrd: initrd.map(Into::into),
1017            cmdline,
1018            custom_dsdt,
1019            enable_serial: any_serial_configured,
1020            boot_mode: if opt.device_tree {
1021                openvmm_defs::config::LinuxDirectBootMode::DeviceTree
1022            } else {
1023                openvmm_defs::config::LinuxDirectBootMode::Acpi
1024            },
1025        };
1026    }
1027
1028    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1029        let disk = VmgsDisk {
1030            disk: disk_open(kind, false)
1031                .await
1032                .context("failed to open vmgs disk")?,
1033            encryption_policy: if opt.test_gsp_by_id {
1034                GuestStateEncryptionPolicy::GspById(true)
1035            } else {
1036                GuestStateEncryptionPolicy::None(true)
1037            },
1038        };
1039        match provision {
1040            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1041            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1042            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1043        }
1044    } else {
1045        VmgsResource::Ephemeral
1046    });
1047
1048    if with_get && with_hv {
1049        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1050            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1051            fixed: Some(Default::default()),
1052            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1053                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1054                nic_devices: underhill_nics,
1055            }),
1056            namespace_settings: Vec::default(),
1057        };
1058
1059        // Cache the VTL2 settings for later modification via the interactive console.
1060        resources.vtl2_settings = Some(vtl2_settings.clone());
1061
1062        let (send, guest_request_recv) = mesh::channel();
1063        resources.ged_rpc = Some(send);
1064
1065        let vmgs = vmgs.take().unwrap();
1066
1067        vmbus_devices.extend([
1068            (
1069                openhcl_vtl,
1070                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1071            ),
1072            (
1073                openhcl_vtl,
1074                get_resources::ged::GuestEmulationDeviceHandle {
1075                    firmware: if opt.pcat {
1076                        get_resources::ged::GuestFirmwareConfig::Pcat {
1077                            boot_order: opt
1078                                .pcat_boot_order
1079                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1080                                .map(|x| match x {
1081                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1082                                        get_resources::ged::PcatBootDevice::Floppy
1083                                    }
1084                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1085                                        get_resources::ged::PcatBootDevice::HardDrive
1086                                    }
1087                                    openvmm_defs::config::PcatBootDevice::Optical => {
1088                                        get_resources::ged::PcatBootDevice::Optical
1089                                    }
1090                                    openvmm_defs::config::PcatBootDevice::Network => {
1091                                        get_resources::ged::PcatBootDevice::Network
1092                                    }
1093                                }),
1094                        }
1095                    } else {
1096                        use get_resources::ged::UefiConsoleMode;
1097
1098                        get_resources::ged::GuestFirmwareConfig::Uefi {
1099                            enable_vpci_boot: storage.has_vtl0_nvme(),
1100                            firmware_debug: opt.uefi_debug,
1101                            disable_frontpage: opt.disable_frontpage,
1102                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1103                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1104                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1105                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1106                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1107                            },
1108                            default_boot_always_attempt: opt.default_boot_always_attempt,
1109                        }
1110                    },
1111                    com1: with_vmbus_com1_serial,
1112                    com2: with_vmbus_com2_serial,
1113                    serial_tx_only: opt.serial_tx_only,
1114                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1115                    vmbus_redirection: opt.vmbus_redirect,
1116                    vmgs,
1117                    framebuffer: opt
1118                        .vtl2_gfx
1119                        .then(|| SharedFramebufferHandle.into_resource()),
1120                    guest_request_recv,
1121                    enable_tpm: opt.tpm,
1122                    firmware_event_send: None,
1123                    secure_boot_enabled: opt.secure_boot,
1124                    secure_boot_template: match opt.secure_boot_template {
1125                        Some(SecureBootTemplateCli::Windows) => {
1126                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1127                        },
1128                        Some(SecureBootTemplateCli::UefiCa) => {
1129                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1130                        }
1131                        None => {
1132                            get_resources::ged::GuestSecureBootTemplateType::None
1133                        },
1134                    },
1135                    enable_battery: opt.battery,
1136                    no_persistent_secrets: true,
1137                    igvm_attest_test_config: None,
1138                    test_gsp_by_id: opt.test_gsp_by_id,
1139                    efi_diagnostics_log_level: {
1140                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1141                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1142                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1143                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1144                        }
1145                    },
1146                    hv_sint_enabled: false,
1147                }
1148                .into_resource(),
1149            ),
1150        ]);
1151    }
1152
1153    if opt.tpm && !opt.vtl2 {
1154        let register_layout = if cfg!(guest_arch = "x86_64") {
1155            TpmRegisterLayout::IoPort
1156        } else {
1157            TpmRegisterLayout::Mmio
1158        };
1159
1160        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1161            (
1162                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1163                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1164            )
1165        } else {
1166            (
1167                EphemeralNonVolatileStoreHandle.into_resource(),
1168                EphemeralNonVolatileStoreHandle.into_resource(),
1169            )
1170        };
1171
1172        chipset_devices.push(ChipsetDeviceHandle {
1173            name: "tpm".to_string(),
1174            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1175                device: TpmDeviceHandle {
1176                    ppi_store,
1177                    nvram_store,
1178                    nvram_size: None,
1179                    refresh_tpm_seeds: false,
1180                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1181                    register_layout,
1182                    guest_secret_key: None,
1183                    logger: None,
1184                    is_confidential_vm: false,
1185                    bios_guid,
1186                }
1187                .into_resource(),
1188                worker_host: mesh.make_host("tpm", None).await?,
1189            }
1190            .into_resource(),
1191        });
1192    }
1193
1194    let custom_uefi_vars = {
1195        use firmware_uefi_custom_vars::CustomVars;
1196
1197        // load base vars from specified template, or use an empty set of base
1198        // vars if none was specified.
1199        let base_vars = match opt.secure_boot_template {
1200            Some(template) => match (arch, template) {
1201                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1202                    hyperv_secure_boot_templates::x64::microsoft_windows()
1203                }
1204                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1205                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1206                }
1207                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1208                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1209                }
1210                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1211                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1212                }
1213            },
1214            None => CustomVars::default(),
1215        };
1216
1217        // TODO: fallback to VMGS read if no command line flag was given
1218
1219        let custom_uefi_json_data = match &opt.custom_uefi_json {
1220            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1221            None => None,
1222        };
1223
1224        // obtain the final custom uefi vars by applying the delta onto the base vars
1225        match custom_uefi_json_data {
1226            Some(data) => {
1227                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1228                base_vars.apply_delta(delta)?
1229            }
1230            None => base_vars,
1231        }
1232    };
1233
1234    let vga_firmware = if opt.pcat {
1235        Some(openvmm_pcat_locator::find_svga_bios(
1236            opt.vga_firmware.as_deref(),
1237        )?)
1238    } else {
1239        None
1240    };
1241
1242    if opt.gfx {
1243        vmbus_devices.extend([
1244            (
1245                DeviceVtl::Vtl0,
1246                SynthVideoHandle {
1247                    framebuffer: SharedFramebufferHandle.into_resource(),
1248                }
1249                .into_resource(),
1250            ),
1251            (
1252                DeviceVtl::Vtl0,
1253                SynthKeyboardHandle {
1254                    source: MultiplexedInputHandle {
1255                        // Save 0 for PS/2
1256                        elevation: 1,
1257                    }
1258                    .into_resource(),
1259                }
1260                .into_resource(),
1261            ),
1262            (
1263                DeviceVtl::Vtl0,
1264                SynthMouseHandle {
1265                    source: MultiplexedInputHandle {
1266                        // Save 0 for PS/2
1267                        elevation: 1,
1268                    }
1269                    .into_resource(),
1270                }
1271                .into_resource(),
1272            ),
1273        ]);
1274    }
1275
1276    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1277        if let Some(path) = path {
1278            cleanup_socket(path.as_ref());
1279            let listener = unix_socket::UnixListener::bind(path)
1280                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1281            Ok(Some(listener))
1282        } else {
1283            Ok(None)
1284        }
1285    };
1286
1287    let vtl0_vsock_listener = vsock_listener(opt.vmbus_vsock_path.as_deref())?;
1288    let vtl2_vsock_listener = vsock_listener(opt.vmbus_vtl2_vsock_path.as_deref())?;
1289
1290    if let Some(path) = &opt.openhcl_dump_path {
1291        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1292        task.detach();
1293        vmbus_devices.push((openhcl_vtl, resource));
1294    }
1295
1296    #[cfg(guest_arch = "aarch64")]
1297    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1298        openvmm_defs::config::Aarch64TopologyConfig {
1299            // TODO: allow this to be configured from the command line
1300            gic_config: None,
1301            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1302        },
1303    );
1304    #[cfg(guest_arch = "x86_64")]
1305    let topology_arch =
1306        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1307            apic_id_offset: opt.apic_id_offset,
1308            x2apic: opt.x2apic,
1309        });
1310
1311    let with_isolation = if let Some(isolation) = &opt.isolation {
1312        // TODO: For now, isolation is only supported with VTL2.
1313        if !opt.vtl2 {
1314            anyhow::bail!("isolation is only currently supported with vtl2");
1315        }
1316
1317        // TODO: Alias map support is not yet implement with isolation.
1318        if !opt.no_alias_map {
1319            anyhow::bail!("alias map not supported with isolation");
1320        }
1321
1322        match isolation {
1323            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1324        }
1325    } else {
1326        None
1327    };
1328
1329    if with_hv {
1330        let (shutdown_send, shutdown_recv) = mesh::channel();
1331        resources.shutdown_ic = Some(shutdown_send);
1332        let (kvp_send, kvp_recv) = mesh::channel();
1333        resources.kvp_ic = Some(kvp_send);
1334        vmbus_devices.extend(
1335            [
1336                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1337                    recv: shutdown_recv,
1338                }
1339                .into_resource(),
1340                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1341                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1342            ]
1343            .map(|r| (DeviceVtl::Vtl0, r)),
1344        );
1345    }
1346
1347    if let Some(hive_path) = &opt.imc {
1348        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1349        vmbus_devices.push((
1350            DeviceVtl::Vtl0,
1351            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1352        ));
1353    }
1354
1355    let mut virtio_devices = Vec::new();
1356    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1357        let bus = match bus {
1358            VirtioBusCli::Auto => {
1359                // Use VPCI when possible (currently only on Windows and macOS due
1360                // to KVM backend limitations).
1361                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1362                    None
1363                } else {
1364                    Some(VirtioBus::Pci)
1365                }
1366            }
1367            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1368            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1369            VirtioBusCli::Vpci => None,
1370        };
1371        if let Some(bus) = bus {
1372            virtio_devices.push((bus, resource));
1373        } else {
1374            vpci_devices.push(VpciDeviceConfig {
1375                vtl: DeviceVtl::Vtl0,
1376                instance_id: Guid::new_random(),
1377                resource: VirtioPciDeviceHandle(resource).into_resource(),
1378            });
1379        }
1380    };
1381
1382    for cli_cfg in &opt.virtio_net {
1383        if cli_cfg.underhill {
1384            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1385        }
1386        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1387        let resource = virtio_resources::net::VirtioNetHandle {
1388            max_queues: vport.max_queues,
1389            mac_address: vport.mac_address,
1390            endpoint: vport.endpoint,
1391        }
1392        .into_resource();
1393        if let Some(pcie_port) = &cli_cfg.pcie_port {
1394            pcie_devices.push(PcieDeviceConfig {
1395                port_name: pcie_port.clone(),
1396                resource: VirtioPciDeviceHandle(resource).into_resource(),
1397            });
1398        } else {
1399            add_virtio_device(VirtioBusCli::Auto, resource);
1400        }
1401    }
1402
1403    for args in &opt.virtio_fs {
1404        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1405            tag: args.tag.clone(),
1406            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1407                root_path: args.path.clone(),
1408                mount_options: args.options.clone(),
1409            },
1410        }
1411        .into_resource();
1412        if let Some(pcie_port) = &args.pcie_port {
1413            pcie_devices.push(PcieDeviceConfig {
1414                port_name: pcie_port.clone(),
1415                resource: VirtioPciDeviceHandle(resource).into_resource(),
1416            });
1417        } else {
1418            add_virtio_device(opt.virtio_fs_bus, resource);
1419        }
1420    }
1421
1422    for args in &opt.virtio_fs_shmem {
1423        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1424            tag: args.tag.clone(),
1425            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1426                root_path: args.path.clone(),
1427            },
1428        }
1429        .into_resource();
1430        if let Some(pcie_port) = &args.pcie_port {
1431            pcie_devices.push(PcieDeviceConfig {
1432                port_name: pcie_port.clone(),
1433                resource: VirtioPciDeviceHandle(resource).into_resource(),
1434            });
1435        } else {
1436            add_virtio_device(opt.virtio_fs_bus, resource);
1437        }
1438    }
1439
1440    for args in &opt.virtio_9p {
1441        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1442            tag: args.tag.clone(),
1443            root_path: args.path.clone(),
1444            debug: opt.virtio_9p_debug,
1445        }
1446        .into_resource();
1447        if let Some(pcie_port) = &args.pcie_port {
1448            pcie_devices.push(PcieDeviceConfig {
1449                port_name: pcie_port.clone(),
1450                resource: VirtioPciDeviceHandle(resource).into_resource(),
1451            });
1452        } else {
1453            add_virtio_device(VirtioBusCli::Auto, resource);
1454        }
1455    }
1456
1457    if let Some(pmem_args) = &opt.virtio_pmem {
1458        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1459            path: pmem_args.path.clone(),
1460        }
1461        .into_resource();
1462        if let Some(pcie_port) = &pmem_args.pcie_port {
1463            pcie_devices.push(PcieDeviceConfig {
1464                port_name: pcie_port.clone(),
1465                resource: VirtioPciDeviceHandle(resource).into_resource(),
1466            });
1467        } else {
1468            add_virtio_device(VirtioBusCli::Auto, resource);
1469        }
1470    }
1471
1472    if opt.virtio_rng {
1473        let resource: Resource<VirtioDeviceHandle> =
1474            virtio_resources::rng::VirtioRngHandle.into_resource();
1475        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1476            pcie_devices.push(PcieDeviceConfig {
1477                port_name: pcie_port.clone(),
1478                resource: VirtioPciDeviceHandle(resource).into_resource(),
1479            });
1480        } else {
1481            add_virtio_device(opt.virtio_rng_bus, resource);
1482        }
1483    }
1484
1485    if let Some(backend) = virtio_console_backend {
1486        let resource: Resource<VirtioDeviceHandle> =
1487            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1488        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1489            pcie_devices.push(PcieDeviceConfig {
1490                port_name: pcie_port.clone(),
1491                resource: VirtioPciDeviceHandle(resource).into_resource(),
1492            });
1493        } else {
1494            add_virtio_device(VirtioBusCli::Auto, resource);
1495        }
1496    }
1497
1498    // Handle --vhost-user arguments.
1499    #[cfg(target_os = "linux")]
1500    for vhost_cli in &opt.vhost_user {
1501        let stream =
1502            unix_socket::UnixStream::connect(&vhost_cli.socket_path).with_context(|| {
1503                format!(
1504                    "failed to connect to vhost-user socket: {}",
1505                    vhost_cli.socket_path
1506                )
1507            })?;
1508
1509        use crate::cli_args::VhostUserDeviceTypeCli;
1510        let resource: Resource<VirtioDeviceHandle> = match vhost_cli.device_type {
1511            VhostUserDeviceTypeCli::Fs {
1512                ref tag,
1513                num_queues,
1514                queue_size,
1515            } => virtio_resources::vhost_user::VhostUserFsHandle {
1516                socket: stream.into(),
1517                tag: tag.clone(),
1518                num_queues,
1519                queue_size,
1520            }
1521            .into_resource(),
1522            VhostUserDeviceTypeCli::Blk {
1523                num_queues,
1524                queue_size,
1525            } => virtio_resources::vhost_user::VhostUserBlkHandle {
1526                socket: stream.into(),
1527                num_queues,
1528                queue_size,
1529            }
1530            .into_resource(),
1531            VhostUserDeviceTypeCli::Other {
1532                device_id,
1533                ref queue_sizes,
1534            } => virtio_resources::vhost_user::VhostUserGenericHandle {
1535                socket: stream.into(),
1536                device_id,
1537                queue_sizes: queue_sizes.clone(),
1538            }
1539            .into_resource(),
1540        };
1541        if let Some(pcie_port) = &vhost_cli.pcie_port {
1542            pcie_devices.push(PcieDeviceConfig {
1543                port_name: pcie_port.clone(),
1544                resource: VirtioPciDeviceHandle(resource).into_resource(),
1545            });
1546        } else {
1547            add_virtio_device(VirtioBusCli::Auto, resource);
1548        }
1549    }
1550
1551    if let Some(vsock_path) = &opt.virtio_vsock_path {
1552        let listener = vsock_listener(Some(vsock_path))?.unwrap();
1553        add_virtio_device(
1554            VirtioBusCli::Auto,
1555            virtio_resources::vsock::VirtioVsockHandle {
1556                // The guest CID does not matter since the UDS relay does not use it. It just needs
1557                // to be some non-reserved value for the guest to use.
1558                guest_cid: 0x3,
1559                base_path: vsock_path.clone(),
1560                listener,
1561            }
1562            .into_resource(),
1563        );
1564    }
1565
1566    let mut cfg = Config {
1567        chipset,
1568        load_mode,
1569        floppy_disks,
1570        pcie_root_complexes,
1571        #[cfg(target_os = "linux")]
1572        pcie_devices: {
1573            let mut devs = pcie_devices;
1574            devs.extend(vfio_pcie_devices);
1575            devs
1576        },
1577        #[cfg(not(target_os = "linux"))]
1578        pcie_devices,
1579        pcie_switches,
1580        vpci_devices,
1581        ide_disks: Vec::new(),
1582        memory: MemoryConfig {
1583            mem_size: opt.memory,
1584            mmio_gaps,
1585            prefetch_memory: opt.prefetch,
1586            private_memory: opt.private_memory,
1587            transparent_hugepages: opt.thp,
1588            pci_ecam_gaps,
1589            pci_mmio_gaps,
1590        },
1591        processor_topology: ProcessorTopologyConfig {
1592            proc_count: opt.processors,
1593            vps_per_socket: opt.vps_per_socket,
1594            enable_smt: match opt.smt {
1595                cli_args::SmtConfigCli::Auto => None,
1596                cli_args::SmtConfigCli::Force => Some(true),
1597                cli_args::SmtConfigCli::Off => Some(false),
1598            },
1599            arch: Some(topology_arch),
1600        },
1601        hypervisor: HypervisorConfig {
1602            with_hv,
1603            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1604                vtl0_alias_map: !opt.no_alias_map,
1605                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1606                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1607                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1608                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1609                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1610                        Some(LateMapVtl0MemoryPolicy::InjectException)
1611                    }
1612                },
1613            }),
1614            with_isolation,
1615        },
1616        #[cfg(windows)]
1617        kernel_vmnics,
1618        input: mesh::Receiver::new(),
1619        framebuffer,
1620        vga_firmware,
1621        vtl2_gfx: opt.vtl2_gfx,
1622        virtio_devices,
1623        vmbus: with_hv.then_some(VmbusConfig {
1624            vsock_listener: vtl0_vsock_listener,
1625            vsock_path: opt.vmbus_vsock_path.clone(),
1626            vtl2_redirect: opt.vmbus_redirect,
1627            vmbus_max_version: opt.vmbus_max_version,
1628            #[cfg(windows)]
1629            vmbusproxy_handle,
1630        }),
1631        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1632            vsock_listener: vtl2_vsock_listener,
1633            vsock_path: opt.vmbus_vtl2_vsock_path.clone(),
1634            ..Default::default()
1635        }),
1636        vmbus_devices,
1637        chipset_devices,
1638        pci_chipset_devices,
1639        chipset_capabilities: capabilities,
1640        #[cfg(windows)]
1641        vpci_resources,
1642        vmgs,
1643        secure_boot_enabled: opt.secure_boot,
1644        custom_uefi_vars,
1645        firmware_event_send: None,
1646        debugger_rpc: None,
1647        generation_id_recv: None,
1648        rtc_delta_milliseconds: 0,
1649        automatic_guest_reset: !opt.halt_on_reset,
1650        efi_diagnostics_log_level: {
1651            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1652                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1653                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1654                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1655            }
1656        },
1657    };
1658
1659    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1660    Ok((cfg, resources))
1661}
1662
1663/// Gets the terminal to use for externally launched console windows.
1664pub(crate) fn openvmm_terminal_app() -> Option<PathBuf> {
1665    std::env::var_os("OPENVMM_TERM")
1666        .or_else(|| std::env::var_os("HVLITE_TERM"))
1667        .map(Into::into)
1668}
1669
1670// Tries to remove `path` if it is confirmed to be a Unix socket.
1671fn cleanup_socket(path: &Path) {
1672    #[cfg(windows)]
1673    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1674    #[cfg(not(windows))]
1675    let is_socket = path
1676        .metadata()
1677        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1678
1679    if is_socket {
1680        let _ = std::fs::remove_file(path);
1681    }
1682}
1683
1684#[cfg(windows)]
1685const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1686
1687#[cfg(windows)]
1688fn new_switch_port(
1689    switch_id: &str,
1690) -> anyhow::Result<(
1691    openvmm_defs::config::SwitchPortId,
1692    vmswitch::kernel::SwitchPort,
1693)> {
1694    let id = vmswitch::kernel::SwitchPortId {
1695        switch: switch_id.parse().context("invalid switch id")?,
1696        port: Guid::new_random(),
1697    };
1698    let _ = vmswitch::hcn::Network::open(&id.switch)
1699        .with_context(|| format!("could not find switch {}", id.switch))?;
1700
1701    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1702
1703    let id = openvmm_defs::config::SwitchPortId {
1704        switch: id.switch,
1705        port: id.port,
1706    };
1707    Ok((id, port))
1708}
1709
1710fn parse_endpoint(
1711    cli_cfg: &NicConfigCli,
1712    index: &mut usize,
1713    resources: &mut VmResources,
1714) -> anyhow::Result<NicConfig> {
1715    let _ = resources;
1716    let endpoint = match &cli_cfg.endpoint {
1717        EndpointConfigCli::Consomme { cidr } => {
1718            net_backend_resources::consomme::ConsommeHandle { cidr: cidr.clone() }.into_resource()
1719        }
1720        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1721        EndpointConfigCli::Dio { id } => {
1722            #[cfg(windows)]
1723            {
1724                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1725                resources.switch_ports.push(port);
1726                net_backend_resources::dio::WindowsDirectIoHandle {
1727                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1728                        switch: port_id.switch,
1729                        port: port_id.port,
1730                    },
1731                }
1732                .into_resource()
1733            }
1734
1735            #[cfg(not(windows))]
1736            {
1737                let _ = id;
1738                bail!("cannot use dio on non-windows platforms")
1739            }
1740        }
1741        EndpointConfigCli::Tap { name } => {
1742            #[cfg(target_os = "linux")]
1743            {
1744                let fd = net_tap::tap::open_tap(name)
1745                    .with_context(|| format!("failed to open TAP device '{name}'"))?;
1746                net_backend_resources::tap::TapHandle { fd }.into_resource()
1747            }
1748
1749            #[cfg(not(target_os = "linux"))]
1750            {
1751                let _ = name;
1752                bail!("TAP backend is only supported on Linux")
1753            }
1754        }
1755    };
1756
1757    // Pick a random MAC address.
1758    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1759    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1760
1761    // Pick a fixed instance ID based on the index.
1762    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1763    let instance_id = Guid {
1764        data1: *index as u32,
1765        ..BASE_INSTANCE_ID
1766    };
1767    *index += 1;
1768
1769    Ok(NicConfig {
1770        vtl: cli_cfg.vtl,
1771        instance_id,
1772        endpoint,
1773        mac_address: mac_address.into(),
1774        max_queues: cli_cfg.max_queues,
1775        pcie_port: cli_cfg.pcie_port.clone(),
1776    })
1777}
1778
1779#[derive(Debug)]
1780struct NicConfig {
1781    vtl: DeviceVtl,
1782    instance_id: Guid,
1783    mac_address: MacAddress,
1784    endpoint: Resource<NetEndpointHandleKind>,
1785    max_queues: Option<u16>,
1786    pcie_port: Option<String>,
1787}
1788
1789impl NicConfig {
1790    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1791        (
1792            self.vtl,
1793            netvsp_resources::NetvspHandle {
1794                instance_id: self.instance_id,
1795                mac_address: self.mac_address,
1796                endpoint: self.endpoint,
1797                max_queues: self.max_queues,
1798            }
1799            .into_resource(),
1800        )
1801    }
1802}
1803
1804enum LayerOrDisk {
1805    Layer(DiskLayerDescription),
1806    Disk(Resource<DiskHandleKind>),
1807}
1808
1809async fn disk_open(
1810    disk_cli: &DiskCliKind,
1811    read_only: bool,
1812) -> anyhow::Result<Resource<DiskHandleKind>> {
1813    let mut layers = Vec::new();
1814    disk_open_inner(disk_cli, read_only, &mut layers).await?;
1815    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1816        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1817            unreachable!()
1818        };
1819        Ok(disk)
1820    } else {
1821        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1822            layers: layers
1823                .into_iter()
1824                .map(|layer| match layer {
1825                    LayerOrDisk::Layer(layer) => layer,
1826                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1827                        layer: DiskLayerHandle(disk).into_resource(),
1828                        read_cache: false,
1829                        write_through: false,
1830                    },
1831                })
1832                .collect(),
1833        }))
1834    }
1835}
1836
1837fn disk_open_inner<'a>(
1838    disk_cli: &'a DiskCliKind,
1839    read_only: bool,
1840    layers: &'a mut Vec<LayerOrDisk>,
1841) -> futures::future::BoxFuture<'a, anyhow::Result<()>> {
1842    Box::pin(async move {
1843        fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1844            LayerOrDisk::Layer(layer.into_resource().into())
1845        }
1846        fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1847            LayerOrDisk::Disk(disk.into_resource())
1848        }
1849        match disk_cli {
1850            &DiskCliKind::Memory(len) => {
1851                layers.push(layer(RamDiskLayerHandle {
1852                    len: Some(len),
1853                    sector_size: None,
1854                }));
1855            }
1856            DiskCliKind::File {
1857                path,
1858                create_with_len,
1859                direct,
1860            } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1861                create_disk_type(
1862                    path,
1863                    *size,
1864                    OpenDiskOptions {
1865                        read_only: false,
1866                        direct: *direct,
1867                    },
1868                )
1869                .with_context(|| format!("failed to create {}", path.display()))?
1870            } else {
1871                open_disk_type(
1872                    path,
1873                    OpenDiskOptions {
1874                        read_only,
1875                        direct: *direct,
1876                    },
1877                )
1878                .await
1879                .with_context(|| format!("failed to open {}", path.display()))?
1880            })),
1881            DiskCliKind::Blob { kind, url } => {
1882                layers.push(disk(disk_backend_resources::BlobDiskHandle {
1883                    url: url.to_owned(),
1884                    format: match kind {
1885                        cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1886                        cli_args::BlobKind::Vhd1 => {
1887                            disk_backend_resources::BlobDiskFormat::FixedVhd1
1888                        }
1889                    },
1890                }))
1891            }
1892            DiskCliKind::MemoryDiff(inner) => {
1893                layers.push(layer(RamDiskLayerHandle {
1894                    len: None,
1895                    sector_size: None,
1896                }));
1897                disk_open_inner(inner, true, layers).await?;
1898            }
1899            DiskCliKind::PersistentReservationsWrapper(inner) => {
1900                layers.push(disk(disk_backend_resources::DiskWithReservationsHandle(
1901                    disk_open(inner, read_only).await?,
1902                )))
1903            }
1904            DiskCliKind::DelayDiskWrapper {
1905                delay_ms,
1906                disk: inner,
1907            } => layers.push(disk(DelayDiskHandle {
1908                delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1909                disk: disk_open(inner, read_only).await?,
1910            })),
1911            DiskCliKind::Crypt {
1912                disk: inner,
1913                cipher,
1914                key_file,
1915            } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1916                disk: disk_open(inner, read_only).await?,
1917                cipher: match cipher {
1918                    cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1919                },
1920                key: fs_err::read(key_file).context("failed to read key file")?,
1921            })),
1922            DiskCliKind::Sqlite {
1923                path,
1924                create_with_len,
1925            } => {
1926                // FUTURE: this code should be responsible for opening
1927                // file-handle(s) itself, and passing them into sqlite via a custom
1928                // vfs. For now though - simply check if the file exists or not, and
1929                // perform early validation of filesystem-level create options.
1930                match (create_with_len.is_some(), path.exists()) {
1931                    (true, true) => anyhow::bail!(
1932                        "cannot create new sqlite disk at {} - file already exists",
1933                        path.display()
1934                    ),
1935                    (false, false) => anyhow::bail!(
1936                        "cannot open sqlite disk at {} - file not found",
1937                        path.display()
1938                    ),
1939                    _ => {}
1940                }
1941
1942                layers.push(layer(SqliteDiskLayerHandle {
1943                    dbhd_path: path.display().to_string(),
1944                    format_dbhd: create_with_len.map(|len| {
1945                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1946                            logically_read_only: false,
1947                            len: Some(len),
1948                        }
1949                    }),
1950                }));
1951            }
1952            DiskCliKind::SqliteDiff { path, create, disk } => {
1953                // FUTURE: this code should be responsible for opening
1954                // file-handle(s) itself, and passing them into sqlite via a custom
1955                // vfs. For now though - simply check if the file exists or not, and
1956                // perform early validation of filesystem-level create options.
1957                match (create, path.exists()) {
1958                    (true, true) => anyhow::bail!(
1959                        "cannot create new sqlite disk at {} - file already exists",
1960                        path.display()
1961                    ),
1962                    (false, false) => anyhow::bail!(
1963                        "cannot open sqlite disk at {} - file not found",
1964                        path.display()
1965                    ),
1966                    _ => {}
1967                }
1968
1969                layers.push(layer(SqliteDiskLayerHandle {
1970                    dbhd_path: path.display().to_string(),
1971                    format_dbhd: create.then_some(
1972                        disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1973                            logically_read_only: false,
1974                            len: None,
1975                        },
1976                    ),
1977                }));
1978                disk_open_inner(disk, true, layers).await?;
1979            }
1980            DiskCliKind::AutoCacheSqlite {
1981                cache_path,
1982                key,
1983                disk,
1984            } => {
1985                layers.push(LayerOrDisk::Layer(DiskLayerDescription {
1986                    read_cache: true,
1987                    write_through: false,
1988                    layer: SqliteAutoCacheDiskLayerHandle {
1989                        cache_path: cache_path.clone(),
1990                        cache_key: key.clone(),
1991                    }
1992                    .into_resource(),
1993                }));
1994                disk_open_inner(disk, read_only, layers).await?;
1995            }
1996        }
1997        Ok(())
1998    })
1999}
2000
2001/// Get the system page size.
2002pub(crate) fn system_page_size() -> u32 {
2003    sparse_mmap::SparseMapping::page_size() as u32
2004}
2005
2006/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2007pub(crate) const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2008    "x86_64"
2009} else {
2010    "aarch64"
2011};
2012
2013/// Open a snapshot directory and validate it against the current VM config.
2014/// Returns the shared memory fd (from memory.bin) and the saved device state.
2015fn prepare_snapshot_restore(
2016    snapshot_dir: &Path,
2017    opt: &Options,
2018) -> anyhow::Result<(
2019    openvmm_defs::worker::SharedMemoryFd,
2020    mesh::payload::message::ProtobufMessage,
2021)> {
2022    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2023
2024    // Validate manifest against current VM config.
2025    openvmm_helpers::snapshot::validate_manifest(
2026        &manifest,
2027        GUEST_ARCH,
2028        opt.memory,
2029        opt.processors,
2030        system_page_size(),
2031    )?;
2032
2033    // Open memory.bin (existing file, no create, no resize).
2034    let memory_file = fs_err::OpenOptions::new()
2035        .read(true)
2036        .write(true)
2037        .open(snapshot_dir.join("memory.bin"))?;
2038
2039    // Validate file size matches expected memory size.
2040    let file_size = memory_file.metadata()?.len();
2041    if file_size != manifest.memory_size_bytes {
2042        anyhow::bail!(
2043            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2044            manifest.memory_size_bytes,
2045        );
2046    }
2047
2048    let shared_memory_fd =
2049        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2050
2051    // Reconstruct ProtobufMessage from the saved state bytes.
2052    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2053    // back to ProtobufMessage.
2054    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2055        .context("failed to decode saved state from snapshot")?;
2056
2057    Ok((shared_memory_fd, state_msg))
2058}
2059
2060fn do_main(pidfile_path: &mut Option<PathBuf>) -> anyhow::Result<()> {
2061    #[cfg(windows)]
2062    pal::windows::disable_hard_error_dialog();
2063
2064    tracing_init::enable_tracing()?;
2065
2066    // Try to run as a worker host.
2067    // On success the worker runs to completion and then exits the process (does
2068    // not return). Any worker host setup errors are return and bubbled up.
2069    meshworker::run_vmm_mesh_host()?;
2070
2071    let opt = Options::parse();
2072    if let Some(path) = &opt.write_saved_state_proto {
2073        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2074            .write_to_path(path)
2075            .context("failed to write protobuf descriptors")?;
2076        return Ok(());
2077    }
2078
2079    if let Some(ref path) = opt.pidfile {
2080        std::fs::write(path, format!("{}\n", std::process::id()))
2081            .context("failed to write pidfile")?;
2082        *pidfile_path = Some(path.clone());
2083    }
2084
2085    if let Some(path) = opt.relay_console_path {
2086        let console_title = opt.relay_console_title.unwrap_or_default();
2087        return console_relay::relay_console(&path, console_title.as_str());
2088    }
2089
2090    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2091    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2092        return block_on(async {
2093            let _ = std::fs::remove_file(path);
2094            let listener =
2095                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2096
2097            let transport = if opt.ttrpc.is_some() {
2098                ttrpc::RpcTransport::Ttrpc
2099            } else {
2100                ttrpc::RpcTransport::Grpc
2101            };
2102
2103            // This is a local launch
2104            let mut handle =
2105                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2106                    listener,
2107                    transport,
2108                })
2109                .await?;
2110
2111            tracing::info!(%transport, path = %path.display(), "listening");
2112
2113            // Signal the the parent process that the server is ready.
2114            pal::close_stdout().context("failed to close stdout")?;
2115
2116            handle.join().await?;
2117
2118            Ok(())
2119        });
2120    }
2121
2122    DefaultPool::run_with(async |driver| run_control(&driver, opt).await)
2123}
2124
2125fn new_hvsock_service_id(port: u32) -> Guid {
2126    // This GUID is an embedding of the AF_VSOCK port into an
2127    // AF_HYPERV service ID.
2128    Guid {
2129        data1: port,
2130        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2131    }
2132}
2133
2134async fn run_control(driver: &DefaultDriver, opt: Options) -> anyhow::Result<()> {
2135    let mut mesh = Some(VmmMesh::new(&driver, opt.single_process)?);
2136    let result = run_control_inner(driver, &mut mesh, opt).await;
2137    // If setup failed before the mesh was handed to the controller, shut it
2138    // down so the child host process exits cleanly without noisy logs.
2139    if let Some(mesh) = mesh {
2140        mesh.shutdown().await;
2141    }
2142    result
2143}
2144
2145async fn run_control_inner(
2146    driver: &DefaultDriver,
2147    mesh_slot: &mut Option<VmmMesh>,
2148    opt: Options,
2149) -> anyhow::Result<()> {
2150    let mesh = mesh_slot.as_ref().unwrap();
2151    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2152
2153    let mut vnc_worker = None;
2154    if opt.gfx || opt.vnc {
2155        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2156            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2157
2158        let input_send = vm_config.input.sender();
2159        let framebuffer = resources
2160            .framebuffer_access
2161            .take()
2162            .expect("synth video enabled");
2163
2164        let vnc_host = mesh
2165            .make_host("vnc", None)
2166            .await
2167            .context("spawning vnc process failed")?;
2168
2169        vnc_worker = Some(
2170            vnc_host
2171                .launch_worker(
2172                    vnc_worker_defs::VNC_WORKER_TCP,
2173                    VncParameters {
2174                        listener,
2175                        framebuffer,
2176                        input_send,
2177                    },
2178                )
2179                .await?,
2180        )
2181    }
2182
2183    // spin up the debug worker
2184    let gdb_worker = if let Some(port) = opt.gdb {
2185        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2186            .with_context(|| format!("binding to gdb port {}", port))?;
2187
2188        let (req_tx, req_rx) = mesh::channel();
2189        vm_config.debugger_rpc = Some(req_rx);
2190
2191        let gdb_host = mesh
2192            .make_host("gdb", None)
2193            .await
2194            .context("spawning gdbstub process failed")?;
2195
2196        Some(
2197            gdb_host
2198                .launch_worker(
2199                    debug_worker_defs::DEBUGGER_WORKER,
2200                    debug_worker_defs::DebuggerParameters {
2201                        listener,
2202                        req_chan: req_tx,
2203                        vp_count: vm_config.processor_topology.proc_count,
2204                        target_arch: if cfg!(guest_arch = "x86_64") {
2205                            debug_worker_defs::TargetArch::X86_64
2206                        } else {
2207                            debug_worker_defs::TargetArch::Aarch64
2208                        },
2209                    },
2210                )
2211                .await
2212                .context("failed to launch gdbstub worker")?,
2213        )
2214    } else {
2215        None
2216    };
2217
2218    // spin up the VM
2219    let (vm_rpc, rpc_recv) = mesh::channel();
2220    let (notify_send, notify_recv) = mesh::channel();
2221    let vm_worker = {
2222        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2223
2224        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2225            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2226            (Some(fd), Some(state_msg))
2227        } else {
2228            let shared_memory = opt
2229                .memory_backing_file
2230                .as_ref()
2231                .map(|path| {
2232                    openvmm_helpers::shared_memory::open_memory_backing_file(path, opt.memory)
2233                })
2234                .transpose()?;
2235            (shared_memory, None)
2236        };
2237
2238        let params = VmWorkerParameters {
2239            hypervisor: match &opt.hypervisor {
2240                Some(name) => openvmm_helpers::hypervisor::hypervisor_resource(name)?,
2241                None => openvmm_helpers::hypervisor::choose_hypervisor()?,
2242            },
2243            cfg: vm_config,
2244            saved_state,
2245            shared_memory,
2246            rpc: rpc_recv,
2247            notify: notify_send,
2248        };
2249        vm_host
2250            .launch_worker(VM_WORKER, params)
2251            .await
2252            .context("failed to launch vm worker")?
2253    };
2254
2255    if opt.restore_snapshot.is_some() {
2256        tracing::info!("restoring VM from snapshot");
2257    }
2258
2259    if !opt.paused {
2260        vm_rpc.call(VmRpc::Resume, ()).await?;
2261    }
2262
2263    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2264        driver.clone(),
2265        DiagDialer {
2266            driver: driver.clone(),
2267            vm_rpc: vm_rpc.clone(),
2268            openhcl_vtl: if opt.vtl2 {
2269                DeviceVtl::Vtl2
2270            } else {
2271                DeviceVtl::Vtl0
2272            },
2273        },
2274    ));
2275
2276    let diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2277
2278    // Create channels between the REPL and VmController.
2279    let (vm_controller_send, vm_controller_recv) = mesh::channel();
2280    let (vm_controller_event_send, vm_controller_event_recv) = mesh::channel();
2281
2282    let has_vtl2 = resources.vtl2_settings.is_some();
2283
2284    // Build the VmController with exclusive resources.
2285    let controller = vm_controller::VmController {
2286        mesh: mesh_slot.take().unwrap(),
2287        vm_worker,
2288        vnc_worker,
2289        gdb_worker,
2290        diag_inspector: Some(diag_inspector),
2291        vtl2_settings: resources.vtl2_settings,
2292        ged_rpc: resources.ged_rpc.clone(),
2293        vm_rpc: vm_rpc.clone(),
2294        paravisor_diag: Some(paravisor_diag),
2295        igvm_path: opt.igvm.clone(),
2296        memory_backing_file: opt.memory_backing_file.clone(),
2297        memory: opt.memory,
2298        processors: opt.processors,
2299        log_file: opt.log_file.clone(),
2300    };
2301
2302    // Spawn the VmController as a task.
2303    let controller_task = driver.spawn(
2304        "vm-controller",
2305        controller.run(vm_controller_recv, vm_controller_event_send, notify_recv),
2306    );
2307
2308    // Run the REPL with shareable resources.
2309    let repl_result = repl::run_repl(
2310        driver,
2311        repl::ReplResources {
2312            vm_rpc,
2313            vm_controller: vm_controller_send,
2314            vm_controller_events: vm_controller_event_recv,
2315            scsi_rpc: resources.scsi_rpc,
2316            nvme_vtl2_rpc: resources.nvme_vtl2_rpc,
2317            shutdown_ic: resources.shutdown_ic,
2318            kvp_ic: resources.kvp_ic,
2319            console_in: resources.console_in,
2320            has_vtl2,
2321        },
2322    )
2323    .await;
2324
2325    // Wait for the controller task to finish (it stops the VM worker and
2326    // shuts down the mesh).
2327    controller_task.await;
2328
2329    repl_result
2330}
2331
2332struct DiagDialer {
2333    driver: DefaultDriver,
2334    vm_rpc: mesh::Sender<VmRpc>,
2335    openhcl_vtl: DeviceVtl,
2336}
2337
2338impl mesh_rpc::client::Dial for DiagDialer {
2339    type Stream = PolledSocket<unix_socket::UnixStream>;
2340
2341    async fn dial(&mut self) -> io::Result<Self::Stream> {
2342        let service_id = new_hvsock_service_id(1);
2343        let socket = self
2344            .vm_rpc
2345            .call_failable(
2346                VmRpc::ConnectHvsock,
2347                (
2348                    CancelContext::new().with_timeout(Duration::from_secs(2)),
2349                    service_id,
2350                    self.openhcl_vtl,
2351                ),
2352            )
2353            .await
2354            .map_err(io::Error::other)?;
2355
2356        PolledSocket::new(&self.driver, socket)
2357    }
2358}
2359
2360/// An object that implements [`InspectMut`] by sending an inspect request over
2361/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
2362/// the response back into the inspect tree.
2363///
2364/// This also caches the TTRPC connection to the guest so that only the first
2365/// inspect request has to wait for the connection to be established.
2366pub(crate) struct DiagInspector(DiagInspectorInner);
2367
2368enum DiagInspectorInner {
2369    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
2370    Started {
2371        send: mesh::Sender<inspect::Deferred>,
2372        _task: Task<()>,
2373    },
2374    Invalid,
2375}
2376
2377impl DiagInspector {
2378    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
2379        Self(DiagInspectorInner::NotStarted(driver, diag_client))
2380    }
2381
2382    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
2383        loop {
2384            match self.0 {
2385                DiagInspectorInner::NotStarted { .. } => {
2386                    let DiagInspectorInner::NotStarted(driver, client) =
2387                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
2388                    else {
2389                        unreachable!()
2390                    };
2391                    let (send, recv) = mesh::channel();
2392                    let task = driver.clone().spawn("diag-inspect", async move {
2393                        Self::run(&client, recv).await
2394                    });
2395
2396                    self.0 = DiagInspectorInner::Started { send, _task: task };
2397                }
2398                DiagInspectorInner::Started { ref send, .. } => break send,
2399                DiagInspectorInner::Invalid => unreachable!(),
2400            }
2401        }
2402    }
2403
2404    async fn run(
2405        diag_client: &diag_client::DiagClient,
2406        mut recv: mesh::Receiver<inspect::Deferred>,
2407    ) {
2408        while let Some(deferred) = recv.next().await {
2409            let info = deferred.external_request();
2410            let result = match info.request_type {
2411                inspect::ExternalRequestType::Inspect { depth } => {
2412                    if depth == 0 {
2413                        Ok(inspect::Node::Unevaluated)
2414                    } else {
2415                        // TODO: Support taking timeouts from the command line
2416                        diag_client
2417                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
2418                            .await
2419                    }
2420                }
2421                inspect::ExternalRequestType::Update { value } => {
2422                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
2423                }
2424            };
2425            deferred.complete_external(
2426                result.unwrap_or_else(|err| {
2427                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
2428                }),
2429                inspect::SensitivityLevel::Unspecified,
2430            )
2431        }
2432    }
2433}
2434
2435impl InspectMut for DiagInspector {
2436    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
2437        self.start().send(req.defer());
2438    }
2439}