openvmm_entry/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements the interactive control process and the entry point
5//! for the worker process.
6
7#![expect(missing_docs)]
8#![cfg_attr(not(test), forbid(unsafe_code))]
9
10mod cli_args;
11mod crash_dump;
12mod kvp;
13mod meshworker;
14mod serial_io;
15mod storage_builder;
16mod tracing_init;
17mod ttrpc;
18
19// `pub` so that the missing_docs warning fires for options without
20// documentation.
21pub use cli_args::Options;
22use console_relay::ConsoleLaunchOptions;
23
24use crate::cli_args::SecureBootTemplateCli;
25use anyhow::Context;
26use anyhow::bail;
27use chipset_resources::battery::HostBatteryUpdate;
28use clap::CommandFactory;
29use clap::FromArgMatches;
30use clap::Parser;
31use cli_args::DiskCliKind;
32use cli_args::EfiDiagnosticsLogLevelCli;
33use cli_args::EndpointConfigCli;
34use cli_args::NicConfigCli;
35use cli_args::ProvisionVmgs;
36use cli_args::SerialConfigCli;
37use cli_args::UefiConsoleModeCli;
38use cli_args::VirtioBusCli;
39use cli_args::VmgsCli;
40use crash_dump::spawn_dump_handler;
41use disk_backend_resources::DelayDiskHandle;
42use disk_backend_resources::DiskLayerDescription;
43use disk_backend_resources::layer::DiskLayerHandle;
44use disk_backend_resources::layer::RamDiskLayerHandle;
45use disk_backend_resources::layer::SqliteAutoCacheDiskLayerHandle;
46use disk_backend_resources::layer::SqliteDiskLayerHandle;
47use floppy_resources::FloppyDiskConfig;
48use framebuffer::FRAMEBUFFER_SIZE;
49use framebuffer::FramebufferAccess;
50use futures::AsyncReadExt;
51use futures::AsyncWrite;
52use futures::AsyncWriteExt;
53use futures::FutureExt;
54use futures::StreamExt;
55use futures::executor::block_on;
56use futures::io::AllowStdIo;
57use futures_concurrency::stream::Merge;
58use gdma_resources::GdmaDeviceHandle;
59use gdma_resources::VportDefinition;
60use get_resources::ged::GuestServicingFlags;
61use guid::Guid;
62use input_core::MultiplexedInputHandle;
63use inspect::InspectMut;
64use inspect::InspectionBuilder;
65use io::Read;
66use memory_range::MemoryRange;
67use mesh::CancelContext;
68use mesh::CellUpdater;
69use mesh::error::RemoteError;
70use mesh::rpc::Rpc;
71use mesh::rpc::RpcError;
72use mesh::rpc::RpcSend;
73use mesh_worker::WorkerEvent;
74use mesh_worker::WorkerHandle;
75use meshworker::VmmMesh;
76use net_backend_resources::mac_address::MacAddress;
77use nvme_resources::NamespaceDefinition;
78use nvme_resources::NvmeControllerRequest;
79use openvmm_defs::config::Config;
80use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64;
81use openvmm_defs::config::DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2;
82use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86;
83use openvmm_defs::config::DEFAULT_MMIO_GAPS_X86_WITH_VTL2;
84use openvmm_defs::config::DEFAULT_PCAT_BOOT_ORDER;
85use openvmm_defs::config::DeviceVtl;
86use openvmm_defs::config::EfiDiagnosticsLogLevelType;
87use openvmm_defs::config::HypervisorConfig;
88use openvmm_defs::config::LateMapVtl0MemoryPolicy;
89use openvmm_defs::config::LoadMode;
90use openvmm_defs::config::MemoryConfig;
91use openvmm_defs::config::PcieDeviceConfig;
92use openvmm_defs::config::PcieRootComplexConfig;
93use openvmm_defs::config::PcieRootPortConfig;
94use openvmm_defs::config::PcieSwitchConfig;
95use openvmm_defs::config::ProcessorTopologyConfig;
96use openvmm_defs::config::SerialInformation;
97use openvmm_defs::config::VirtioBus;
98use openvmm_defs::config::VmbusConfig;
99use openvmm_defs::config::VpciDeviceConfig;
100use openvmm_defs::config::Vtl2BaseAddressType;
101use openvmm_defs::config::Vtl2Config;
102use openvmm_defs::rpc::PulseSaveRestoreError;
103use openvmm_defs::rpc::VmRpc;
104use openvmm_defs::worker::VM_WORKER;
105use openvmm_defs::worker::VmWorkerParameters;
106use openvmm_helpers::disk::create_disk_type;
107use openvmm_helpers::disk::open_disk_type;
108use pal_async::DefaultDriver;
109use pal_async::DefaultPool;
110use pal_async::socket::PolledSocket;
111use pal_async::task::Spawn;
112use pal_async::task::Task;
113use pal_async::timer::PolledTimer;
114use scsidisk_resources::SimpleScsiDiskHandle;
115use scsidisk_resources::SimpleScsiDvdHandle;
116use serial_16550_resources::ComPort;
117use serial_core::resources::DisconnectedSerialBackendHandle;
118use sparse_mmap::alloc_shared_memory;
119use std::cell::RefCell;
120use std::collections::BTreeMap;
121use std::fmt::Write as _;
122use std::future::pending;
123use std::io;
124#[cfg(unix)]
125use std::io::IsTerminal;
126use std::io::Write;
127use std::net::TcpListener;
128use std::path::Path;
129use std::path::PathBuf;
130use std::pin::pin;
131use std::sync::Arc;
132use std::thread;
133use std::time::Duration;
134use std::time::Instant;
135use storvsp_resources::ScsiControllerRequest;
136use storvsp_resources::ScsiDeviceAndPath;
137use storvsp_resources::ScsiPath;
138use tpm_resources::TpmDeviceHandle;
139use tpm_resources::TpmRegisterLayout;
140use tracing_helpers::AnyhowValueExt;
141use uidevices_resources::SynthKeyboardHandle;
142use uidevices_resources::SynthMouseHandle;
143use uidevices_resources::SynthVideoHandle;
144use video_core::SharedFramebufferHandle;
145use virtio_resources::VirtioPciDeviceHandle;
146use vm_manifest_builder::BaseChipsetType;
147use vm_manifest_builder::MachineArch;
148use vm_manifest_builder::VmChipsetResult;
149use vm_manifest_builder::VmManifestBuilder;
150use vm_resource::IntoResource;
151use vm_resource::Resource;
152use vm_resource::kind::DiskHandleKind;
153use vm_resource::kind::DiskLayerHandleKind;
154use vm_resource::kind::NetEndpointHandleKind;
155use vm_resource::kind::VirtioDeviceHandle;
156use vm_resource::kind::VmbusDeviceHandleKind;
157use vmbus_serial_resources::VmbusSerialDeviceHandle;
158use vmbus_serial_resources::VmbusSerialPort;
159use vmcore::non_volatile_store::resources::EphemeralNonVolatileStoreHandle;
160use vmgs_resources::GuestStateEncryptionPolicy;
161use vmgs_resources::VmgsDisk;
162use vmgs_resources::VmgsFileHandle;
163use vmgs_resources::VmgsResource;
164use vmotherboard::ChipsetDeviceHandle;
165use vnc_worker_defs::VncParameters;
166
167pub fn openvmm_main() {
168    // Save the current state of the terminal so we can restore it back to
169    // normal before exiting.
170    #[cfg(unix)]
171    let orig_termios = io::stderr().is_terminal().then(term::get_termios);
172
173    let exit_code = match do_main() {
174        Ok(_) => 0,
175        Err(err) => {
176            eprintln!("fatal error: {:?}", err);
177            1
178        }
179    };
180
181    // Restore the terminal to its initial state.
182    #[cfg(unix)]
183    if let Some(orig_termios) = orig_termios {
184        term::set_termios(orig_termios);
185    }
186
187    // Terminate the process immediately without graceful shutdown of DLLs or
188    // C++ destructors or anything like that. This is all unnecessary and saves
189    // time on Windows.
190    //
191    // Do flush stdout, though, since there may be buffered data.
192    let _ = io::stdout().flush();
193    pal::process::terminate(exit_code);
194}
195
196#[derive(Default)]
197struct VmResources {
198    console_in: Option<Box<dyn AsyncWrite + Send + Unpin>>,
199    framebuffer_access: Option<FramebufferAccess>,
200    shutdown_ic: Option<mesh::Sender<hyperv_ic_resources::shutdown::ShutdownRpc>>,
201    kvp_ic: Option<mesh::Sender<hyperv_ic_resources::kvp::KvpConnectRpc>>,
202    scsi_rpc: Option<mesh::Sender<ScsiControllerRequest>>,
203    nvme_vtl2_rpc: Option<mesh::Sender<NvmeControllerRequest>>,
204    ged_rpc: Option<mesh::Sender<get_resources::ged::GuestEmulationRequest>>,
205    vtl2_settings: Option<vtl2_settings_proto::Vtl2Settings>,
206    #[cfg(windows)]
207    switch_ports: Vec<vmswitch::kernel::SwitchPort>,
208}
209
210impl VmResources {
211    /// Modify the cached VTL2 settings and send them to OpenHCL via the GED.
212    ///
213    /// This follows the same pattern as petri's `modify_vtl2_settings`: the cache
214    /// is modified locally, then the entire settings are sent to OpenHCL.
215    async fn modify_vtl2_settings(
216        &mut self,
217        f: impl FnOnce(&mut vtl2_settings_proto::Vtl2Settings),
218    ) -> anyhow::Result<()> {
219        let mut settings_copy = self
220            .vtl2_settings
221            .clone()
222            .context("vtl2 settings not configured")?;
223
224        f(&mut settings_copy);
225
226        let ged_rpc = self.ged_rpc.as_ref().context("no GED configured")?;
227
228        ged_rpc
229            .call_failable(
230                get_resources::ged::GuestEmulationRequest::ModifyVtl2Settings,
231                prost::Message::encode_to_vec(&settings_copy),
232            )
233            .await?;
234
235        // Settings successfully applied, update our cache
236        self.vtl2_settings = Some(settings_copy);
237        Ok(())
238    }
239
240    /// Add a VTL0 SCSI LUN backed by a VTL2 storage device.
241    ///
242    /// This modifies the VTL2 settings to add a new LUN to the specified SCSI controller,
243    /// backed by the given VTL2 device (NVMe namespace or SCSI disk).
244    async fn add_vtl0_scsi_disk(
245        &mut self,
246        controller_guid: Guid,
247        lun: u32,
248        device_type: vtl2_settings_proto::physical_device::DeviceType,
249        device_path: Guid,
250        sub_device_path: u32,
251    ) -> anyhow::Result<()> {
252        let mut not_found = false;
253        self.modify_vtl2_settings(|settings| {
254            let dynamic = settings.dynamic.get_or_insert_with(Default::default);
255
256            // Find the SCSI controller, bail out if not found (we can't create new controllers at runtime)
257            let scsi_controller = dynamic.storage_controllers.iter_mut().find(|c| {
258                c.instance_id == controller_guid.to_string()
259                    && c.protocol
260                        == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
261            });
262
263            let Some(scsi_controller) = scsi_controller else {
264                not_found = true;
265                return;
266            };
267
268            // Add the LUN backed by the VTL2 storage device. If the LUN exists already, UH will reject the settings
269            scsi_controller.luns.push(vtl2_settings_proto::Lun {
270                location: lun,
271                device_id: Guid::new_random().to_string(),
272                vendor_id: "OpenVMM".to_string(),
273                product_id: "Disk".to_string(),
274                product_revision_level: "1.0".to_string(),
275                serial_number: "0".to_string(),
276                model_number: "1".to_string(),
277                physical_devices: Some(vtl2_settings_proto::PhysicalDevices {
278                    r#type: vtl2_settings_proto::physical_devices::BackingType::Single.into(),
279                    device: Some(vtl2_settings_proto::PhysicalDevice {
280                        device_type: device_type.into(),
281                        device_path: device_path.to_string(),
282                        sub_device_path,
283                    }),
284                    devices: Vec::new(),
285                }),
286                is_dvd: false,
287                ..Default::default()
288            });
289        })
290        .await?;
291
292        if not_found {
293            anyhow::bail!("SCSI controller {} not found", controller_guid);
294        }
295        Ok(())
296    }
297
298    /// Remove a VTL0 SCSI LUN.
299    ///
300    /// This modifies the VTL2 settings to remove a LUN from the specified SCSI controller.
301    async fn remove_vtl0_scsi_disk(
302        &mut self,
303        controller_guid: Guid,
304        lun: u32,
305    ) -> anyhow::Result<()> {
306        self.modify_vtl2_settings(|settings| {
307            let dynamic = settings.dynamic.as_mut();
308            if let Some(dynamic) = dynamic {
309                // Find the SCSI controller
310                if let Some(scsi_controller) = dynamic.storage_controllers.iter_mut().find(|c| {
311                    c.instance_id == controller_guid.to_string()
312                        && c.protocol
313                            == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
314                }) {
315                    // Remove the LUN
316                    scsi_controller.luns.retain(|l| l.location != lun);
317                }
318            }
319        })
320        .await
321    }
322
323    /// Find and remove a VTL0 SCSI LUN backed by a specific NVMe namespace.
324    ///
325    /// Returns the LUN number that was removed, or None if no matching LUN was found.
326    async fn remove_vtl0_scsi_disk_by_nvme_nsid(
327        &mut self,
328        controller_guid: Guid,
329        nvme_controller_guid: Guid,
330        nsid: u32,
331    ) -> anyhow::Result<Option<u32>> {
332        let mut removed_lun = None;
333        self.modify_vtl2_settings(|settings| {
334            let dynamic = settings.dynamic.as_mut();
335            if let Some(dynamic) = dynamic {
336                // Find the SCSI controller
337                if let Some(scsi_controller) = dynamic.storage_controllers.iter_mut().find(|c| {
338                    c.instance_id == controller_guid.to_string()
339                        && c.protocol
340                            == vtl2_settings_proto::storage_controller::StorageProtocol::Scsi as i32
341                }) {
342                    // Find and remove the LUN backed by this NVMe namespace
343                    let nvme_controller_str = nvme_controller_guid.to_string();
344                    scsi_controller.luns.retain(|l| {
345                        let dominated_by_nsid = l.physical_devices.as_ref().is_some_and(|pd| {
346                            pd.device.as_ref().is_some_and(|d| {
347                                d.device_type
348                                    == vtl2_settings_proto::physical_device::DeviceType::Nvme as i32
349                                    && d.device_path == nvme_controller_str
350                                    && d.sub_device_path == nsid
351                            })
352                        });
353                        if dominated_by_nsid {
354                            removed_lun = Some(l.location);
355                            false // Remove this LUN
356                        } else {
357                            true // Keep this LUN
358                        }
359                    });
360                }
361            }
362        })
363        .await?;
364        Ok(removed_lun)
365    }
366}
367
368struct ConsoleState<'a> {
369    device: &'a str,
370    input: Box<dyn AsyncWrite + Unpin + Send>,
371}
372
373/// Build a flat list of switches with their parent port assignments.
374///
375/// This function converts hierarchical CLI switch definitions into a flat list
376/// where each switch specifies its parent port directly.
377fn build_switch_list(all_switches: &[cli_args::GenericPcieSwitchCli]) -> Vec<PcieSwitchConfig> {
378    all_switches
379        .iter()
380        .map(|switch_cli| PcieSwitchConfig {
381            name: switch_cli.name.clone(),
382            num_downstream_ports: switch_cli.num_downstream_ports,
383            parent_port: switch_cli.port_name.clone(),
384            hotplug: switch_cli.hotplug,
385        })
386        .collect()
387}
388
389async fn vm_config_from_command_line(
390    spawner: impl Spawn,
391    mesh: &VmmMesh,
392    opt: &Options,
393) -> anyhow::Result<(Config, VmResources)> {
394    let (_, serial_driver) = DefaultPool::spawn_on_thread("serial");
395    // Ensure the serial driver stays alive with no tasks.
396    serial_driver.spawn("leak", pending::<()>()).detach();
397
398    let openhcl_vtl = if opt.vtl2 {
399        DeviceVtl::Vtl2
400    } else {
401        DeviceVtl::Vtl0
402    };
403
404    let console_state: RefCell<Option<ConsoleState<'_>>> = RefCell::new(None);
405    let setup_serial = |name: &str, cli_cfg, device| -> anyhow::Result<_> {
406        Ok(match cli_cfg {
407            SerialConfigCli::Console => {
408                if let Some(console_state) = console_state.borrow().as_ref() {
409                    bail!("console already set by {}", console_state.device);
410                }
411                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
412                let (serial_read, serial_write) = AsyncReadExt::split(serial);
413                *console_state.borrow_mut() = Some(ConsoleState {
414                    device,
415                    input: Box::new(serial_write),
416                });
417                thread::Builder::new()
418                    .name(name.to_owned())
419                    .spawn(move || {
420                        let _ = block_on(futures::io::copy(
421                            serial_read,
422                            &mut AllowStdIo::new(term::raw_stdout()),
423                        ));
424                    })
425                    .unwrap();
426                Some(config)
427            }
428            SerialConfigCli::Stderr => {
429                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
430                thread::Builder::new()
431                    .name(name.to_owned())
432                    .spawn(move || {
433                        let _ = block_on(futures::io::copy(
434                            serial,
435                            &mut AllowStdIo::new(term::raw_stderr()),
436                        ));
437                    })
438                    .unwrap();
439                Some(config)
440            }
441            SerialConfigCli::File(path) => {
442                let (config, serial) = serial_io::anonymous_serial_pair(&serial_driver)?;
443                let file = fs_err::File::create(path).context("failed to create file")?;
444
445                thread::Builder::new()
446                    .name(name.to_owned())
447                    .spawn(move || {
448                        let _ = block_on(futures::io::copy(serial, &mut AllowStdIo::new(file)));
449                    })
450                    .unwrap();
451                Some(config)
452            }
453            SerialConfigCli::None => None,
454            SerialConfigCli::Pipe(path) => {
455                Some(serial_io::bind_serial(&path).context("failed to bind serial")?)
456            }
457            SerialConfigCli::Tcp(addr) => {
458                Some(serial_io::bind_tcp_serial(&addr).context("failed to bind serial")?)
459            }
460            SerialConfigCli::NewConsole(app, window_title) => {
461                let path = console_relay::random_console_path();
462                let config =
463                    serial_io::bind_serial(&path).context("failed to bind console serial")?;
464                let window_title =
465                    window_title.unwrap_or_else(|| name.to_uppercase() + " [OpenVMM]");
466
467                console_relay::launch_console(
468                    app.or_else(openvmm_terminal_app).as_deref(),
469                    &path,
470                    ConsoleLaunchOptions {
471                        window_title: Some(window_title),
472                    },
473                )
474                .context("failed to launch console")?;
475
476                Some(config)
477            }
478        })
479    };
480
481    let mut vmbus_devices = Vec::new();
482
483    let serial0_cfg = setup_serial(
484        "com1",
485        opt.com1.clone().unwrap_or(SerialConfigCli::Console),
486        if cfg!(guest_arch = "x86_64") {
487            "ttyS0"
488        } else {
489            "ttyAMA0"
490        },
491    )?;
492    let serial1_cfg = setup_serial(
493        "com2",
494        opt.com2.clone().unwrap_or(SerialConfigCli::None),
495        if cfg!(guest_arch = "x86_64") {
496            "ttyS1"
497        } else {
498            "ttyAMA1"
499        },
500    )?;
501    let serial2_cfg = setup_serial(
502        "com3",
503        opt.com3.clone().unwrap_or(SerialConfigCli::None),
504        if cfg!(guest_arch = "x86_64") {
505            "ttyS2"
506        } else {
507            "ttyAMA2"
508        },
509    )?;
510    let serial3_cfg = setup_serial(
511        "com4",
512        opt.com4.clone().unwrap_or(SerialConfigCli::None),
513        if cfg!(guest_arch = "x86_64") {
514            "ttyS3"
515        } else {
516            "ttyAMA3"
517        },
518    )?;
519    let with_vmbus_com1_serial = if let Some(vmbus_com1_cfg) = setup_serial(
520        "vmbus_com1",
521        opt.vmbus_com1_serial
522            .clone()
523            .unwrap_or(SerialConfigCli::None),
524        "vmbus_com1",
525    )? {
526        vmbus_devices.push((
527            openhcl_vtl,
528            VmbusSerialDeviceHandle {
529                port: VmbusSerialPort::Com1,
530                backend: vmbus_com1_cfg,
531            }
532            .into_resource(),
533        ));
534        true
535    } else {
536        false
537    };
538    let with_vmbus_com2_serial = if let Some(vmbus_com2_cfg) = setup_serial(
539        "vmbus_com2",
540        opt.vmbus_com2_serial
541            .clone()
542            .unwrap_or(SerialConfigCli::None),
543        "vmbus_com2",
544    )? {
545        vmbus_devices.push((
546            openhcl_vtl,
547            VmbusSerialDeviceHandle {
548                port: VmbusSerialPort::Com2,
549                backend: vmbus_com2_cfg,
550            }
551            .into_resource(),
552        ));
553        true
554    } else {
555        false
556    };
557    let debugcon_cfg = setup_serial(
558        "debugcon",
559        opt.debugcon
560            .clone()
561            .map(|cfg| cfg.serial)
562            .unwrap_or(SerialConfigCli::None),
563        "debugcon",
564    )?;
565
566    let virtio_console_backend = if let Some(serial_cfg) = opt.virtio_console.clone() {
567        setup_serial("virtio-console", serial_cfg, "hvc0")?
568    } else {
569        None
570    };
571
572    let mut resources = VmResources::default();
573    let mut console_str = "";
574    if let Some(ConsoleState { device, input }) = console_state.into_inner() {
575        resources.console_in = Some(input);
576        console_str = device;
577    }
578
579    if opt.shared_memory {
580        tracing::warn!("--shared-memory/-M flag has no effect and will be removed");
581    }
582
583    const MAX_PROCESSOR_COUNT: u32 = 1024;
584
585    if opt.processors == 0 || opt.processors > MAX_PROCESSOR_COUNT {
586        bail!("invalid proc count: {}", opt.processors);
587    }
588
589    // Total SCSI channel count should not exceed the processor count
590    // (at most, one channel per VP).
591    if opt.scsi_sub_channels > (MAX_PROCESSOR_COUNT - 1) as u16 {
592        bail!(
593            "invalid SCSI sub-channel count: requested {}, max {}",
594            opt.scsi_sub_channels,
595            MAX_PROCESSOR_COUNT - 1
596        );
597    }
598
599    let with_get = opt.get || (opt.vtl2 && !opt.no_get);
600
601    let mut storage = storage_builder::StorageBuilder::new(with_get.then_some(openhcl_vtl));
602    for &cli_args::DiskCli {
603        vtl,
604        ref kind,
605        read_only,
606        is_dvd,
607        underhill,
608        ref pcie_port,
609    } in &opt.disk
610    {
611        if pcie_port.is_some() {
612            anyhow::bail!("`--disk` is incompatible with PCIe");
613        }
614
615        storage.add(
616            vtl,
617            underhill,
618            storage_builder::DiskLocation::Scsi(None),
619            kind,
620            is_dvd,
621            read_only,
622        )?;
623    }
624
625    for &cli_args::IdeDiskCli {
626        ref kind,
627        read_only,
628        channel,
629        device,
630        is_dvd,
631    } in &opt.ide
632    {
633        storage.add(
634            DeviceVtl::Vtl0,
635            None,
636            storage_builder::DiskLocation::Ide(channel, device),
637            kind,
638            is_dvd,
639            read_only,
640        )?;
641    }
642
643    for &cli_args::DiskCli {
644        vtl,
645        ref kind,
646        read_only,
647        is_dvd,
648        underhill,
649        ref pcie_port,
650    } in &opt.nvme
651    {
652        storage.add(
653            vtl,
654            underhill,
655            storage_builder::DiskLocation::Nvme(None, pcie_port.clone()),
656            kind,
657            is_dvd,
658            read_only,
659        )?;
660    }
661
662    for &cli_args::DiskCli {
663        vtl,
664        ref kind,
665        read_only,
666        is_dvd,
667        ref underhill,
668        ref pcie_port,
669    } in &opt.virtio_blk
670    {
671        if underhill.is_some() {
672            anyhow::bail!("underhill not supported with virtio-blk");
673        }
674        storage.add(
675            vtl,
676            None,
677            storage_builder::DiskLocation::VirtioBlk(pcie_port.clone()),
678            kind,
679            is_dvd,
680            read_only,
681        )?;
682    }
683
684    let floppy_disks: Vec<_> = opt
685        .floppy
686        .iter()
687        .map(|disk| -> anyhow::Result<_> {
688            let &cli_args::FloppyDiskCli {
689                ref kind,
690                read_only,
691            } = disk;
692            Ok(FloppyDiskConfig {
693                disk_type: disk_open(kind, read_only)?,
694                read_only,
695            })
696        })
697        .collect::<Result<Vec<_>, _>>()?;
698
699    let mut vpci_mana_nics = [(); 3].map(|()| None);
700    let mut pcie_mana_nics = BTreeMap::<String, GdmaDeviceHandle>::new();
701    let mut underhill_nics = Vec::new();
702    let mut vpci_devices = Vec::new();
703
704    let mut nic_index = 0;
705    for cli_cfg in &opt.net {
706        if cli_cfg.pcie_port.is_some() {
707            anyhow::bail!("`--net` does not support PCIe");
708        }
709        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
710        if cli_cfg.underhill {
711            if !opt.no_alias_map {
712                anyhow::bail!("must specify --no-alias-map to offer NICs to VTL2");
713            }
714            let mana = vpci_mana_nics[openhcl_vtl as usize].get_or_insert_with(|| {
715                let vpci_instance_id = Guid::new_random();
716                underhill_nics.push(vtl2_settings_proto::NicDeviceLegacy {
717                    instance_id: vpci_instance_id.to_string(),
718                    subordinate_instance_id: None,
719                    max_sub_channels: None,
720                });
721                (vpci_instance_id, GdmaDeviceHandle { vports: Vec::new() })
722            });
723            mana.1.vports.push(VportDefinition {
724                mac_address: vport.mac_address,
725                endpoint: vport.endpoint,
726            });
727        } else {
728            vmbus_devices.push(vport.into_netvsp_handle());
729        }
730    }
731
732    if opt.nic {
733        let nic_config = parse_endpoint(
734            &NicConfigCli {
735                vtl: DeviceVtl::Vtl0,
736                endpoint: EndpointConfigCli::Consomme { cidr: None },
737                max_queues: None,
738                underhill: false,
739                pcie_port: None,
740            },
741            &mut nic_index,
742            &mut resources,
743        )?;
744        vmbus_devices.push(nic_config.into_netvsp_handle());
745    }
746
747    if opt.mcr {
748        tracing::info!("Instantiating MCR controller");
749
750        // Arbitrary but constant instance ID to be consistent across boots.
751        const MCR_INSTANCE_ID: Guid = guid::guid!("07effd8f-7501-426c-a947-d8345f39113d");
752
753        vpci_devices.push(VpciDeviceConfig {
754            vtl: DeviceVtl::Vtl0,
755            instance_id: MCR_INSTANCE_ID,
756            resource: mcr_resources::McrControllerHandle {
757                instance_id: MCR_INSTANCE_ID,
758            }
759            .into_resource(),
760        });
761    }
762
763    // Build initial PCIe devices list from CLI options. Storage devices
764    // (e.g., NVMe controllers on PCIe ports) are added later by storage_builder.
765    let mut pcie_devices = Vec::new();
766    for (index, cli_cfg) in opt.pcie_remote.iter().enumerate() {
767        tracing::info!(
768            port_name = %cli_cfg.port_name,
769            socket_addr = ?cli_cfg.socket_addr,
770            "instantiating PCIe remote device"
771        );
772
773        // Generate a deterministic instance ID based on index
774        const PCIE_REMOTE_BASE_INSTANCE_ID: Guid =
775            guid::guid!("28ed784d-c059-429f-9d9a-46bea02562c0");
776        let instance_id = Guid {
777            data1: index as u32,
778            ..PCIE_REMOTE_BASE_INSTANCE_ID
779        };
780
781        pcie_devices.push(PcieDeviceConfig {
782            port_name: cli_cfg.port_name.clone(),
783            resource: pcie_remote_resources::PcieRemoteHandle {
784                instance_id,
785                socket_addr: cli_cfg.socket_addr.clone(),
786                hu: cli_cfg.hu,
787                controller: cli_cfg.controller,
788            }
789            .into_resource(),
790        });
791    }
792
793    #[cfg(windows)]
794    let mut kernel_vmnics = Vec::new();
795    #[cfg(windows)]
796    for (index, switch_id) in opt.kernel_vmnic.iter().enumerate() {
797        // Pick a random MAC address.
798        let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
799        getrandom::fill(&mut mac_address[3..]).expect("rng failure");
800
801        // Pick a fixed instance ID based on the index.
802        const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-435d-11ee-9f59-00155d5016fc");
803        let instance_id = Guid {
804            data1: index as u32,
805            ..BASE_INSTANCE_ID
806        };
807
808        let switch_id = if switch_id == "default" {
809            DEFAULT_SWITCH
810        } else {
811            switch_id
812        };
813        let (port_id, port) = new_switch_port(switch_id)?;
814        resources.switch_ports.push(port);
815
816        kernel_vmnics.push(openvmm_defs::config::KernelVmNicConfig {
817            instance_id,
818            mac_address: mac_address.into(),
819            switch_port_id: port_id,
820        });
821    }
822
823    for vport in &opt.mana {
824        let vport = parse_endpoint(vport, &mut nic_index, &mut resources)?;
825        let vport_array = match (vport.vtl as usize, vport.pcie_port) {
826            (vtl, None) => {
827                &mut vpci_mana_nics[vtl]
828                    .get_or_insert_with(|| {
829                        (Guid::new_random(), GdmaDeviceHandle { vports: Vec::new() })
830                    })
831                    .1
832                    .vports
833            }
834            (0, Some(pcie_port)) => {
835                &mut pcie_mana_nics
836                    .entry(pcie_port)
837                    .or_insert(GdmaDeviceHandle { vports: Vec::new() })
838                    .vports
839            }
840            _ => anyhow::bail!("PCIe NICs only supported to VTL0"),
841        };
842        vport_array.push(VportDefinition {
843            mac_address: vport.mac_address,
844            endpoint: vport.endpoint,
845        });
846    }
847
848    vpci_devices.extend(
849        vpci_mana_nics
850            .into_iter()
851            .enumerate()
852            .filter_map(|(vtl, nic)| {
853                nic.map(|(instance_id, handle)| VpciDeviceConfig {
854                    vtl: match vtl {
855                        0 => DeviceVtl::Vtl0,
856                        1 => DeviceVtl::Vtl1,
857                        2 => DeviceVtl::Vtl2,
858                        _ => unreachable!(),
859                    },
860                    instance_id,
861                    resource: handle.into_resource(),
862                })
863            }),
864    );
865
866    pcie_devices.extend(
867        pcie_mana_nics
868            .into_iter()
869            .map(|(pcie_port, handle)| PcieDeviceConfig {
870                port_name: pcie_port,
871                resource: handle.into_resource(),
872            }),
873    );
874
875    // If VTL2 is enabled, and we are not in VTL2 self allocate mode, provide an
876    // mmio gap for VTL2.
877    let use_vtl2_gap = opt.vtl2
878        && !matches!(
879            opt.igvm_vtl2_relocation_type,
880            Vtl2BaseAddressType::Vtl2Allocate { .. },
881        );
882
883    #[cfg(guest_arch = "aarch64")]
884    let arch = MachineArch::Aarch64;
885    #[cfg(guest_arch = "x86_64")]
886    let arch = MachineArch::X86_64;
887
888    let mmio_gaps: Vec<MemoryRange> = match (use_vtl2_gap, arch) {
889        (true, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86_WITH_VTL2.into(),
890        (true, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64_WITH_VTL2.into(),
891        (false, MachineArch::X86_64) => DEFAULT_MMIO_GAPS_X86.into(),
892        (false, MachineArch::Aarch64) => DEFAULT_MMIO_GAPS_AARCH64.into(),
893    };
894
895    let mut pci_ecam_gaps = Vec::new();
896    let mut pci_mmio_gaps = Vec::new();
897
898    let mut low_mmio_start = mmio_gaps.first().context("expected mmio gap")?.start();
899    let mut high_mmio_end = mmio_gaps.last().context("expected second mmio gap")?.end();
900
901    let mut pcie_root_complexes = Vec::new();
902    for (i, rc_cli) in opt.pcie_root_complex.iter().enumerate() {
903        let ports = opt
904            .pcie_root_port
905            .iter()
906            .filter(|port_cli| port_cli.root_complex_name == rc_cli.name)
907            .map(|port_cli| PcieRootPortConfig {
908                name: port_cli.name.clone(),
909                hotplug: port_cli.hotplug,
910            })
911            .collect();
912
913        const ONE_MB: u64 = 1024 * 1024;
914        let low_mmio_size = (rc_cli.low_mmio as u64).next_multiple_of(ONE_MB);
915        let high_mmio_size = rc_cli
916            .high_mmio
917            .checked_next_multiple_of(ONE_MB)
918            .context("high mmio rounding error")?;
919        let ecam_size = (((rc_cli.end_bus - rc_cli.start_bus) as u64) + 1) * 256 * 4096;
920
921        let low_pci_mmio_start = low_mmio_start
922            .checked_sub(low_mmio_size)
923            .context("pci low mmio underflow")?;
924        let ecam_start = low_pci_mmio_start
925            .checked_sub(ecam_size)
926            .context("pci ecam underflow")?;
927        low_mmio_start = ecam_start;
928        high_mmio_end = high_mmio_end
929            .checked_add(high_mmio_size)
930            .context("pci high mmio overflow")?;
931
932        let ecam_range = MemoryRange::new(ecam_start..ecam_start + ecam_size);
933        let low_mmio = MemoryRange::new(low_pci_mmio_start..low_pci_mmio_start + low_mmio_size);
934        let high_mmio = MemoryRange::new(high_mmio_end - high_mmio_size..high_mmio_end);
935
936        pci_ecam_gaps.push(ecam_range);
937        pci_mmio_gaps.push(low_mmio);
938        pci_mmio_gaps.push(high_mmio);
939
940        pcie_root_complexes.push(PcieRootComplexConfig {
941            index: i as u32,
942            name: rc_cli.name.clone(),
943            segment: rc_cli.segment,
944            start_bus: rc_cli.start_bus,
945            end_bus: rc_cli.end_bus,
946            ecam_range,
947            low_mmio,
948            high_mmio,
949            ports,
950        });
951    }
952
953    pci_ecam_gaps.sort();
954    pci_mmio_gaps.sort();
955
956    let pcie_switches = build_switch_list(&opt.pcie_switch);
957
958    #[cfg(windows)]
959    let vpci_resources: Vec<_> = opt
960        .device
961        .iter()
962        .map(|path| -> anyhow::Result<_> {
963            Ok(virt_whp::device::DeviceHandle(
964                whp::VpciResource::new(
965                    None,
966                    Default::default(),
967                    &whp::VpciResourceDescriptor::Sriov(path, 0, 0),
968                )
969                .with_context(|| format!("opening PCI device {}", path))?,
970            ))
971        })
972        .collect::<Result<_, _>>()?;
973
974    // Create a vmbusproxy handle if needed by any devices.
975    #[cfg(windows)]
976    let vmbusproxy_handle = if !kernel_vmnics.is_empty() {
977        Some(vmbus_proxy::ProxyHandle::new().context("failed to open vmbusproxy handle")?)
978    } else {
979        None
980    };
981
982    let framebuffer = if opt.gfx || opt.vtl2_gfx || opt.vnc || opt.pcat {
983        let vram = alloc_shared_memory(FRAMEBUFFER_SIZE, "vram")?;
984        let (fb, fba) =
985            framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, 0).context("creating framebuffer")?;
986        resources.framebuffer_access = Some(fba);
987        Some(fb)
988    } else {
989        None
990    };
991
992    let load_mode;
993    let with_hv;
994
995    let any_serial_configured = serial0_cfg.is_some()
996        || serial1_cfg.is_some()
997        || serial2_cfg.is_some()
998        || serial3_cfg.is_some();
999
1000    let has_com3 = serial2_cfg.is_some();
1001
1002    let mut chipset = VmManifestBuilder::new(
1003        if opt.igvm.is_some() {
1004            BaseChipsetType::HclHost
1005        } else if opt.pcat {
1006            BaseChipsetType::HypervGen1
1007        } else if opt.uefi {
1008            BaseChipsetType::HypervGen2Uefi
1009        } else if opt.hv {
1010            BaseChipsetType::HyperVGen2LinuxDirect
1011        } else {
1012            BaseChipsetType::UnenlightenedLinuxDirect
1013        },
1014        arch,
1015    );
1016
1017    if framebuffer.is_some() {
1018        chipset = chipset.with_framebuffer();
1019    }
1020    if opt.guest_watchdog {
1021        chipset = chipset.with_guest_watchdog();
1022    }
1023    if any_serial_configured {
1024        chipset = chipset.with_serial([serial0_cfg, serial1_cfg, serial2_cfg, serial3_cfg]);
1025    }
1026    if opt.battery {
1027        let (tx, rx) = mesh::channel();
1028        tx.send(HostBatteryUpdate::default_present());
1029        chipset = chipset.with_battery(rx);
1030    }
1031    if let Some(cfg) = &opt.debugcon {
1032        chipset = chipset.with_debugcon(
1033            debugcon_cfg.unwrap_or_else(|| DisconnectedSerialBackendHandle.into_resource()),
1034            cfg.port,
1035        );
1036    }
1037
1038    // TODO: load from VMGS file if it exists
1039    let bios_guid = Guid::new_random();
1040
1041    let VmChipsetResult {
1042        chipset,
1043        mut chipset_devices,
1044    } = chipset
1045        .build()
1046        .context("failed to build chipset configuration")?;
1047
1048    if opt.restore_snapshot.is_some() {
1049        // Snapshot restore: skip firmware loading entirely. Device state and
1050        // memory come from the snapshot directory.
1051        load_mode = LoadMode::None;
1052        with_hv = true;
1053    } else if let Some(path) = &opt.igvm {
1054        let file = fs_err::File::open(path)
1055            .context("failed to open igvm file")?
1056            .into();
1057        let cmdline = opt.cmdline.join(" ");
1058        with_hv = true;
1059
1060        load_mode = LoadMode::Igvm {
1061            file,
1062            cmdline,
1063            vtl2_base_address: opt.igvm_vtl2_relocation_type,
1064            com_serial: has_com3.then(|| SerialInformation {
1065                io_port: ComPort::Com3.io_port(),
1066                irq: ComPort::Com3.irq().into(),
1067            }),
1068        };
1069    } else if opt.pcat {
1070        // Emit a nice error early instead of complaining about missing firmware.
1071        if arch != MachineArch::X86_64 {
1072            anyhow::bail!("pcat not supported on this architecture");
1073        }
1074        with_hv = true;
1075
1076        let firmware = openvmm_pcat_locator::find_pcat_bios(opt.pcat_firmware.as_deref())?;
1077        load_mode = LoadMode::Pcat {
1078            firmware,
1079            boot_order: opt
1080                .pcat_boot_order
1081                .map(|x| x.0)
1082                .unwrap_or(DEFAULT_PCAT_BOOT_ORDER),
1083        };
1084    } else if opt.uefi {
1085        use openvmm_defs::config::UefiConsoleMode;
1086
1087        with_hv = true;
1088
1089        let firmware = fs_err::File::open(
1090            (opt.uefi_firmware.0)
1091                .as_ref()
1092                .context("must provide uefi firmware when booting with uefi")?,
1093        )
1094        .context("failed to open uefi firmware")?;
1095
1096        // TODO: It would be better to default memory protections to on, but currently Linux does not boot via UEFI due to what
1097        //       appears to be a GRUB memory protection fault. Memory protections are therefore only enabled if configured.
1098        load_mode = LoadMode::Uefi {
1099            firmware: firmware.into(),
1100            enable_debugging: opt.uefi_debug,
1101            enable_memory_protections: opt.uefi_enable_memory_protections,
1102            disable_frontpage: opt.disable_frontpage,
1103            enable_tpm: opt.tpm,
1104            enable_battery: opt.battery,
1105            enable_serial: any_serial_configured,
1106            enable_vpci_boot: false,
1107            uefi_console_mode: opt.uefi_console_mode.map(|m| match m {
1108                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1109                UefiConsoleModeCli::Com1 => UefiConsoleMode::Com1,
1110                UefiConsoleModeCli::Com2 => UefiConsoleMode::Com2,
1111                UefiConsoleModeCli::None => UefiConsoleMode::None,
1112            }),
1113            default_boot_always_attempt: opt.default_boot_always_attempt,
1114            bios_guid,
1115        };
1116    } else {
1117        // Linux Direct
1118        let mut cmdline = "panic=-1 debug".to_string();
1119
1120        with_hv = opt.hv;
1121        if with_hv && opt.pcie_root_complex.is_empty() {
1122            cmdline += " pci=off";
1123        }
1124
1125        if !console_str.is_empty() {
1126            let _ = write!(&mut cmdline, " console={}", console_str);
1127        }
1128        if opt.gfx {
1129            cmdline += " console=tty";
1130        }
1131        for extra in &opt.cmdline {
1132            let _ = write!(&mut cmdline, " {}", extra);
1133        }
1134
1135        let kernel = fs_err::File::open(
1136            (opt.kernel.0)
1137                .as_ref()
1138                .context("must provide kernel when booting with linux direct")?,
1139        )
1140        .context("failed to open kernel")?;
1141        let initrd = (opt.initrd.0)
1142            .as_ref()
1143            .map(fs_err::File::open)
1144            .transpose()
1145            .context("failed to open initrd")?;
1146
1147        let custom_dsdt = match &opt.custom_dsdt {
1148            Some(path) => {
1149                let mut v = Vec::new();
1150                fs_err::File::open(path)
1151                    .context("failed to open custom dsdt")?
1152                    .read_to_end(&mut v)
1153                    .context("failed to read custom dsdt")?;
1154                Some(v)
1155            }
1156            None => None,
1157        };
1158
1159        load_mode = LoadMode::Linux {
1160            kernel: kernel.into(),
1161            initrd: initrd.map(Into::into),
1162            cmdline,
1163            custom_dsdt,
1164            enable_serial: any_serial_configured,
1165        };
1166    }
1167
1168    let mut vmgs = Some(if let Some(VmgsCli { kind, provision }) = &opt.vmgs {
1169        let disk = VmgsDisk {
1170            disk: disk_open(kind, false).context("failed to open vmgs disk")?,
1171            encryption_policy: if opt.test_gsp_by_id {
1172                GuestStateEncryptionPolicy::GspById(true)
1173            } else {
1174                GuestStateEncryptionPolicy::None(true)
1175            },
1176        };
1177        match provision {
1178            ProvisionVmgs::OnEmpty => VmgsResource::Disk(disk),
1179            ProvisionVmgs::OnFailure => VmgsResource::ReprovisionOnFailure(disk),
1180            ProvisionVmgs::True => VmgsResource::Reprovision(disk),
1181        }
1182    } else {
1183        VmgsResource::Ephemeral
1184    });
1185
1186    if with_get && with_hv {
1187        let vtl2_settings = vtl2_settings_proto::Vtl2Settings {
1188            version: vtl2_settings_proto::vtl2_settings_base::Version::V1.into(),
1189            fixed: Some(Default::default()),
1190            dynamic: Some(vtl2_settings_proto::Vtl2SettingsDynamic {
1191                storage_controllers: storage.build_underhill(opt.vmbus_redirect),
1192                nic_devices: underhill_nics,
1193            }),
1194            namespace_settings: Vec::default(),
1195        };
1196
1197        // Cache the VTL2 settings for later modification via the interactive console.
1198        resources.vtl2_settings = Some(vtl2_settings.clone());
1199
1200        let (send, guest_request_recv) = mesh::channel();
1201        resources.ged_rpc = Some(send);
1202
1203        let vmgs = vmgs.take().unwrap();
1204
1205        vmbus_devices.extend([
1206            (
1207                openhcl_vtl,
1208                get_resources::gel::GuestEmulationLogHandle.into_resource(),
1209            ),
1210            (
1211                openhcl_vtl,
1212                get_resources::ged::GuestEmulationDeviceHandle {
1213                    firmware: if opt.pcat {
1214                        get_resources::ged::GuestFirmwareConfig::Pcat {
1215                            boot_order: opt
1216                                .pcat_boot_order
1217                                .map_or(DEFAULT_PCAT_BOOT_ORDER, |x| x.0)
1218                                .map(|x| match x {
1219                                    openvmm_defs::config::PcatBootDevice::Floppy => {
1220                                        get_resources::ged::PcatBootDevice::Floppy
1221                                    }
1222                                    openvmm_defs::config::PcatBootDevice::HardDrive => {
1223                                        get_resources::ged::PcatBootDevice::HardDrive
1224                                    }
1225                                    openvmm_defs::config::PcatBootDevice::Optical => {
1226                                        get_resources::ged::PcatBootDevice::Optical
1227                                    }
1228                                    openvmm_defs::config::PcatBootDevice::Network => {
1229                                        get_resources::ged::PcatBootDevice::Network
1230                                    }
1231                                }),
1232                        }
1233                    } else {
1234                        use get_resources::ged::UefiConsoleMode;
1235
1236                        get_resources::ged::GuestFirmwareConfig::Uefi {
1237                            enable_vpci_boot: storage.has_vtl0_nvme(),
1238                            firmware_debug: opt.uefi_debug,
1239                            disable_frontpage: opt.disable_frontpage,
1240                            console_mode: match opt.uefi_console_mode.unwrap_or(UefiConsoleModeCli::Default) {
1241                                UefiConsoleModeCli::Default => UefiConsoleMode::Default,
1242                                UefiConsoleModeCli::Com1 => UefiConsoleMode::COM1,
1243                                UefiConsoleModeCli::Com2 => UefiConsoleMode::COM2,
1244                                UefiConsoleModeCli::None => UefiConsoleMode::None,
1245                            },
1246                            default_boot_always_attempt: opt.default_boot_always_attempt,
1247                        }
1248                    },
1249                    com1: with_vmbus_com1_serial,
1250                    com2: with_vmbus_com2_serial,
1251                    serial_tx_only: opt.serial_tx_only,
1252                    vtl2_settings: Some(prost::Message::encode_to_vec(&vtl2_settings)),
1253                    vmbus_redirection: opt.vmbus_redirect,
1254                    vmgs,
1255                    framebuffer: opt
1256                        .vtl2_gfx
1257                        .then(|| SharedFramebufferHandle.into_resource()),
1258                    guest_request_recv,
1259                    enable_tpm: opt.tpm,
1260                    firmware_event_send: None,
1261                    secure_boot_enabled: opt.secure_boot,
1262                    secure_boot_template: match opt.secure_boot_template {
1263                        Some(SecureBootTemplateCli::Windows) => {
1264                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftWindows
1265                        },
1266                        Some(SecureBootTemplateCli::UefiCa) => {
1267                            get_resources::ged::GuestSecureBootTemplateType::MicrosoftUefiCertificateAuthority
1268                        }
1269                        None => {
1270                            get_resources::ged::GuestSecureBootTemplateType::None
1271                        },
1272                    },
1273                    enable_battery: opt.battery,
1274                    no_persistent_secrets: true,
1275                    igvm_attest_test_config: None,
1276                    test_gsp_by_id: opt.test_gsp_by_id,
1277                    efi_diagnostics_log_level: {
1278                        match opt.efi_diagnostics_log_level.unwrap_or_default() {
1279                            EfiDiagnosticsLogLevelCli::Default => get_resources::ged::EfiDiagnosticsLogLevelType::Default,
1280                            EfiDiagnosticsLogLevelCli::Info => get_resources::ged::EfiDiagnosticsLogLevelType::Info,
1281                            EfiDiagnosticsLogLevelCli::Full => get_resources::ged::EfiDiagnosticsLogLevelType::Full,
1282                        }
1283                    },
1284                    hv_sint_enabled: false,
1285                }
1286                .into_resource(),
1287            ),
1288        ]);
1289    }
1290
1291    if opt.tpm && !opt.vtl2 {
1292        let register_layout = if cfg!(guest_arch = "x86_64") {
1293            TpmRegisterLayout::IoPort
1294        } else {
1295            TpmRegisterLayout::Mmio
1296        };
1297
1298        let (ppi_store, nvram_store) = if opt.vmgs.is_some() {
1299            (
1300                VmgsFileHandle::new(vmgs_format::FileId::TPM_PPI, true).into_resource(),
1301                VmgsFileHandle::new(vmgs_format::FileId::TPM_NVRAM, true).into_resource(),
1302            )
1303        } else {
1304            (
1305                EphemeralNonVolatileStoreHandle.into_resource(),
1306                EphemeralNonVolatileStoreHandle.into_resource(),
1307            )
1308        };
1309
1310        chipset_devices.push(ChipsetDeviceHandle {
1311            name: "tpm".to_string(),
1312            resource: chipset_device_worker_defs::RemoteChipsetDeviceHandle {
1313                device: TpmDeviceHandle {
1314                    ppi_store,
1315                    nvram_store,
1316                    nvram_size: None,
1317                    refresh_tpm_seeds: false,
1318                    ak_cert_type: tpm_resources::TpmAkCertTypeResource::None,
1319                    register_layout,
1320                    guest_secret_key: None,
1321                    logger: None,
1322                    is_confidential_vm: false,
1323                    bios_guid,
1324                }
1325                .into_resource(),
1326                worker_host: mesh.make_host("tpm", None).await?,
1327            }
1328            .into_resource(),
1329        });
1330    }
1331
1332    let custom_uefi_vars = {
1333        use firmware_uefi_custom_vars::CustomVars;
1334
1335        // load base vars from specified template, or use an empty set of base
1336        // vars if none was specified.
1337        let base_vars = match opt.secure_boot_template {
1338            Some(template) => match (arch, template) {
1339                (MachineArch::X86_64, SecureBootTemplateCli::Windows) => {
1340                    hyperv_secure_boot_templates::x64::microsoft_windows()
1341                }
1342                (MachineArch::X86_64, SecureBootTemplateCli::UefiCa) => {
1343                    hyperv_secure_boot_templates::x64::microsoft_uefi_ca()
1344                }
1345                (MachineArch::Aarch64, SecureBootTemplateCli::Windows) => {
1346                    hyperv_secure_boot_templates::aarch64::microsoft_windows()
1347                }
1348                (MachineArch::Aarch64, SecureBootTemplateCli::UefiCa) => {
1349                    hyperv_secure_boot_templates::aarch64::microsoft_uefi_ca()
1350                }
1351            },
1352            None => CustomVars::default(),
1353        };
1354
1355        // TODO: fallback to VMGS read if no command line flag was given
1356
1357        let custom_uefi_json_data = match &opt.custom_uefi_json {
1358            Some(file) => Some(fs_err::read(file).context("opening custom uefi json file")?),
1359            None => None,
1360        };
1361
1362        // obtain the final custom uefi vars by applying the delta onto the base vars
1363        match custom_uefi_json_data {
1364            Some(data) => {
1365                let delta = hyperv_uefi_custom_vars_json::load_delta_from_json(&data)?;
1366                base_vars.apply_delta(delta)?
1367            }
1368            None => base_vars,
1369        }
1370    };
1371
1372    let vga_firmware = if opt.pcat {
1373        Some(openvmm_pcat_locator::find_svga_bios(
1374            opt.vga_firmware.as_deref(),
1375        )?)
1376    } else {
1377        None
1378    };
1379
1380    if opt.gfx {
1381        vmbus_devices.extend([
1382            (
1383                DeviceVtl::Vtl0,
1384                SynthVideoHandle {
1385                    framebuffer: SharedFramebufferHandle.into_resource(),
1386                }
1387                .into_resource(),
1388            ),
1389            (
1390                DeviceVtl::Vtl0,
1391                SynthKeyboardHandle {
1392                    source: MultiplexedInputHandle {
1393                        // Save 0 for PS/2
1394                        elevation: 1,
1395                    }
1396                    .into_resource(),
1397                }
1398                .into_resource(),
1399            ),
1400            (
1401                DeviceVtl::Vtl0,
1402                SynthMouseHandle {
1403                    source: MultiplexedInputHandle {
1404                        // Save 0 for PS/2
1405                        elevation: 1,
1406                    }
1407                    .into_resource(),
1408                }
1409                .into_resource(),
1410            ),
1411        ]);
1412    }
1413
1414    let vsock_listener = |path: Option<&str>| -> anyhow::Result<_> {
1415        if let Some(path) = path {
1416            cleanup_socket(path.as_ref());
1417            let listener = unix_socket::UnixListener::bind(path)
1418                .with_context(|| format!("failed to bind to hybrid vsock path: {}", path))?;
1419            Ok(Some(listener))
1420        } else {
1421            Ok(None)
1422        }
1423    };
1424
1425    let vtl0_vsock_listener = vsock_listener(opt.vsock_path.as_deref())?;
1426    let vtl2_vsock_listener = vsock_listener(opt.vtl2_vsock_path.as_deref())?;
1427
1428    if let Some(path) = &opt.openhcl_dump_path {
1429        let (resource, task) = spawn_dump_handler(&spawner, path.clone(), None);
1430        task.detach();
1431        vmbus_devices.push((openhcl_vtl, resource));
1432    }
1433
1434    #[cfg(guest_arch = "aarch64")]
1435    let topology_arch = openvmm_defs::config::ArchTopologyConfig::Aarch64(
1436        openvmm_defs::config::Aarch64TopologyConfig {
1437            // TODO: allow this to be configured from the command line
1438            gic_config: None,
1439            pmu_gsiv: openvmm_defs::config::PmuGsivConfig::Platform,
1440        },
1441    );
1442    #[cfg(guest_arch = "x86_64")]
1443    let topology_arch =
1444        openvmm_defs::config::ArchTopologyConfig::X86(openvmm_defs::config::X86TopologyConfig {
1445            apic_id_offset: opt.apic_id_offset,
1446            x2apic: opt.x2apic,
1447        });
1448
1449    let with_isolation = if let Some(isolation) = &opt.isolation {
1450        // TODO: For now, isolation is only supported with VTL2.
1451        if !opt.vtl2 {
1452            anyhow::bail!("isolation is only currently supported with vtl2");
1453        }
1454
1455        // TODO: Alias map support is not yet implement with isolation.
1456        if !opt.no_alias_map {
1457            anyhow::bail!("alias map not supported with isolation");
1458        }
1459
1460        match isolation {
1461            cli_args::IsolationCli::Vbs => Some(openvmm_defs::config::IsolationType::Vbs),
1462        }
1463    } else {
1464        None
1465    };
1466
1467    if with_hv {
1468        let (shutdown_send, shutdown_recv) = mesh::channel();
1469        resources.shutdown_ic = Some(shutdown_send);
1470        let (kvp_send, kvp_recv) = mesh::channel();
1471        resources.kvp_ic = Some(kvp_send);
1472        vmbus_devices.extend(
1473            [
1474                hyperv_ic_resources::shutdown::ShutdownIcHandle {
1475                    recv: shutdown_recv,
1476                }
1477                .into_resource(),
1478                hyperv_ic_resources::kvp::KvpIcHandle { recv: kvp_recv }.into_resource(),
1479                hyperv_ic_resources::timesync::TimesyncIcHandle.into_resource(),
1480            ]
1481            .map(|r| (DeviceVtl::Vtl0, r)),
1482        );
1483    }
1484
1485    if let Some(hive_path) = &opt.imc {
1486        let file = fs_err::File::open(hive_path).context("failed to open imc hive")?;
1487        vmbus_devices.push((
1488            DeviceVtl::Vtl0,
1489            vmbfs_resources::VmbfsImcDeviceHandle { file: file.into() }.into_resource(),
1490        ));
1491    }
1492
1493    let mut virtio_devices = Vec::new();
1494    let mut add_virtio_device = |bus, resource: Resource<VirtioDeviceHandle>| {
1495        let bus = match bus {
1496            VirtioBusCli::Auto => {
1497                // Use VPCI when possible (currently only on Windows and macOS due
1498                // to KVM backend limitations).
1499                if with_hv && (cfg!(windows) || cfg!(target_os = "macos")) {
1500                    None
1501                } else {
1502                    Some(VirtioBus::Pci)
1503                }
1504            }
1505            VirtioBusCli::Mmio => Some(VirtioBus::Mmio),
1506            VirtioBusCli::Pci => Some(VirtioBus::Pci),
1507            VirtioBusCli::Vpci => None,
1508        };
1509        if let Some(bus) = bus {
1510            virtio_devices.push((bus, resource));
1511        } else {
1512            vpci_devices.push(VpciDeviceConfig {
1513                vtl: DeviceVtl::Vtl0,
1514                instance_id: Guid::new_random(),
1515                resource: VirtioPciDeviceHandle(resource).into_resource(),
1516            });
1517        }
1518    };
1519
1520    for cli_cfg in &opt.virtio_net {
1521        if cli_cfg.underhill {
1522            anyhow::bail!("use --net uh:[...] to add underhill NICs")
1523        }
1524        let vport = parse_endpoint(cli_cfg, &mut nic_index, &mut resources)?;
1525        let resource = virtio_resources::net::VirtioNetHandle {
1526            max_queues: vport.max_queues,
1527            mac_address: vport.mac_address,
1528            endpoint: vport.endpoint,
1529        }
1530        .into_resource();
1531        if let Some(pcie_port) = &cli_cfg.pcie_port {
1532            pcie_devices.push(PcieDeviceConfig {
1533                port_name: pcie_port.clone(),
1534                resource: VirtioPciDeviceHandle(resource).into_resource(),
1535            });
1536        } else {
1537            add_virtio_device(VirtioBusCli::Auto, resource);
1538        }
1539    }
1540
1541    for args in &opt.virtio_fs {
1542        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1543            tag: args.tag.clone(),
1544            fs: virtio_resources::fs::VirtioFsBackend::HostFs {
1545                root_path: args.path.clone(),
1546                mount_options: args.options.clone(),
1547            },
1548        }
1549        .into_resource();
1550        if let Some(pcie_port) = &args.pcie_port {
1551            pcie_devices.push(PcieDeviceConfig {
1552                port_name: pcie_port.clone(),
1553                resource: VirtioPciDeviceHandle(resource).into_resource(),
1554            });
1555        } else {
1556            add_virtio_device(opt.virtio_fs_bus, resource);
1557        }
1558    }
1559
1560    for args in &opt.virtio_fs_shmem {
1561        let resource: Resource<VirtioDeviceHandle> = virtio_resources::fs::VirtioFsHandle {
1562            tag: args.tag.clone(),
1563            fs: virtio_resources::fs::VirtioFsBackend::SectionFs {
1564                root_path: args.path.clone(),
1565            },
1566        }
1567        .into_resource();
1568        if let Some(pcie_port) = &args.pcie_port {
1569            pcie_devices.push(PcieDeviceConfig {
1570                port_name: pcie_port.clone(),
1571                resource: VirtioPciDeviceHandle(resource).into_resource(),
1572            });
1573        } else {
1574            add_virtio_device(opt.virtio_fs_bus, resource);
1575        }
1576    }
1577
1578    for args in &opt.virtio_9p {
1579        let resource: Resource<VirtioDeviceHandle> = virtio_resources::p9::VirtioPlan9Handle {
1580            tag: args.tag.clone(),
1581            root_path: args.path.clone(),
1582            debug: opt.virtio_9p_debug,
1583        }
1584        .into_resource();
1585        if let Some(pcie_port) = &args.pcie_port {
1586            pcie_devices.push(PcieDeviceConfig {
1587                port_name: pcie_port.clone(),
1588                resource: VirtioPciDeviceHandle(resource).into_resource(),
1589            });
1590        } else {
1591            add_virtio_device(VirtioBusCli::Auto, resource);
1592        }
1593    }
1594
1595    if let Some(pmem_args) = &opt.virtio_pmem {
1596        let resource: Resource<VirtioDeviceHandle> = virtio_resources::pmem::VirtioPmemHandle {
1597            path: pmem_args.path.clone(),
1598        }
1599        .into_resource();
1600        if let Some(pcie_port) = &pmem_args.pcie_port {
1601            pcie_devices.push(PcieDeviceConfig {
1602                port_name: pcie_port.clone(),
1603                resource: VirtioPciDeviceHandle(resource).into_resource(),
1604            });
1605        } else {
1606            add_virtio_device(VirtioBusCli::Auto, resource);
1607        }
1608    }
1609
1610    if opt.virtio_rng {
1611        let resource: Resource<VirtioDeviceHandle> =
1612            virtio_resources::rng::VirtioRngHandle.into_resource();
1613        if let Some(pcie_port) = &opt.virtio_rng_pcie_port {
1614            pcie_devices.push(PcieDeviceConfig {
1615                port_name: pcie_port.clone(),
1616                resource: VirtioPciDeviceHandle(resource).into_resource(),
1617            });
1618        } else {
1619            add_virtio_device(opt.virtio_rng_bus, resource);
1620        }
1621    }
1622
1623    if let Some(backend) = virtio_console_backend {
1624        let resource: Resource<VirtioDeviceHandle> =
1625            virtio_resources::console::VirtioConsoleHandle { backend }.into_resource();
1626        if let Some(pcie_port) = &opt.virtio_console_pcie_port {
1627            pcie_devices.push(PcieDeviceConfig {
1628                port_name: pcie_port.clone(),
1629                resource: VirtioPciDeviceHandle(resource).into_resource(),
1630            });
1631        } else {
1632            add_virtio_device(VirtioBusCli::Auto, resource);
1633        }
1634    }
1635
1636    let mut cfg = Config {
1637        chipset,
1638        load_mode,
1639        floppy_disks,
1640        pcie_root_complexes,
1641        pcie_devices,
1642        pcie_switches,
1643        vpci_devices,
1644        ide_disks: Vec::new(),
1645        memory: MemoryConfig {
1646            mem_size: opt.memory,
1647            mmio_gaps,
1648            prefetch_memory: opt.prefetch,
1649            private_memory: opt.private_memory,
1650            transparent_hugepages: opt.thp,
1651            pci_ecam_gaps,
1652            pci_mmio_gaps,
1653        },
1654        processor_topology: ProcessorTopologyConfig {
1655            proc_count: opt.processors,
1656            vps_per_socket: opt.vps_per_socket,
1657            enable_smt: match opt.smt {
1658                cli_args::SmtConfigCli::Auto => None,
1659                cli_args::SmtConfigCli::Force => Some(true),
1660                cli_args::SmtConfigCli::Off => Some(false),
1661            },
1662            arch: Some(topology_arch),
1663        },
1664        hypervisor: HypervisorConfig {
1665            with_hv,
1666            with_vtl2: opt.vtl2.then_some(Vtl2Config {
1667                vtl0_alias_map: !opt.no_alias_map,
1668                late_map_vtl0_memory: match opt.late_map_vtl0_policy {
1669                    cli_args::Vtl0LateMapPolicyCli::Off => None,
1670                    cli_args::Vtl0LateMapPolicyCli::Log => Some(LateMapVtl0MemoryPolicy::Log),
1671                    cli_args::Vtl0LateMapPolicyCli::Halt => Some(LateMapVtl0MemoryPolicy::Halt),
1672                    cli_args::Vtl0LateMapPolicyCli::Exception => {
1673                        Some(LateMapVtl0MemoryPolicy::InjectException)
1674                    }
1675                },
1676            }),
1677            with_isolation,
1678            user_mode_hv_enlightenments: opt.no_enlightenments,
1679            user_mode_apic: opt.user_mode_apic,
1680        },
1681        #[cfg(windows)]
1682        kernel_vmnics,
1683        input: mesh::Receiver::new(),
1684        framebuffer,
1685        vga_firmware,
1686        vtl2_gfx: opt.vtl2_gfx,
1687        virtio_devices,
1688        vmbus: with_hv.then_some(VmbusConfig {
1689            vsock_listener: vtl0_vsock_listener,
1690            vsock_path: opt.vsock_path.clone(),
1691            vtl2_redirect: opt.vmbus_redirect,
1692            vmbus_max_version: opt.vmbus_max_version,
1693            #[cfg(windows)]
1694            vmbusproxy_handle,
1695        }),
1696        vtl2_vmbus: (with_hv && opt.vtl2).then_some(VmbusConfig {
1697            vsock_listener: vtl2_vsock_listener,
1698            vsock_path: opt.vtl2_vsock_path.clone(),
1699            ..Default::default()
1700        }),
1701        vmbus_devices,
1702        chipset_devices,
1703        #[cfg(windows)]
1704        vpci_resources,
1705        vmgs,
1706        secure_boot_enabled: opt.secure_boot,
1707        custom_uefi_vars,
1708        firmware_event_send: None,
1709        debugger_rpc: None,
1710        generation_id_recv: None,
1711        rtc_delta_milliseconds: 0,
1712        automatic_guest_reset: !opt.halt_on_reset,
1713        efi_diagnostics_log_level: {
1714            match opt.efi_diagnostics_log_level.unwrap_or_default() {
1715                EfiDiagnosticsLogLevelCli::Default => EfiDiagnosticsLogLevelType::Default,
1716                EfiDiagnosticsLogLevelCli::Info => EfiDiagnosticsLogLevelType::Info,
1717                EfiDiagnosticsLogLevelCli::Full => EfiDiagnosticsLogLevelType::Full,
1718            }
1719        },
1720    };
1721
1722    storage.build_config(&mut cfg, &mut resources, opt.scsi_sub_channels)?;
1723    Ok((cfg, resources))
1724}
1725
1726/// Gets the terminal to use for externally launched console windows.
1727fn openvmm_terminal_app() -> Option<PathBuf> {
1728    std::env::var_os("OPENVMM_TERM")
1729        .or_else(|| std::env::var_os("HVLITE_TERM"))
1730        .map(Into::into)
1731}
1732
1733// Tries to remove `path` if it is confirmed to be a Unix socket.
1734fn cleanup_socket(path: &Path) {
1735    #[cfg(windows)]
1736    let is_socket = pal::windows::fs::is_unix_socket(path).unwrap_or(false);
1737    #[cfg(not(windows))]
1738    let is_socket = path
1739        .metadata()
1740        .is_ok_and(|meta| std::os::unix::fs::FileTypeExt::is_socket(&meta.file_type()));
1741
1742    if is_socket {
1743        let _ = std::fs::remove_file(path);
1744    }
1745}
1746
1747#[cfg(windows)]
1748const DEFAULT_SWITCH: &str = "C08CB7B8-9B3C-408E-8E30-5E16A3AEB444";
1749
1750#[cfg(windows)]
1751fn new_switch_port(
1752    switch_id: &str,
1753) -> anyhow::Result<(
1754    openvmm_defs::config::SwitchPortId,
1755    vmswitch::kernel::SwitchPort,
1756)> {
1757    let id = vmswitch::kernel::SwitchPortId {
1758        switch: switch_id.parse().context("invalid switch id")?,
1759        port: Guid::new_random(),
1760    };
1761    let _ = vmswitch::hcn::Network::open(&id.switch)
1762        .with_context(|| format!("could not find switch {}", id.switch))?;
1763
1764    let port = vmswitch::kernel::SwitchPort::new(&id).context("failed to create switch port")?;
1765
1766    let id = openvmm_defs::config::SwitchPortId {
1767        switch: id.switch,
1768        port: id.port,
1769    };
1770    Ok((id, port))
1771}
1772
1773fn parse_endpoint(
1774    cli_cfg: &NicConfigCli,
1775    index: &mut usize,
1776    resources: &mut VmResources,
1777) -> anyhow::Result<NicConfig> {
1778    let _ = resources;
1779    let endpoint = match &cli_cfg.endpoint {
1780        EndpointConfigCli::Consomme { cidr } => {
1781            net_backend_resources::consomme::ConsommeHandle { cidr: cidr.clone() }.into_resource()
1782        }
1783        EndpointConfigCli::None => net_backend_resources::null::NullHandle.into_resource(),
1784        EndpointConfigCli::Dio { id } => {
1785            #[cfg(windows)]
1786            {
1787                let (port_id, port) = new_switch_port(id.as_deref().unwrap_or(DEFAULT_SWITCH))?;
1788                resources.switch_ports.push(port);
1789                net_backend_resources::dio::WindowsDirectIoHandle {
1790                    switch_port_id: net_backend_resources::dio::SwitchPortId {
1791                        switch: port_id.switch,
1792                        port: port_id.port,
1793                    },
1794                }
1795                .into_resource()
1796            }
1797
1798            #[cfg(not(windows))]
1799            {
1800                let _ = id;
1801                bail!("cannot use dio on non-windows platforms")
1802            }
1803        }
1804        EndpointConfigCli::Tap { name } => {
1805            net_backend_resources::tap::TapHandle { name: name.clone() }.into_resource()
1806        }
1807    };
1808
1809    // Pick a random MAC address.
1810    let mut mac_address = [0x00, 0x15, 0x5D, 0, 0, 0];
1811    getrandom::fill(&mut mac_address[3..]).expect("rng failure");
1812
1813    // Pick a fixed instance ID based on the index.
1814    const BASE_INSTANCE_ID: Guid = guid::guid!("00000000-da43-11ed-936a-00155d6db52f");
1815    let instance_id = Guid {
1816        data1: *index as u32,
1817        ..BASE_INSTANCE_ID
1818    };
1819    *index += 1;
1820
1821    Ok(NicConfig {
1822        vtl: cli_cfg.vtl,
1823        instance_id,
1824        endpoint,
1825        mac_address: mac_address.into(),
1826        max_queues: cli_cfg.max_queues,
1827        pcie_port: cli_cfg.pcie_port.clone(),
1828    })
1829}
1830
1831#[derive(Debug)]
1832struct NicConfig {
1833    vtl: DeviceVtl,
1834    instance_id: Guid,
1835    mac_address: MacAddress,
1836    endpoint: Resource<NetEndpointHandleKind>,
1837    max_queues: Option<u16>,
1838    pcie_port: Option<String>,
1839}
1840
1841impl NicConfig {
1842    fn into_netvsp_handle(self) -> (DeviceVtl, Resource<VmbusDeviceHandleKind>) {
1843        (
1844            self.vtl,
1845            netvsp_resources::NetvspHandle {
1846                instance_id: self.instance_id,
1847                mac_address: self.mac_address,
1848                endpoint: self.endpoint,
1849                max_queues: self.max_queues,
1850            }
1851            .into_resource(),
1852        )
1853    }
1854}
1855
1856enum LayerOrDisk {
1857    Layer(DiskLayerDescription),
1858    Disk(Resource<DiskHandleKind>),
1859}
1860
1861fn disk_open(disk_cli: &DiskCliKind, read_only: bool) -> anyhow::Result<Resource<DiskHandleKind>> {
1862    let mut layers = Vec::new();
1863    disk_open_inner(disk_cli, read_only, &mut layers)?;
1864    if layers.len() == 1 && matches!(layers[0], LayerOrDisk::Disk(_)) {
1865        let LayerOrDisk::Disk(disk) = layers.pop().unwrap() else {
1866            unreachable!()
1867        };
1868        Ok(disk)
1869    } else {
1870        Ok(Resource::new(disk_backend_resources::LayeredDiskHandle {
1871            layers: layers
1872                .into_iter()
1873                .map(|layer| match layer {
1874                    LayerOrDisk::Layer(layer) => layer,
1875                    LayerOrDisk::Disk(disk) => DiskLayerDescription {
1876                        layer: DiskLayerHandle(disk).into_resource(),
1877                        read_cache: false,
1878                        write_through: false,
1879                    },
1880                })
1881                .collect(),
1882        }))
1883    }
1884}
1885
1886fn disk_open_inner(
1887    disk_cli: &DiskCliKind,
1888    read_only: bool,
1889    layers: &mut Vec<LayerOrDisk>,
1890) -> anyhow::Result<()> {
1891    fn layer<T: IntoResource<DiskLayerHandleKind>>(layer: T) -> LayerOrDisk {
1892        LayerOrDisk::Layer(layer.into_resource().into())
1893    }
1894    fn disk<T: IntoResource<DiskHandleKind>>(disk: T) -> LayerOrDisk {
1895        LayerOrDisk::Disk(disk.into_resource())
1896    }
1897    match disk_cli {
1898        &DiskCliKind::Memory(len) => {
1899            layers.push(layer(RamDiskLayerHandle {
1900                len: Some(len),
1901                sector_size: None,
1902            }));
1903        }
1904        DiskCliKind::File {
1905            path,
1906            create_with_len,
1907        } => layers.push(LayerOrDisk::Disk(if let Some(size) = create_with_len {
1908            create_disk_type(path, *size)
1909                .with_context(|| format!("failed to create {}", path.display()))?
1910        } else {
1911            open_disk_type(path, read_only)
1912                .with_context(|| format!("failed to open {}", path.display()))?
1913        })),
1914        DiskCliKind::Blob { kind, url } => {
1915            layers.push(disk(disk_backend_resources::BlobDiskHandle {
1916                url: url.to_owned(),
1917                format: match kind {
1918                    cli_args::BlobKind::Flat => disk_backend_resources::BlobDiskFormat::Flat,
1919                    cli_args::BlobKind::Vhd1 => disk_backend_resources::BlobDiskFormat::FixedVhd1,
1920                },
1921            }))
1922        }
1923        DiskCliKind::MemoryDiff(inner) => {
1924            layers.push(layer(RamDiskLayerHandle {
1925                len: None,
1926                sector_size: None,
1927            }));
1928            disk_open_inner(inner, true, layers)?;
1929        }
1930        DiskCliKind::PersistentReservationsWrapper(inner) => layers.push(disk(
1931            disk_backend_resources::DiskWithReservationsHandle(disk_open(inner, read_only)?),
1932        )),
1933        DiskCliKind::DelayDiskWrapper {
1934            delay_ms,
1935            disk: inner,
1936        } => layers.push(disk(DelayDiskHandle {
1937            delay: CellUpdater::new(Duration::from_millis(*delay_ms)).cell(),
1938            disk: disk_open(inner, read_only)?,
1939        })),
1940        DiskCliKind::Crypt {
1941            disk: inner,
1942            cipher,
1943            key_file,
1944        } => layers.push(disk(disk_crypt_resources::DiskCryptHandle {
1945            disk: disk_open(inner, read_only)?,
1946            cipher: match cipher {
1947                cli_args::DiskCipher::XtsAes256 => disk_crypt_resources::Cipher::XtsAes256,
1948            },
1949            key: fs_err::read(key_file).context("failed to read key file")?,
1950        })),
1951        DiskCliKind::Sqlite {
1952            path,
1953            create_with_len,
1954        } => {
1955            // FUTURE: this code should be responsible for opening
1956            // file-handle(s) itself, and passing them into sqlite via a custom
1957            // vfs. For now though - simply check if the file exists or not, and
1958            // perform early validation of filesystem-level create options.
1959            match (create_with_len.is_some(), path.exists()) {
1960                (true, true) => anyhow::bail!(
1961                    "cannot create new sqlite disk at {} - file already exists",
1962                    path.display()
1963                ),
1964                (false, false) => anyhow::bail!(
1965                    "cannot open sqlite disk at {} - file not found",
1966                    path.display()
1967                ),
1968                _ => {}
1969            }
1970
1971            layers.push(layer(SqliteDiskLayerHandle {
1972                dbhd_path: path.display().to_string(),
1973                format_dbhd: create_with_len.map(|len| {
1974                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
1975                        logically_read_only: false,
1976                        len: Some(len),
1977                    }
1978                }),
1979            }));
1980        }
1981        DiskCliKind::SqliteDiff { path, create, disk } => {
1982            // FUTURE: this code should be responsible for opening
1983            // file-handle(s) itself, and passing them into sqlite via a custom
1984            // vfs. For now though - simply check if the file exists or not, and
1985            // perform early validation of filesystem-level create options.
1986            match (create, path.exists()) {
1987                (true, true) => anyhow::bail!(
1988                    "cannot create new sqlite disk at {} - file already exists",
1989                    path.display()
1990                ),
1991                (false, false) => anyhow::bail!(
1992                    "cannot open sqlite disk at {} - file not found",
1993                    path.display()
1994                ),
1995                _ => {}
1996            }
1997
1998            layers.push(layer(SqliteDiskLayerHandle {
1999                dbhd_path: path.display().to_string(),
2000                format_dbhd: create.then_some(
2001                    disk_backend_resources::layer::SqliteDiskLayerFormatParams {
2002                        logically_read_only: false,
2003                        len: None,
2004                    },
2005                ),
2006            }));
2007            disk_open_inner(disk, true, layers)?;
2008        }
2009        DiskCliKind::AutoCacheSqlite {
2010            cache_path,
2011            key,
2012            disk,
2013        } => {
2014            layers.push(LayerOrDisk::Layer(DiskLayerDescription {
2015                read_cache: true,
2016                write_through: false,
2017                layer: SqliteAutoCacheDiskLayerHandle {
2018                    cache_path: cache_path.clone(),
2019                    cache_key: key.clone(),
2020                }
2021                .into_resource(),
2022            }));
2023            disk_open_inner(disk, read_only, layers)?;
2024        }
2025    }
2026    Ok(())
2027}
2028
2029/// Get the system page size.
2030fn system_page_size() -> u32 {
2031    sparse_mmap::SparseMapping::page_size() as u32
2032}
2033
2034/// The guest architecture string, derived from the compile-time `guest_arch` cfg.
2035const GUEST_ARCH: &str = if cfg!(guest_arch = "x86_64") {
2036    "x86_64"
2037} else {
2038    "aarch64"
2039};
2040
2041/// Open a snapshot directory and validate it against the current VM config.
2042/// Returns the shared memory fd (from memory.bin) and the saved device state.
2043fn prepare_snapshot_restore(
2044    snapshot_dir: &Path,
2045    opt: &Options,
2046) -> anyhow::Result<(
2047    openvmm_defs::worker::SharedMemoryFd,
2048    mesh::payload::message::ProtobufMessage,
2049)> {
2050    let (manifest, state_bytes) = openvmm_helpers::snapshot::read_snapshot(snapshot_dir)?;
2051
2052    // Validate manifest against current VM config.
2053    openvmm_helpers::snapshot::validate_manifest(
2054        &manifest,
2055        GUEST_ARCH,
2056        opt.memory,
2057        opt.processors,
2058        system_page_size(),
2059    )?;
2060
2061    // Open memory.bin (existing file, no create, no resize).
2062    let memory_file = fs_err::OpenOptions::new()
2063        .read(true)
2064        .write(true)
2065        .open(snapshot_dir.join("memory.bin"))?;
2066
2067    // Validate file size matches expected memory size.
2068    let file_size = memory_file.metadata()?.len();
2069    if file_size != manifest.memory_size_bytes {
2070        anyhow::bail!(
2071            "memory.bin size ({file_size} bytes) doesn't match manifest ({} bytes)",
2072            manifest.memory_size_bytes,
2073        );
2074    }
2075
2076    let shared_memory_fd =
2077        openvmm_helpers::shared_memory::file_to_shared_memory_fd(memory_file.into())?;
2078
2079    // Reconstruct ProtobufMessage from the saved state bytes.
2080    // The save side wrote mesh::payload::encode(ProtobufMessage), so we decode
2081    // back to ProtobufMessage.
2082    let state_msg: mesh::payload::message::ProtobufMessage = mesh::payload::decode(&state_bytes)
2083        .context("failed to decode saved state from snapshot")?;
2084
2085    Ok((shared_memory_fd, state_msg))
2086}
2087
2088/// Save a VM snapshot to the given directory.
2089///
2090/// Pauses the VM, saves device state, fsyncs the memory backing file,
2091/// and writes the snapshot directory. The VM remains paused after this
2092/// call — resuming would corrupt the snapshot.
2093async fn save_snapshot(
2094    vm_rpc: &mesh::Sender<VmRpc>,
2095    opt: &Options,
2096    dir: &Path,
2097) -> anyhow::Result<()> {
2098    let memory_file_path = opt
2099        .memory_backing_file
2100        .as_ref()
2101        .context("save-snapshot requires --memory-backing-file")?;
2102
2103    // Pause the VM.
2104    vm_rpc
2105        .call(VmRpc::Pause, ())
2106        .await
2107        .context("failed to pause VM")?;
2108
2109    // Get device state via existing VmRpc::Save.
2110    let saved_state_msg = vm_rpc
2111        .call_failable(VmRpc::Save, ())
2112        .await
2113        .context("failed to save state")?;
2114
2115    // Serialize the ProtobufMessage to bytes for writing to disk.
2116    let saved_state_bytes = mesh::payload::encode(saved_state_msg);
2117
2118    // Fsync the memory backing file.
2119    let memory_file = fs_err::File::open(memory_file_path)?;
2120    memory_file
2121        .sync_all()
2122        .context("failed to fsync memory backing file")?;
2123
2124    // Build manifest.
2125    let manifest = openvmm_helpers::snapshot::SnapshotManifest {
2126        version: openvmm_helpers::snapshot::MANIFEST_VERSION,
2127        created_at: std::time::SystemTime::now().into(),
2128        openvmm_version: env!("CARGO_PKG_VERSION").to_string(),
2129        memory_size_bytes: opt.memory,
2130        vp_count: opt.processors,
2131        page_size: system_page_size(),
2132        architecture: GUEST_ARCH.to_string(),
2133    };
2134
2135    // Write snapshot directory.
2136    openvmm_helpers::snapshot::write_snapshot(
2137        dir,
2138        &manifest,
2139        &saved_state_bytes,
2140        memory_file_path,
2141    )?;
2142
2143    // VM stays paused. Do NOT resume.
2144    Ok(())
2145}
2146
2147fn do_main() -> anyhow::Result<()> {
2148    #[cfg(windows)]
2149    pal::windows::disable_hard_error_dialog();
2150
2151    tracing_init::enable_tracing()?;
2152
2153    // Try to run as a worker host.
2154    // On success the worker runs to completion and then exits the process (does
2155    // not return). Any worker host setup errors are return and bubbled up.
2156    meshworker::run_vmm_mesh_host()?;
2157
2158    let opt = Options::parse();
2159    if let Some(path) = &opt.write_saved_state_proto {
2160        mesh::payload::protofile::DescriptorWriter::new(vmcore::save_restore::saved_state_roots())
2161            .write_to_path(path)
2162            .context("failed to write protobuf descriptors")?;
2163        return Ok(());
2164    }
2165
2166    if let Some(path) = opt.relay_console_path {
2167        let console_title = opt.relay_console_title.unwrap_or_default();
2168        return console_relay::relay_console(&path, console_title.as_str());
2169    }
2170
2171    #[cfg(any(feature = "grpc", feature = "ttrpc"))]
2172    if let Some(path) = opt.ttrpc.as_ref().or(opt.grpc.as_ref()) {
2173        return block_on(async {
2174            let _ = std::fs::remove_file(path);
2175            let listener =
2176                unix_socket::UnixListener::bind(path).context("failed to bind to socket")?;
2177
2178            let transport = if opt.ttrpc.is_some() {
2179                ttrpc::RpcTransport::Ttrpc
2180            } else {
2181                ttrpc::RpcTransport::Grpc
2182            };
2183
2184            // This is a local launch
2185            let mut handle =
2186                mesh_worker::launch_local_worker::<ttrpc::TtrpcWorker>(ttrpc::Parameters {
2187                    listener,
2188                    transport,
2189                })
2190                .await?;
2191
2192            tracing::info!(%transport, path = %path.display(), "listening");
2193
2194            // Signal the the parent process that the server is ready.
2195            pal::close_stdout().context("failed to close stdout")?;
2196
2197            handle.join().await?;
2198
2199            Ok(())
2200        });
2201    }
2202
2203    DefaultPool::run_with(async |driver| {
2204        let mesh = VmmMesh::new(&driver, opt.single_process)?;
2205        let result = run_control(&driver, &mesh, opt).await;
2206        mesh.shutdown().await;
2207        result
2208    })
2209}
2210
2211fn maybe_with_radix_u64(s: &str) -> Result<u64, String> {
2212    let (radix, prefix_len) = if s.starts_with("0x") || s.starts_with("0X") {
2213        (16, 2)
2214    } else if s.starts_with("0o") || s.starts_with("0O") {
2215        (8, 2)
2216    } else if s.starts_with("0b") || s.starts_with("0B") {
2217        (2, 2)
2218    } else {
2219        (10, 0)
2220    };
2221
2222    u64::from_str_radix(&s[prefix_len..], radix).map_err(|e| format!("{e}"))
2223}
2224
2225#[derive(Parser)]
2226#[clap(
2227    name = "openvmm",
2228    disable_help_flag = true,
2229    disable_version_flag = true,
2230    no_binary_name = true,
2231    help_template("{subcommands}")
2232)]
2233enum InteractiveCommand {
2234    /// Restart the VM worker (experimental).
2235    ///
2236    /// This restarts the VM worker while preserving state.
2237    #[clap(visible_alias = "R")]
2238    Restart,
2239
2240    /// Inject an NMI.
2241    #[clap(visible_alias = "n")]
2242    Nmi,
2243
2244    /// Pause the VM.
2245    #[clap(visible_alias = "p")]
2246    Pause,
2247
2248    /// Resume the VM.
2249    #[clap(visible_alias = "r")]
2250    Resume,
2251
2252    /// Save a snapshot to a directory (requires --memory-backing-file).
2253    #[clap(visible_alias = "snap")]
2254    SaveSnapshot {
2255        /// Directory to write the snapshot to.
2256        dir: PathBuf,
2257    },
2258
2259    /// Do a pulsed save restore (pause, save, reset, restore, resume) to the VM.
2260    #[clap(visible_alias = "psr")]
2261    PulseSaveRestore,
2262
2263    /// Schedule a pulsed save restore (pause, save, reset, restore, resume) to the VM.
2264    #[clap(visible_alias = "spsr")]
2265    SchedulePulseSaveRestore {
2266        /// The interval between pulse save restore operations in seconds.
2267        /// None or 0 means any previous scheduled pulse save restores will be cleared.
2268        interval: Option<u64>,
2269    },
2270
2271    /// Hot add a disk to the VTL0 guest.
2272    #[clap(visible_alias = "d")]
2273    AddDisk {
2274        #[clap(long = "ro")]
2275        read_only: bool,
2276        #[clap(long = "dvd")]
2277        is_dvd: bool,
2278        #[clap(long, default_value_t)]
2279        target: u8,
2280        #[clap(long, default_value_t)]
2281        path: u8,
2282        #[clap(long, default_value_t)]
2283        lun: u8,
2284        #[clap(long)]
2285        ram: Option<u64>,
2286        file_path: Option<PathBuf>,
2287    },
2288
2289    /// Hot remove a disk from the VTL0 guest.
2290    #[clap(visible_alias = "D")]
2291    RmDisk {
2292        #[clap(long)]
2293        target: u8,
2294        #[clap(long)]
2295        path: u8,
2296        #[clap(long)]
2297        lun: u8,
2298    },
2299
2300    /// Manage VTL2 settings (storage controllers, NICs exposed to VTL0).
2301    #[clap(subcommand)]
2302    Vtl2Settings(Vtl2SettingsCommand),
2303
2304    /// Hot add an NVMe namespace to VTL2, and optionally to VTL0.
2305    AddNvmeNs {
2306        #[clap(long = "ro")]
2307        read_only: bool,
2308        /// The namespace ID.
2309        #[clap(long)]
2310        nsid: u32,
2311        /// Create a RAM-backed namespace of the specified size in bytes.
2312        #[clap(long)]
2313        ram: Option<u64>,
2314        /// Path to a file to use as the backing store.
2315        file_path: Option<PathBuf>,
2316        /// Also expose this namespace to VTL0 via VTL2 settings as a SCSI disk
2317        /// with the specified LUN number.
2318        #[clap(long)]
2319        vtl0_lun: Option<u32>,
2320    },
2321
2322    /// Hot remove an NVMe namespace from VTL2.
2323    RmNvmeNs {
2324        /// The namespace ID to remove.
2325        #[clap(long)]
2326        nsid: u32,
2327        /// Also remove the VTL0 SCSI disk backed by this namespace.
2328        #[clap(long)]
2329        vtl0: bool,
2330    },
2331
2332    /// Inspect program state.
2333    #[clap(visible_alias = "x")]
2334    Inspect {
2335        /// Enumerate state recursively.
2336        #[clap(short, long)]
2337        recursive: bool,
2338        /// The recursive depth limit.
2339        #[clap(short, long, requires("recursive"))]
2340        limit: Option<usize>,
2341        /// Target the paravisor.
2342        #[clap(short = 'v', long)]
2343        paravisor: bool,
2344        /// The element path to inspect.
2345        element: Option<String>,
2346        /// Update the path with a new value.
2347        #[clap(short, long, conflicts_with("recursive"))]
2348        update: Option<String>,
2349    },
2350
2351    /// Restart the VNC worker.
2352    #[clap(visible_alias = "V")]
2353    RestartVnc,
2354
2355    /// Start an hvsocket terminal window.
2356    #[clap(visible_alias = "v")]
2357    Hvsock {
2358        /// the terminal emulator to run (defaults to conhost.exe or xterm)
2359        #[clap(short, long)]
2360        term: Option<PathBuf>,
2361        /// the vsock port to connect to
2362        port: u32,
2363    },
2364
2365    /// Quit the program.
2366    #[clap(visible_alias = "q")]
2367    Quit,
2368
2369    /// Write input to the VM console.
2370    ///
2371    /// This will write each input parameter to the console's associated serial
2372    /// port, separated by spaces.
2373    #[clap(visible_alias = "i")]
2374    Input { data: Vec<String> },
2375
2376    /// Switch to input mode.
2377    ///
2378    /// Once in input mode, Ctrl-Q returns to command mode.
2379    #[clap(visible_alias = "I")]
2380    InputMode,
2381
2382    /// Reset the VM.
2383    Reset,
2384
2385    /// Send a request to the VM to shut it down.
2386    Shutdown {
2387        /// Reboot the VM instead of powering it off.
2388        #[clap(long, short = 'r')]
2389        reboot: bool,
2390        /// Hibernate the VM instead of powering it off.
2391        #[clap(long, short = 'h', conflicts_with = "reboot")]
2392        hibernate: bool,
2393        /// Tell the guest to force the power state transition.
2394        #[clap(long, short = 'f')]
2395        force: bool,
2396    },
2397
2398    /// Clears the current halt condition, resuming the VPs if the VM is
2399    /// running.
2400    #[clap(visible_alias = "ch")]
2401    ClearHalt,
2402
2403    /// Update the image in VTL2.
2404    ServiceVtl2 {
2405        /// Just restart the user-mode paravisor process, not the full
2406        /// firmware.
2407        #[clap(long, short = 'u')]
2408        user_mode_only: bool,
2409        /// The path to the new IGVM file. If missing, use the originally
2410        /// configured path.
2411        #[clap(long, conflicts_with("user_mode_only"))]
2412        igvm: Option<PathBuf>,
2413        /// Enable keepalive when servicing VTL2 devices.
2414        /// Default is `true`.
2415        #[clap(long, short = 'n', default_missing_value = "true")]
2416        nvme_keepalive: bool,
2417        /// Enable keepalive when servicing VTL2 devices.
2418        /// Default is `false`.
2419        #[clap(long)]
2420        mana_keepalive: bool,
2421    },
2422
2423    /// Read guest memory
2424    ReadMemory {
2425        /// Guest physical address to start at.
2426        #[clap(value_parser=maybe_with_radix_u64)]
2427        gpa: u64,
2428        /// How many bytes to dump.
2429        #[clap(value_parser=maybe_with_radix_u64)]
2430        size: u64,
2431        /// File to save the data to. If omitted,
2432        /// the data will be presented as a hex dump.
2433        #[clap(long, short = 'f')]
2434        file: Option<PathBuf>,
2435    },
2436
2437    /// Write guest memory
2438    WriteMemory {
2439        /// Guest physical address to start at
2440        #[clap(value_parser=maybe_with_radix_u64)]
2441        gpa: u64,
2442        /// Hex string encoding data, with no `0x` radix.
2443        /// If omitted, the source file must be specified.
2444        hex: Option<String>,
2445        /// File to write the data from.
2446        #[clap(long, short = 'f')]
2447        file: Option<PathBuf>,
2448    },
2449
2450    /// Inject an artificial panic into OpenVMM
2451    Panic,
2452
2453    /// Use KVP to interact with the guest.
2454    Kvp(kvp::KvpCommand),
2455}
2456
2457/// Subcommands for managing VTL2 settings.
2458#[derive(clap::Subcommand)]
2459enum Vtl2SettingsCommand {
2460    /// Show the current VTL2 settings.
2461    Show,
2462
2463    /// Add a SCSI disk to VTL0 backed by a VTL2 storage device.
2464    ///
2465    /// The backing device can be either a VTL2 NVMe namespace or a VTL2 SCSI disk.
2466    AddScsiDisk {
2467        /// The VTL0 SCSI controller instance ID (GUID). Defaults to the standard
2468        /// OpenVMM VTL0 SCSI instance.
2469        #[clap(long)]
2470        controller: Option<String>,
2471        /// The SCSI LUN to expose to VTL0.
2472        #[clap(long)]
2473        lun: u32,
2474        /// The backing VTL2 NVMe namespace ID.
2475        #[clap(
2476            long,
2477            conflicts_with = "backing_scsi_lun",
2478            required_unless_present = "backing_scsi_lun"
2479        )]
2480        backing_nvme_nsid: Option<u32>,
2481        /// The backing VTL2 SCSI LUN.
2482        #[clap(
2483            long,
2484            conflicts_with = "backing_nvme_nsid",
2485            required_unless_present = "backing_nvme_nsid"
2486        )]
2487        backing_scsi_lun: Option<u32>,
2488    },
2489
2490    /// Remove a SCSI disk from VTL0.
2491    RmScsiDisk {
2492        /// The SCSI controller instance ID (GUID). Defaults to the standard
2493        /// OpenVMM VTL0 SCSI instance.
2494        #[clap(long)]
2495        controller: Option<String>,
2496        /// The SCSI LUN to remove.
2497        #[clap(long)]
2498        lun: u32,
2499    },
2500}
2501
2502struct CommandParser {
2503    app: clap::Command,
2504}
2505
2506impl CommandParser {
2507    fn new() -> Self {
2508        // Update the help template for each subcommand.
2509        let mut app = InteractiveCommand::command();
2510        for sc in app.get_subcommands_mut() {
2511            *sc = sc
2512                .clone()
2513                .help_template("{about-with-newline}\n{usage-heading}\n    {usage}\n\n{all-args}");
2514        }
2515        Self { app }
2516    }
2517
2518    fn parse(&mut self, line: &str) -> clap::error::Result<InteractiveCommand> {
2519        let args = shell_words::split(line)
2520            .map_err(|err| self.app.error(clap::error::ErrorKind::ValueValidation, err))?;
2521        let matches = self.app.try_get_matches_from_mut(args)?;
2522        InteractiveCommand::from_arg_matches(&matches).map_err(|err| err.format(&mut self.app))
2523    }
2524}
2525
2526fn new_hvsock_service_id(port: u32) -> Guid {
2527    // This GUID is an embedding of the AF_VSOCK port into an
2528    // AF_HYPERV service ID.
2529    Guid {
2530        data1: port,
2531        .."00000000-facb-11e6-bd58-64006a7986d3".parse().unwrap()
2532    }
2533}
2534
2535async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> anyhow::Result<()> {
2536    let (mut vm_config, mut resources) = vm_config_from_command_line(driver, mesh, &opt).await?;
2537
2538    let mut vnc_worker = None;
2539    if opt.gfx || opt.vnc {
2540        let listener = TcpListener::bind(format!("127.0.0.1:{}", opt.vnc_port))
2541            .with_context(|| format!("binding to VNC port {}", opt.vnc_port))?;
2542
2543        let input_send = vm_config.input.sender();
2544        let framebuffer = resources
2545            .framebuffer_access
2546            .take()
2547            .expect("synth video enabled");
2548
2549        let vnc_host = mesh
2550            .make_host("vnc", None)
2551            .await
2552            .context("spawning vnc process failed")?;
2553
2554        vnc_worker = Some(
2555            vnc_host
2556                .launch_worker(
2557                    vnc_worker_defs::VNC_WORKER_TCP,
2558                    VncParameters {
2559                        listener,
2560                        framebuffer,
2561                        input_send,
2562                    },
2563                )
2564                .await?,
2565        )
2566    }
2567
2568    // spin up the debug worker
2569    let gdb_worker = if let Some(port) = opt.gdb {
2570        let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
2571            .with_context(|| format!("binding to gdb port {}", port))?;
2572
2573        let (req_tx, req_rx) = mesh::channel();
2574        vm_config.debugger_rpc = Some(req_rx);
2575
2576        let gdb_host = mesh
2577            .make_host("gdb", None)
2578            .await
2579            .context("spawning gdbstub process failed")?;
2580
2581        Some(
2582            gdb_host
2583                .launch_worker(
2584                    debug_worker_defs::DEBUGGER_WORKER,
2585                    debug_worker_defs::DebuggerParameters {
2586                        listener,
2587                        req_chan: req_tx,
2588                        vp_count: vm_config.processor_topology.proc_count,
2589                        target_arch: if cfg!(guest_arch = "x86_64") {
2590                            debug_worker_defs::TargetArch::X86_64
2591                        } else {
2592                            debug_worker_defs::TargetArch::Aarch64
2593                        },
2594                    },
2595                )
2596                .await
2597                .context("failed to launch gdbstub worker")?,
2598        )
2599    } else {
2600        None
2601    };
2602
2603    // spin up the VM
2604    let (vm_rpc, rpc_recv) = mesh::channel();
2605    let (notify_send, notify_recv) = mesh::channel();
2606    let mut vm_worker = {
2607        let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
2608
2609        let (shared_memory, saved_state) = if let Some(snapshot_dir) = &opt.restore_snapshot {
2610            let (fd, state_msg) = prepare_snapshot_restore(snapshot_dir, &opt)?;
2611            (Some(fd), Some(state_msg))
2612        } else {
2613            let shared_memory = opt
2614                .memory_backing_file
2615                .as_ref()
2616                .map(|path| {
2617                    openvmm_helpers::shared_memory::open_memory_backing_file(path, opt.memory)
2618                })
2619                .transpose()?;
2620            (shared_memory, None)
2621        };
2622
2623        let params = VmWorkerParameters {
2624            hypervisor: opt.hypervisor,
2625            cfg: vm_config,
2626            saved_state,
2627            shared_memory,
2628            rpc: rpc_recv,
2629            notify: notify_send,
2630        };
2631        vm_host
2632            .launch_worker(VM_WORKER, params)
2633            .await
2634            .context("failed to launch vm worker")?
2635    };
2636
2637    if opt.restore_snapshot.is_some() {
2638        tracing::info!("restoring VM from snapshot");
2639    }
2640
2641    if !opt.paused {
2642        vm_rpc.call(VmRpc::Resume, ()).await?;
2643    }
2644
2645    let paravisor_diag = Arc::new(diag_client::DiagClient::from_dialer(
2646        driver.clone(),
2647        DiagDialer {
2648            driver: driver.clone(),
2649            vm_rpc: vm_rpc.clone(),
2650            openhcl_vtl: if opt.vtl2 {
2651                DeviceVtl::Vtl2
2652            } else {
2653                DeviceVtl::Vtl0
2654            },
2655        },
2656    ));
2657
2658    let mut diag_inspector = DiagInspector::new(driver.clone(), paravisor_diag.clone());
2659
2660    let (console_command_send, console_command_recv) = mesh::channel();
2661    let (inspect_completion_engine_send, inspect_completion_engine_recv) = mesh::channel();
2662
2663    let mut console_in = resources.console_in.take();
2664    thread::Builder::new()
2665        .name("stdio-thread".to_string())
2666        .spawn(move || {
2667            // install panic hook to restore cooked terminal (linux)
2668            #[cfg(unix)]
2669            if io::stderr().is_terminal() {
2670                term::revert_terminal_on_panic()
2671            }
2672
2673            let mut rl = rustyline::Editor::<
2674                interactive_console::OpenvmmRustylineEditor,
2675                rustyline::history::FileHistory,
2676            >::with_config(
2677                rustyline::Config::builder()
2678                    .completion_type(rustyline::CompletionType::List)
2679                    .build(),
2680            )
2681            .unwrap();
2682
2683            rl.set_helper(Some(interactive_console::OpenvmmRustylineEditor {
2684                openvmm_inspect_req: Arc::new(inspect_completion_engine_send),
2685            }));
2686
2687            let history_file = {
2688                const HISTORY_FILE: &str = ".openvmm_history";
2689
2690                // using a `None` to kick off the `.or()` chain in order to make
2691                // it a bit easier to visually inspect the fallback chain.
2692                let history_folder = None
2693                    .or_else(dirs::state_dir)
2694                    .or_else(dirs::data_local_dir)
2695                    .map(|path| path.join("openvmm"));
2696
2697                if let Some(history_folder) = history_folder {
2698                    if let Err(err) = std::fs::create_dir_all(&history_folder) {
2699                        tracing::warn!(
2700                            error = &err as &dyn std::error::Error,
2701                            "could not create directory: {}",
2702                            history_folder.display()
2703                        )
2704                    }
2705
2706                    Some(history_folder.join(HISTORY_FILE))
2707                } else {
2708                    None
2709                }
2710            };
2711
2712            if let Some(history_file) = &history_file {
2713                tracing::info!("restoring history from {}", history_file.display());
2714                if rl.load_history(history_file).is_err() {
2715                    tracing::info!("could not find existing {}", history_file.display());
2716                }
2717            }
2718
2719            // Enable Ctrl-Backspace to delete the current word.
2720            rl.bind_sequence(
2721                rustyline::KeyEvent::new('\x08', rustyline::Modifiers::CTRL),
2722                rustyline::Cmd::Kill(rustyline::Movement::BackwardWord(1, rustyline::Word::Emacs)),
2723            );
2724
2725            let mut parser = CommandParser::new();
2726
2727            let mut stdin = io::stdin();
2728            loop {
2729                // Raw console text until Ctrl-Q.
2730                term::set_raw_console(true).expect("failed to set raw console mode");
2731
2732                if let Some(input) = console_in.as_mut() {
2733                    let mut buf = [0; 32];
2734                    loop {
2735                        let n = stdin.read(&mut buf).unwrap();
2736                        let mut b = &buf[..n];
2737                        let stop = if let Some(ctrlq) = b.iter().position(|x| *x == 0x11) {
2738                            b = &b[..ctrlq];
2739                            true
2740                        } else {
2741                            false
2742                        };
2743                        block_on(input.as_mut().write_all(b)).expect("BUGBUG");
2744                        if stop {
2745                            break;
2746                        }
2747                    }
2748                }
2749
2750                term::set_raw_console(false).expect("failed to set raw console mode");
2751
2752                loop {
2753                    let line = rl.readline("openvmm> ");
2754                    if line.is_err() {
2755                        break;
2756                    }
2757                    let line = line.unwrap();
2758                    let trimmed = line.trim();
2759                    if trimmed.is_empty() {
2760                        continue;
2761                    }
2762                    if let Err(err) = rl.add_history_entry(&line) {
2763                        tracing::warn!(
2764                            err = &err as &dyn std::error::Error,
2765                            "error adding to .openvmm_history"
2766                        )
2767                    }
2768
2769                    match parser.parse(trimmed) {
2770                        Ok(cmd) => match cmd {
2771                            InteractiveCommand::Input { data } => {
2772                                let mut data = data.join(" ");
2773                                data.push('\n');
2774                                if let Some(input) = console_in.as_mut() {
2775                                    block_on(input.write_all(data.as_bytes())).expect("BUGBUG");
2776                                }
2777                            }
2778                            InteractiveCommand::InputMode => break,
2779                            cmd => {
2780                                // Send the command to the main thread for processing.
2781                                let (processing_done_send, processing_done_recv) =
2782                                    mesh::oneshot::<()>();
2783                                console_command_send.send((cmd, processing_done_send));
2784                                let _ = block_on(processing_done_recv);
2785                            }
2786                        },
2787                        Err(err) => {
2788                            err.print().unwrap();
2789                        }
2790                    }
2791
2792                    if let Some(history_file) = &history_file {
2793                        rl.append_history(history_file).unwrap();
2794                    }
2795                }
2796            }
2797        })
2798        .unwrap();
2799
2800    let mut state_change_task = None::<Task<Result<StateChange, RpcError>>>;
2801    let mut pulse_save_restore_interval: Option<Duration> = None;
2802    let mut pending_shutdown = None;
2803    let mut snapshot_saved = false;
2804
2805    enum StateChange {
2806        Pause(bool),
2807        Resume(bool),
2808        Reset(Result<(), RemoteError>),
2809        PulseSaveRestore(Result<(), PulseSaveRestoreError>),
2810        ServiceVtl2(anyhow::Result<Duration>),
2811    }
2812
2813    enum Event {
2814        Command((InteractiveCommand, mesh::OneshotSender<()>)),
2815        InspectRequestFromCompletionEngine(
2816            (InspectTarget, String, mesh::OneshotSender<inspect::Node>),
2817        ),
2818        Quit,
2819        Halt(vmm_core_defs::HaltReason),
2820        PulseSaveRestore,
2821        Worker(WorkerEvent),
2822        VncWorker(WorkerEvent),
2823        StateChange(Result<StateChange, RpcError>),
2824        ShutdownResult(Result<hyperv_ic_resources::shutdown::ShutdownResult, RpcError>),
2825    }
2826
2827    let mut console_command_recv = console_command_recv
2828        .map(Event::Command)
2829        .chain(futures::stream::repeat_with(|| Event::Quit));
2830
2831    let mut notify_recv = notify_recv.map(Event::Halt);
2832
2833    let mut inspect_completion_engine_recv =
2834        inspect_completion_engine_recv.map(Event::InspectRequestFromCompletionEngine);
2835
2836    let mut quit = false;
2837    loop {
2838        let event = {
2839            let pulse_save_restore = pin!(async {
2840                match pulse_save_restore_interval {
2841                    Some(wait) => {
2842                        PolledTimer::new(driver).sleep(wait).await;
2843                        Event::PulseSaveRestore
2844                    }
2845                    None => pending().await,
2846                }
2847            });
2848
2849            let vm = (&mut vm_worker).map(Event::Worker);
2850            let vnc = futures::stream::iter(vnc_worker.as_mut())
2851                .flatten()
2852                .map(Event::VncWorker);
2853            let change = futures::stream::iter(state_change_task.as_mut().map(|x| x.into_stream()))
2854                .flatten()
2855                .map(Event::StateChange);
2856            let shutdown = pin!(async {
2857                if let Some(s) = &mut pending_shutdown {
2858                    Event::ShutdownResult(s.await)
2859                } else {
2860                    pending().await
2861                }
2862            });
2863
2864            (
2865                &mut console_command_recv,
2866                &mut inspect_completion_engine_recv,
2867                &mut notify_recv,
2868                pulse_save_restore.into_stream(),
2869                vm,
2870                vnc,
2871                change,
2872                shutdown.into_stream(),
2873            )
2874                .merge()
2875                .next()
2876                .await
2877                .unwrap()
2878        };
2879
2880        let (cmd, _processing_done_send) = match event {
2881            Event::Command(message) => message,
2882            Event::InspectRequestFromCompletionEngine((vtl, path, res)) => {
2883                let mut inspection =
2884                    InspectionBuilder::new(&path)
2885                        .depth(Some(1))
2886                        .inspect(inspect_obj(
2887                            vtl,
2888                            mesh,
2889                            &vm_worker,
2890                            vnc_worker.as_ref(),
2891                            gdb_worker.as_ref(),
2892                            &mut diag_inspector,
2893                        ));
2894                let _ = CancelContext::new()
2895                    .with_timeout(Duration::from_secs(1))
2896                    .until_cancelled(inspection.resolve())
2897                    .await;
2898
2899                let node = inspection.results();
2900                res.send(node);
2901                continue;
2902            }
2903            Event::Quit => break,
2904            Event::Halt(reason) => {
2905                tracing::info!(?reason, "guest halted");
2906                continue;
2907            }
2908            Event::PulseSaveRestore => {
2909                vm_rpc.call(VmRpc::PulseSaveRestore, ()).await??;
2910                continue;
2911            }
2912            Event::Worker(event) => {
2913                match event {
2914                    WorkerEvent::Stopped => {
2915                        if quit {
2916                            tracing::info!("vm stopped");
2917                        } else {
2918                            tracing::error!("vm worker unexpectedly stopped");
2919                        }
2920                        break;
2921                    }
2922                    WorkerEvent::Failed(err) => {
2923                        tracing::error!(error = &err as &dyn std::error::Error, "vm worker failed");
2924                        break;
2925                    }
2926                    WorkerEvent::RestartFailed(err) => {
2927                        tracing::error!(
2928                            error = &err as &dyn std::error::Error,
2929                            "vm worker restart failed"
2930                        );
2931                    }
2932                    WorkerEvent::Started => {
2933                        tracing::info!("vm worker restarted");
2934                    }
2935                }
2936                continue;
2937            }
2938            Event::VncWorker(event) => {
2939                match event {
2940                    WorkerEvent::Stopped => tracing::error!("vnc unexpectedly stopped"),
2941                    WorkerEvent::Failed(err) => {
2942                        tracing::error!(
2943                            error = &err as &dyn std::error::Error,
2944                            "vnc worker failed"
2945                        );
2946                    }
2947                    WorkerEvent::RestartFailed(err) => {
2948                        tracing::error!(
2949                            error = &err as &dyn std::error::Error,
2950                            "vnc worker restart failed"
2951                        );
2952                    }
2953                    WorkerEvent::Started => {
2954                        tracing::info!("vnc worker restarted");
2955                    }
2956                }
2957                continue;
2958            }
2959            Event::StateChange(r) => {
2960                match r {
2961                    Ok(sc) => match sc {
2962                        StateChange::Pause(success) => {
2963                            if success {
2964                                tracing::info!("pause complete");
2965                            } else {
2966                                tracing::warn!("already paused");
2967                            }
2968                        }
2969                        StateChange::Resume(success) => {
2970                            if success {
2971                                tracing::info!("resumed complete");
2972                            } else {
2973                                tracing::warn!("already running");
2974                            }
2975                        }
2976                        StateChange::Reset(r) => match r {
2977                            Ok(()) => tracing::info!("reset complete"),
2978                            Err(err) => tracing::error!(
2979                                error = &err as &dyn std::error::Error,
2980                                "reset failed"
2981                            ),
2982                        },
2983                        StateChange::PulseSaveRestore(r) => match r {
2984                            Ok(()) => tracing::info!("pulse save/restore complete"),
2985                            Err(err) => tracing::error!(
2986                                error = &err as &dyn std::error::Error,
2987                                "pulse save/restore failed"
2988                            ),
2989                        },
2990                        StateChange::ServiceVtl2(r) => match r {
2991                            Ok(dur) => {
2992                                tracing::info!(
2993                                    duration = dur.as_millis() as i64,
2994                                    "vtl2 servicing complete"
2995                                )
2996                            }
2997                            Err(err) => tracing::error!(
2998                                error = err.as_ref() as &dyn std::error::Error,
2999                                "vtl2 servicing failed"
3000                            ),
3001                        },
3002                    },
3003                    Err(err) => {
3004                        tracing::error!(
3005                            error = &err as &dyn std::error::Error,
3006                            "communication failure during state change"
3007                        );
3008                    }
3009                }
3010                state_change_task = None;
3011                continue;
3012            }
3013            Event::ShutdownResult(r) => {
3014                match r {
3015                    Ok(r) => match r {
3016                        hyperv_ic_resources::shutdown::ShutdownResult::Ok => {
3017                            tracing::info!("shutdown initiated");
3018                        }
3019                        hyperv_ic_resources::shutdown::ShutdownResult::NotReady => {
3020                            tracing::error!("shutdown ic not ready");
3021                        }
3022                        hyperv_ic_resources::shutdown::ShutdownResult::AlreadyInProgress => {
3023                            tracing::error!("shutdown already in progress");
3024                        }
3025                        hyperv_ic_resources::shutdown::ShutdownResult::Failed(hr) => {
3026                            tracing::error!("shutdown failed with error code {hr:#x}");
3027                        }
3028                    },
3029                    Err(err) => {
3030                        tracing::error!(
3031                            error = &err as &dyn std::error::Error,
3032                            "communication failure during shutdown"
3033                        );
3034                    }
3035                }
3036                pending_shutdown = None;
3037                continue;
3038            }
3039        };
3040
3041        fn inspect_obj<'a>(
3042            target: InspectTarget,
3043            mesh: &'a VmmMesh,
3044            vm_worker: &'a WorkerHandle,
3045            vnc_worker: Option<&'a WorkerHandle>,
3046            gdb_worker: Option<&'a WorkerHandle>,
3047            diag_inspector: &'a mut DiagInspector,
3048        ) -> impl 'a + InspectMut {
3049            inspect::adhoc_mut(move |req| match target {
3050                InspectTarget::Host => {
3051                    let mut resp = req.respond();
3052                    resp.field("mesh", mesh)
3053                        .field("vm", vm_worker)
3054                        .field("vnc", vnc_worker)
3055                        .field("gdb", gdb_worker);
3056                }
3057                InspectTarget::Paravisor => {
3058                    diag_inspector.inspect_mut(req);
3059                }
3060            })
3061        }
3062
3063        fn state_change<U: 'static + Send>(
3064            driver: impl Spawn,
3065            vm_rpc: &mesh::Sender<VmRpc>,
3066            state_change_task: &mut Option<Task<Result<StateChange, RpcError>>>,
3067            f: impl FnOnce(Rpc<(), U>) -> VmRpc,
3068            g: impl FnOnce(U) -> StateChange + 'static + Send,
3069        ) {
3070            if state_change_task.is_some() {
3071                tracing::error!("state change already in progress");
3072            } else {
3073                let rpc = vm_rpc.call(f, ());
3074                *state_change_task =
3075                    Some(driver.spawn("state-change", async move { Ok(g(rpc.await?)) }));
3076            }
3077        }
3078
3079        match cmd {
3080            InteractiveCommand::Panic => {
3081                panic!("injected panic")
3082            }
3083            InteractiveCommand::Restart => {
3084                // create a new host process
3085                let vm_host = mesh.make_host("vm", opt.log_file.clone()).await?;
3086
3087                vm_worker.restart(&vm_host);
3088            }
3089            InteractiveCommand::Pause => {
3090                state_change(
3091                    driver,
3092                    &vm_rpc,
3093                    &mut state_change_task,
3094                    VmRpc::Pause,
3095                    StateChange::Pause,
3096                );
3097            }
3098            InteractiveCommand::Resume => {
3099                if snapshot_saved {
3100                    eprintln!(
3101                        "error: cannot resume after snapshot save — resuming would corrupt the snapshot. Use 'shutdown' to exit."
3102                    );
3103                } else {
3104                    state_change(
3105                        driver,
3106                        &vm_rpc,
3107                        &mut state_change_task,
3108                        VmRpc::Resume,
3109                        StateChange::Resume,
3110                    );
3111                }
3112            }
3113            InteractiveCommand::Reset => {
3114                state_change(
3115                    driver,
3116                    &vm_rpc,
3117                    &mut state_change_task,
3118                    VmRpc::Reset,
3119                    StateChange::Reset,
3120                );
3121            }
3122            InteractiveCommand::SaveSnapshot { dir } => {
3123                match save_snapshot(&vm_rpc, &opt, &dir).await {
3124                    Ok(()) => {
3125                        snapshot_saved = true;
3126                        tracing::info!(
3127                            dir = %dir.display(),
3128                            "snapshot saved; VM is paused. \
3129                             Resume is blocked to prevent snapshot corruption. \
3130                             Use 'shutdown' to exit."
3131                        );
3132                    }
3133                    Err(err) => {
3134                        eprintln!("error: save-snapshot failed: {err:#}");
3135                    }
3136                }
3137            }
3138            InteractiveCommand::PulseSaveRestore => {
3139                state_change(
3140                    driver,
3141                    &vm_rpc,
3142                    &mut state_change_task,
3143                    VmRpc::PulseSaveRestore,
3144                    StateChange::PulseSaveRestore,
3145                );
3146            }
3147            InteractiveCommand::SchedulePulseSaveRestore { interval } => {
3148                pulse_save_restore_interval = match interval {
3149                    Some(seconds) if seconds != 0 => Some(Duration::from_secs(seconds)),
3150                    _ => {
3151                        // Treat None and 0 seconds as do not perform scheduled pulse save restores anymore.
3152                        None
3153                    }
3154                }
3155            }
3156            InteractiveCommand::Shutdown {
3157                reboot,
3158                hibernate,
3159                force,
3160            } => {
3161                if pending_shutdown.is_some() {
3162                    println!("shutdown already in progress");
3163                } else if let Some(ic) = &resources.shutdown_ic {
3164                    let params = hyperv_ic_resources::shutdown::ShutdownParams {
3165                        shutdown_type: if hibernate {
3166                            hyperv_ic_resources::shutdown::ShutdownType::Hibernate
3167                        } else if reboot {
3168                            hyperv_ic_resources::shutdown::ShutdownType::Reboot
3169                        } else {
3170                            hyperv_ic_resources::shutdown::ShutdownType::PowerOff
3171                        },
3172                        force,
3173                    };
3174                    pending_shutdown =
3175                        Some(ic.call(hyperv_ic_resources::shutdown::ShutdownRpc::Shutdown, params));
3176                } else {
3177                    println!("no shutdown ic configured");
3178                }
3179            }
3180            InteractiveCommand::Nmi => {
3181                let _ = vm_rpc.call(VmRpc::Nmi, 0).await;
3182            }
3183            InteractiveCommand::ClearHalt => {
3184                vm_rpc.call(VmRpc::ClearHalt, ()).await.ok();
3185            }
3186            InteractiveCommand::AddDisk {
3187                read_only,
3188                target,
3189                path,
3190                lun,
3191                ram,
3192                file_path,
3193                is_dvd,
3194            } => {
3195                let action = async {
3196                    let scsi = resources.scsi_rpc.as_ref().context("no scsi controller")?;
3197                    let disk_type = match ram {
3198                        None => {
3199                            let path = file_path.context("no filename passed")?;
3200                            open_disk_type(path.as_ref(), read_only)
3201                                .with_context(|| format!("failed to open {}", path.display()))?
3202                        }
3203                        Some(size) => {
3204                            Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer(
3205                                RamDiskLayerHandle {
3206                                    len: Some(size),
3207                                    sector_size: None,
3208                                },
3209                            ))
3210                        }
3211                    };
3212
3213                    let device = if is_dvd {
3214                        SimpleScsiDvdHandle {
3215                            media: Some(disk_type),
3216                            requests: None,
3217                        }
3218                        .into_resource()
3219                    } else {
3220                        SimpleScsiDiskHandle {
3221                            disk: disk_type,
3222                            read_only,
3223                            parameters: Default::default(),
3224                        }
3225                        .into_resource()
3226                    };
3227
3228                    let cfg = ScsiDeviceAndPath {
3229                        path: ScsiPath { path, target, lun },
3230                        device,
3231                    };
3232
3233                    scsi.call_failable(ScsiControllerRequest::AddDevice, cfg)
3234                        .await?;
3235
3236                    anyhow::Result::<_>::Ok(())
3237                };
3238
3239                if let Err(error) = action.await {
3240                    tracing::error!(error = error.as_error(), "error adding disk")
3241                }
3242            }
3243            InteractiveCommand::RmDisk { target, path, lun } => {
3244                let action = async {
3245                    let scsi = resources.scsi_rpc.as_ref().context("no scsi controller")?;
3246                    scsi.call_failable(
3247                        ScsiControllerRequest::RemoveDevice,
3248                        ScsiPath { target, path, lun },
3249                    )
3250                    .await?;
3251                    anyhow::Ok(())
3252                };
3253
3254                if let Err(error) = action.await {
3255                    tracing::error!(error = error.as_error(), "error removing disk")
3256                }
3257            }
3258            InteractiveCommand::Vtl2Settings(cmd) => {
3259                if resources.vtl2_settings.is_none() {
3260                    eprintln!("error: no VTL2 settings (not running with VTL2?)");
3261                    continue;
3262                }
3263                let action = async {
3264                    match cmd {
3265                        Vtl2SettingsCommand::Show => {
3266                            let settings = resources.vtl2_settings.as_ref().unwrap();
3267                            println!("{:#?}", settings);
3268                        }
3269                        Vtl2SettingsCommand::AddScsiDisk {
3270                            controller,
3271                            lun,
3272                            backing_nvme_nsid,
3273                            backing_scsi_lun,
3274                        } => {
3275                            // Determine the backing device type and path
3276                            let (device_type, device_path, sub_device_path) = match (
3277                                backing_nvme_nsid,
3278                                backing_scsi_lun,
3279                            ) {
3280                                (Some(nsid), None) => (
3281                                    vtl2_settings_proto::physical_device::DeviceType::Nvme,
3282                                    storage_builder::NVME_VTL2_INSTANCE_ID,
3283                                    nsid,
3284                                ),
3285                                (None, Some(scsi_lun)) => (
3286                                    vtl2_settings_proto::physical_device::DeviceType::Vscsi,
3287                                    storage_builder::SCSI_VTL2_INSTANCE_ID,
3288                                    scsi_lun,
3289                                ),
3290                                (Some(_), Some(_)) => {
3291                                    anyhow::bail!(
3292                                        "can't specify both --backing-nvme-nsid and --backing-scsi-lun"
3293                                    );
3294                                }
3295                                (None, None) => {
3296                                    anyhow::bail!(
3297                                        "must specify either --backing-nvme-nsid or --backing-scsi-lun"
3298                                    );
3299                                }
3300                            };
3301
3302                            // Default to the standard OpenVMM VTL0 SCSI instance
3303                            let controller_guid = controller
3304                                .map(|s| s.parse())
3305                                .transpose()
3306                                .context("invalid controller GUID")?
3307                                .unwrap_or(storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE);
3308
3309                            resources
3310                                .add_vtl0_scsi_disk(
3311                                    controller_guid,
3312                                    lun,
3313                                    device_type,
3314                                    device_path,
3315                                    sub_device_path,
3316                                )
3317                                .await?;
3318
3319                            let backing_desc = if backing_nvme_nsid.is_some() {
3320                                format!("nvme_nsid={}", sub_device_path)
3321                            } else {
3322                                format!("scsi_lun={}", sub_device_path)
3323                            };
3324                            println!(
3325                                "Added VTL0 SCSI disk: controller={}, lun={}, backing={}",
3326                                controller_guid, lun, backing_desc
3327                            );
3328                        }
3329                        Vtl2SettingsCommand::RmScsiDisk { controller, lun } => {
3330                            // Default to the standard OpenVMM VTL0 SCSI instance
3331                            let controller_guid = controller
3332                                .map(|s| s.parse())
3333                                .transpose()
3334                                .context("invalid controller GUID")?
3335                                .unwrap_or(storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE);
3336
3337                            resources
3338                                .remove_vtl0_scsi_disk(controller_guid, lun)
3339                                .await?;
3340
3341                            println!(
3342                                "Removed VTL0 SCSI disk: controller={}, lun={}",
3343                                controller_guid, lun
3344                            );
3345                        }
3346                    }
3347                    anyhow::Ok(())
3348                };
3349
3350                if let Err(error) = action.await {
3351                    eprintln!("error: {}", error);
3352                }
3353            }
3354            InteractiveCommand::AddNvmeNs {
3355                read_only,
3356                nsid,
3357                ram,
3358                file_path,
3359                vtl0_lun,
3360            } => {
3361                if resources.vtl2_settings.is_none() {
3362                    eprintln!("error: add-nvme-ns requires --vtl2 mode");
3363                    continue;
3364                }
3365                let action = async {
3366                    let nvme = resources
3367                        .nvme_vtl2_rpc
3368                        .as_ref()
3369                        .context("no vtl2 nvme controller")?;
3370                    let disk_type = match (ram, file_path) {
3371                        (None, Some(path)) => open_disk_type(path.as_ref(), read_only)
3372                            .with_context(|| format!("failed to open {}", path.display()))?,
3373                        (Some(size), None) => {
3374                            Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer(
3375                                RamDiskLayerHandle {
3376                                    len: Some(size),
3377                                    sector_size: None,
3378                                },
3379                            ))
3380                        }
3381                        (None, None) => {
3382                            anyhow::bail!("must specify either file path or --ram");
3383                        }
3384                        (Some(_), Some(_)) => {
3385                            anyhow::bail!("cannot specify both file path and --ram");
3386                        }
3387                    };
3388
3389                    let ns = NamespaceDefinition {
3390                        nsid,
3391                        read_only,
3392                        disk: disk_type,
3393                    };
3394
3395                    nvme.call_failable(NvmeControllerRequest::AddNamespace, ns)
3396                        .await?;
3397                    println!("Added namespace {}", nsid);
3398
3399                    // If --vtl0-lun was specified, add a SCSI disk to VTL0 backed by the NVMe namespace
3400                    if let Some(lun) = vtl0_lun {
3401                        resources
3402                            .add_vtl0_scsi_disk(
3403                                storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE,
3404                                lun,
3405                                vtl2_settings_proto::physical_device::DeviceType::Nvme,
3406                                storage_builder::NVME_VTL2_INSTANCE_ID,
3407                                nsid,
3408                            )
3409                            .await?;
3410                        println!("Exposed namespace {} to VTL0 as SCSI lun={}", nsid, lun);
3411                    }
3412
3413                    Ok(())
3414                };
3415
3416                if let Err(error) = action.await {
3417                    eprintln!("error adding nvme namespace: {}", error);
3418                }
3419            }
3420            InteractiveCommand::RmNvmeNs { nsid, vtl0 } => {
3421                if resources.vtl2_settings.is_none() {
3422                    eprintln!("error: rm-nvme-ns requires --vtl2 mode");
3423                    continue;
3424                }
3425                let action = async {
3426                    // If --vtl0 was specified, find and remove the SCSI disk backed by this namespace
3427                    if vtl0 {
3428                        let removed_lun = resources
3429                            .remove_vtl0_scsi_disk_by_nvme_nsid(
3430                                storage_builder::UNDERHILL_VTL0_SCSI_INSTANCE,
3431                                storage_builder::NVME_VTL2_INSTANCE_ID,
3432                                nsid,
3433                            )
3434                            .await?;
3435                        if let Some(lun) = removed_lun {
3436                            println!("Removed VTL0 SCSI lun={}", lun);
3437                        } else {
3438                            println!("No VTL0 SCSI disk found backed by NVMe nsid={}", nsid);
3439                        }
3440                    }
3441
3442                    let nvme = resources
3443                        .nvme_vtl2_rpc
3444                        .as_ref()
3445                        .context("no vtl2 nvme controller")?;
3446                    nvme.call_failable(NvmeControllerRequest::RemoveNamespace, nsid)
3447                        .await?;
3448                    println!("Removed NVMe namespace {}", nsid);
3449                    anyhow::Ok(())
3450                };
3451
3452                if let Err(error) = action.await {
3453                    eprintln!("error removing nvme namespace: {}", error);
3454                }
3455            }
3456            InteractiveCommand::Inspect {
3457                recursive,
3458                limit,
3459                paravisor,
3460                element,
3461                update,
3462            } => {
3463                let obj = inspect_obj(
3464                    if paravisor {
3465                        InspectTarget::Paravisor
3466                    } else {
3467                        InspectTarget::Host
3468                    },
3469                    mesh,
3470                    &vm_worker,
3471                    vnc_worker.as_ref(),
3472                    gdb_worker.as_ref(),
3473                    &mut diag_inspector,
3474                );
3475
3476                if let Some(value) = update {
3477                    let Some(element) = element else {
3478                        anyhow::bail!("must provide element for update")
3479                    };
3480
3481                    let value = async {
3482                        let update = inspect::update(&element, &value, obj);
3483                        let value = CancelContext::new()
3484                            .with_timeout(Duration::from_secs(1))
3485                            .until_cancelled(update)
3486                            .await??;
3487                        anyhow::Ok(value)
3488                    }
3489                    .await;
3490                    match value {
3491                        Ok(node) => match &node.kind {
3492                            inspect::ValueKind::String(s) => println!("{s}"),
3493                            _ => println!("{:#}", node),
3494                        },
3495                        Err(err) => println!("error: {:#}", err),
3496                    }
3497                } else {
3498                    let element = element.unwrap_or_default();
3499                    let depth = if recursive { limit } else { Some(0) };
3500                    let node = async {
3501                        let mut inspection =
3502                            InspectionBuilder::new(&element).depth(depth).inspect(obj);
3503                        let _ = CancelContext::new()
3504                            .with_timeout(Duration::from_secs(1))
3505                            .until_cancelled(inspection.resolve())
3506                            .await;
3507                        inspection.results()
3508                    }
3509                    .await;
3510
3511                    println!("{:#}", node);
3512                }
3513            }
3514            InteractiveCommand::RestartVnc => {
3515                if let Some(vnc) = &mut vnc_worker {
3516                    let action = async {
3517                        let vnc_host = mesh
3518                            .make_host("vnc", None)
3519                            .await
3520                            .context("spawning vnc process failed")?;
3521
3522                        vnc.restart(&vnc_host);
3523                        anyhow::Result::<_>::Ok(())
3524                    };
3525
3526                    if let Err(error) = action.await {
3527                        eprintln!("error: {}", error);
3528                    }
3529                } else {
3530                    eprintln!("ERROR: no VNC server running");
3531                }
3532            }
3533            InteractiveCommand::Hvsock { term, port } => {
3534                let vm_rpc = &vm_rpc;
3535                let action = async || {
3536                    let service_id = new_hvsock_service_id(port);
3537                    let socket = vm_rpc
3538                        .call_failable(
3539                            VmRpc::ConnectHvsock,
3540                            (
3541                                CancelContext::new().with_timeout(Duration::from_secs(2)),
3542                                service_id,
3543                                DeviceVtl::Vtl0,
3544                            ),
3545                        )
3546                        .await?;
3547                    let socket = PolledSocket::new(driver, socket)?;
3548                    let mut console = console_relay::Console::new(
3549                        driver.clone(),
3550                        term.or_else(openvmm_terminal_app).as_deref(),
3551                        Some(ConsoleLaunchOptions {
3552                            window_title: Some(format!("HVSock{} [OpenVMM]", port)),
3553                        }),
3554                    )?;
3555                    driver
3556                        .spawn("console-relay", async move { console.relay(socket).await })
3557                        .detach();
3558                    anyhow::Result::<_>::Ok(())
3559                };
3560
3561                if let Err(error) = (action)().await {
3562                    eprintln!("error: {}", error);
3563                }
3564            }
3565            InteractiveCommand::ServiceVtl2 {
3566                user_mode_only,
3567                igvm,
3568                mana_keepalive,
3569                nvme_keepalive,
3570            } => {
3571                let paravisor_diag = paravisor_diag.clone();
3572                let vm_rpc = vm_rpc.clone();
3573                let igvm = igvm.or_else(|| opt.igvm.clone());
3574                let ged_rpc = resources.ged_rpc.clone();
3575                let r = async move {
3576                    let start;
3577                    if user_mode_only {
3578                        start = Instant::now();
3579                        paravisor_diag.restart().await?;
3580                    } else {
3581                        let path = igvm.context("no igvm file loaded")?;
3582                        let file = fs_err::File::open(path)?;
3583                        start = Instant::now();
3584                        openvmm_helpers::underhill::save_underhill(
3585                            &vm_rpc,
3586                            ged_rpc.as_ref().context("no GED")?,
3587                            GuestServicingFlags {
3588                                nvme_keepalive,
3589                                mana_keepalive,
3590                            },
3591                            file.into(),
3592                        )
3593                        .await?;
3594                        openvmm_helpers::underhill::restore_underhill(
3595                            &vm_rpc,
3596                            ged_rpc.as_ref().context("no GED")?,
3597                        )
3598                        .await?;
3599                    }
3600                    let end = Instant::now();
3601                    Ok(end - start)
3602                }
3603                .map(|r| Ok(StateChange::ServiceVtl2(r)));
3604                if state_change_task.is_some() {
3605                    tracing::error!("state change already in progress");
3606                } else {
3607                    state_change_task = Some(driver.spawn("state-change", r));
3608                }
3609            }
3610            InteractiveCommand::Quit => {
3611                tracing::info!("quitting");
3612                // Work around the detached SCSI task holding up worker stop.
3613                // TODO: Fix the underlying bug
3614                resources.scsi_rpc = None;
3615                resources.nvme_vtl2_rpc = None;
3616
3617                vm_worker.stop();
3618                quit = true;
3619            }
3620            InteractiveCommand::ReadMemory { gpa, size, file } => {
3621                let size = size as usize;
3622                let data = vm_rpc.call(VmRpc::ReadMemory, (gpa, size)).await?;
3623
3624                match data {
3625                    Ok(bytes) => {
3626                        if let Some(file) = file {
3627                            if let Err(err) = fs_err::write(file, bytes) {
3628                                eprintln!("error: {err:?}");
3629                            }
3630                        } else {
3631                            let width = 16;
3632                            let show_ascii = true;
3633
3634                            let mut dump = String::new();
3635                            for (i, chunk) in bytes.chunks(width).enumerate() {
3636                                let hex_part: Vec<String> =
3637                                    chunk.iter().map(|byte| format!("{:02x}", byte)).collect();
3638                                let hex_line = hex_part.join(" ");
3639
3640                                if show_ascii {
3641                                    let ascii_part: String = chunk
3642                                        .iter()
3643                                        .map(|&byte| {
3644                                            if byte.is_ascii_graphic() || byte == b' ' {
3645                                                byte as char
3646                                            } else {
3647                                                '.'
3648                                            }
3649                                        })
3650                                        .collect();
3651                                    dump.push_str(&format!(
3652                                        "{:04x}: {:<width$}  {}\n",
3653                                        i * width,
3654                                        hex_line,
3655                                        ascii_part,
3656                                        width = width * 3 - 1
3657                                    ));
3658                                } else {
3659                                    dump.push_str(&format!("{:04x}: {}\n", i * width, hex_line));
3660                                }
3661                            }
3662
3663                            println!("{dump}");
3664                        }
3665                    }
3666                    Err(err) => {
3667                        eprintln!("error: {err:?}");
3668                    }
3669                }
3670            }
3671            InteractiveCommand::WriteMemory { gpa, hex, file } => {
3672                if hex.is_some() == file.is_some() {
3673                    eprintln!("error: either path to the file or the hex string must be specified");
3674                    continue;
3675                }
3676
3677                let data = if let Some(file) = file {
3678                    let data = fs_err::read(file);
3679                    match data {
3680                        Ok(data) => data,
3681                        Err(err) => {
3682                            eprintln!("error: {err:?}");
3683                            continue;
3684                        }
3685                    }
3686                } else if let Some(hex) = hex {
3687                    if hex.len() & 1 != 0 {
3688                        eprintln!(
3689                            "error: expected even number of hex digits (2 hex digits per byte)"
3690                        );
3691                        continue;
3692                    }
3693                    let data: Result<Vec<u8>, String> = (0..hex.len())
3694                        .step_by(2)
3695                        .map(|i| {
3696                            u8::from_str_radix(&hex[i..i + 2], 16).map_err(|e| {
3697                                format!("invalid hex character at position {}: {}", i, e)
3698                            })
3699                        })
3700                        .collect();
3701
3702                    match data {
3703                        Ok(data) => data,
3704                        Err(err) => {
3705                            eprintln!("error: {err}");
3706                            continue;
3707                        }
3708                    }
3709                } else {
3710                    unreachable!();
3711                };
3712
3713                if data.is_empty() {
3714                    eprintln!("error: no data to write");
3715                    continue;
3716                }
3717
3718                if let Err(err) = vm_rpc.call(VmRpc::WriteMemory, (gpa, data)).await? {
3719                    eprintln!("error: {err:?}");
3720                }
3721            }
3722            InteractiveCommand::Kvp(command) => {
3723                let Some(kvp) = &resources.kvp_ic else {
3724                    eprintln!("error: no kvp ic configured");
3725                    continue;
3726                };
3727                if let Err(err) = kvp::handle_kvp(kvp, command).await {
3728                    eprintln!("error: {err:#}");
3729                }
3730            }
3731            InteractiveCommand::Input { .. } | InteractiveCommand::InputMode => unreachable!(),
3732        }
3733    }
3734
3735    vm_worker.stop();
3736    vm_worker.join().await?;
3737    Ok(())
3738}
3739
3740struct DiagDialer {
3741    driver: DefaultDriver,
3742    vm_rpc: mesh::Sender<VmRpc>,
3743    openhcl_vtl: DeviceVtl,
3744}
3745
3746impl mesh_rpc::client::Dial for DiagDialer {
3747    type Stream = PolledSocket<unix_socket::UnixStream>;
3748
3749    async fn dial(&mut self) -> io::Result<Self::Stream> {
3750        let service_id = new_hvsock_service_id(1);
3751        let socket = self
3752            .vm_rpc
3753            .call_failable(
3754                VmRpc::ConnectHvsock,
3755                (
3756                    CancelContext::new().with_timeout(Duration::from_secs(2)),
3757                    service_id,
3758                    self.openhcl_vtl,
3759                ),
3760            )
3761            .await
3762            .map_err(io::Error::other)?;
3763
3764        PolledSocket::new(&self.driver, socket)
3765    }
3766}
3767
3768/// An object that implements [`InspectMut`] by sending an inspect request over
3769/// TTRPC to the guest (typically the paravisor running in VTL2), then stitching
3770/// the response back into the inspect tree.
3771///
3772/// This also caches the TTRPC connection to the guest so that only the first
3773/// inspect request has to wait for the connection to be established.
3774pub struct DiagInspector(DiagInspectorInner);
3775
3776enum DiagInspectorInner {
3777    NotStarted(DefaultDriver, Arc<diag_client::DiagClient>),
3778    Started {
3779        send: mesh::Sender<inspect::Deferred>,
3780        _task: Task<()>,
3781    },
3782    Invalid,
3783}
3784
3785impl DiagInspector {
3786    pub fn new(driver: DefaultDriver, diag_client: Arc<diag_client::DiagClient>) -> Self {
3787        Self(DiagInspectorInner::NotStarted(driver, diag_client))
3788    }
3789
3790    fn start(&mut self) -> &mesh::Sender<inspect::Deferred> {
3791        loop {
3792            match self.0 {
3793                DiagInspectorInner::NotStarted { .. } => {
3794                    let DiagInspectorInner::NotStarted(driver, client) =
3795                        std::mem::replace(&mut self.0, DiagInspectorInner::Invalid)
3796                    else {
3797                        unreachable!()
3798                    };
3799                    let (send, recv) = mesh::channel();
3800                    let task = driver.clone().spawn("diag-inspect", async move {
3801                        Self::run(&client, recv).await
3802                    });
3803
3804                    self.0 = DiagInspectorInner::Started { send, _task: task };
3805                }
3806                DiagInspectorInner::Started { ref send, .. } => break send,
3807                DiagInspectorInner::Invalid => unreachable!(),
3808            }
3809        }
3810    }
3811
3812    async fn run(
3813        diag_client: &diag_client::DiagClient,
3814        mut recv: mesh::Receiver<inspect::Deferred>,
3815    ) {
3816        while let Some(deferred) = recv.next().await {
3817            let info = deferred.external_request();
3818            let result = match info.request_type {
3819                inspect::ExternalRequestType::Inspect { depth } => {
3820                    if depth == 0 {
3821                        Ok(inspect::Node::Unevaluated)
3822                    } else {
3823                        // TODO: Support taking timeouts from the command line
3824                        diag_client
3825                            .inspect(info.path, Some(depth - 1), Some(Duration::from_secs(1)))
3826                            .await
3827                    }
3828                }
3829                inspect::ExternalRequestType::Update { value } => {
3830                    (diag_client.update(info.path, value).await).map(inspect::Node::Value)
3831                }
3832            };
3833            deferred.complete_external(
3834                result.unwrap_or_else(|err| {
3835                    inspect::Node::Failed(inspect::Error::Mesh(format!("{err:#}")))
3836                }),
3837                inspect::SensitivityLevel::Unspecified,
3838            )
3839        }
3840    }
3841}
3842
3843impl InspectMut for DiagInspector {
3844    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
3845        self.start().send(req.defer());
3846    }
3847}
3848
3849enum InspectTarget {
3850    Host,
3851    Paravisor,
3852}
3853
3854mod interactive_console {
3855    use super::InteractiveCommand;
3856    use rustyline::Helper;
3857    use rustyline::Highlighter;
3858    use rustyline::Hinter;
3859    use rustyline::Validator;
3860
3861    #[derive(Helper, Highlighter, Hinter, Validator)]
3862    pub(crate) struct OpenvmmRustylineEditor {
3863        pub openvmm_inspect_req: std::sync::Arc<
3864            mesh::Sender<(
3865                super::InspectTarget,
3866                String,
3867                mesh::OneshotSender<inspect::Node>,
3868            )>,
3869        >,
3870    }
3871
3872    impl rustyline::completion::Completer for OpenvmmRustylineEditor {
3873        type Candidate = String;
3874
3875        fn complete(
3876            &self,
3877            line: &str,
3878            pos: usize,
3879            _ctx: &rustyline::Context<'_>,
3880        ) -> rustyline::Result<(usize, Vec<Self::Candidate>)> {
3881            let Ok(cmd) = shell_words::split(line) else {
3882                return Ok((0, Vec::with_capacity(0)));
3883            };
3884
3885            let completions = futures::executor::block_on(
3886                clap_dyn_complete::Complete {
3887                    cmd,
3888                    raw: Some(line.into()),
3889                    position: Some(pos),
3890                }
3891                .generate_completions::<InteractiveCommand>(None, self),
3892            );
3893
3894            let pos_from_end = {
3895                let line = line.chars().take(pos).collect::<String>();
3896
3897                let trailing_ws = line.len() - line.trim_end().len();
3898
3899                if trailing_ws > 0 {
3900                    line.len() - trailing_ws + 1 // +1 for the space
3901                } else {
3902                    let last_word = shell_words::split(&line)
3903                        .unwrap_or_default()
3904                        .last()
3905                        .cloned()
3906                        .unwrap_or_default();
3907
3908                    line.len() - last_word.len()
3909                }
3910            };
3911
3912            Ok((pos_from_end, completions))
3913        }
3914    }
3915
3916    impl clap_dyn_complete::CustomCompleterFactory for &OpenvmmRustylineEditor {
3917        type CustomCompleter = OpenvmmComplete;
3918        async fn build(&self, _ctx: &clap_dyn_complete::RootCtx<'_>) -> Self::CustomCompleter {
3919            OpenvmmComplete {
3920                openvmm_inspect_req: self.openvmm_inspect_req.clone(),
3921            }
3922        }
3923    }
3924
3925    pub struct OpenvmmComplete {
3926        openvmm_inspect_req: std::sync::Arc<
3927            mesh::Sender<(
3928                super::InspectTarget,
3929                String,
3930                mesh::OneshotSender<inspect::Node>,
3931            )>,
3932        >,
3933    }
3934
3935    impl clap_dyn_complete::CustomCompleter for OpenvmmComplete {
3936        async fn complete(
3937            &self,
3938            ctx: &clap_dyn_complete::RootCtx<'_>,
3939            subcommand_path: &[&str],
3940            arg_id: &str,
3941        ) -> Vec<String> {
3942            match (subcommand_path, arg_id) {
3943                (["openvmm", "inspect"], "element") => {
3944                    let on_error = vec!["failed/to/connect".into()];
3945
3946                    let (parent_path, to_complete) = (ctx.to_complete)
3947                        .rsplit_once('/')
3948                        .unwrap_or(("", ctx.to_complete));
3949
3950                    let node = {
3951                        let paravisor = {
3952                            let raw_arg = ctx
3953                                .matches
3954                                .subcommand()
3955                                .unwrap()
3956                                .1
3957                                .get_one::<String>("paravisor")
3958                                .map(|x| x.as_str())
3959                                .unwrap_or_default();
3960                            raw_arg == "true"
3961                        };
3962
3963                        let (tx, rx) = mesh::oneshot();
3964                        self.openvmm_inspect_req.send((
3965                            if paravisor {
3966                                super::InspectTarget::Paravisor
3967                            } else {
3968                                super::InspectTarget::Host
3969                            },
3970                            parent_path.to_owned(),
3971                            tx,
3972                        ));
3973                        let Ok(node) = rx.await else {
3974                            return on_error;
3975                        };
3976
3977                        node
3978                    };
3979
3980                    let mut completions = Vec::new();
3981
3982                    if let inspect::Node::Dir(dir) = node {
3983                        for entry in dir {
3984                            if entry.name.starts_with(to_complete) {
3985                                if parent_path.is_empty() {
3986                                    completions.push(format!("{}/", entry.name))
3987                                } else {
3988                                    completions.push(format!(
3989                                        "{}/{}{}",
3990                                        parent_path,
3991                                        entry.name,
3992                                        if matches!(entry.node, inspect::Node::Dir(..)) {
3993                                            "/"
3994                                        } else {
3995                                            ""
3996                                        }
3997                                    ))
3998                                }
3999                            }
4000                        }
4001                    } else {
4002                        return on_error;
4003                    }
4004
4005                    completions
4006                }
4007                _ => Vec::new(),
4008            }
4009        }
4010    }
4011}