Skip to main content

openvmm_defs/
config.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Configuration for the VM worker.
5
6use guid::Guid;
7use input_core::InputData;
8use memory_range::MemoryRange;
9use mesh::MeshPayload;
10use mesh::payload::Protobuf;
11use net_backend_resources::mac_address::MacAddress;
12use openvmm_pcat_locator::RomFileLocation;
13use std::fs::File;
14use vm_resource::Resource;
15use vm_resource::kind::PciDeviceHandleKind;
16use vm_resource::kind::VirtioDeviceHandle;
17use vm_resource::kind::VmbusDeviceHandleKind;
18use vmgs_resources::VmgsResource;
19use vmotherboard::ChipsetDeviceHandle;
20use vmotherboard::LegacyPciChipsetDeviceHandle;
21use vmotherboard::options::BaseChipsetManifest;
22use vmotherboard::options::VmChipsetCapabilities;
23
24#[derive(MeshPayload, Debug)]
25pub struct Config {
26    pub load_mode: LoadMode,
27    pub floppy_disks: Vec<floppy_resources::FloppyDiskConfig>,
28    pub ide_disks: Vec<ide_resources::IdeDeviceConfig>,
29    pub pcie_root_complexes: Vec<PcieRootComplexConfig>,
30    pub pcie_devices: Vec<PcieDeviceConfig>,
31    pub pcie_switches: Vec<PcieSwitchConfig>,
32    pub pcie_generic_initiators: Vec<PcieGenericInitiatorConfig>,
33    pub vpci_devices: Vec<VpciDeviceConfig>,
34    pub numa: NumaTopology,
35    pub processor_topology: ProcessorTopologyConfig,
36    pub hypervisor: HypervisorConfig,
37    pub chipset: BaseChipsetManifest,
38    pub vmbus: Option<VmbusConfig>,
39    pub vtl2_vmbus: Option<VmbusConfig>,
40    #[cfg(windows)]
41    pub kernel_vmnics: Vec<KernelVmNicConfig>,
42    pub input: mesh::Receiver<InputData>,
43    pub framebuffer: Option<framebuffer::Framebuffer>,
44    pub vga_firmware: Option<RomFileLocation>,
45    pub vtl2_gfx: bool,
46    pub virtio_devices: Vec<(VirtioBus, Resource<VirtioDeviceHandle>)>,
47    #[cfg(windows)]
48    pub vpci_resources: Vec<virt_whp::device::DeviceHandle>,
49    pub vmgs: Option<VmgsResource>,
50    pub secure_boot_enabled: bool,
51    pub custom_uefi_vars: firmware_uefi_custom_vars::CustomVars,
52    // TODO: move FirmwareEvent somewhere not GED-specific.
53    pub firmware_event_send: Option<mesh::Sender<get_resources::ged::FirmwareEvent>>,
54    pub debugger_rpc: Option<mesh::Receiver<vmm_core_defs::debug_rpc::DebugRequest>>,
55    pub vmbus_devices: Vec<(DeviceVtl, Resource<VmbusDeviceHandleKind>)>,
56    pub chipset_devices: Vec<ChipsetDeviceHandle>,
57    pub pci_chipset_devices: Vec<LegacyPciChipsetDeviceHandle>,
58    pub isa_dma_controller: Option<Resource<vm_resource::kind::IsaDmaControllerHandleKind>>,
59    pub chipset_capabilities: VmChipsetCapabilities,
60    /// Memory layout sizing for the layout engine. Determines chipset MMIO
61    /// range sizes; addresses are allocated dynamically by the resolver.
62    pub layout: vmm_core_defs::LayoutConfig,
63    // This is used for testing. TODO: resourcify, and also store this in VMGS.
64    pub rtc_delta_milliseconds: i64,
65    /// allow the guest to reset without notifying the client
66    pub automatic_guest_reset: bool,
67    pub efi_diagnostics_log_level: EfiDiagnosticsLogLevelType,
68}
69
70pub const DEFAULT_GIC_DISTRIBUTOR_BASE: u64 = 0xFFFF_0000;
71// The KVM in-kernel vGICv3 requires the distributor and redistributor bases be 64KiB aligned.
72pub const DEFAULT_GIC_REDISTRIBUTORS_BASE: u64 = if cfg!(target_os = "linux") {
73    0xEFFF_0000
74} else {
75    0xEFFE_E000
76};
77
78/// Base address of the GIC v2m MSI frame. Must not overlap GIC dist/redist,
79/// serial UARTs, or VMBus MMIO. Matches the Hyper-V convention.
80pub const DEFAULT_GIC_V2M_MSI_FRAME_BASE: u64 = 0xEFFE_8000;
81/// Size of the v2m MSI frame (one 4KB page is the architectural minimum).
82pub const GIC_V2M_MSI_FRAME_SIZE: u64 = 0x1000;
83
84/// Base address of the GICv3 ITS MMIO region. Must be 64 KiB aligned,
85/// below the v2m frame address, and not overlap other devices.
86/// The region extends from this base to base + GIC_ITS_SIZE (128 KiB).
87pub const DEFAULT_GIC_ITS_BASE: u64 = 0xEFFC_0000;
88/// Size of the ITS MMIO region (control frame + translation frame, 2×64 KiB).
89pub const GIC_ITS_SIZE: u64 = 0x2_0000;
90
91/// Default virtual timer PPI (GIC INTID). PPI 4 = INTID 16 + 4 = 20.
92/// This is the EL1 virtual timer interrupt used across Hyper-V, KVM, and HVF.
93pub const DEFAULT_VIRT_TIMER_PPI: u32 = 20;
94
95/// Default total number of GIC interrupts (SGIs + PPIs + SPIs).
96/// Must satisfy KVM constraints: 64 <= n <= 1023, multiple of 32.
97/// 992 = 31 × 32 is the largest valid value.
98pub const DEFAULT_GIC_NR_IRQS: u32 = 992;
99
100/// Default VMBus PPI (GIC INTID). PPI 2 = INTID 16 + 2 = 18.
101pub const DEFAULT_VMBUS_PPI: u32 = 18;
102
103/// How firmware tables are presented to the guest in Linux direct boot.
104///
105/// On x86, `DeviceTree` is not supported and will be rejected. On aarch64,
106/// this selects between a full device tree or an ACPI boot path.
107#[derive(MeshPayload, Debug, Clone, Copy, PartialEq, Eq)]
108pub enum LinuxDirectBootMode {
109    /// Full device tree with all devices described in DT nodes (aarch64 only).
110    DeviceTree,
111    /// ACPI tables for device discovery. On aarch64, this also synthesizes
112    /// an EFI system table so the kernel enters its ACPI code path. On x86,
113    /// ACPI tables are always provided via the zero page.
114    Acpi,
115}
116
117#[derive(MeshPayload, Debug)]
118pub enum LoadMode {
119    Linux {
120        kernel: File,
121        initrd: Option<File>,
122        cmdline: String,
123        enable_serial: bool,
124        custom_dsdt: Option<Vec<u8>>,
125        boot_mode: LinuxDirectBootMode,
126    },
127    Uefi {
128        firmware: File,
129        enable_debugging: bool,
130        enable_memory_protections: bool,
131        disable_frontpage: bool,
132        enable_tpm: bool,
133        enable_battery: bool,
134        enable_serial: bool,
135        enable_vpci_boot: bool,
136        uefi_console_mode: Option<UefiConsoleMode>,
137        default_boot_always_attempt: bool,
138        bios_guid: Guid,
139        enable_vmbus: bool,
140        force_dma_bounce: bool,
141    },
142    Pcat {
143        firmware: RomFileLocation,
144        boot_order: [PcatBootDevice; 4],
145    },
146    Igvm {
147        file: File,
148        cmdline: String,
149        vtl2_base_address: Vtl2BaseAddressType,
150        com_serial: Option<SerialInformation>,
151    },
152    None,
153}
154
155#[derive(Debug, Clone, Copy, MeshPayload)]
156pub struct SerialInformation {
157    pub io_port: u16,
158    pub irq: u32,
159}
160
161/// Different types to specify the base address for the VTL2 region of the IGVM
162/// file.
163#[derive(Debug, Clone, Copy, MeshPayload)]
164pub enum Vtl2BaseAddressType {
165    /// Use the addresses specified in the file. The IGVM file does not need to
166    /// support relocations.
167    File,
168    /// Put VTL2 at the specified address. The IGVM file must support
169    /// relocations.
170    Absolute(u64),
171    /// Use the specified range in the supplied MemoryLayout, as the caller has
172    /// created a specific range for VTL2. The IGVM file must support
173    /// relocations.
174    ///
175    /// An optional size may be specified to override the size describing VTL2
176    /// provided in the IGVM file. It must be larger than the IGVM file provided
177    /// size.
178    MemoryLayout { size: Option<u64> },
179    /// Tell VTL2 to allocate out it's own memory. This will load the file at
180    /// the base address specified in the file, and the host will tell VTL2 the
181    /// size of memory to allocate for itself.
182    ///
183    /// An optional size may be specified to override the size describing VTL2
184    /// provided in the IGVM file. It must be larger than the IGVM file provided
185    /// size.
186    Vtl2Allocate { size: Option<u64> },
187}
188
189/// Specifies a PCIe MMIO BAR window, either by size (the resolver allocates) or
190/// by a fixed location. Fixed locations exist for assigned-device, IOMMU, and
191/// physical-topology compatibility.
192#[derive(Debug, MeshPayload)]
193pub enum PcieMmioRangeConfig {
194    /// Dynamically allocate a range of the given size.
195    Dynamic {
196        /// Size of the range in bytes.
197        size: u64,
198    },
199    /// Use the specified fixed memory range.
200    Fixed(MemoryRange),
201}
202
203#[derive(Debug, MeshPayload)]
204pub struct RootComplexCxlConfig {
205    /// HDM window size in bytes for this CXL root complex.
206    pub hdm_size: u64,
207    /// CFMWS HDM window restrictions bitmask.
208    pub hdm_window_restrictions: u16,
209}
210
211#[derive(Debug, MeshPayload)]
212pub struct PcieRootComplexConfig {
213    pub index: u32,
214    pub name: String,
215    pub segment: u16,
216    pub start_bus: u8,
217    pub end_bus: u8,
218    pub low_mmio: PcieMmioRangeConfig,
219    pub high_mmio: PcieMmioRangeConfig,
220    pub ports: Vec<PciePortConfig>,
221    /// Optional CXL configuration for root-complex CXL mode.
222    pub cxl: Option<RootComplexCxlConfig>,
223    /// Optional IOMMU for this root complex.
224    pub iommu: Option<PcieIommuConfig>,
225    /// NUMA node affinity for this root complex. Used to generate `_PXM` in
226    /// the ACPI SSDT so the guest OS sees correct NUMA locality for devices
227    /// under this root complex.
228    pub vnode: Option<u32>,
229    /// When true, treat non-zero BAR values found during probing as pinned
230    /// addresses. Used for P2P DMA with GPA = HPA.
231    pub preserve_bars: bool,
232}
233
234/// Configuration for a single PCIe port — either a root-complex root port or a
235/// switch downstream port.
236#[derive(Debug, MeshPayload)]
237pub struct PciePortConfig {
238    /// Port name used for topology wiring and lookup.
239    pub name: String,
240    /// The device/function (`device << 3 | function`) to place this port at on
241    /// its bus.
242    ///
243    /// When `None`, the port is assigned the lowest available devfn. Ports are
244    /// assigned in order, so an explicit devfn that collides with a
245    /// previously-assigned port (including one assigned automatically) is an
246    /// error. Honored for both root-complex root ports and switch downstream
247    /// ports.
248    pub devfn: Option<u8>,
249    /// Enables PCIe hotplug capabilities for this port.
250    pub hotplug: bool,
251    /// Optional ACS capability bitmask to expose on this port.
252    pub acs_capabilities_supported: Option<u16>,
253    /// Marks this port as CXL-capable.
254    ///
255    /// Runtime port construction derives required BAR/subregion layout from
256    /// this flag (currently CXL component registers for BAR0).
257    pub cxl: bool,
258}
259
260#[derive(Debug, MeshPayload)]
261pub struct PcieSwitchConfig {
262    pub name: String,
263    pub parent_port: String,
264    /// The downstream ports of this switch.
265    pub ports: Vec<PciePortConfig>,
266}
267
268/// Declares that the device directly behind a named PCIe port (a root port or
269/// a switch downstream port) is a generic initiator (GI) for the given NUMA
270/// node. Used to generate an SRAT Generic Initiator Affinity structure so the
271/// guest attaches the device's memory to that (typically CPU-less) proximity
272/// domain.
273///
274/// The port is resolved against the live topology by port name after switch
275/// downstream ports have been enumerated, so it can target devices that sit
276/// behind a switch.
277#[derive(Debug, MeshPayload)]
278pub struct PcieGenericInitiatorConfig {
279    /// Name of the PCIe port (root port or switch downstream port) behind
280    /// which the generic-initiator device resides.
281    pub port_name: String,
282    /// NUMA node the device is a generic initiator for.
283    pub node: u32,
284}
285
286#[derive(Debug, MeshPayload)]
287pub struct PcieDeviceConfig {
288    pub port_name: String,
289    pub resource: Resource<PciDeviceHandleKind>,
290}
291
292#[derive(Debug, MeshPayload)]
293pub struct VpciDeviceConfig {
294    pub vtl: DeviceVtl,
295    /// The ID of the device. Vpci devices are identified by a portion of `data2` and `data3` of the
296    /// instance ID, which is used to generate the guest-visible device ID.
297    pub instance_id: Guid,
298    pub resource: Resource<PciDeviceHandleKind>,
299    /// NUMA node affinity for this VPCI device.
300    pub vnode: Option<u32>,
301}
302
303#[derive(Debug, Protobuf)]
304pub struct ProcessorTopologyConfig {
305    pub proc_count: u32,
306    pub vps_per_socket: Option<u32>,
307    pub enable_smt: Option<bool>,
308    pub arch: Option<ArchTopologyConfig>,
309}
310
311#[derive(Debug, Protobuf, Default, Clone)]
312pub struct X86TopologyConfig {
313    pub apic_id_offset: u32,
314    pub x2apic: X2ApicConfig,
315}
316
317#[derive(Debug, Default, Copy, Clone, Protobuf)]
318pub enum X2ApicConfig {
319    #[default]
320    /// Support the X2APIC if recommended by the hypervisor or if needed by the
321    /// topology configuration.
322    Auto,
323    /// Support the X2APIC, and automatically enable it if needed to address all
324    /// processors.
325    Supported,
326    /// Do not support the X2APIC.
327    Unsupported,
328    /// Support and enable the X2APIC.
329    Enabled,
330}
331
332#[derive(Debug, Protobuf, Default, Clone)]
333pub enum PmuGsivConfig {
334    #[default]
335    /// Use the hypervisor's platform GSIV value for the PMU.
336    Platform,
337    /// Use the specified GSIV value for the PMU.
338    Gsiv(u32),
339    /// Disable the PMU.
340    Disabled,
341}
342
343/// MSI controller selection for aarch64 PCIe interrupt delivery.
344#[derive(Debug, Protobuf, Default, Clone)]
345pub enum GicMsiConfig {
346    /// Automatically select the best available MSI controller:
347    /// ITS when the hypervisor supports it, otherwise GICv2m.
348    #[default]
349    Auto,
350    /// Force GICv3 ITS for MSI delivery via LPIs.
351    Its,
352    /// Force GICv2m for MSI delivery via SPIs.
353    V2m {
354        /// Number of SPIs to reserve for PCIe MSIs. Defaults to a
355        /// platform-specific value when `None`.
356        spi_count: Option<u32>,
357    },
358}
359
360/// IOMMU configuration for a single PCIe root complex.
361#[derive(Debug, MeshPayload, Clone)]
362pub enum PcieIommuConfig {
363    /// AMD IOMMU (AMD-Vi) for x86_64 guests.
364    AmdVi,
365    /// Arm SMMUv3 for aarch64 guests.
366    Smmu,
367    /// Intel VT-d for x86_64 guests.
368    IntelVtd,
369}
370
371#[derive(Debug, Protobuf, Default, Clone)]
372pub struct Aarch64TopologyConfig {
373    pub gic_config: Option<GicConfig>,
374    pub pmu_gsiv: PmuGsivConfig,
375    pub gic_msi: GicMsiConfig,
376}
377
378/// GIC configuration for the virtual machine.
379///
380/// The variant selects the GIC version. `None` inner config means use
381/// defaults for that version's addresses.
382#[derive(Debug, Protobuf, Clone)]
383pub enum GicConfig {
384    /// GICv2 with optional address overrides.
385    V2(Option<GicV2Config>),
386    /// GICv3 with optional address overrides.
387    V3(Option<GicV3Config>),
388}
389
390/// GICv2-specific address configuration.
391#[derive(Debug, Protobuf, Clone)]
392pub struct GicV2Config {
393    pub gic_distributor_base: u64,
394    pub cpu_interface_base: u64,
395}
396
397/// GICv3-specific address configuration.
398#[derive(Debug, Protobuf, Clone)]
399pub struct GicV3Config {
400    pub gic_distributor_base: u64,
401    pub gic_redistributors_base: u64,
402}
403
404#[derive(Debug, Protobuf, Clone)]
405pub enum ArchTopologyConfig {
406    X86(X86TopologyConfig),
407    Aarch64(Aarch64TopologyConfig),
408}
409
410/// Per-node memory allocation configuration.
411#[derive(Debug, Clone, Copy, MeshPayload)]
412pub struct MemoryConfig {
413    pub mem_size: u64,
414    pub prefetch_memory: bool,
415    pub private_memory: bool,
416    pub transparent_hugepages: bool,
417    pub hugepages: bool,
418    pub hugepage_size: Option<u64>,
419    /// Host physical NUMA node to bind this allocation to (Linux:
420    /// `mbind(MPOL_BIND)`). `None` means OS default placement.
421    pub host_numa_node: Option<u32>,
422}
423
424/// Virtual NUMA topology for the VM.
425#[derive(Debug, MeshPayload)]
426pub struct NumaTopology {
427    /// NUMA nodes. The vnode ID is the index into this vector.
428    pub nodes: Vec<NumaNode>,
429    /// Inter-node distances for the SLIT. If empty, defaults are used
430    /// (10 for self, 20 for cross-node).
431    pub distances: Vec<NumaDistance>,
432}
433
434/// A single virtual NUMA node.
435#[derive(Debug, MeshPayload)]
436pub struct NumaNode {
437    /// Memory allocation for this node. `None` means a CPU-only or
438    /// device-only node.
439    pub mem: Option<MemoryConfig>,
440    /// VP assignment for this node.
441    pub vps: VpAssignment,
442}
443
444/// How VPs are assigned to a NUMA node.
445#[derive(Debug, MeshPayload)]
446pub enum VpAssignment {
447    /// Assign VPs to nodes by round-robining sockets over the CPU-bearing
448    /// nodes only: a VP with socket ID `vp_index / vps_per_socket` belongs to
449    /// the `(vp_index / vps_per_socket) % num_cpu_nodes`-th `FromTopology`
450    /// node. `vps_per_socket` comes from `ProcessorTopologyConfig`;
451    /// `num_cpu_nodes` is the number of `FromTopology` nodes, so `Empty`
452    /// (CPU-less) nodes are skipped and do not affect the distribution.
453    FromTopology,
454    /// Explicit VP indices assigned to this node.
455    Explicit(Vec<u32>),
456    /// A CPU-less node: no VPs are assigned to it. Unlike `Explicit`, this
457    /// may be combined with `FromTopology` nodes, so a memory- or
458    /// device-only node can be declared without forcing every other node to
459    /// spell out its VP set.
460    Empty,
461}
462
463/// An inter-node distance entry for the ACPI SLIT.
464#[derive(Debug, MeshPayload)]
465pub struct NumaDistance {
466    /// Source node index.
467    pub src: u32,
468    /// Destination node index.
469    pub dst: u32,
470    /// Distance value (10 = local, 20 = default cross-node, 255 = unreachable).
471    pub distance: u8,
472}
473
474#[derive(Debug, MeshPayload, Default)]
475pub struct VmbusConfig {
476    pub vsock_listener: Option<unix_socket::UnixListener>,
477    pub vsock_path: Option<String>,
478    pub vmbus_max_version: Option<u32>,
479    #[cfg(windows)]
480    pub vmbusproxy_handle: Option<vmbus_proxy::ProxyHandle>,
481    pub vtl2_redirect: bool,
482}
483
484#[derive(Debug, MeshPayload, Default)]
485pub struct HypervisorConfig {
486    pub with_hv: bool,
487    pub with_vtl2: Option<Vtl2Config>,
488    pub with_isolation: Option<IsolationType>,
489}
490
491#[derive(Debug, MeshPayload)]
492pub struct KernelVmNicConfig {
493    pub instance_id: Guid,
494    pub mac_address: MacAddress,
495    pub switch_port_id: SwitchPortId,
496}
497
498#[derive(Clone, Debug, MeshPayload)]
499pub struct SwitchPortId {
500    pub switch: Guid,
501    pub port: Guid,
502}
503
504pub const DEFAULT_PCAT_BOOT_ORDER: [PcatBootDevice; 4] = [
505    PcatBootDevice::Optical,
506    PcatBootDevice::HardDrive,
507    PcatBootDevice::Network,
508    PcatBootDevice::Floppy,
509];
510
511#[derive(MeshPayload, Debug, Clone, Copy, PartialEq)]
512pub enum PcatBootDevice {
513    Floppy,
514    HardDrive,
515    Optical,
516    Network,
517}
518
519#[derive(Eq, PartialEq, Debug, Copy, Clone, MeshPayload)]
520pub enum VirtioBus {
521    Mmio,
522    Pci,
523}
524
525/// Policy for the partition when mapping VTL0 memory late.
526#[derive(Eq, PartialEq, Debug, Copy, Clone, MeshPayload)]
527pub enum LateMapVtl0MemoryPolicy {
528    /// Halt execution of the VP if VTL0 memory is accessed.
529    Halt,
530    /// Log the error but emulate the access with the instruction emulator.
531    Log,
532    /// Inject an exception into the guest.
533    InjectException,
534}
535
536impl From<LateMapVtl0MemoryPolicy> for virt::LateMapVtl0MemoryPolicy {
537    fn from(value: LateMapVtl0MemoryPolicy) -> Self {
538        match value {
539            LateMapVtl0MemoryPolicy::Halt => virt::LateMapVtl0MemoryPolicy::Halt,
540            LateMapVtl0MemoryPolicy::Log => virt::LateMapVtl0MemoryPolicy::Log,
541            LateMapVtl0MemoryPolicy::InjectException => {
542                virt::LateMapVtl0MemoryPolicy::InjectException
543            }
544        }
545    }
546}
547
548/// Configuration for VTL2.
549///
550/// NOTE: This is distinct from `virt::Vtl2Config` to keep an abstraction
551/// between the virt crate and this crate. Users should not be specifying
552/// virt crate configuration directly.
553#[derive(Debug, Clone, MeshPayload)]
554pub struct Vtl2Config {
555    /// Enable the VTL0 alias map. This maps VTL0's view of memory in VTL2 at
556    /// the highest legal physical address bit.
557    pub vtl0_alias_map: bool,
558    /// If set, map VTL0 memory late after VTL2 has started. The current
559    /// heuristic is to defer mapping VTL0 memory until the first
560    /// `HvModifyVtlProtectionMask` hypercall is made.
561    pub late_map_vtl0_memory: Option<LateMapVtl0MemoryPolicy>,
562}
563
564// Isolation type for a partition.
565#[derive(Eq, PartialEq, Debug, Copy, Clone, MeshPayload)]
566pub enum IsolationType {
567    Vbs,
568}
569
570impl From<IsolationType> for virt::IsolationType {
571    fn from(value: IsolationType) -> Self {
572        match value {
573            IsolationType::Vbs => Self::Vbs,
574        }
575    }
576}
577
578/// Which VTL to assign a particular device to.
579#[derive(Copy, Clone, Debug, PartialEq, Eq, MeshPayload)]
580pub enum DeviceVtl {
581    Vtl0,
582    Vtl1,
583    Vtl2,
584}
585
586#[derive(Copy, Clone, Debug, MeshPayload)]
587pub enum UefiConsoleMode {
588    Default,
589    Com1,
590    Com2,
591    None,
592}
593
594#[derive(Copy, Clone, Debug, MeshPayload, Default)]
595pub enum EfiDiagnosticsLogLevelType {
596    /// Default log level
597    #[default]
598    Default,
599    /// Include INFO logs
600    Info,
601    /// All logs
602    Full,
603}