1use crate::emuplat;
5use crate::partition::BindHvliteVp;
6use crate::partition::HvlitePartition;
7use crate::vmgs_non_volatile_store::HvLiteVmgsNonVolatileStore;
8use crate::worker::rom::RomBuilder;
9use acpi::dsdt;
10use anyhow::Context;
11use cfg_if::cfg_if;
12use chipset_device_resources::IRQ_LINE_SET;
13use chipset_resources::LEGACY_CHIPSET_PCI_BUS_NAME;
14use debug_ptr::DebugPtr;
15use disk_backend::Disk;
16use disk_backend::resolve::ResolveDiskParameters;
17use firmware_uefi::LogLevel;
18use firmware_uefi::UefiCommandSet;
19use floppy_resources::FloppyDiskConfig;
20use futures::FutureExt;
21use futures::StreamExt;
22use futures::executor::block_on;
23use futures::future::try_join_all;
24use futures_concurrency::prelude::*;
25use guestmem::GuestMemory;
26use hvdef::HV_PAGE_SIZE;
27use hvdef::Vtl;
28use hypervisor_resources::HypervisorKind;
29use ide_resources::GuestMedia;
30use ide_resources::IdeDeviceConfig;
31use igvm::IgvmFile;
32use input_core::InputData;
33use input_core::MultiplexedInputHandle;
34use inspect::Inspect;
35use local_clock::LocalClockDelta;
36use membacking::GuestMemoryBuilder;
37use membacking::GuestMemoryManager;
38use membacking::SharedMemoryBacking;
39use memory_range::MemoryRange;
40use mesh::MeshPayload;
41use mesh::error::RemoteError;
42use mesh::payload::Protobuf;
43use mesh::payload::message::ProtobufMessage;
44use mesh_worker::Worker;
45use mesh_worker::WorkerId;
46use mesh_worker::WorkerRpc;
47use missing_dev::MissingDevManifest;
48use openvmm_defs::config::Aarch64TopologyConfig;
49use openvmm_defs::config::ArchTopologyConfig;
50use openvmm_defs::config::Config;
51use openvmm_defs::config::DeviceVtl;
52use openvmm_defs::config::EfiDiagnosticsLogLevelType;
53use openvmm_defs::config::GicConfig;
54use openvmm_defs::config::HypervisorConfig;
55use openvmm_defs::config::LoadMode;
56use openvmm_defs::config::MemoryConfig;
57use openvmm_defs::config::PcieDeviceConfig;
58use openvmm_defs::config::PcieRootComplexConfig;
59use openvmm_defs::config::PcieSwitchConfig;
60use openvmm_defs::config::PmuGsivConfig;
61use openvmm_defs::config::ProcessorTopologyConfig;
62use openvmm_defs::config::VirtioBus;
63use openvmm_defs::config::VmbusConfig;
64use openvmm_defs::config::VpciDeviceConfig;
65use openvmm_defs::config::Vtl2BaseAddressType;
66use openvmm_defs::config::Vtl2Config;
67use openvmm_defs::config::X2ApicConfig;
68use openvmm_defs::config::X86TopologyConfig;
69use openvmm_defs::rpc::PulseSaveRestoreError;
70use openvmm_defs::rpc::VmRpc;
71use openvmm_defs::worker::VM_WORKER;
72use openvmm_defs::worker::VmWorkerParameters;
73use openvmm_pcat_locator::RomFileLocation;
74use pal_async::DefaultDriver;
75use pal_async::DefaultPool;
76use pal_async::local::block_with_io;
77use pal_async::task::Spawn;
78use pal_async::task::Task;
79use pci_core::PciInterruptPin;
80use pcie::root::GenericPcieRootComplex;
81use pcie::root::GenericPcieRootPortDefinition;
82use pcie::switch::GenericPcieSwitch;
83use scsi_core::ResolveScsiDeviceHandleParams;
84use scsidisk::SimpleScsiDisk;
85use scsidisk::atapi_scsi::AtapiScsiDisk;
86use serial_16550_resources::ComPort;
87use state_unit::SavedStateUnit;
88use state_unit::SpawnedUnit;
89use state_unit::StateUnits;
90use std::fs::File;
91use std::sync::Arc;
92use std::thread;
93use std::thread::JoinHandle;
94use storvsp::ScsiControllerDisk;
95use virt::ProtoPartition;
96use virt::VpIndex;
97use virtio::PciInterruptModel;
98use virtio::VirtioMmioDevice;
99use virtio::VirtioPciDevice;
100use virtio::resolve::VirtioResolveInput;
101use vm_loader::initial_regs::initial_regs;
102use vm_resource::Resource;
103use vm_resource::ResourceResolver;
104use vm_resource::kind::DiskHandleKind;
105use vm_resource::kind::KeyboardInputHandleKind;
106use vm_resource::kind::MouseInputHandleKind;
107use vm_resource::kind::VirtioDeviceHandle;
108use vm_resource::kind::VmbusDeviceHandleKind;
109use vm_topology::memory::MemoryLayout;
110use vm_topology::pcie::PcieHostBridge;
111use vm_topology::processor::ArchTopology;
112use vm_topology::processor::ProcessorTopology;
113use vm_topology::processor::TopologyBuilder;
114use vm_topology::processor::aarch64::Aarch64Topology;
115use vm_topology::processor::aarch64::GicVersion;
116use vm_topology::processor::x86::X86Topology;
117use vmbus_channel::channel::VmbusDevice;
118use vmbus_server::HvsockRelayChannel;
119use vmbus_server::VmbusServer;
120use vmbus_server::hvsock::HvsockRelay;
121use vmcore::save_restore::SavedStateRoot;
122use vmcore::vm_task::VmTaskDriverSource;
123use vmcore::vm_task::thread::ThreadDriverBackend;
124use vmcore::vmtime::VmTime;
125use vmcore::vmtime::VmTimeKeeper;
126use vmcore::vmtime::VmTimeSource;
127use vmgs_resources::GuestStateEncryptionPolicy;
128use vmgs_resources::VmgsResource;
129use vmm_core::acpi_builder::AcpiTablesBuilder;
130use vmm_core::input_distributor::InputDistributor;
131use vmm_core::partition_unit::Halt;
132use vmm_core::partition_unit::PartitionUnit;
133use vmm_core::partition_unit::PartitionUnitParams;
134use vmm_core::partition_unit::block_on_vp;
135use vmm_core::vmbus_unit::ChannelUnit;
136use vmm_core::vmbus_unit::VmbusServerHandle;
137use vmm_core::vmbus_unit::offer_channel_unit;
138use vmm_core::vmbus_unit::offer_vmbus_device_handle_unit;
139use vmm_core_defs::HaltReason;
140use vmotherboard::BaseChipsetBuilder;
141use vmotherboard::BaseChipsetBuilderOutput;
142use vmotherboard::ChipsetDeviceHandle;
143use vmotherboard::ChipsetDevices;
144use vmotherboard::LegacyPciChipsetDeviceHandle;
145use vmotherboard::options::BaseChipsetDevices;
146use vmotherboard::options::BaseChipsetFoundation;
147use vmotherboard::options::BaseChipsetManifest;
148use vmotherboard::options::VmChipsetCapabilities;
149#[cfg(all(windows, feature = "virt_whp"))]
150use vpci::bus::VpciBus;
151use watchdog_core::platform::BaseWatchdogPlatform;
152use watchdog_core::platform::WatchdogCallback;
153use watchdog_core::platform::WatchdogPlatform;
154
155const PM_BASE: u16 = 0x400;
156const SYSTEM_IRQ_ACPI: u32 = 9;
157
158const WDAT_PORT: u16 = 0x30;
159
160pub fn new_device_thread() -> (JoinHandle<()>, DefaultDriver) {
162 DefaultPool::spawn_on_thread("basic_device_thread")
163}
164
165impl Manifest {
166 fn from_config(config: Config) -> Self {
167 Self {
168 load_mode: config.load_mode,
169 floppy_disks: config.floppy_disks,
170 ide_disks: config.ide_disks,
171 pcie_root_complexes: config.pcie_root_complexes,
172 pcie_devices: config.pcie_devices,
173 pcie_switches: config.pcie_switches,
174 vpci_devices: config.vpci_devices,
175 hypervisor: config.hypervisor,
176 memory: config.memory,
177 processor_topology: config.processor_topology,
178 chipset: config.chipset,
179 #[cfg(windows)]
180 kernel_vmnics: config.kernel_vmnics,
181 input: config.input,
182 framebuffer: config.framebuffer,
183 vga_firmware: config.vga_firmware,
184 vtl2_gfx: config.vtl2_gfx,
185 virtio_devices: config.virtio_devices,
186 vmbus: config.vmbus,
187 vtl2_vmbus: config.vtl2_vmbus,
188 #[cfg(all(windows, feature = "virt_whp"))]
189 vpci_resources: config.vpci_resources,
190 vmgs: config.vmgs,
191 secure_boot_enabled: config.secure_boot_enabled,
192 custom_uefi_vars: config.custom_uefi_vars,
193 firmware_event_send: config.firmware_event_send,
194 debugger_rpc: config.debugger_rpc,
195 vmbus_devices: config.vmbus_devices,
196 chipset_devices: config.chipset_devices,
197 pci_chipset_devices: config.pci_chipset_devices,
198 chipset_capabilities: config.chipset_capabilities,
199 generation_id_recv: config.generation_id_recv,
200 rtc_delta_milliseconds: config.rtc_delta_milliseconds,
201 automatic_guest_reset: config.automatic_guest_reset,
202 efi_diagnostics_log_level: match config.efi_diagnostics_log_level {
203 EfiDiagnosticsLogLevelType::Default => LogLevel::make_default(),
204 EfiDiagnosticsLogLevelType::Info => LogLevel::make_info(),
205 EfiDiagnosticsLogLevelType::Full => LogLevel::make_full(),
206 },
207 }
208 }
209}
210
211#[derive(MeshPayload)]
216pub struct Manifest {
217 load_mode: LoadMode,
218 floppy_disks: Vec<FloppyDiskConfig>,
219 ide_disks: Vec<IdeDeviceConfig>,
220 pcie_root_complexes: Vec<PcieRootComplexConfig>,
221 pcie_devices: Vec<PcieDeviceConfig>,
222 pcie_switches: Vec<PcieSwitchConfig>,
223 vpci_devices: Vec<VpciDeviceConfig>,
224 memory: MemoryConfig,
225 processor_topology: ProcessorTopologyConfig,
226 hypervisor: HypervisorConfig,
227 chipset: BaseChipsetManifest,
228 #[cfg(windows)]
229 kernel_vmnics: Vec<openvmm_defs::config::KernelVmNicConfig>,
230 input: mesh::Receiver<InputData>,
231 framebuffer: Option<framebuffer::Framebuffer>,
232 vga_firmware: Option<RomFileLocation>,
233 vtl2_gfx: bool,
234 virtio_devices: Vec<(VirtioBus, Resource<VirtioDeviceHandle>)>,
235 vmbus: Option<VmbusConfig>,
236 vtl2_vmbus: Option<VmbusConfig>,
237 #[cfg(all(windows, feature = "virt_whp"))]
238 vpci_resources: Vec<virt_whp::device::DeviceHandle>,
239 vmgs: Option<VmgsResource>,
240 secure_boot_enabled: bool,
241 custom_uefi_vars: firmware_uefi_custom_vars::CustomVars,
242 firmware_event_send: Option<mesh::Sender<get_resources::ged::FirmwareEvent>>,
243 debugger_rpc: Option<mesh::Receiver<vmm_core_defs::debug_rpc::DebugRequest>>,
244 vmbus_devices: Vec<(DeviceVtl, Resource<VmbusDeviceHandleKind>)>,
245 chipset_devices: Vec<ChipsetDeviceHandle>,
246 pci_chipset_devices: Vec<LegacyPciChipsetDeviceHandle>,
247 chipset_capabilities: VmChipsetCapabilities,
248 generation_id_recv: Option<mesh::Receiver<[u8; 16]>>,
249 rtc_delta_milliseconds: i64,
250 automatic_guest_reset: bool,
251 efi_diagnostics_log_level: LogLevel,
252}
253
254#[derive(Protobuf, SavedStateRoot)]
255#[mesh(package = "openvmm")]
256pub struct SavedState {
257 #[mesh(1)]
258 pub units: Vec<SavedStateUnit>,
259}
260
261async fn open_simple_disk(
262 resolver: &ResourceResolver,
263 disk_type: Resource<DiskHandleKind>,
264 read_only: bool,
265 driver_source: &VmTaskDriverSource,
266) -> anyhow::Result<Disk> {
267 let disk = resolver
268 .resolve(
269 disk_type,
270 ResolveDiskParameters {
271 read_only,
272 driver_source,
273 },
274 )
275 .await?;
276 Ok(disk.0)
277}
278
279#[derive(MeshPayload)]
280pub struct RestartState {
281 hypervisor: Resource<HypervisorKind>,
282 manifest: Manifest,
283 running: bool,
284 saved_state: SavedState,
285 shared_memory: Option<SharedMemoryBacking>,
286 rpc: mesh::Receiver<VmRpc>,
287 notify: mesh::Sender<HaltReason>,
288}
289
290static LOADED_VM: DebugPtr<LoadedVm> = DebugPtr::new();
293
294pub struct VmWorker {
296 vm: LoadedVm,
297 rpc: mesh::Receiver<VmRpc>,
298 device_thread: JoinHandle<()>,
299}
300
301impl Worker for VmWorker {
302 type Parameters = VmWorkerParameters;
303 type State = RestartState;
304 const ID: WorkerId<Self::Parameters> = VM_WORKER;
305
306 fn new(parameters: Self::Parameters) -> anyhow::Result<Self> {
307 let (device_thread, device_driver) = new_device_thread();
308
309 let manifest = Manifest::from_config(parameters.cfg);
310
311 let hypervisor = block_on(ResourceResolver::new().resolve(parameters.hypervisor, ()))
312 .context("failed to resolve hypervisor backend")?;
313
314 let shared_memory = parameters
315 .shared_memory
316 .map(|fd| SharedMemoryBacking::from_mappable(fd.into()));
317
318 let vm = block_on(InitializedVm::new(
319 VmTaskDriverSource::new(ThreadDriverBackend::new(device_driver)),
320 hypervisor.0,
321 manifest,
322 shared_memory,
323 ))?;
324 let saved_state = parameters
325 .saved_state
326 .map(|m| m.parse())
327 .transpose()
328 .context("failed to decode saved state")?;
329
330 let vm = block_with_io(|_| vm.load(saved_state, parameters.notify))?;
331
332 LOADED_VM.store(&vm);
333
334 Ok(Self {
335 vm,
336 rpc: parameters.rpc,
337 device_thread,
338 })
339 }
340
341 fn restart(state: Self::State) -> anyhow::Result<Self> {
342 let RestartState {
343 hypervisor,
344 manifest,
345 running,
346 saved_state,
347 shared_memory,
348 rpc,
349 notify,
350 } = state;
351 let (device_thread, device_driver) = new_device_thread();
352
353 let hypervisor = block_on(ResourceResolver::new().resolve(hypervisor, ()))
354 .context("failed to resolve hypervisor backend")?;
355
356 let vm = block_on(InitializedVm::new(
357 VmTaskDriverSource::new(ThreadDriverBackend::new(device_driver)),
358 hypervisor.0,
359 manifest,
360 shared_memory,
361 ))?;
362 pal_async::local::block_on(async {
363 let mut vm = vm.load(Some(saved_state), notify).await?;
364
365 LOADED_VM.store(&vm);
366
367 if running {
368 vm.resume().await;
369 }
370 Ok(Self {
371 vm,
372 rpc,
373 device_thread,
374 })
375 })
376 }
377
378 fn run(self, worker_rpc: mesh::Receiver<WorkerRpc<Self::State>>) -> anyhow::Result<()> {
379 DefaultPool::run_with(async |driver| {
380 let driver = driver;
381 self.vm.run(&driver, self.rpc, worker_rpc).await
382 });
383 self.device_thread.join().unwrap();
384 Ok(())
385 }
386}
387
388pub(crate) struct InitializedVm {
391 partition: Arc<dyn HvlitePartition>,
392 vps: Vec<Box<dyn BindHvliteVp>>,
393 vmtime_keeper: VmTimeKeeper,
394 vmtime_source: VmTimeSource,
395 memory_manager: GuestMemoryManager,
396 gm: GuestMemory,
397 cfg: Manifest,
398 mem_layout: MemoryLayout,
399 processor_topology: ProcessorTopology,
400 igvm_file: Option<IgvmFile>,
401 driver_source: VmTaskDriverSource,
402}
403
404trait BuildTopology<T: ArchTopology + Inspect> {
405 fn to_topology(
406 &self,
407 platform_info: &virt::PlatformInfo,
408 ) -> anyhow::Result<ProcessorTopology<T>>;
409}
410
411trait ExtractTopologyConfig {
412 fn to_config(&self) -> ProcessorTopologyConfig;
413}
414
415impl ExtractTopologyConfig for ProcessorTopology<X86Topology> {
416 fn to_config(&self) -> ProcessorTopologyConfig {
417 ProcessorTopologyConfig {
418 proc_count: self.vp_count(),
419 vps_per_socket: Some(self.reserved_vps_per_socket()),
420 enable_smt: Some(self.smt_enabled()),
421 arch: Some(ArchTopologyConfig::X86(X86TopologyConfig {
422 apic_id_offset: self.vp_arch(VpIndex::BSP).apic_id,
423 x2apic: match self.apic_mode() {
424 vm_topology::processor::x86::ApicMode::XApic => X2ApicConfig::Unsupported,
425 vm_topology::processor::x86::ApicMode::X2ApicSupported => {
426 X2ApicConfig::Supported
427 }
428 vm_topology::processor::x86::ApicMode::X2ApicEnabled => X2ApicConfig::Enabled,
429 },
430 })),
431 }
432 }
433}
434
435#[cfg(guest_arch = "x86_64")]
436impl BuildTopology<X86Topology> for ProcessorTopologyConfig {
437 fn to_topology(
438 &self,
439 _platform_info: &virt::PlatformInfo,
440 ) -> anyhow::Result<ProcessorTopology<X86Topology>> {
441 use vm_topology::processor::x86::X2ApicState;
442
443 let arch = match &self.arch {
444 None => Default::default(),
445 Some(ArchTopologyConfig::X86(arch)) => arch.clone(),
446 _ => anyhow::bail!("invalid architecture config"),
447 };
448 let mut builder = TopologyBuilder::from_host_topology()?;
449 builder.apic_id_offset(arch.apic_id_offset);
450 if let Some(smt) = self.enable_smt {
451 builder.smt_enabled(smt);
452 }
453 if let Some(count) = self.vps_per_socket {
454 builder.vps_per_socket(count);
455 }
456 let x2apic = match arch.x2apic {
457 X2ApicConfig::Auto => {
458 X2ApicState::Supported
460 }
461 X2ApicConfig::Supported => X2ApicState::Supported,
462 X2ApicConfig::Unsupported => X2ApicState::Unsupported,
463 X2ApicConfig::Enabled => X2ApicState::Enabled,
464 };
465 builder.x2apic(x2apic);
466 Ok(builder.build(self.proc_count)?)
467 }
468}
469
470impl ExtractTopologyConfig for ProcessorTopology<Aarch64Topology> {
471 fn to_config(&self) -> ProcessorTopologyConfig {
472 ProcessorTopologyConfig {
473 proc_count: self.vp_count(),
474 vps_per_socket: Some(self.reserved_vps_per_socket()),
475 enable_smt: Some(self.smt_enabled()),
476 arch: Some(ArchTopologyConfig::Aarch64(Aarch64TopologyConfig {
477 gic_config: Some(match self.gic_version() {
478 GicVersion::V3 {
479 redistributors_base,
480 } => GicConfig::V3(Some(openvmm_defs::config::GicV3Config {
481 gic_distributor_base: self.gic_distributor_base(),
482 gic_redistributors_base: redistributors_base,
483 })),
484 GicVersion::V2 { cpu_interface_base } => {
485 GicConfig::V2(Some(openvmm_defs::config::GicV2Config {
486 gic_distributor_base: self.gic_distributor_base(),
487 cpu_interface_base,
488 }))
489 }
490 }),
491 pmu_gsiv: match self.pmu_gsiv() {
492 Some(gsiv) => PmuGsivConfig::Gsiv(gsiv),
493 None => PmuGsivConfig::Disabled,
494 },
495 })),
496 }
497 }
498}
499
500#[cfg(guest_arch = "aarch64")]
501impl BuildTopology<Aarch64Topology> for ProcessorTopologyConfig {
502 fn to_topology(
503 &self,
504 platform_info: &virt::PlatformInfo,
505 ) -> anyhow::Result<ProcessorTopology<Aarch64Topology>> {
506 use vm_topology::processor::aarch64::Aarch64PlatformConfig;
507 use vm_topology::processor::aarch64::GicV2mInfo;
508
509 let arch = match &self.arch {
510 None => Default::default(),
511 Some(ArchTopologyConfig::Aarch64(arch)) => arch.clone(),
512 _ => anyhow::bail!("invalid architecture config"),
513 };
514 let gic_v2m = Some(GicV2mInfo {
515 frame_base: openvmm_defs::config::DEFAULT_GIC_V2M_MSI_FRAME_BASE,
516 spi_base: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_BASE,
517 spi_count: openvmm_defs::config::DEFAULT_GIC_V2M_SPI_COUNT,
518 });
519 let pmu_gsiv = match arch.pmu_gsiv {
520 PmuGsivConfig::Disabled => None,
521 PmuGsivConfig::Gsiv(gsiv) => Some(gsiv),
522 PmuGsivConfig::Platform => platform_info.platform_gsiv,
523 };
524
525 if pmu_gsiv.is_none() {
529 tracing::warn!("PMU GSIV is not set");
530 }
531
532 let (gic_distributor_base, gic_version) = match &arch.gic_config {
533 Some(GicConfig::V3(config)) => {
534 let dist = config
535 .as_ref()
536 .map(|c| c.gic_distributor_base)
537 .unwrap_or(openvmm_defs::config::DEFAULT_GIC_DISTRIBUTOR_BASE);
538 let redist = config
539 .as_ref()
540 .map(|c| c.gic_redistributors_base)
541 .unwrap_or(openvmm_defs::config::DEFAULT_GIC_REDISTRIBUTORS_BASE);
542 (
543 dist,
544 GicVersion::V3 {
545 redistributors_base: redist,
546 },
547 )
548 }
549 Some(GicConfig::V2(config)) => {
550 let dist = config
551 .as_ref()
552 .map(|c| c.gic_distributor_base)
553 .unwrap_or(openvmm_defs::config::DEFAULT_GIC_DISTRIBUTOR_BASE);
554 let cpu_if = config
555 .as_ref()
556 .map(|c| c.cpu_interface_base)
557 .unwrap_or(openvmm_defs::config::DEFAULT_GIC_REDISTRIBUTORS_BASE);
558 (
559 dist,
560 GicVersion::V2 {
561 cpu_interface_base: cpu_if,
562 },
563 )
564 }
565 None => {
566 let dist = openvmm_defs::config::DEFAULT_GIC_DISTRIBUTOR_BASE;
569 let second = openvmm_defs::config::DEFAULT_GIC_REDISTRIBUTORS_BASE;
570 if platform_info.supports_gic_v3 {
571 (
572 dist,
573 GicVersion::V3 {
574 redistributors_base: second,
575 },
576 )
577 } else {
578 (
579 dist,
580 GicVersion::V2 {
581 cpu_interface_base: second,
582 },
583 )
584 }
585 }
586 };
587
588 let platform = Aarch64PlatformConfig {
589 gic_distributor_base,
590 gic_version,
591 gic_v2m,
592 pmu_gsiv,
593 virt_timer_ppi: openvmm_defs::config::DEFAULT_VIRT_TIMER_PPI,
594 gic_nr_irqs: openvmm_defs::config::DEFAULT_GIC_NR_IRQS,
595 };
596
597 let mut builder = TopologyBuilder::new_aarch64(platform);
598 if let Some(smt) = self.enable_smt {
599 builder.smt_enabled(smt);
600 }
601 if let Some(count) = self.vps_per_socket {
602 builder.vps_per_socket(count);
603 } else {
604 builder.vps_per_socket(self.proc_count);
605 }
606 Ok(builder.build(self.proc_count)?)
607 }
608}
609
610pub(crate) struct LoadedVm {
614 state_units: StateUnits,
615 inner: LoadedVmInner,
616 running: bool,
617}
618
619struct LoadedVmInner {
622 driver_source: VmTaskDriverSource,
623 resolver: ResourceResolver,
624 partition_unit: PartitionUnit,
625 partition: Arc<dyn HvlitePartition>,
626 chipset_devices: ChipsetDevices,
627 _vmtime: SpawnedUnit<VmTimeKeeper>,
628 _scsi_devices: Vec<SpawnedUnit<ChannelUnit<storvsp::StorageDevice>>>,
629 memory_manager: GuestMemoryManager,
630 gm: GuestMemory,
631 vtl0_hvsock_relay: Option<HvsockRelay>,
632 vtl2_hvsock_relay: Option<HvsockRelay>,
633 vmbus_server: Option<VmbusServerHandle>,
634 vtl2_vmbus_server: Option<VmbusServerHandle>,
635 #[cfg(windows)]
636 _vmbus_proxy: Option<vmbus_server::ProxyIntegration>,
637 #[cfg(windows)]
638 _kernel_vmnics: Vec<vmswitch::kernel::KernelVmNic>,
639 memory_cfg: MemoryConfig,
640 mem_layout: MemoryLayout,
641 processor_topology: ProcessorTopology,
642 hypervisor_cfg: HypervisorConfig,
643 vmbus_redirect: bool,
644 vmbus_devices: Vec<SpawnedUnit<ChannelUnit<dyn VmbusDevice>>>,
645
646 input_distributor: SpawnedUnit<InputDistributor>,
647 vtl2_framebuffer_gpa_base: Option<u64>,
648
649 chipset_cfg: BaseChipsetManifest,
650 chipset_capabilities: VmChipsetCapabilities,
651 #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
652 virtio_mmio_count: usize,
653 #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
654 virtio_mmio_irq: u32,
655 #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
657 pci_legacy_interrupts: Vec<((u8, Option<u8>), u32)>,
658 firmware_event_send: Option<mesh::Sender<get_resources::ged::FirmwareEvent>>,
659
660 load_mode: LoadMode,
661 igvm_file: Option<IgvmFile>,
662 next_igvm_file: Option<IgvmFile>,
663 _vmgs_task: Option<Task<()>>,
664 vmgs_client_inspect_handle: Option<vmgs_broker::VmgsClient>,
665
666 #[cfg(target_os = "linux")]
668 vfio_inspect: Option<vfio_assigned_device::manager::VfioManagerClient>,
669
670 halt_recv: mesh::Receiver<HaltReason>,
672 client_notify_send: mesh::Sender<HaltReason>,
673 automatic_guest_reset: bool,
675 pcie_host_bridges: Vec<PcieHostBridge>,
676 pcie_root_complexes: Vec<Arc<closeable_mutex::CloseableMutex<GenericPcieRootComplex>>>,
677 pcie_hotplug_devices: Vec<(
678 String,
679 vmotherboard::DynamicDeviceUnit,
680 Arc<closeable_mutex::CloseableMutex<chipset_device_resources::ErasedChipsetDevice>>,
681 )>,
682}
683
684fn convert_vtl2_config(
685 vtl2_cfg: Option<&Vtl2Config>,
686 load_mode: &LoadMode,
687 igvm_file: Option<&IgvmFile>,
688) -> anyhow::Result<Option<virt::Vtl2Config>> {
689 let vtl2_cfg = match vtl2_cfg {
690 Some(cfg) => cfg,
691 None => return Ok(None),
692 };
693
694 let late_map_vtl0_memory = match vtl2_cfg.late_map_vtl0_memory {
695 Some(policy) => {
696 use super::vm_loaders::igvm::vtl2_memory_info;
697 use virt::LateMapVtl0AllowedRanges;
698 let igvm_file = igvm_file.context("vtl2 configured but not loading from igvm")?;
699
700 let allowed_ranges = if let LoadMode::Igvm {
701 vtl2_base_address, ..
702 } = load_mode
703 {
704 let range = vtl2_memory_info(igvm_file).context("invalid igvm file")?;
705 match vtl2_base_address {
706 Vtl2BaseAddressType::File => {
707 LateMapVtl0AllowedRanges::Ranges(vec![range])
709 }
710 Vtl2BaseAddressType::Absolute(base) => {
711 if !crate::worker::vm_loaders::igvm::supports_relocations(igvm_file) {
713 anyhow::bail!(
714 "vtl2 base address is absolute but igvm file does not support relocations"
715 );
716 }
717
718 LateMapVtl0AllowedRanges::Ranges(vec![MemoryRange::new(
721 *base..(*base + range.len()),
722 )])
723 }
724 Vtl2BaseAddressType::MemoryLayout { .. } => {
725 LateMapVtl0AllowedRanges::MemoryLayout
726 }
727 Vtl2BaseAddressType::Vtl2Allocate { .. } => {
728 anyhow::bail!(
731 "late map vtl0 memory is not supported when VTL2 is doing self allocation of ram"
732 );
733 }
734 }
735 } else {
736 anyhow::bail!("vtl2 configured but not loading from igvm");
737 };
738
739 Some(virt::LateMapVtl0MemoryConfig {
740 allowed_ranges,
741 policy: policy.into(),
742 })
743 }
744 None => None,
745 };
746
747 let config = virt::Vtl2Config {
748 late_map_vtl0_memory,
749 };
750
751 Ok(Some(config))
752}
753
754impl InitializedVm {
755 async fn new(
757 driver_source: VmTaskDriverSource,
758 create_vm: crate::hypervisor_backend::CreateVmFn,
759 cfg: Manifest,
760 shared_memory: Option<SharedMemoryBacking>,
761 ) -> anyhow::Result<Self> {
762 create_vm(driver_source, cfg, shared_memory).await
763 }
764
765 pub(crate) async fn new_with_hypervisor<P, H>(
771 driver_source: VmTaskDriverSource,
772 hypervisor: &mut H,
773 platform_info: virt::PlatformInfo,
774 cfg: Manifest,
775 shared_memory: Option<SharedMemoryBacking>,
776 ) -> anyhow::Result<Self>
777 where
778 H: virt::Hypervisor<Partition = P>,
779 P: 'static + HvlitePartition,
780 {
781 tracing::info!(mem_size = cfg.memory.mem_size, "guest RAM config");
782
783 let vmtime_keeper = VmTimeKeeper::new(&driver_source.simple(), VmTime::from_100ns(0));
784 let vmtime_source = vmtime_keeper
785 .builder()
786 .build(&driver_source.simple())
787 .await
788 .unwrap();
789
790 let igvm_file = if let LoadMode::Igvm { file, .. } = &cfg.load_mode {
792 let igvm_file = super::vm_loaders::igvm::read_igvm_file(file)
793 .context("reading igvm file failed")?;
794 Some(igvm_file)
795 } else {
796 None
797 };
798
799 let hv_config = if cfg.hypervisor.with_hv {
800 cfg_if::cfg_if! {
801 if #[cfg(all(windows, feature = "virt_whp"))] {
802 let allow_device_assignment = !cfg.vpci_resources.is_empty();
803 } else {
804 let allow_device_assignment = false;
805 }
806 }
807
808 Some(virt::HvConfig {
809 allow_device_assignment,
810 vtl2: convert_vtl2_config(
811 cfg.hypervisor.with_vtl2.as_ref(),
812 &cfg.load_mode,
813 igvm_file.as_ref(),
814 )?,
815 })
816 } else {
817 None
818 };
819
820 let processor_topology = cfg.processor_topology.to_topology(&platform_info)?;
821
822 let proto = hypervisor
823 .new_partition(virt::ProtoPartitionConfig {
824 processor_topology: &processor_topology,
825 hv_config,
826 vmtime: &vmtime_source,
827 isolation: cfg
828 .hypervisor
829 .with_isolation
830 .map(|typ| typ.into())
831 .unwrap_or(virt::IsolationType::None),
832 })
833 .context("failed to create the prototype partition")?;
834
835 let physical_address_size = proto.max_physical_address_size();
836
837 let vtl2_range = if let LoadMode::Igvm {
839 vtl2_base_address, ..
840 } = &cfg.load_mode
841 {
842 match vtl2_base_address {
843 Vtl2BaseAddressType::File
844 | Vtl2BaseAddressType::Absolute(_)
845 | Vtl2BaseAddressType::Vtl2Allocate { .. } => None,
846 Vtl2BaseAddressType::MemoryLayout { size } => {
847 let vtl2_range = super::vm_loaders::igvm::vtl2_memory_range(
848 physical_address_size,
849 cfg.memory.mem_size,
850 &cfg.memory.mmio_gaps,
851 &cfg.memory.pci_ecam_gaps,
852 &cfg.memory.pci_mmio_gaps,
853 igvm_file
854 .as_ref()
855 .expect("igvm file should be already parsed"),
856 *size,
857 )
858 .context("unable to determine vtl2 memory range")?;
859 tracing::info!(?vtl2_range, "vtl2 memory range selected");
860
861 Some(vtl2_range)
862 }
863 }
864 } else {
865 None
866 };
867
868 let mem_layout = if let Some(ref sizes) = cfg.memory.numa_mem_sizes {
870 let total: u64 = sizes
877 .iter()
878 .copied()
879 .try_fold(0u64, |acc, s| acc.checked_add(s))
880 .context("numa memory sizes overflow")?;
881 anyhow::ensure!(
882 total == cfg.memory.mem_size,
883 "numa_mem_sizes total ({total:#x}) does not match mem_size ({:#x})",
884 cfg.memory.mem_size
885 );
886
887 MemoryLayout::new_with_numa(
888 sizes,
889 &cfg.memory.mmio_gaps,
890 &cfg.memory.pci_ecam_gaps,
891 &cfg.memory.pci_mmio_gaps,
892 vtl2_range,
893 )
894 } else {
895 MemoryLayout::new(
896 cfg.memory.mem_size,
897 &cfg.memory.mmio_gaps,
898 &cfg.memory.pci_ecam_gaps,
899 &cfg.memory.pci_mmio_gaps,
900 vtl2_range,
901 )
902 }
903 .context("invalid memory configuration")?;
904
905 if mem_layout.end_of_layout() > 1 << physical_address_size {
906 anyhow::bail!(
907 "memory layout ends at {:#x}, which exceeds the address with of {} bits",
908 mem_layout.end_of_layout(),
909 physical_address_size
910 );
911 }
912
913 let vtl0_alias_map = cfg.hypervisor.with_vtl2.as_ref().and_then(|cfg| {
918 cfg.vtl0_alias_map
919 .then_some(1 << (physical_address_size - 1))
920 });
921
922 if let Some(size) = cfg.memory.hugepage_size
923 && !cfg.memory.hugepages
924 {
925 anyhow::bail!("hugepage_size={size} requires hugepages=on");
926 }
927
928 let mut memory_builder = GuestMemoryBuilder::new();
929 memory_builder = memory_builder
930 .existing_backing(shared_memory)
931 .vtl0_alias_map(vtl0_alias_map)
932 .prefetch_ram(cfg.memory.prefetch_memory)
933 .private_memory(cfg.memory.private_memory)
934 .transparent_hugepages(cfg.memory.transparent_hugepages)
935 .x86_legacy_support(
936 matches!(cfg.load_mode, LoadMode::Pcat { .. }) || cfg.chipset.with_hyperv_vga,
937 );
938 if cfg.memory.hugepages {
939 memory_builder = memory_builder.hugepages(cfg.memory.hugepage_size);
940 }
941
942 #[cfg(all(windows, feature = "virt_whp"))]
943 if !cfg.vpci_resources.is_empty() {
944 memory_builder = memory_builder.pin_mappings(true);
945 }
946
947 cfg_if! {
948 if #[cfg(windows)] {
949 let vtl2_memory_process = if cfg.hypervisor.with_vtl2.is_some() {
950 let process = pal::windows::process::empty_process()
952 .context("could not launch a memory process for VTL2")?;
953 Some(Box::new(process) as _)
954 } else {
955 None
956 };
957 } else {
958 let vtl2_memory_process = None;
959 }
960 }
961
962 let mut memory_manager = memory_builder
963 .build(&mem_layout)
964 .await
965 .context("failed to build guest memory")?;
966
967 let gm = memory_manager
968 .client()
969 .guest_memory()
970 .await
971 .context("failed to get guest memory")?;
972 let mut cpuid = Vec::new();
973
974 if cfg.hypervisor.with_hv {
976 let confidential_vmbus = false;
977 let extended_ioapic_rte = !matches!(cfg.load_mode, LoadMode::Pcat { .. });
979 cpuid.extend(vmm_core::cpuid::hyperv_cpuid_leaves(
980 extended_ioapic_rte,
981 confidential_vmbus,
982 ));
983 }
984
985 let (partition, vps) = proto
986 .build(virt::PartitionConfig {
987 mem_layout: &mem_layout,
988 guest_memory: &gm,
989 cpuid: &cpuid,
990 vtl0_alias_map,
991 })
992 .context("failed to create the partition")?;
993
994 let vps = vps.into_iter().map(|vp| Box::new(vp) as _).collect();
995
996 let partition = Arc::new(partition);
997
998 memory_manager
999 .attach_partition(Vtl::Vtl0, &partition.memory_mapper(Vtl::Vtl0), None)
1000 .await
1001 .context("failed to attach memory to the partition")?;
1002
1003 if cfg.hypervisor.with_vtl2.is_some() {
1004 memory_manager
1005 .attach_partition(
1006 Vtl::Vtl2,
1007 &partition.memory_mapper(Vtl::Vtl2),
1008 vtl2_memory_process,
1009 )
1010 .await
1011 .context("failed to attach memory to VTL2")?;
1012 }
1013
1014 Ok(Self {
1015 partition,
1016 vps,
1017 vmtime_keeper,
1018 vmtime_source,
1019 memory_manager,
1020 gm,
1021 cfg,
1022 mem_layout,
1023 processor_topology,
1024 igvm_file,
1025 driver_source,
1026 })
1027 }
1028
1029 async fn load(
1034 self,
1035 saved_state: Option<SavedState>,
1036 client_notify_send: mesh::Sender<HaltReason>,
1037 ) -> Result<LoadedVm, anyhow::Error> {
1038 use vmotherboard::options::dev;
1039
1040 let Self {
1041 partition,
1042 vps,
1043 vmtime_keeper,
1044 vmtime_source,
1045 memory_manager,
1046 gm,
1047 cfg,
1048 mem_layout,
1049 processor_topology,
1050 igvm_file,
1051 driver_source,
1052 } = self;
1053
1054 let mut resolver = ResourceResolver::new();
1055
1056 resolver.add_async_resolver(
1057 chipset_device_worker::resolver::RemoteChipsetDeviceResolver(
1058 OpenVmmRemoteDynamicResolvers {},
1059 ),
1060 );
1061
1062 if cfg.hypervisor.with_hv {
1064 if let Some(ref_time) = partition.reference_time_source() {
1065 resolver.add_resolver(ref_time);
1066 }
1067 }
1068
1069 if cfg
1070 .vmgs
1071 .as_ref()
1072 .is_some_and(|x| !matches!(x.encryption_policy(), GuestStateEncryptionPolicy::None(_)))
1073 {
1074 unimplemented!("guest state encryption not supported on openvmm");
1075 }
1076
1077 let vmgs = match cfg.vmgs {
1078 Some(VmgsResource::Disk(disk)) => Some(
1079 vmgs::Vmgs::try_open(
1080 open_simple_disk(&resolver, disk.disk, false, &driver_source).await?,
1081 None,
1082 true,
1083 false,
1084 )
1085 .await
1086 .context("failed to open vmgs file")?,
1087 ),
1088 Some(VmgsResource::ReprovisionOnFailure(disk)) => Some(
1089 vmgs::Vmgs::try_open(
1090 open_simple_disk(&resolver, disk.disk, false, &driver_source).await?,
1091 None,
1092 true,
1093 true,
1094 )
1095 .await
1096 .context("failed to open vmgs file")?,
1097 ),
1098 Some(VmgsResource::Reprovision(disk)) => Some(
1099 vmgs::Vmgs::request_format(
1100 open_simple_disk(&resolver, disk.disk, false, &driver_source).await?,
1101 None,
1102 )
1103 .await
1104 .context("failed to format vmgs file")?,
1105 ),
1106 Some(VmgsResource::Ephemeral) => None,
1107 None => None,
1109 };
1110
1111 let (vmgs_client, vmgs_task) = if let Some(vmgs) = vmgs {
1112 let (vmgs_client, vmgs_task) =
1113 vmgs_broker::spawn_vmgs_broker(driver_source.builder().build("vmgs_broker"), vmgs);
1114 resolver.add_resolver(vmgs_client.clone());
1115 (Some(vmgs_client), Some(vmgs_task))
1116 } else {
1117 (None, None)
1118 };
1119
1120 let vmgs_client_inspect_handle = vmgs_client.clone();
1128 let vmgs_client: Option<&dyn HvLiteVmgsNonVolatileStore> =
1129 vmgs_client.as_ref().map(|x| x as _);
1130
1131 let (halt_vps, halt_request_recv) = Halt::new();
1132 let halt_vps = Arc::new(halt_vps);
1133
1134 resolver.add_resolver(vmm_core::platform_resolvers::HaltResolver(halt_vps.clone()));
1135
1136 let generation_id_recv = cfg.generation_id_recv.unwrap_or_else(|| mesh::channel().1);
1137
1138 let logger = Box::new(emuplat::firmware::MeshLogger::new(
1139 cfg.firmware_event_send.clone(),
1140 ));
1141
1142 let mapper = memory_manager.device_memory_mapper();
1143
1144 #[cfg_attr(not(guest_arch = "x86_64"), expect(unused_mut))]
1145 let mut deps_hyperv_firmware_pcat = None;
1146 let mut deps_hyperv_firmware_uefi = None;
1147 match &cfg.load_mode {
1148 LoadMode::Uefi { .. } => {
1149 let (watchdog_send, watchdog_recv) = mesh::channel();
1150 deps_hyperv_firmware_uefi = Some(dev::HyperVFirmwareUefi {
1151 config: firmware_uefi::UefiConfig {
1152 custom_uefi_vars: cfg.custom_uefi_vars,
1153 secure_boot: cfg.secure_boot_enabled,
1154 initial_generation_id: {
1155 let mut generation_id = [0; 16];
1156 getrandom::fill(&mut generation_id).expect("rng failure");
1157 generation_id
1158 },
1159 use_mmio: cfg!(not(guest_arch = "x86_64")),
1160 command_set: if cfg!(guest_arch = "x86_64") {
1161 UefiCommandSet::X64
1162 } else {
1163 UefiCommandSet::Aarch64
1164 },
1165 diagnostics_log_level: cfg.efi_diagnostics_log_level,
1166 },
1167 logger,
1168 nvram_storage: {
1169 use hcl_compat_uefi_nvram_storage::HclCompatNvram;
1170 use uefi_nvram_storage::in_memory::InMemoryNvram;
1171 use vmm_core::emuplat::hcl_compat_uefi_nvram_storage::VmgsStorageBackendAdapter;
1172
1173 match vmgs_client {
1174 Some(vmgs) => Box::new(HclCompatNvram::new(
1175 VmgsStorageBackendAdapter(
1176 vmgs.as_non_volatile_store(vmgs::FileId::BIOS_NVRAM, true)
1177 .context("failed to instantiate UEFI NVRAM store")?,
1178 ),
1179 None,
1180 )),
1181 None => Box::new(InMemoryNvram::new()),
1182 }
1183 },
1184 generation_id_recv,
1185 watchdog_platform: {
1186 use vmcore::non_volatile_store::EphemeralNonVolatileStore;
1187
1188 let store = EphemeralNonVolatileStore::new_boxed();
1190
1191 let mut base_watchdog_platform = BaseWatchdogPlatform::new(store).await?;
1193
1194 #[cfg(guest_arch = "x86_64")]
1196 let watchdog_callback = WatchdogTimeoutNmi {
1197 partition: partition.clone(),
1198 watchdog_send: Some(watchdog_send),
1199 };
1200
1201 #[cfg(guest_arch = "aarch64")]
1203 let watchdog_callback = WatchdogTimeoutReset {
1204 halt_vps: halt_vps.clone(),
1205 watchdog_send: Some(watchdog_send),
1206 };
1207
1208 base_watchdog_platform.add_callback(Box::new(watchdog_callback));
1210
1211 Box::new(base_watchdog_platform)
1212 },
1213 watchdog_recv,
1214 vsm_config: None,
1215 time_source: Box::new(local_clock::SystemTimeClock::new(
1217 LocalClockDelta::from_millis(cfg.rtc_delta_milliseconds),
1218 )),
1219 })
1220 }
1221 #[cfg(guest_arch = "x86_64")]
1222 LoadMode::Pcat {
1223 firmware,
1224 boot_order,
1225 } => {
1226 tracing::debug!(?firmware, "Loading BIOS firmware.");
1227 let rom_builder = RomBuilder::new("bios".into(), Box::new(mapper.clone()));
1228 let rom = rom_builder.build_from_file_location(firmware)?;
1229 let halt_vps = halt_vps.clone();
1231 deps_hyperv_firmware_pcat = Some(dev::HyperVFirmwarePcat {
1232 logger,
1233 generation_id_recv,
1234 rom: Some(Box::new(rom)),
1235 replay_mtrrs: Box::new(move || halt_vps.replay_mtrrs()),
1236 config: {
1237 let acpi_tables_builder = AcpiTablesBuilder {
1238 processor_topology: &processor_topology,
1239 mem_layout: &mem_layout,
1240 cache_topology: None,
1241 pcie_host_bridges: &Vec::new(),
1242 arch: vmm_core::acpi_builder::AcpiArchConfig::X86 {
1243 with_ioapic: cfg.chipset.with_generic_ioapic,
1244 with_pic: cfg.chipset_capabilities.with_pic,
1245 with_pit: cfg.chipset_capabilities.with_pit,
1246 with_psp: cfg.chipset.with_generic_psp,
1247 pm_base: PM_BASE,
1248 acpi_irq: SYSTEM_IRQ_ACPI,
1249 },
1250 };
1251 let srat = acpi_tables_builder.build_srat();
1252 firmware_pcat::config::PcatBiosConfig {
1253 processor_topology: processor_topology.clone(),
1254 mem_layout: mem_layout.clone(),
1255 srat,
1256
1257 hibernation_enabled: false,
1258 initial_generation_id: {
1259 let mut generation_id = [0; 16];
1260 getrandom::fill(&mut generation_id).expect("rng failure");
1261 generation_id
1262 },
1263 boot_order: {
1264 use firmware_pcat::config::BootDevice;
1265 use firmware_pcat::config::BootDeviceStatus;
1266 use openvmm_defs::config::PcatBootDevice;
1267 boot_order.map(|dev| BootDeviceStatus {
1268 kind: match dev {
1269 PcatBootDevice::Floppy => BootDevice::Floppy,
1270 PcatBootDevice::HardDrive => BootDevice::HardDrive,
1271 PcatBootDevice::Optical => BootDevice::Optical,
1272 PcatBootDevice::Network => BootDevice::Network,
1273 },
1274 attached: true,
1276 })
1277 },
1278 num_lock_enabled: false,
1279 smbios: firmware_pcat::config::SmbiosConstants {
1281 bios_guid: guid::Guid {
1282 data1: 0xC4066C45,
1283 data2: 0x503D,
1284 data3: 0x40E8,
1285 data4: [0xB1, 0x5C, 0x31, 0x26, 0x4E, 0x5F, 0xE1, 0xD9],
1286 },
1287 system_serial_number: "9583-9572-9874-4843-7295-1653-92".into(),
1288 base_board_serial_number: "9583-9572-9874-4843-7295-1653-92".into(),
1289 chassis_serial_number: "9583-9572-9874-4843-7295-1653-92".into(),
1290 chassis_asset_tag: "9583-9572-9874-4843-7295-1653-92".into(),
1291 bios_lock_string: "00000000000000000000000000000000".into(),
1292 processor_manufacturer: b"\0".to_vec(),
1293 processor_version: b"\0".to_vec(),
1294 cpu_info_bundle: None,
1295 },
1296 }
1297 },
1298 })
1299 }
1300 _ => {}
1301 };
1302
1303 let vtl2_framebuffer_gpa_base = if cfg.vtl2_gfx {
1304 let len = cfg
1307 .framebuffer
1308 .as_ref()
1309 .context("no framebuffer configured")?
1310 .len();
1311 let mut gpa = mem_layout.end_of_ram();
1312 for mmio in mem_layout.mmio() {
1313 if gpa < mmio.end() && mmio.start() < gpa + len as u64 {
1314 gpa = mmio.end();
1315 }
1316 }
1317 tracing::debug!("Vtl2 framebuffer gpa base: {:#x}", gpa);
1318 Some(gpa)
1319 } else {
1320 None
1321 };
1322
1323 let state_units = StateUnits::new();
1324
1325 let vmtime = state_units
1326 .add("vmtime")
1327 .spawn(driver_source.simple(), {
1328 |recv| {
1329 let mut vmtime = vmtime_keeper;
1330 async move {
1331 vmm_core::vmtime_unit::run_vmtime(&mut vmtime, recv).await;
1332 vmtime
1333 }
1334 }
1335 })
1336 .unwrap();
1337
1338 let mut input_distributor = InputDistributor::new(cfg.input);
1339 resolver.add_async_resolver::<KeyboardInputHandleKind, _, MultiplexedInputHandle, _>(
1340 input_distributor.client().clone(),
1341 );
1342 resolver.add_async_resolver::<MouseInputHandleKind, _, MultiplexedInputHandle, _>(
1343 input_distributor.client().clone(),
1344 );
1345
1346 let input_distributor = state_units
1347 .add("input")
1348 .spawn(driver_source.simple(), async |mut recv| {
1349 input_distributor.run(&mut recv).await;
1350 input_distributor
1351 })
1352 .unwrap();
1353
1354 let mut pci_legacy_interrupts = Vec::new();
1355
1356 let mut ide_drives = [[None, None], [None, None]];
1357 let mut storvsp_ide_disks = Vec::new();
1358 if cfg.chipset.with_hyperv_ide {
1359 pci_legacy_interrupts.push(((7, None), 14));
1360 pci_legacy_interrupts.push(((7, None), 15));
1361
1362 for disk_cfg in cfg.ide_disks {
1363 let path = disk_cfg.path;
1364 let media = match disk_cfg.guest_media {
1365 GuestMedia::Dvd(disk_type) => {
1366 let dvd = resolver
1367 .resolve(
1368 disk_type,
1369 ResolveScsiDeviceHandleParams {
1370 driver_source: &driver_source,
1371 },
1372 )
1373 .await
1374 .context("failed to open IDE DVD")?;
1375
1376 let scsi_disk = Arc::new(AtapiScsiDisk::new(dvd.0));
1377 ide::DriveMedia::optical_disk(scsi_disk.clone())
1378 }
1379 GuestMedia::Disk {
1380 disk_type,
1381 read_only,
1382 disk_parameters,
1383 } => {
1384 let disk =
1385 open_simple_disk(&resolver, disk_type, read_only, &driver_source)
1386 .await
1387 .context("failed to open IDE disk")?;
1388
1389 let scsi_disk = ScsiControllerDisk::new(Arc::new(SimpleScsiDisk::new(
1391 disk.clone(),
1392 disk_parameters.unwrap_or_default(),
1393 )));
1394 storvsp_ide_disks.push((path, scsi_disk));
1395 ide::DriveMedia::hard_disk(disk.clone())
1396 }
1397 };
1398
1399 let old_media = ide_drives
1400 .get_mut(path.channel as usize)
1401 .context("invalid ide channel")?
1402 .get_mut(path.drive as usize)
1403 .context("invalid ide device")?
1404 .replace(media);
1405
1406 if old_media.is_some() {
1407 anyhow::bail!(
1408 "ide drive {}:{} is already in use",
1409 path.channel,
1410 path.drive
1411 );
1412 }
1413 }
1414 }
1415
1416 let deps_hyperv_guest_watchdog = if cfg.chipset.with_hyperv_guest_watchdog {
1417 Some(dev::HyperVGuestWatchdogDeps {
1418 port_base: WDAT_PORT,
1419 watchdog_platform: {
1420 use vmcore::non_volatile_store::EphemeralNonVolatileStore;
1421
1422 let store = match vmgs_client {
1423 Some(vmgs) => vmgs
1424 .as_non_volatile_store(vmgs::FileId::GUEST_WATCHDOG, false)
1425 .context("failed to instantiate guest watchdog store")?,
1426 None => EphemeralNonVolatileStore::new_boxed(),
1427 };
1428
1429 let mut base_watchdog_platform = BaseWatchdogPlatform::new(store).await?;
1431
1432 let watchdog_callback = WatchdogTimeoutReset {
1434 halt_vps: halt_vps.clone(),
1435 watchdog_send: None, };
1438
1439 base_watchdog_platform.add_callback(Box::new(watchdog_callback));
1441
1442 Box::new(base_watchdog_platform)
1443 },
1444 })
1445 } else {
1446 None
1447 };
1448
1449 let initial_rtc_cmos = if matches!(cfg.load_mode, LoadMode::Pcat { .. }) {
1450 Some(firmware_pcat::default_cmos_values(&mem_layout))
1451 } else {
1452 None
1453 };
1454
1455 let deps_generic_cmos_rtc = (cfg.chipset.with_generic_cmos_rtc).then(|| {
1456 let time_source = Box::new(local_clock::SystemTimeClock::new(
1459 LocalClockDelta::from_millis(cfg.rtc_delta_milliseconds),
1460 ));
1461 dev::GenericCmosRtcDeps {
1462 irq: 8,
1463 time_source,
1464 century_reg_idx: 0x32, initial_cmos: initial_rtc_cmos,
1466 }
1467 });
1468
1469 #[cfg(guest_arch = "x86_64")]
1470 let deps_generic_ioapic =
1471 (cfg.chipset.with_generic_ioapic).then(|| dev::GenericIoApicDeps {
1472 num_entries: virt::irqcon::IRQ_LINES as u8,
1473 routing: Box::new(vmm_core::emuplat::ioapic::IoApicRouting(
1474 partition.clone().ioapic_routing(),
1475 )),
1476 });
1477
1478 #[cfg(guest_arch = "aarch64")]
1479 let deps_generic_ioapic = if cfg.chipset.with_generic_ioapic {
1480 anyhow::bail!("ioapic not supported on this architecture");
1481 } else {
1482 None
1483 };
1484
1485 let deps_generic_isa_dma =
1486 (cfg.chipset.with_generic_isa_dma).then_some(dev::GenericIsaDmaDeps {});
1487
1488 let mut primary_disk_drive = floppy::DriveRibbon::None;
1489 let mut secondary_disk_drive = floppy::DriveRibbon::None;
1490 if cfg.chipset.with_winbond_super_io_and_floppy_full {
1491 let mut pri_drives = Vec::new();
1492 let mut sec_drives = Vec::new();
1493 for (index, disk_cfg) in cfg.floppy_disks.into_iter().enumerate() {
1494 let FloppyDiskConfig {
1495 disk_type,
1496 read_only,
1497 } = disk_cfg;
1498
1499 let disk = open_simple_disk(&resolver, disk_type, read_only, &driver_source)
1500 .await
1501 .context("failed to open floppy disk")?;
1502 tracing::trace!("floppy opened based on config into DriveRibbon");
1503
1504 if index == 0 {
1505 pri_drives.push(disk);
1506 } else if index == 1 {
1507 sec_drives.push(disk)
1508 } else {
1509 tracing::error!("more than 2 floppy controllers are not supported");
1510 break;
1511 }
1512 }
1513
1514 primary_disk_drive = floppy::DriveRibbon::from_vec(pri_drives)?;
1515 secondary_disk_drive = floppy::DriveRibbon::from_vec(sec_drives)?;
1516 }
1517
1518 let (deps_generic_isa_floppy, deps_winbond_super_io_and_floppy_full) = match (
1521 cfg.chipset.with_generic_isa_floppy,
1522 cfg.chipset.with_winbond_super_io_and_floppy_full,
1523 ) {
1524 (true, true) => anyhow::bail!("cannot have both generic and winbond floppy"),
1525 (true, false) => {
1526 if !matches!(secondary_disk_drive, floppy::DriveRibbon::None) {
1527 anyhow::bail!("more than 1 generic floppy controller is not supported")
1528 }
1529
1530 (
1531 Some(dev::GenericIsaFloppyDeps {
1534 irq: 6,
1535 dma_channel: 2,
1536 pio_base: 0x3f0,
1537 drives: primary_disk_drive,
1538 }),
1539 None,
1540 )
1541 }
1542 (false, true) => (
1543 None,
1544 Some(dev::WinbondSuperIoAndFloppyFullDeps {
1545 primary_disk_drive,
1546 secondary_disk_drive,
1547 }),
1548 ),
1549 (false, false) => (None, None),
1550 };
1551
1552 let pci_bus_id_generic = vmotherboard::BusId::new("generic");
1553 let pci_bus_id_piix4 = vmotherboard::BusId::new(LEGACY_CHIPSET_PCI_BUS_NAME);
1554
1555 let deps_generic_pci_bus =
1556 (cfg.chipset.with_generic_pci_bus).then_some(dev::GenericPciBusDeps {
1557 bus_id: pci_bus_id_generic.clone(),
1558 pio_addr: pci_bus::standard_x86_io_ports::ADDR_START,
1559 pio_data: pci_bus::standard_x86_io_ports::DATA_START,
1560 });
1561
1562 let deps_generic_psp = (cfg.chipset.with_generic_psp).then_some(dev::GenericPspDeps {});
1563
1564 let deps_hyperv_framebuffer =
1565 (cfg.chipset.with_hyperv_framebuffer).then(|| dev::HyperVFramebufferDeps {
1566 fb_mapper: Box::new(mapper.clone()),
1567 fb: cfg.framebuffer.unwrap(),
1568 vtl2_framebuffer_gpa_base,
1569 });
1570
1571 let deps_hyperv_power_management =
1572 (cfg.chipset.with_hyperv_power_management).then_some(dev::HyperVPowerManagementDeps {
1573 acpi_irq: SYSTEM_IRQ_ACPI,
1574 pio_base: PM_BASE,
1575 pm_timer_assist: None,
1576 });
1577
1578 let deps_hyperv_vga = if cfg.chipset.with_hyperv_vga {
1579 let vga_firmware = cfg.vga_firmware.as_ref().context("no VGA BIOS file")?;
1580 let rom_builder = RomBuilder::new("vga".into(), Box::new(mapper.clone()));
1581 let rom = rom_builder.build_from_file_location(vga_firmware)?;
1582
1583 Some(dev::HyperVVgaDeps {
1584 attached_to: pci_bus_id_piix4.clone(),
1585 rom: Some(Box::new(rom)),
1586 })
1587 } else {
1588 None
1589 };
1590
1591 let deps_i440bx_host_pci_bridge =
1592 (cfg.chipset.with_i440bx_host_pci_bridge).then(|| dev::I440BxHostPciBridgeDeps {
1593 attached_to: pci_bus_id_piix4.clone(),
1594 adjust_gpa_range: Box::new(
1595 emuplat::i440bx_host_pci_bridge::ManageRamGpaRange::new(
1596 memory_manager.ram_visibility_control(),
1597 ),
1598 ),
1599 });
1600
1601 let deps_piix4_pci_bus = (cfg.chipset.with_piix4_pci_bus).then(|| dev::Piix4PciBusDeps {
1602 bus_id: pci_bus_id_piix4.clone(),
1603 });
1604
1605 let deps_piix4_cmos_rtc = (cfg.chipset.with_piix4_cmos_rtc).then(|| {
1606 let time_source = Box::new(local_clock::SystemTimeClock::new(
1609 LocalClockDelta::from_millis(cfg.rtc_delta_milliseconds),
1610 ));
1611 dev::Piix4CmosRtcDeps {
1612 time_source,
1613 initial_cmos: initial_rtc_cmos,
1614 enlightened_interrupts: true, }
1616 });
1617
1618 let [primary_channel_drives, secondary_channel_drives] = ide_drives;
1619 let deps_hyperv_ide = (cfg.chipset.with_hyperv_ide).then_some(dev::HyperVIdeDeps {
1620 attached_to: pci_bus_id_piix4.clone(),
1621 primary_channel_drives,
1622 secondary_channel_drives,
1623 });
1624
1625 let deps_piix4_power_management =
1626 (cfg.chipset.with_piix4_power_management).then_some(dev::Piix4PowerManagementDeps {
1627 attached_to: pci_bus_id_piix4.clone(),
1628 pm_timer_assist: None,
1629 });
1630
1631 let base_chipset_devices = {
1632 BaseChipsetDevices {
1633 deps_generic_cmos_rtc,
1634 deps_generic_ioapic,
1635 deps_generic_isa_dma,
1636 deps_generic_isa_floppy,
1637 deps_generic_pci_bus,
1638 deps_generic_psp,
1639 deps_hyperv_firmware_pcat,
1640 deps_hyperv_firmware_uefi,
1641 deps_hyperv_framebuffer,
1642 deps_hyperv_guest_watchdog,
1643 deps_hyperv_ide,
1644 deps_hyperv_power_management,
1645 deps_hyperv_vga,
1646 deps_i440bx_host_pci_bridge,
1647 deps_piix4_cmos_rtc,
1648 deps_piix4_pci_bus,
1649 deps_piix4_power_management,
1650 deps_underhill_vga_proxy: None,
1651 deps_winbond_super_io_and_floppy_stub: None,
1652 deps_winbond_super_io_and_floppy_full,
1653 }
1654 };
1655
1656 let BaseChipsetBuilderOutput {
1657 chipset_builder,
1658 device_interfaces: base_chipset_device_interfaces,
1659 } = BaseChipsetBuilder::new(
1660 BaseChipsetFoundation {
1661 is_restoring: false,
1662 untrusted_dma_memory: gm.clone(),
1663 trusted_vtl0_dma_memory: gm.clone(),
1669 power_event_handler: halt_vps.clone(),
1670 debug_event_handler: halt_vps.clone(),
1671 vmtime: &vmtime_source,
1672 vmtime_unit: vmtime.handle(),
1673 doorbell_registration: partition.clone().into_doorbell_registration(Vtl::Vtl0),
1674 },
1675 base_chipset_devices,
1676 )
1677 .with_expected_manifest(cfg.chipset.clone())
1678 .with_device_handles(cfg.chipset_devices)
1679 .with_pci_device_handles(cfg.pci_chipset_devices)
1680 .with_trace_unknown_pio(true) .build(&driver_source, &state_units, &resolver)
1682 .await?;
1683
1684 if cfg.chipset.with_generic_pci_bus {
1685 chipset_builder
1690 .arc_mutex_device("fake-bus-root")
1691 .on_pci_bus(pci_bus_id_generic.clone())
1692 .add(|services| {
1693 missing_dev::MissingDev::from_manifest(
1694 MissingDevManifest::new().claim_pci((0, 0, 0), 0x8086, 0x7111),
1695 &mut services.register_mmio(),
1696 &mut services.register_pio(),
1697 )
1698 })?;
1699 }
1700
1701 #[cfg(guest_arch = "aarch64")]
1703 chipset_builder.add_external_line_target(
1704 IRQ_LINE_SET,
1705 0..=vmm_core::emuplat::gic::SPI_RANGE.end() - vmm_core::emuplat::gic::SPI_RANGE.start(),
1706 *vmm_core::emuplat::gic::SPI_RANGE.start(),
1707 "gic",
1708 Arc::new(vmm_core::emuplat::gic::GicInterruptTarget::new(
1709 partition.clone().control_gic(Vtl::Vtl0),
1710 )),
1711 );
1712
1713 #[cfg(guest_arch = "x86_64")]
1715 chipset_builder.add_external_line_target(
1716 chipset_device_resources::BSP_LINT_LINE_SET,
1717 0..=1,
1718 0,
1719 "bsp",
1720 partition.clone().into_lint_target(Vtl::Vtl0),
1721 );
1722
1723 if let Some(framebuffer) = base_chipset_device_interfaces.framebuffer_local_control {
1724 resolver.add_resolver(framebuffer);
1725 }
1726
1727 let pci_inta_line = {
1728 const PCI_LEGACY_INTA_IRQ: u32 = 11;
1729 const PCI_INTA_IRQ: u32 = 16;
1730 if cfg.chipset.with_i440bx_host_pci_bridge {
1731 Some(PCI_LEGACY_INTA_IRQ)
1733 } else if cfg.chipset.with_generic_pci_bus {
1734 if cfg.chipset_capabilities.with_pic {
1738 Some(PCI_LEGACY_INTA_IRQ)
1739 } else {
1740 Some(PCI_INTA_IRQ)
1741 }
1742 } else {
1743 None
1744 }
1745 };
1746
1747 let mut scsi_devices = Vec::new();
1748 let mut vtl0_hvsock_relay = None;
1749 #[cfg(windows)]
1750 let mut vmbus_proxy = None;
1751 #[cfg(windows)]
1752 let mut kernel_vmnics = Vec::new();
1753 let mut vmbus_server = None;
1754 let mut vtl2_vmbus_server = None;
1755 let mut vtl2_hvsock_relay = None;
1756 let mut vmbus_redirect = false;
1757
1758 let (pcie_host_bridges, pcie_root_complexes) = {
1761 let mut pcie_host_bridges = Vec::new();
1762 let mut pcie_root_complexes = Vec::new();
1763
1764 for rc in cfg.pcie_root_complexes {
1765 let device_name = format!("pcie-root:{}", rc.name);
1766 let msi_conn = pci_core::msi::MsiConnection::new();
1767 let root_complex =
1768 chipset_builder
1769 .arc_mutex_device(device_name)
1770 .add(|services| {
1771 let root_port_definitions = rc
1772 .ports
1773 .into_iter()
1774 .map(|rp_cfg| GenericPcieRootPortDefinition {
1775 name: rp_cfg.name.into(),
1776 hotplug: rp_cfg.hotplug,
1777 })
1778 .collect();
1779
1780 GenericPcieRootComplex::new(
1781 &mut services.register_mmio(),
1782 rc.start_bus,
1783 rc.end_bus,
1784 rc.ecam_range,
1785 root_port_definitions,
1786 msi_conn.target(),
1787 )
1788 })?;
1789
1790 if let Some(signal_msi) = partition.as_signal_msi(Vtl::Vtl0) {
1791 msi_conn.connect(signal_msi);
1792 }
1793
1794 pcie_host_bridges.push(PcieHostBridge {
1795 index: rc.index,
1796 segment: rc.segment,
1797 start_bus: rc.start_bus,
1798 end_bus: rc.end_bus,
1799 ecam_range: rc.ecam_range,
1800 low_mmio: rc.low_mmio,
1801 high_mmio: rc.high_mmio,
1802 });
1803
1804 pcie_root_complexes.push(root_complex.clone());
1805
1806 let bus_id = vmotherboard::BusId::new(&rc.name);
1807 chipset_builder.register_weak_mutex_pcie_enumerator(bus_id, Box::new(root_complex));
1808 }
1809
1810 (pcie_host_bridges, pcie_root_complexes)
1811 };
1812
1813 for switch in cfg.pcie_switches {
1814 let device_name = format!("pcie-switch:{}", switch.name);
1815 let switch_device = chipset_builder
1816 .arc_mutex_device(device_name)
1817 .on_pcie_port(vmotherboard::BusId::new(&switch.parent_port))
1818 .add(|_services| {
1819 let definition = pcie::switch::GenericPcieSwitchDefinition {
1820 name: switch.name.clone().into(),
1821 downstream_port_count: switch.num_downstream_ports,
1822 hotplug: switch.hotplug,
1823 };
1824 GenericPcieSwitch::new(definition)
1825 })?;
1826
1827 let bus_id = vmotherboard::BusId::new(&switch.name);
1828 chipset_builder.register_weak_mutex_pcie_enumerator(bus_id, Box::new(switch_device));
1829 }
1830
1831 #[cfg(target_os = "linux")]
1834 let vfio_inspect = {
1835 let vfio_resolver = vfio_assigned_device::resolver::VfioDeviceResolver::new(
1836 driver_source.builder().build("vfio-container-mgr"),
1837 memory_manager.dma_mapper_client(),
1838 );
1839 let handle = vfio_resolver.inspect_handle();
1840 resolver.add_async_resolver::<
1841 vm_resource::kind::PciDeviceHandleKind,
1842 _,
1843 vfio_assigned_device_resources::VfioDeviceHandle,
1844 _,
1845 >(vfio_resolver);
1846 Some(handle)
1847 };
1848
1849 try_join_all(cfg.pcie_devices.into_iter().map(|dev_cfg| {
1851 let chipset_builder = &chipset_builder;
1852 let driver_source = &driver_source;
1853 let resolver = &resolver;
1854 let gm = &gm;
1855 let partition = &partition;
1856 let mapper = &mapper;
1857 async move {
1858 vmm_core::device_builder::build_pcie_device(
1859 chipset_builder,
1860 dev_cfg.port_name.into(),
1861 driver_source,
1862 resolver,
1863 gm,
1864 dev_cfg.resource,
1865 partition.clone().into_doorbell_registration(Vtl::Vtl0),
1866 Some(mapper),
1867 partition.as_signal_msi(Vtl::Vtl0),
1868 partition.irqfd(),
1869 )
1870 .await
1871 }
1872 }))
1873 .await?;
1874
1875 if let Some(vmbus_cfg) = cfg.vmbus {
1876 if !cfg.hypervisor.with_hv {
1877 anyhow::bail!("vmbus required hypervisor enlightements");
1878 }
1879
1880 let synic = partition.synic();
1881
1882 vmbus_redirect = vmbus_cfg.vtl2_redirect;
1883 let hvsock_channel = HvsockRelayChannel::new();
1884
1885 let (vtl2_vmbus, vtl2_request_send) = if let Some(vtl2_vmbus_cfg) = cfg.vtl2_vmbus {
1886 let (server_request_send, server_request_recv) = mesh::channel();
1887 let vtl2_hvsock_channel = HvsockRelayChannel::new();
1888
1889 let vmbus_driver = driver_source.simple();
1890 let vtl2_vmbus =
1891 VmbusServer::builder(vmbus_driver.clone(), synic.clone(), gm.clone())
1892 .vtl(Vtl::Vtl2)
1893 .max_version(
1894 vtl2_vmbus_cfg
1895 .vmbus_max_version
1896 .map(vmbus_core::MaxVersionInfo::new),
1897 )
1898 .hvsock_notify(Some(vtl2_hvsock_channel.server_half))
1899 .external_requests(Some(server_request_recv))
1900 .enable_mnf(true)
1901 .build()
1902 .context("failed to create VTL2 vmbus server")?;
1903
1904 let vtl2_vmbus = VmbusServerHandle::new(
1905 &vmbus_driver,
1906 state_units.add("vtl2_vmbus"),
1907 vtl2_vmbus,
1908 )
1909 .context("failed to add vmbus state unit")?;
1910
1911 let relay = HvsockRelay::new(
1912 vmbus_driver,
1913 vtl2_vmbus.control().clone(),
1914 vtl2_hvsock_channel.relay_half,
1915 vtl2_vmbus_cfg.vsock_path.map(Into::into),
1916 vtl2_vmbus_cfg.vsock_listener,
1917 )
1918 .context("failed to create vtl2 hvsock relay")?;
1919
1920 vtl2_hvsock_relay = Some(relay);
1921
1922 (Some(vtl2_vmbus), Some(server_request_send))
1923 } else {
1924 (None, None)
1925 };
1926
1927 let vmbus_driver = driver_source.simple();
1928 let vmbus = VmbusServer::builder(vmbus_driver.clone(), synic.clone(), gm.clone())
1929 .hvsock_notify(Some(hvsock_channel.server_half))
1930 .external_server(vtl2_request_send)
1931 .use_message_redirect(vmbus_cfg.vtl2_redirect)
1932 .max_version(
1933 vmbus_cfg
1934 .vmbus_max_version
1935 .map(vmbus_core::MaxVersionInfo::new),
1936 )
1937 .delay_max_version(matches!(cfg.load_mode, LoadMode::Uefi { .. }))
1938 .enable_mnf(true)
1939 .build()
1940 .context("failed to create vmbus server")?;
1941
1942 #[cfg(windows)]
1944 if let Some(proxy_handle) = vmbus_cfg.vmbusproxy_handle {
1945 vmbus_proxy =
1946 Some(
1947 vmbus_server::ProxyIntegration::builder(
1948 &vmbus_driver,
1949 proxy_handle,
1950 vmbus_server::ProxyServerInfo::new(vmbus.control()),
1951 )
1952 .vtl2_server(vtl2_vmbus.as_ref().map(|server| {
1953 vmbus_server::ProxyServerInfo::new(server.control().clone())
1954 }))
1955 .memory(Some(&gm))
1956 .build()
1957 .await
1958 .context("failed to start the vmbus proxy")?,
1959 )
1960 }
1961
1962 let vmbus = VmbusServerHandle::new(&vmbus_driver, state_units.add("vmbus"), vmbus)
1963 .context("failed to add vmbus state unit")?;
1964
1965 let relay = HvsockRelay::new(
1966 vmbus_driver,
1967 vmbus.control().clone(),
1968 hvsock_channel.relay_half,
1969 vmbus_cfg.vsock_path.map(Into::into),
1970 vmbus_cfg.vsock_listener,
1971 )
1972 .context("failed to create hvsock relay")?;
1973
1974 vtl0_hvsock_relay = Some(relay);
1975 vmbus_server = Some(vmbus);
1976 vtl2_vmbus_server = vtl2_vmbus;
1977 }
1978
1979 #[cfg(all(windows, feature = "virt_whp"))]
1980 fn make_ids(
1981 name: &str,
1982 instance_id: Option<guid::Guid>,
1983 ) -> (String, String, guid::Guid, u64) {
1984 let guid = instance_id.unwrap_or_else(guid::Guid::new_random);
1985 let device_id = (guid.data2 as u64) << 16 | (guid.data3 as u64 & 0xfff8);
1987 let vpci_device_name = format!("vpci:{guid}");
1988 let device_name = format!("{name}:vpci-{guid}");
1989 (vpci_device_name, device_name, guid, device_id)
1990 }
1991
1992 {
1994 const DEFAULT_IO_QUEUE_DEPTH: u32 = 256;
1996 if let Some(vmbus) = &vmbus_server {
1997 for (path, scsi_disk) in storvsp_ide_disks {
1998 scsi_devices.push(
1999 offer_channel_unit(
2000 &driver_source.simple(),
2001 &state_units,
2002 vmbus,
2003 storvsp::StorageDevice::build_ide(
2004 &driver_source,
2005 path.channel,
2006 path.drive,
2007 scsi_disk,
2008 DEFAULT_IO_QUEUE_DEPTH,
2009 ),
2010 )
2011 .await?,
2012 );
2013 }
2014 }
2015
2016 #[cfg(windows)]
2017 for nic_config in cfg.kernel_vmnics {
2018 let mut nic = vmswitch::kernel::KernelVmNic::new(
2019 &guid::Guid::new_random(),
2020 "nic",
2021 "nic",
2022 nic_config.mac_address.into(),
2023 &nic_config.instance_id,
2024 vmbus_proxy
2025 .as_ref()
2026 .context("missing vmbusproxy handle")?
2027 .handle(),
2028 )
2029 .context("failed to create a kernel vmnic")?;
2030
2031 nic.connect(&vmswitch::kernel::SwitchPortId {
2032 switch: nic_config.switch_port_id.switch,
2033 port: nic_config.switch_port_id.port,
2034 })
2035 .context("failed to connect kernel vmnic")?;
2036
2037 nic.resume().context("failed to resume the kernel vmnic")?;
2038 kernel_vmnics.push(nic);
2039 }
2040
2041 if partition.supports_virtual_devices() {
2042 for dev_cfg in cfg.vpci_devices {
2043 let vmbus = match dev_cfg.vtl {
2044 DeviceVtl::Vtl0 => vmbus_server.as_ref().context("vmbus not enabled")?,
2045 DeviceVtl::Vtl1 => anyhow::bail!("not supported"),
2046 DeviceVtl::Vtl2 => vtl2_vmbus_server
2047 .as_ref()
2048 .context("VTL2 vmbus not enabled")?,
2049 };
2050
2051 let vtl = match dev_cfg.vtl {
2052 DeviceVtl::Vtl0 => Vtl::Vtl0,
2053 DeviceVtl::Vtl1 => Vtl::Vtl1,
2054 DeviceVtl::Vtl2 => Vtl::Vtl2,
2055 };
2056
2057 vmm_core::device_builder::build_vpci_device(
2058 &driver_source,
2059 &resolver,
2060 &gm,
2061 vmbus.control(),
2062 dev_cfg.instance_id,
2063 dev_cfg.resource,
2064 &chipset_builder,
2065 partition.clone().into_doorbell_registration(vtl),
2066 Some(&mapper),
2067 |device_id| {
2068 let hv_device = partition.new_virtual_device(
2069 match dev_cfg.vtl {
2070 DeviceVtl::Vtl0 => Vtl::Vtl0,
2071 DeviceVtl::Vtl1 => Vtl::Vtl1,
2072 DeviceVtl::Vtl2 => Vtl::Vtl2,
2073 },
2074 device_id,
2075 )?;
2076 Ok((
2077 hv_device.clone().target(),
2078 hv_device.clone().interrupt_mapper(),
2079 ))
2080 },
2081 None,
2082 )
2083 .await?;
2084 }
2085
2086 #[cfg(all(windows, feature = "virt_whp"))]
2087 for resource in cfg.vpci_resources {
2088 let vmbus = vmbus_server
2089 .as_ref()
2090 .context("vmbus must be enabled to assign devices")?
2091 .control()
2092 .as_ref();
2093
2094 let pd = partition.as_any();
2096 let p = pd.downcast_ref::<virt_whp::WhpPartition>().unwrap();
2097 let (vpci_bus_name, device_name, instance_id, device_id) =
2098 make_ids("assigned-device", None);
2099
2100 let hv_device = Arc::new(
2101 p.new_physical_device(Vtl::Vtl0, device_id, resource.0)
2102 .context("failed to get physical device for assignment")?,
2103 );
2104
2105 let device = chipset_builder
2106 .arc_mutex_device(device_name)
2107 .with_external_pci()
2108 .try_add(|services| {
2109 virt_whp::device::AssignedPciDevice::new(
2110 &mut services.register_mmio(),
2111 hv_device.clone(),
2112 )
2113 })
2114 .context("failed to assign device")?;
2115
2116 chipset_builder
2117 .arc_mutex_device(vpci_bus_name)
2118 .try_add_async(async |services| {
2119 VpciBus::new(
2120 &driver_source,
2121 instance_id,
2122 device,
2123 &mut services.register_mmio(),
2124 vmbus,
2125 crate::partition::VpciDevice::interrupt_mapper(hv_device),
2126 None,
2127 )
2128 .await
2129 })
2130 .await?;
2131 }
2132 }
2133 }
2134
2135 let mut vmbus_devices = Vec::new();
2137 for (vtl, resource) in cfg.vmbus_devices {
2138 let vmbus = match vtl {
2139 DeviceVtl::Vtl0 => vmbus_server
2140 .as_ref()
2141 .context("failed to find vmbus for vtl0"),
2142 DeviceVtl::Vtl1 => anyhow::bail!("vtl1 scsi controllers unsupported"),
2143 DeviceVtl::Vtl2 => vtl2_vmbus_server
2144 .as_ref()
2145 .context("failed to find vmbus for vtl2"),
2146 }
2147 .with_context(|| format!("failed to resolve vmbus resource {}", resource.id()))?;
2148 vmbus_devices.push(
2149 offer_vmbus_device_handle_unit(
2150 &driver_source,
2151 &state_units,
2152 vmbus,
2153 &resolver,
2154 resource,
2155 )
2156 .await?,
2157 );
2158 }
2159
2160 let mut pci_device_number = 10;
2166 if mem_layout.mmio().len() < 2 {
2167 anyhow::bail!("at least two mmio regions are required");
2168 }
2169 let mut virtio_mmio_start = mem_layout.mmio()[1].end();
2170 let mut virtio_mmio_count = 0;
2171
2172 let virtio_mmio_irq = {
2176 const VIRTIO_MMIO_IOAPIC_IRQ: u32 = 17;
2177 const VIRTIO_MMIO_PIC_IRQ: u32 = 5;
2178 if cfg.chipset_capabilities.with_pic {
2179 VIRTIO_MMIO_PIC_IRQ
2180 } else {
2181 VIRTIO_MMIO_IOAPIC_IRQ
2182 }
2183 };
2184 for (bus, device) in cfg.virtio_devices.into_iter() {
2185 let id = device.id().to_string();
2186 let device = resolver
2187 .resolve(
2188 device,
2189 VirtioResolveInput {
2190 driver_source: &driver_source,
2191 },
2192 )
2193 .await?;
2194 match bus {
2195 VirtioBus::Mmio => {
2196 let mmio_start = virtio_mmio_start - 0x1000;
2197 virtio_mmio_start -= 0x1000;
2198 let id = format!("{id}-{mmio_start}");
2199 let gm = gm.clone();
2200 chipset_builder.arc_mutex_device(id).try_add(|services| {
2201 VirtioMmioDevice::new(
2202 device.0,
2203 &driver_source.simple(),
2204 gm,
2205 services.new_line(IRQ_LINE_SET, "interrupt", virtio_mmio_irq),
2206 partition.clone().into_doorbell_registration(Vtl::Vtl0),
2207 mmio_start,
2208 0x1000,
2209 )
2210 })?;
2211 virtio_mmio_count += 1;
2212 }
2213 VirtioBus::Pci => {
2214 let pci_inta_line = pci_inta_line.context("missing PCI INT#A line")?;
2215
2216 let device_number = pci_device_number;
2217 pci_device_number += 1;
2218 pci_legacy_interrupts.push(((device_number, None), pci_inta_line));
2219
2220 let bus = if cfg.chipset.with_piix4_pci_bus {
2221 pci_bus_id_piix4.clone()
2222 } else {
2223 pci_bus_id_generic.clone()
2224 };
2225
2226 chipset_builder
2227 .arc_mutex_device(format!("{id}-pci"))
2228 .with_pci_addr(0, device_number, 0)
2229 .on_pci_bus(bus)
2230 .try_add(|services| {
2231 VirtioPciDevice::new(
2232 device.0,
2233 &driver_source.simple(),
2234 gm.clone(),
2235 PciInterruptModel::IntX(
2236 PciInterruptPin::IntA,
2237 services.new_line(IRQ_LINE_SET, "interrupt", pci_inta_line),
2238 ),
2239 partition.clone().into_doorbell_registration(Vtl::Vtl0),
2240 &mut services.register_mmio(),
2241 Some(&mapper),
2242 )
2243 })?;
2244 }
2245 }
2246 }
2247
2248 assert!(virtio_mmio_start >= mem_layout.mmio()[1].start());
2249
2250 let (chipset, devices) = chipset_builder.build()?;
2251 let (fatal_error_send, _fatal_error_recv) = mesh::channel();
2252 let chipset = vmm_core::vmotherboard_adapter::AdaptedChipset::new(
2253 chipset,
2254 vmm_core::vmotherboard_adapter::FatalErrorPolicy::DebugBreak(fatal_error_send),
2256 );
2257
2258 let (halt_send, halt_recv) = mesh::channel();
2260
2261 let (partition_unit, vp_runners) = PartitionUnit::new(
2262 driver_source.simple(),
2263 state_units
2264 .add("partition")
2265 .depends_on(devices.chipset_unit())
2266 .depends_on(vmtime.handle()),
2267 partition.clone().into_vm_partition(),
2268 PartitionUnitParams {
2269 processor_topology: &processor_topology,
2270 halt_vps,
2271 halt_request_recv,
2272 client_notify_send: halt_send,
2273 vtl_guest_memory: [
2274 Some(&gm),
2275 None,
2276 cfg.hypervisor.with_vtl2.is_some().then_some(&gm),
2277 ],
2278 debugger_rpc: cfg.debugger_rpc,
2279 },
2280 )
2281 .context("failed to create partition unit")?;
2282
2283 try_join_all(vps.into_iter().zip(vp_runners).enumerate().map(
2285 |(vp_index, (mut vp, runner))| {
2286 let partition = partition.clone();
2287 let chipset = chipset.clone();
2288 let (send, recv) = mesh::oneshot();
2289 thread::Builder::new()
2290 .name(format!("vp-{}", vp_index))
2291 .spawn(move || match vp.bind() {
2292 Ok(mut vp) => {
2293 send.send(Ok(()));
2294 block_on_vp(
2295 partition,
2296 VpIndex::new(vp_index as u32),
2297 vp.run(runner, &chipset),
2298 )
2299 }
2300 Err(err) => {
2301 send.send(Err(err));
2302 }
2303 })
2304 .unwrap();
2305
2306 async move {
2307 recv.await
2308 .unwrap()
2309 .with_context(|| format!("failed to bind vp {vp_index}"))
2310 }
2311 },
2312 ))
2313 .await?;
2314
2315 let mut this = LoadedVm {
2316 state_units,
2317 running: false,
2318 inner: LoadedVmInner {
2319 driver_source,
2320 resolver,
2321 partition_unit,
2322 partition,
2323 chipset_devices: devices,
2324 _vmtime: vmtime,
2325 _scsi_devices: scsi_devices,
2326 memory_manager,
2327 gm,
2328 vtl0_hvsock_relay,
2329 vtl2_hvsock_relay,
2330 vmbus_server,
2331 vtl2_vmbus_server,
2332 hypervisor_cfg: cfg.hypervisor,
2333 memory_cfg: cfg.memory,
2334 mem_layout,
2335 processor_topology,
2336 vmbus_redirect,
2337 input_distributor,
2338 vtl2_framebuffer_gpa_base,
2339 #[cfg(windows)]
2340 _vmbus_proxy: vmbus_proxy,
2341 #[cfg(windows)]
2342 _kernel_vmnics: kernel_vmnics,
2343 vmbus_devices,
2344 chipset_cfg: cfg.chipset,
2345 chipset_capabilities: cfg.chipset_capabilities,
2346 firmware_event_send: cfg.firmware_event_send,
2347 load_mode: cfg.load_mode,
2348 virtio_mmio_count,
2349 virtio_mmio_irq,
2350 pci_legacy_interrupts,
2351 igvm_file,
2352 next_igvm_file: None,
2353 _vmgs_task: vmgs_task,
2354 vmgs_client_inspect_handle,
2355 #[cfg(target_os = "linux")]
2356 vfio_inspect,
2357 halt_recv,
2358 client_notify_send,
2359 automatic_guest_reset: cfg.automatic_guest_reset,
2360 pcie_host_bridges,
2361 pcie_root_complexes,
2362 pcie_hotplug_devices: Vec::new(),
2363 },
2364 };
2365
2366 if let Some(saved_state) = saved_state {
2367 this.restore(saved_state)
2368 .await
2369 .context("loadedvm restore failed")?;
2370 } else {
2371 this.inner.load_firmware(false).await?;
2372 }
2373
2374 Ok(this)
2375 }
2376}
2377
2378impl LoadedVmInner {
2379 async fn load_firmware(&mut self, vtl2_only: bool) -> anyhow::Result<()> {
2380 let cache_topology = if cfg!(guest_arch = "aarch64") {
2381 Some(
2382 cache_topology::CacheTopology::from_host()
2383 .context("failed to get cache topology")?,
2384 )
2385 } else {
2386 None
2387 };
2388 let acpi_builder = AcpiTablesBuilder {
2389 processor_topology: &self.processor_topology,
2390 mem_layout: &self.mem_layout,
2391 cache_topology: cache_topology.as_ref(),
2392 pcie_host_bridges: &self.pcie_host_bridges,
2393 #[cfg(guest_arch = "x86_64")]
2394 arch: vmm_core::acpi_builder::AcpiArchConfig::X86 {
2395 with_ioapic: self.chipset_cfg.with_generic_ioapic,
2396 with_psp: self.chipset_cfg.with_generic_psp,
2397 with_pic: self.chipset_capabilities.with_pic,
2398 with_pit: self.chipset_capabilities.with_pit,
2399 pm_base: PM_BASE,
2400 acpi_irq: SYSTEM_IRQ_ACPI,
2401 },
2402 #[cfg(guest_arch = "aarch64")]
2403 arch: vmm_core::acpi_builder::AcpiArchConfig::Aarch64 {
2404 hypervisor_vendor_identity: if self.hypervisor_cfg.with_hv {
2405 u64::from_le_bytes(*b"MsHyperV")
2406 } else {
2407 0
2408 },
2409 virt_timer_ppi: self.processor_topology.virt_timer_ppi(),
2410 },
2411 };
2412
2413 if vtl2_only {
2414 assert!(matches!(self.load_mode, LoadMode::Igvm { .. }));
2415 }
2416
2417 #[cfg_attr(not(guest_arch = "x86_64"), expect(unused_mut))]
2418 let (mut regs, initial_page_vis) = match &self.load_mode {
2419 LoadMode::None => return Ok(()),
2420 #[cfg(guest_arch = "x86_64")]
2421 &LoadMode::Linux {
2422 ref kernel,
2423 ref initrd,
2424 ref cmdline,
2425 enable_serial,
2426 ref custom_dsdt,
2427 boot_mode,
2428 } => {
2429 match boot_mode {
2430 openvmm_defs::config::LinuxDirectBootMode::DeviceTree => {
2431 anyhow::bail!("device tree boot mode is not supported on x86_64");
2432 }
2433 openvmm_defs::config::LinuxDirectBootMode::Acpi => {}
2434 }
2435 let kernel_config = super::vm_loaders::linux::KernelConfig {
2436 kernel,
2437 initrd,
2438 cmdline,
2439 mem_layout: &self.mem_layout,
2440 };
2441 if custom_dsdt.is_none() && self.mem_layout.mmio().len() < 2 {
2442 anyhow::bail!("at least two mmio regions are required");
2443 }
2444 let regs =
2445 super::vm_loaders::linux::load_linux_x86(&kernel_config, &self.gm, |gpa| {
2446 let tables = if let Some(dsdt) = custom_dsdt {
2447 acpi_builder.build_acpi_tables_custom_dsdt(gpa, dsdt)
2448 } else {
2449 acpi_builder.build_acpi_tables(gpa, |mem_layout, dsdt| {
2450 add_devices_to_dsdt_x64(
2451 mem_layout,
2452 dsdt,
2453 &self.chipset_cfg,
2454 enable_serial,
2455 self.virtio_mmio_count,
2456 self.virtio_mmio_irq,
2457 &self.pci_legacy_interrupts,
2458 )
2459 })
2460 };
2461
2462 super::vm_loaders::linux::AcpiTables {
2463 rdsp: tables.rdsp,
2464 tables: tables.tables,
2465 }
2466 })?;
2467
2468 (regs, Vec::new())
2469 }
2470 #[cfg(guest_arch = "aarch64")]
2471 &LoadMode::Linux {
2472 ref kernel,
2473 ref initrd,
2474 ref cmdline,
2475 enable_serial,
2476 custom_dsdt: _,
2477 boot_mode,
2478 } => {
2479 use openvmm_defs::config::LinuxDirectBootMode;
2480
2481 let kernel_config = super::vm_loaders::linux::KernelConfig {
2482 kernel,
2483 initrd,
2484 cmdline,
2485 mem_layout: &self.mem_layout,
2486 };
2487
2488 let with_hv = self.hypervisor_cfg.with_hv;
2489 let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi {
2490 Some(|rsdp_gpa: u64| {
2491 acpi_builder.build_acpi_tables(rsdp_gpa, |mem_layout, dsdt| {
2492 add_devices_to_dsdt_arm64(mem_layout, dsdt, enable_serial, with_hv)
2493 })
2494 })
2495 } else {
2496 None
2497 };
2498
2499 let regs = super::vm_loaders::linux::load_linux_arm64(
2500 &kernel_config,
2501 &self.gm,
2502 enable_serial,
2503 &self.processor_topology,
2504 &self.pcie_host_bridges,
2505 build_acpi,
2506 )?;
2507
2508 (regs, Vec::new())
2509 }
2510 &LoadMode::Uefi {
2511 ref firmware,
2512 enable_debugging,
2513 enable_memory_protections,
2514 disable_frontpage,
2515 enable_tpm,
2516 enable_battery,
2517 enable_serial,
2518 enable_vpci_boot,
2519 uefi_console_mode,
2520 default_boot_always_attempt,
2521 bios_guid,
2522 } => {
2523 let madt = acpi_builder.build_madt();
2524 let srat = acpi_builder.build_srat();
2525 let mcfg = (!self.pcie_host_bridges.is_empty()).then(|| acpi_builder.build_mcfg());
2526 let pptt = cache_topology.is_some().then(|| acpi_builder.build_pptt());
2527 let load_settings = super::vm_loaders::uefi::UefiLoadSettings {
2528 debugging: enable_debugging,
2529 memory_protections: enable_memory_protections,
2530 frontpage: !disable_frontpage,
2531 tpm: enable_tpm,
2532 battery: enable_battery,
2533 guest_watchdog: self.chipset_cfg.with_hyperv_guest_watchdog,
2534 vpci_boot: enable_vpci_boot,
2535 serial: enable_serial,
2536 uefi_console_mode,
2537 default_boot_always_attempt,
2538 bios_guid,
2539 };
2540 let regs = super::vm_loaders::uefi::load_uefi(
2541 firmware,
2542 &self.gm,
2543 &self.processor_topology,
2544 &self.mem_layout,
2545 &self.pcie_host_bridges,
2546 load_settings,
2547 &madt,
2548 &srat,
2549 mcfg.as_deref(),
2550 pptt.as_deref(),
2551 )?;
2552
2553 (regs, Vec::new())
2554 }
2555 #[cfg(guest_arch = "x86_64")]
2556 LoadMode::Pcat { .. } => {
2557 let regs = super::vm_loaders::pcat::load_pcat(&self.gm, &self.mem_layout)?;
2558
2559 (regs, Vec::new())
2560 }
2561 &LoadMode::Igvm {
2562 file: _,
2563 ref cmdline,
2564 vtl2_base_address,
2565 com_serial,
2566 } => {
2567 let madt = acpi_builder.build_madt();
2568 let srat = acpi_builder.build_srat();
2569 const ENTROPY_SIZE: usize = 64;
2570 let mut entropy = [0u8; ENTROPY_SIZE];
2571 getrandom::fill(&mut entropy).unwrap();
2572
2573 let params = crate::worker::vm_loaders::igvm::LoadIgvmParams {
2574 igvm_file: self.igvm_file.as_ref().expect("should be already read"),
2575 gm: &self.gm,
2576 processor_topology: &self.processor_topology,
2577 mem_layout: &self.mem_layout,
2578 cmdline,
2579 acpi_tables: super::vm_loaders::igvm::AcpiTables {
2580 madt: &madt,
2581 srat: &srat,
2582 slit: None,
2583 pptt: None,
2584 },
2585 vtl2_base_address,
2586 vtl2_framebuffer_gpa_base: self.vtl2_framebuffer_gpa_base,
2587 vtl2_only,
2588 with_vmbus_redirect: self.vmbus_redirect,
2589 com_serial,
2590 entropy: Some(&entropy),
2591 };
2592 super::vm_loaders::igvm::load_igvm(params)?
2593 }
2594
2595 #[expect(clippy::allow_attributes)]
2596 #[allow(unreachable_patterns)]
2597 _ => anyhow::bail!("load mode not supported on this platform"),
2598 };
2599
2600 #[cfg(guest_arch = "x86_64")]
2603 if self.hypervisor_cfg.with_vtl2.is_none() {
2604 regs.extend(
2605 loader::common::compute_variable_mtrrs(
2606 &self.mem_layout,
2607 self.partition.caps().physical_address_width,
2608 )
2609 .context("failed to compute variable mtrrs")?,
2610 );
2611 }
2612
2613 if self.hypervisor_cfg.with_isolation.is_some() {
2615 tracing::debug!(?initial_page_vis, "initial_page_vis");
2616 self.partition_unit
2617 .set_initial_page_visibility(initial_page_vis)
2618 .await
2619 .context("failed to set initial page visibility")?;
2620 }
2621
2622 let initial_regs = initial_regs(
2623 ®s,
2624 self.partition.caps(),
2625 &self.processor_topology.vp_arch(VpIndex::BSP),
2626 );
2627
2628 tracing::debug!(?initial_regs, "initial_registers");
2629 self.partition_unit
2630 .set_initial_regs(
2631 if self.hypervisor_cfg.with_vtl2.is_some() {
2632 Vtl::Vtl2
2633 } else {
2634 Vtl::Vtl0
2635 },
2636 initial_regs,
2637 )
2638 .await
2639 .context("failed to set initial register state")?;
2640
2641 Ok(())
2642 }
2643}
2644
2645impl LoadedVm {
2646 async fn resume(&mut self) -> bool {
2647 if self.running {
2648 return false;
2649 }
2650 self.state_units.start().await;
2651 self.running = true;
2652 true
2653 }
2654
2655 async fn pause(&mut self) -> bool {
2656 if !self.running {
2657 return false;
2658 }
2659 self.state_units.stop().await;
2660 self.running = false;
2661 true
2662 }
2663
2664 pub async fn run(
2665 mut self,
2666 driver: &impl Spawn,
2667 mut rpc_recv: mesh::Receiver<VmRpc>,
2668 mut worker_rpc: mesh::Receiver<WorkerRpc<RestartState>>,
2669 ) {
2670 enum Event {
2671 WorkerRpc(Result<WorkerRpc<RestartState>, mesh::RecvError>),
2672 VmRpc(Result<VmRpc, mesh::RecvError>),
2673 Halt(Result<HaltReason, mesh::RecvError>),
2674 }
2675
2676 let (worker_rpc_send, worker_rpc_recv) = mesh::channel();
2680 let _filter_rpc_task = driver.spawn("loaded-vm-worker-rpc-filter", {
2681 let state_units = self.state_units.inspector();
2682 async move {
2683 while let Some(rpc) = worker_rpc.next().await {
2684 match rpc {
2685 WorkerRpc::Inspect(req) => req.respond(|resp| {
2686 resp.merge(&state_units)
2687 .merge(inspect::send(&worker_rpc_send, WorkerRpc::Inspect));
2688 }),
2689 rpc => worker_rpc_send.send(rpc),
2690 }
2691 }
2692 }
2693 });
2694 let mut worker_rpc = worker_rpc_recv;
2695
2696 loop {
2697 let event: Event = {
2698 let a = rpc_recv.recv().map(Event::VmRpc);
2699 let b = worker_rpc.recv().map(Event::WorkerRpc);
2700 let c = self.inner.halt_recv.recv().map(Event::Halt);
2701 (a, b, c).race().await
2702 };
2703
2704 match event {
2705 Event::WorkerRpc(Err(_)) => break,
2706 Event::WorkerRpc(Ok(message)) => match message {
2707 WorkerRpc::Stop => break,
2708 WorkerRpc::Restart(rpc) => {
2709 let mut stopped = false;
2710 let r = async {
2712 let shared_memory = self.inner.memory_manager.shared_memory_backing();
2713 if shared_memory.is_none() {
2714 anyhow::bail!("restart is not supported with --private-memory");
2715 }
2716 if self.running {
2717 self.state_units.stop().await;
2718 stopped = true;
2719 }
2720 let saved_state = self.save().await?;
2721 anyhow::Ok((shared_memory, saved_state))
2722 }
2723 .await;
2724 match r {
2725 Ok((shared_memory, saved_state)) => {
2726 rpc.complete(Ok(self
2727 .serialize(rpc_recv, shared_memory, saved_state)
2728 .await));
2729
2730 return;
2731 }
2732 Err(err) => {
2733 if stopped {
2734 self.state_units.start().await;
2735 }
2736 rpc.complete(Err(RemoteError::new(err)));
2737 }
2738 }
2739 }
2740 WorkerRpc::Inspect(deferred) => deferred.respond(|resp| {
2741 resp.field("memory", &self.inner.memory_manager)
2742 .field("memory_layout", &self.inner.mem_layout)
2743 .field("resolver", &self.inner.resolver)
2744 .field("vmgs", &self.inner.vmgs_client_inspect_handle);
2745 #[cfg(target_os = "linux")]
2746 resp.field("vfio", &self.inner.vfio_inspect);
2747 }),
2748 },
2749 Event::VmRpc(Err(_)) => break,
2750 Event::VmRpc(Ok(message)) => match message {
2751 VmRpc::Reset(rpc) => {
2752 rpc.handle_failable(async |()| self.reset(true).await).await
2753 }
2754 VmRpc::ClearHalt(rpc) => {
2755 rpc.handle(async |()| self.inner.partition_unit.clear_halt().await)
2756 .await
2757 }
2758 VmRpc::Resume(rpc) => rpc.handle(async |()| self.resume().await).await,
2759 VmRpc::Pause(rpc) => rpc.handle(async |()| self.pause().await).await,
2760 VmRpc::Save(rpc) => {
2761 rpc.handle_failable(async |()| self.save().await.map(ProtobufMessage::new))
2762 .await
2763 }
2764 VmRpc::Nmi(rpc) => rpc.handle_sync(|vpindex| {
2765 if vpindex < self.inner.processor_topology.vp_count() {
2766 #[cfg(guest_arch = "x86_64")]
2774 self.inner.partition.request_msi(
2775 Vtl::Vtl0,
2776 virt::irqcon::MsiRequest::new_x86(
2777 virt::irqcon::DeliveryMode::NMI,
2778 self.inner
2779 .processor_topology
2780 .vp_arch(VpIndex::new(vpindex))
2781 .apic_id,
2782 false,
2783 0,
2784 false,
2785 ),
2786 );
2787 }
2788 }),
2789 VmRpc::AddVmbusDevice(rpc) => {
2790 rpc.handle_failable(async |(vtl, resource)| {
2791 let vmbus = match vtl {
2792 DeviceVtl::Vtl0 => self.inner.vmbus_server.as_ref(),
2793 DeviceVtl::Vtl1 => None,
2794 DeviceVtl::Vtl2 => self.inner.vtl2_vmbus_server.as_ref(),
2795 }
2796 .context("no vmbus available")?;
2797 let device = offer_vmbus_device_handle_unit(
2798 &self.inner.driver_source,
2799 &self.state_units,
2800 vmbus,
2801 &self.inner.resolver,
2802 resource,
2803 )
2804 .await?;
2805 self.inner.vmbus_devices.push(device);
2806 self.state_units.start_stopped_units().await;
2807 anyhow::Ok(())
2808 })
2809 .await
2810 }
2811 VmRpc::ConnectHvsock(rpc) => {
2812 let ((mut ctx, service_id, vtl), response) = rpc.split();
2813 if let Some(relay) = self.hvsock_relay(vtl) {
2814 let fut = relay.connect(&mut ctx, service_id);
2815 driver
2816 .spawn("vmrpc-hvsock-connect", async move {
2817 response.complete(fut.await.map_err(RemoteError::new))
2818 })
2819 .detach();
2820 } else {
2821 response.complete(Err(RemoteError::new(anyhow::anyhow!(
2822 "hvsock is not available"
2823 ))));
2824 }
2825 }
2826 VmRpc::PulseSaveRestore(rpc) => {
2827 rpc.handle(async |()| {
2828 if !self.inner.partition.supports_reset() {
2829 return Err(PulseSaveRestoreError::ResetNotSupported);
2830 }
2831 let paused = self.pause().await;
2832 self.save_reset_restore().await?;
2833
2834 if paused {
2835 self.resume().await;
2836 }
2837 Ok(())
2838 })
2839 .await
2840 }
2841 VmRpc::StartReloadIgvm(rpc) => {
2842 rpc.handle_failable_sync(|file| self.start_reload_igvm(&file))
2843 }
2844 VmRpc::CompleteReloadIgvm(rpc) => {
2845 rpc.handle_failable(async |complete| {
2846 self.complete_reload_igvm(complete).await
2847 })
2848 .await
2849 }
2850 VmRpc::ReadMemory(rpc) => {
2851 rpc.handle_failable_sync(|(gpa, size)| {
2852 let mut bytes = vec![0u8; size];
2853 self.inner
2854 .gm
2855 .read_at(gpa, bytes.as_mut_slice())
2856 .map(|_| bytes)
2857 });
2858 }
2859 VmRpc::WriteMemory(rpc) => rpc.handle_failable_sync(|(gpa, bytes)| {
2860 self.inner.gm.write_at(gpa, bytes.as_slice())
2861 }),
2862 VmRpc::UpdateCliParams(rpc) => {
2863 rpc.handle_failable_sync(|params| match &mut self.inner.load_mode {
2864 LoadMode::Igvm { cmdline, .. } => {
2865 *cmdline = params;
2866 Ok(())
2867 }
2868 _ => anyhow::bail!(
2869 "Updating command line parameters is only supported for Igvm load mode"
2870 ),
2871 })
2872 }
2873 VmRpc::AddPcieDevice(rpc) => {
2874 rpc.handle_failable(async |(port_name, resource)| {
2875 let rc = self.inner.pcie_root_complexes.iter()
2878 .find(|rc| {
2879 rc.lock().downstream_ports().iter().any(|(_, name)| name.as_ref() == port_name.as_str())
2880 })
2881 .ok_or_else(|| anyhow::anyhow!("port '{}' not found in any root complex", port_name))?;
2882
2883 let msi_conn = pci_core::msi::MsiConnection::new();
2884 let signal_msi = self.inner.partition.as_signal_msi(Vtl::Vtl0);
2885
2886 let (unit, device) = self.inner.chipset_devices.add_dyn_device(
2887 &self.inner.driver_source,
2888 &self.state_units,
2889 format!("pcie-hotplug:{}", port_name),
2890 async |register_mmio| {
2891 self.inner.resolver
2892 .resolve(
2893 resource,
2894 pci_resources::ResolvePciDeviceHandleParams {
2895 msi_target: msi_conn.target(),
2896 register_mmio,
2897 driver_source: &self.inner.driver_source,
2898 guest_memory: &self.inner.gm,
2899 doorbell_registration: self.inner.partition.clone().into_doorbell_registration(Vtl::Vtl0),
2900 shared_mem_mapper: None,
2901 irqfd: self.inner.partition.irqfd(),
2902 },
2903 )
2904 .await
2905 .map(|r| r.0)
2906 .map_err(|e| anyhow::anyhow!(e))
2907 },
2908 ).await?;
2909
2910 if let Some(target) = signal_msi {
2911 msi_conn.connect(target);
2912 }
2913
2914 let weak_dev: std::sync::Weak<closeable_mutex::CloseableMutex<dyn chipset_device::ChipsetDevice>> = Arc::downgrade(&(device.clone() as Arc<closeable_mutex::CloseableMutex<dyn chipset_device::ChipsetDevice>>));
2917 let bus_device = Box::new(WeakMutexPciBusDevice(weak_dev));
2918
2919 self.inner.pcie_hotplug_devices.push((port_name.clone(), unit, device));
2920
2921 self.state_units.start_stopped_units().await;
2926
2927 if let Err(e) = rc.lock().hotplug_add_device(
2929 &port_name,
2930 "hotplug-device",
2931 bus_device,
2932 ) {
2933 let (_, unit, _) = self.inner.pcie_hotplug_devices.pop().unwrap();
2935 unit.remove().await;
2936 return Err(e);
2937 }
2938 anyhow::Ok(())
2939 })
2940 .await
2941 }
2942 VmRpc::RemovePcieDevice(rpc) => {
2943 rpc.handle_failable(async |port_name: String| {
2944 let idx = self.inner.pcie_hotplug_devices.iter()
2949 .position(|(name, _, _)| name == &port_name)
2950 .ok_or_else(|| anyhow::anyhow!(
2951 "no hot-added device on port '{}' (only dynamically added devices can be hot-removed)",
2952 port_name
2953 ))?;
2954
2955 let rc = self.inner.pcie_root_complexes.iter()
2957 .find(|rc| {
2958 rc.lock().downstream_ports().iter().any(|(_, name)| name.as_ref() == port_name.as_str())
2959 })
2960 .ok_or_else(|| anyhow::anyhow!("port '{}' not found in any root complex", port_name))?;
2961
2962 rc.lock().hotplug_remove_device(&port_name)?;
2963
2964 let (_, unit, _device) = self.inner.pcie_hotplug_devices.remove(idx);
2966 unit.remove().await;
2967
2968 anyhow::Ok(())
2969 })
2970 .await
2971 }
2972 },
2973 Event::Halt(Err(_)) => break,
2974 Event::Halt(Ok(reason)) => {
2975 if matches!(reason, HaltReason::Reset) && self.inner.automatic_guest_reset {
2976 tracing::info!("guest-initiated reset");
2977 if let Err(err) = self.reset(true).await {
2978 tracing::error!(?err, "failed to reset VM");
2979 break;
2980 }
2981 } else {
2982 self.inner.client_notify_send.send(reason);
2983 }
2984 }
2985 }
2986 }
2987
2988 self.inner.partition_unit.teardown().await;
2989 if let Some(vmbus) = self.inner.vmbus_server {
2990 vmbus.remove().await.shutdown().await;
2991 }
2992 }
2993
2994 fn start_reload_igvm(&mut self, file: &File) -> anyhow::Result<()> {
2995 self.inner.next_igvm_file = None;
2997
2998 let igvm_file =
3000 super::vm_loaders::igvm::read_igvm_file(file).context("reading igvm file failed")?;
3001
3002 self.inner.next_igvm_file = Some(igvm_file);
3003 Ok(())
3004 }
3005
3006 async fn complete_reload_igvm(&mut self, complete: bool) -> anyhow::Result<()> {
3007 if !complete {
3008 self.inner.next_igvm_file = None;
3009 return Ok(());
3010 }
3011
3012 let next_igvm_file = self
3014 .inner
3015 .next_igvm_file
3016 .take()
3017 .context("no staged igvm file")?;
3018
3019 let vtl2_vmbus = self
3024 .inner
3025 .vtl2_vmbus_server
3026 .as_ref()
3027 .context("missing vtl2 vmbus")?;
3028
3029 let stop_vps = self.inner.partition_unit.temporarily_stop_vps().await;
3031
3032 vtl2_vmbus
3040 .control()
3041 .force_reset()
3042 .await
3043 .context("failed to reset vtl2 vmbus")?;
3044
3045 let _old_igvm_file = self.inner.igvm_file.replace(next_igvm_file);
3050 self.inner
3051 .load_firmware(true)
3052 .await
3053 .context("failed to reload VTL2 firmware")?;
3054
3055 drop(stop_vps);
3057 Ok(())
3058 }
3059
3060 fn hvsock_relay(&self, vtl: DeviceVtl) -> Option<&HvsockRelay> {
3062 match vtl {
3063 DeviceVtl::Vtl0 => self.inner.vtl0_hvsock_relay.as_ref(),
3064 DeviceVtl::Vtl1 => None,
3065 DeviceVtl::Vtl2 => self.inner.vtl2_hvsock_relay.as_ref(),
3066 }
3067 }
3068
3069 async fn save(&mut self) -> anyhow::Result<SavedState> {
3073 Ok(SavedState {
3074 units: self.state_units.save().await?,
3075 })
3076 }
3077
3078 async fn restore(&mut self, state: SavedState) -> anyhow::Result<()> {
3080 self.state_units.restore(state.units).await?;
3081 Ok(())
3082 }
3083
3084 async fn save_reset_restore(&mut self) -> anyhow::Result<()> {
3086 let state = self.save().await?;
3087 self.reset(false).await?;
3088 self.restore(state).await?;
3089 Ok(())
3090 }
3091
3092 async fn serialize(
3094 mut self,
3095 rpc: mesh::Receiver<VmRpc>,
3096 shared_memory: Option<SharedMemoryBacking>,
3097 saved_state: SavedState,
3098 ) -> RestartState {
3099 let notify = self.inner.partition_unit.teardown().await;
3100 let input = self.inner.input_distributor.remove().await.into_inner();
3101
3102 if let Some(vmbus_server) = self.inner.vmbus_server.take() {
3103 vmbus_server.remove().await.shutdown().await;
3104 }
3105
3106 let manifest = Manifest {
3107 load_mode: self.inner.load_mode,
3108 floppy_disks: vec![], ide_disks: vec![], pcie_root_complexes: vec![], pcie_devices: vec![], pcie_switches: vec![], vpci_devices: vec![], memory: self.inner.memory_cfg,
3115 processor_topology: self.inner.processor_topology.to_config(),
3116 chipset: self.inner.chipset_cfg,
3117 vmbus: None, vtl2_vmbus: None, hypervisor: self.inner.hypervisor_cfg,
3120 #[cfg(windows)]
3121 kernel_vmnics: vec![], input,
3123 framebuffer: None, vga_firmware: None, vtl2_gfx: false, virtio_devices: vec![], #[cfg(all(windows, feature = "virt_whp"))]
3128 vpci_resources: vec![], vmgs: None, secure_boot_enabled: false, custom_uefi_vars: Default::default(), firmware_event_send: self.inner.firmware_event_send,
3133 debugger_rpc: None, vmbus_devices: vec![], chipset_devices: vec![], pci_chipset_devices: vec![], chipset_capabilities: self.inner.chipset_capabilities,
3138 generation_id_recv: None, rtc_delta_milliseconds: 0, automatic_guest_reset: self.inner.automatic_guest_reset,
3141 efi_diagnostics_log_level: Default::default(),
3142 };
3143 #[expect(unreachable_code, reason = "TODO")]
3144 RestartState {
3145 manifest,
3146 running: self.running,
3147 saved_state,
3148 shared_memory,
3149 rpc,
3150 notify,
3151 hypervisor: todo!("TODO: RestartState serialization is broken"),
3152 }
3153 }
3154
3155 async fn reset(&mut self, reload_firmware: bool) -> anyhow::Result<()> {
3156 let resume = self.pause().await;
3157
3158 self.state_units.reset().await?;
3159 if reload_firmware {
3164 self.inner.load_firmware(false).await?;
3165 }
3166
3167 if resume {
3168 self.resume().await;
3169 }
3170 Ok(())
3171 }
3172}
3173
3174#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
3175fn add_devices_to_dsdt_x64(
3176 mem_layout: &MemoryLayout,
3177 dsdt: &mut dsdt::Dsdt,
3178 cfg: &BaseChipsetManifest,
3179 serial_uarts: bool,
3180 virtio_mmio_count: usize,
3181 virtio_mmio_irq: u32,
3182 pci_legacy_interrupts: &[((u8, Option<u8>), u32)], ) {
3184 dsdt.add_apic();
3185
3186 if serial_uarts {
3188 for (name, com_port, ddn, uid) in [
3189 (b"\\_SB.UAR1", ComPort::Com1, b"COM1", 1),
3190 (b"\\_SB.UAR2", ComPort::Com2, b"COM2", 2),
3191 (b"\\_SB.UAR3", ComPort::Com3, b"COM3", 3),
3192 (b"\\_SB.UAR4", ComPort::Com4, b"COM4", 4),
3193 ]
3194 .iter()
3195 .copied()
3196 {
3197 dsdt.add_uart(name, ddn, uid, com_port.io_port(), com_port.irq().into());
3198 }
3199 }
3200
3201 assert!(
3202 mem_layout.mmio().len() >= 2,
3203 "the DSDT describes two MMIO regions"
3204 );
3205 let low_mmio_gap = mem_layout.mmio()[0];
3206 let mut high_mmio_space: std::ops::Range<u64> = mem_layout.mmio()[1].into();
3207 for i in 0..virtio_mmio_count {
3220 high_mmio_space.end -= HV_PAGE_SIZE;
3221 let mut device = dsdt::Device::new(format!("\\_SB.VI{i:02}").as_bytes());
3222 device.add_object(&dsdt::NamedString::new(b"_HID", b"LNRO0005"));
3223 device.add_object(&dsdt::NamedInteger::new(b"_UID", i as u64));
3224 let mut crs = dsdt::CurrentResourceSettings::new();
3225 crs.add_resource(&dsdt::QwordMemory::new(high_mmio_space.end, HV_PAGE_SIZE));
3226 let mut intr = dsdt::Interrupt::new(virtio_mmio_irq);
3227 intr.is_edge_triggered = false;
3228 crs.add_resource(&intr);
3229 device.add_object(&crs);
3230 dsdt.add_object(&device);
3231 }
3232
3233 let high_mmio_gap = MemoryRange::new(high_mmio_space);
3234
3235 if cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge {
3236 dsdt.add_pci(low_mmio_gap, high_mmio_gap, pci_legacy_interrupts);
3238 } else {
3239 dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap);
3240 }
3241
3242 dsdt.add_vmbus(
3243 cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge,
3244 None,
3245 );
3246 dsdt.add_rtc();
3247}
3248
3249#[cfg(guest_arch = "aarch64")]
3250fn add_devices_to_dsdt_arm64(
3251 mem_layout: &MemoryLayout,
3252 dsdt: &mut dsdt::Dsdt,
3253 enable_serial: bool,
3254 with_hv: bool,
3255) {
3256 const VMBUS_INTID: u32 = openvmm_defs::config::DEFAULT_VMBUS_PPI;
3258 const PL011_SERIAL0_BASE: u64 = 0xEFFEC000;
3260 const PL011_SERIAL1_BASE: u64 = 0xEFFEB000;
3261 const PL011_SERIAL_SIZE: u64 = 0x1000;
3262 const PL011_SERIAL0_GSIV: u32 = 33;
3264 const PL011_SERIAL1_GSIV: u32 = 34;
3265
3266 if with_hv {
3267 assert!(
3271 mem_layout.mmio().len() >= 2,
3272 "need at least two MMIO regions"
3273 );
3274 let low_mmio_gap = mem_layout.mmio()[0];
3275 let high_mmio_gap: MemoryRange = mem_layout.mmio()[1];
3276 dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap);
3277 dsdt.add_vmbus(false, Some(VMBUS_INTID));
3281 }
3282
3283 if enable_serial {
3284 dsdt.add_sbsa_uart(
3285 b"\\_SB.UAR0",
3286 0,
3287 PL011_SERIAL0_BASE,
3288 PL011_SERIAL_SIZE,
3289 PL011_SERIAL0_GSIV,
3290 );
3291 dsdt.add_sbsa_uart(
3292 b"\\_SB.UAR1",
3293 1,
3294 PL011_SERIAL1_BASE,
3295 PL011_SERIAL_SIZE,
3296 PL011_SERIAL1_GSIV,
3297 );
3298 }
3299}
3300
3301#[cfg(guest_arch = "x86_64")]
3302struct WatchdogTimeoutNmi {
3303 partition: Arc<dyn HvlitePartition>,
3304 watchdog_send: Option<mesh::Sender<()>>,
3305}
3306
3307#[cfg(guest_arch = "x86_64")]
3308#[async_trait::async_trait]
3309impl WatchdogCallback for WatchdogTimeoutNmi {
3310 async fn on_timeout(&mut self) {
3311 self.partition.request_msi(
3313 Vtl::Vtl0,
3314 virt::irqcon::MsiRequest::new_x86(virt::irqcon::DeliveryMode::NMI, 0, false, 0, false),
3315 );
3316
3317 if let Some(watchdog_send) = &self.watchdog_send {
3318 watchdog_send.send(());
3319 }
3320 }
3321}
3322
3323struct WatchdogTimeoutReset {
3324 halt_vps: Arc<Halt>,
3325 watchdog_send: Option<mesh::Sender<()>>,
3326}
3327
3328#[async_trait::async_trait]
3329impl WatchdogCallback for WatchdogTimeoutReset {
3330 async fn on_timeout(&mut self) {
3331 self.halt_vps.halt(HaltReason::Reset);
3332
3333 if let Some(watchdog_send) = &self.watchdog_send {
3334 watchdog_send.send(());
3335 }
3336 }
3337}
3338
3339#[derive(MeshPayload, Clone)]
3340struct OpenVmmRemoteDynamicResolvers {}
3341
3342impl chipset_device_worker::RemoteDynamicResolvers for OpenVmmRemoteDynamicResolvers {
3343 const WORKER_ID_STR: &str = "openvmm_remote_chipset_worker";
3344
3345 async fn register_remote_dynamic_resolvers(
3346 self,
3347 _resolver: &mut ResourceResolver,
3348 ) -> anyhow::Result<()> {
3349 Ok(())
3350 }
3351}
3352
3353mesh_worker::register_workers! {
3354 chipset_device_worker::worker::RemoteChipsetDeviceWorker<OpenVmmRemoteDynamicResolvers>
3355}
3356
3357struct WeakMutexPciBusDevice(
3360 std::sync::Weak<closeable_mutex::CloseableMutex<dyn chipset_device::ChipsetDevice>>,
3361);
3362
3363impl pci_bus::GenericPciBusDevice for WeakMutexPciBusDevice {
3364 fn pci_cfg_read(
3365 &mut self,
3366 offset: u16,
3367 value: &mut u32,
3368 ) -> Option<chipset_device::io::IoResult> {
3369 Some(
3370 self.0
3371 .upgrade()?
3372 .lock()
3373 .supports_pci()?
3374 .pci_cfg_read(offset, value),
3375 )
3376 }
3377
3378 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> Option<chipset_device::io::IoResult> {
3379 Some(
3380 self.0
3381 .upgrade()?
3382 .lock()
3383 .supports_pci()?
3384 .pci_cfg_write(offset, value),
3385 )
3386 }
3387
3388 fn pci_cfg_read_with_routing(
3389 &mut self,
3390 secondary_bus: u8,
3391 target_bus: u8,
3392 function: u8,
3393 offset: u16,
3394 value: &mut u32,
3395 ) -> Option<chipset_device::io::IoResult> {
3396 Some(
3397 self.0
3398 .upgrade()?
3399 .lock()
3400 .supports_pci()?
3401 .pci_cfg_read_with_routing(secondary_bus, target_bus, function, offset, value),
3402 )
3403 }
3404
3405 fn pci_cfg_write_with_routing(
3406 &mut self,
3407 secondary_bus: u8,
3408 target_bus: u8,
3409 function: u8,
3410 offset: u16,
3411 value: u32,
3412 ) -> Option<chipset_device::io::IoResult> {
3413 Some(
3414 self.0
3415 .upgrade()?
3416 .lock()
3417 .supports_pci()?
3418 .pci_cfg_write_with_routing(secondary_bus, target_bus, function, offset, value),
3419 )
3420 }
3421}