1#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13 if #[cfg(guest_arch = "x86_64")] {
14 mod cvm_cpuid;
15 pub use processor::snp::SnpBacked;
16 pub use processor::tdx::TdxBacked;
17 use crate::processor::HardwareIsolatedBacking;
18 pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19 use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20 use bitvec::prelude::BitArray;
21 use bitvec::prelude::Lsb0;
22 use devmsr::MsrDevice;
23 use hv1_emulator::hv::ProcessorVtlHv;
24 use processor::LapicState;
25 use processor::snp::SnpBackedShared;
26 use processor::tdx::TdxBackedShared;
27 use std::arch::x86_64::CpuidResult;
28 use virt::CpuidLeaf;
29 use virt::state::StateElement;
30 use virt::vp::MpState;
31 type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34 } else if #[cfg(guest_arch = "aarch64")] {
35 pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36 use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37 }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvPartitionPrivilege;
66use hvdef::HvRegisterName;
67use hvdef::HvRegisterVsmPartitionConfig;
68use hvdef::HvRegisterVsmPartitionStatus;
69use hvdef::Vtl;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
73use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
74use hvdef::hypercall::HostVisibilityType;
75use hvdef::hypercall::HvGuestOsId;
76use hvdef::hypercall::HvInputVtl;
77use hvdef::hypercall::HvInterceptParameters;
78use hvdef::hypercall::HvInterceptType;
79use inspect::Inspect;
80use inspect::InspectMut;
81use memory_range::MemoryRange;
82use pal::unix::affinity;
83use pal::unix::affinity::CpuSet;
84use pal_async::driver::Driver;
85use pal_async::driver::SpawnDriver;
86use pal_uring::IdleControl;
87use parking_lot::Mutex;
88use parking_lot::RwLock;
89use processor::BackingSharedParams;
90use processor::SidecarExitReason;
91use sidecar_client::NewSidecarClientError;
92use std::collections::HashMap;
93use std::ops::RangeInclusive;
94use std::os::fd::AsRawFd;
95use std::sync::Arc;
96use std::sync::Weak;
97use std::sync::atomic::AtomicBool;
98use std::sync::atomic::AtomicU8;
99use std::sync::atomic::AtomicU32;
100use std::sync::atomic::AtomicU64;
101use std::sync::atomic::Ordering;
102use std::task::Waker;
103use thiserror::Error;
104use user_driver::DmaClient;
105use virt::IsolationType;
106use virt::PartitionCapabilities;
107use virt::VpIndex;
108use virt::X86Partition;
109use virt::irqcon::IoApicRouting;
110use virt::irqcon::MsiRequest;
111use virt::x86::apic_software_device::ApicSoftwareDevices;
112use virt_support_apic::LocalApicSet;
113use vm_topology::memory::MemoryLayout;
114use vm_topology::processor::ProcessorTopology;
115use vm_topology::processor::TargetVpInfo;
116use vmcore::monitor::MonitorPage;
117use vmcore::reference_time::GetReferenceTime;
118use vmcore::reference_time::ReferenceTimeResult;
119use vmcore::reference_time::ReferenceTimeSource;
120use vmcore::vmtime::VmTimeSource;
121use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
122use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
123use x86defs::tdx::TdCallResult;
124use zerocopy::FromBytes;
125use zerocopy::FromZeros;
126use zerocopy::Immutable;
127use zerocopy::IntoBytes;
128use zerocopy::KnownLayout;
129
130#[derive(Error, Debug)]
132#[expect(missing_docs)]
133pub enum Error {
134 #[error("hcl error")]
135 Hcl(#[source] hcl::ioctl::Error),
136 #[error("failed to open sidecar client")]
137 Sidecar(#[source] NewSidecarClientError),
138 #[error("failed to install {0:?} intercept: {1:?}")]
139 InstallIntercept(HvInterceptType, HvError),
140 #[error("failed to query hypervisor register {0:#x?}")]
141 Register(HvRegisterName, #[source] HvError),
142 #[error("failed to set vsm partition config register")]
143 VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
144 #[error("failed to create virtual device")]
145 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
146 #[error("failed to create cpuid tables for cvm")]
147 #[cfg(guest_arch = "x86_64")]
148 CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
149 #[error("failed to update hypercall msr")]
150 UpdateHypercallMsr,
151 #[error("failed to update reference tsc msr")]
152 UpdateReferenceTsc,
153 #[error("failed to map overlay page")]
154 MapOverlay(#[source] std::io::Error),
155 #[error("failed to allocate shared visibility pages for overlay")]
156 AllocateSharedVisOverlay(#[source] anyhow::Error),
157 #[error("failed to open msr device")]
158 OpenMsr(#[source] std::io::Error),
159 #[error("cpuid did not contain valid TSC frequency information")]
160 BadCpuidTsc,
161 #[error("failed to read tsc frequency")]
162 ReadTscFrequency(#[source] std::io::Error),
163 #[error(
164 "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
165 )]
166 TscFrequencyMismatch {
167 hv: u64,
168 hw: u64,
169 allowed_error: u64,
170 },
171 #[error("failed to set vsm partition config: {0:?}")]
172 FailedToSetL2Ctls(TdCallResult),
173 #[error("debugging is configured but the binary does not have the gdb feature")]
174 InvalidDebugConfiguration,
175 #[error("failed to allocate TLB flush page")]
176 AllocateTlbFlushPage(#[source] anyhow::Error),
177 #[error("host does not support required cpu capabilities")]
178 Capabilities(virt::PartitionCapabilitiesError),
179 #[error("failed to get register")]
180 GetReg(#[source] hcl::ioctl::register::GetRegError),
181 #[error("failed to set register")]
182 SetReg(#[source] hcl::ioctl::register::SetRegError),
183}
184
185#[derive(Error, Debug)]
187#[expect(missing_docs)]
188pub enum RevokeGuestVsmError {
189 #[error("failed to set vsm config")]
190 SetGuestVsmConfig(#[source] hcl::ioctl::register::SetRegError),
191 #[error("VTL 1 is already enabled")]
192 Vtl1AlreadyEnabled,
193}
194
195#[derive(Inspect)]
197pub struct UhPartition {
198 #[inspect(flatten)]
199 inner: Arc<UhPartitionInner>,
200 #[inspect(skip)]
202 interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
203}
204
205#[derive(Inspect)]
207#[inspect(extra = "UhPartitionInner::inspect_extra")]
208struct UhPartitionInner {
209 #[inspect(skip)]
210 hcl: Hcl,
211 #[inspect(skip)] vps: Vec<UhVpInner>,
213 irq_routes: virt::irqcon::IrqRoutes,
214 caps: PartitionCapabilities,
215 #[inspect(skip)] enter_modes: Mutex<EnterModes>,
217 #[inspect(skip)]
218 enter_modes_atomic: AtomicU8,
219 #[cfg(guest_arch = "x86_64")]
220 cpuid: virt::CpuidLeafSet,
221 lower_vtl_memory_layout: MemoryLayout,
222 gm: VtlArray<GuestMemory, 2>,
223 vtl0_kernel_exec_gm: GuestMemory,
224 vtl0_user_exec_gm: GuestMemory,
225 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
226 #[inspect(skip)]
227 crash_notification_send: mesh::Sender<VtlCrash>,
228 monitor_page: MonitorPage,
229 #[inspect(skip)]
230 allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
231 software_devices: Option<ApicSoftwareDevices>,
232 #[inspect(skip)]
233 vmtime: VmTimeSource,
234 isolation: IsolationType,
235 #[inspect(with = "inspect::AtomicMut")]
236 no_sidecar_hotplug: AtomicBool,
237 use_mmio_hypercalls: bool,
238 backing_shared: BackingShared,
239 intercept_debug_exceptions: bool,
240 #[cfg(guest_arch = "x86_64")]
241 #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
243 device_vector_table: RwLock<IrrBitmap>,
244 vmbus_relay: bool,
245}
246
247#[derive(Inspect)]
248#[inspect(untagged)]
249enum BackingShared {
250 Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
251 #[cfg(guest_arch = "x86_64")]
252 Snp(#[inspect(flatten)] SnpBackedShared),
253 #[cfg(guest_arch = "x86_64")]
254 Tdx(#[inspect(flatten)] TdxBackedShared),
255}
256
257impl BackingShared {
258 fn new(
259 isolation: IsolationType,
260 partition_params: &UhPartitionNewParams<'_>,
261 backing_shared_params: BackingSharedParams<'_>,
262 ) -> Result<BackingShared, Error> {
263 Ok(match isolation {
264 IsolationType::None | IsolationType::Vbs => {
265 assert!(backing_shared_params.cvm_state.is_none());
266 BackingShared::Hypervisor(HypervisorBackedShared::new(
267 partition_params,
268 backing_shared_params,
269 )?)
270 }
271 #[cfg(guest_arch = "x86_64")]
272 IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
273 partition_params,
274 backing_shared_params,
275 )?),
276 #[cfg(guest_arch = "x86_64")]
277 IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
278 partition_params,
279 backing_shared_params,
280 )?),
281 #[cfg(not(guest_arch = "x86_64"))]
282 _ => unreachable!(),
283 })
284 }
285
286 fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
287 match self {
288 BackingShared::Hypervisor(_) => None,
289 #[cfg(guest_arch = "x86_64")]
290 BackingShared::Snp(SnpBackedShared { cvm, .. })
291 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
292 }
293 }
294
295 fn untrusted_synic(&self) -> Option<&GlobalSynic> {
296 match self {
297 BackingShared::Hypervisor(_) => None,
298 #[cfg(guest_arch = "x86_64")]
299 BackingShared::Snp(_) => None,
300 #[cfg(guest_arch = "x86_64")]
301 BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
302 }
303 }
304}
305
306#[derive(InspectMut, Copy, Clone)]
307struct EnterModes {
308 #[inspect(mut)]
309 first: EnterMode,
310 #[inspect(mut)]
311 second: EnterMode,
312}
313
314impl Default for EnterModes {
315 fn default() -> Self {
316 Self {
317 first: EnterMode::Fast,
318 second: EnterMode::IdleToVtl0,
319 }
320 }
321}
322
323impl From<EnterModes> for hcl::protocol::EnterModes {
324 fn from(value: EnterModes) -> Self {
325 Self::new()
326 .with_first(value.first.into())
327 .with_second(value.second.into())
328 }
329}
330
331#[derive(InspectMut, Copy, Clone)]
332enum EnterMode {
333 Fast,
334 PlayIdle,
335 IdleToVtl0,
336}
337
338impl From<EnterMode> for hcl::protocol::EnterMode {
339 fn from(value: EnterMode) -> Self {
340 match value {
341 EnterMode::Fast => Self::FAST,
342 EnterMode::PlayIdle => Self::PLAY_IDLE,
343 EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
344 }
345 }
346}
347
348#[cfg(guest_arch = "x86_64")]
349#[derive(Inspect)]
350struct GuestVsmVpState {
351 #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
354 vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
355 reg_intercept: SecureRegisterInterceptState,
356}
357
358#[cfg(guest_arch = "x86_64")]
359impl GuestVsmVpState {
360 fn new() -> Self {
361 GuestVsmVpState {
362 vtl0_exit_pending_event: None,
363 reg_intercept: Default::default(),
364 }
365 }
366}
367
368#[cfg(guest_arch = "x86_64")]
369#[derive(Inspect)]
370struct UhCvmVpState {
372 #[inspect(debug)]
374 direct_overlay_handle: user_driver::memory::MemoryBlock,
375 exit_vtl: GuestVtl,
377 hv: VtlArray<ProcessorVtlHv, 2>,
379 lapics: VtlArray<LapicState, 2>,
381 vtl1: Option<GuestVsmVpState>,
383}
384
385#[cfg(guest_arch = "x86_64")]
386impl UhCvmVpState {
387 pub(crate) fn new(
389 cvm_partition: &UhCvmPartitionState,
390 inner: &UhPartitionInner,
391 vp_info: &TargetVpInfo,
392 overlay_pages_required: usize,
393 ) -> Result<Self, Error> {
394 let direct_overlay_handle = cvm_partition
395 .shared_dma_client
396 .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
397 .map_err(Error::AllocateSharedVisOverlay)?;
398
399 let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
400 let lapics = VtlArray::from_fn(|vtl| {
401 let apic_set = &cvm_partition.lapic[vtl];
402
403 let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
406 lapic.set_apic_base(apic_base).unwrap();
408 let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
410 MpState::WaitForSipi
411 } else {
412 MpState::Running
413 };
414 LapicState::new(lapic, activity)
415 });
416
417 let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
418
419 Ok(Self {
420 direct_overlay_handle,
421 exit_vtl: GuestVtl::Vtl0,
422 hv,
423 lapics,
424 vtl1: None,
425 })
426 }
427}
428
429#[cfg(guest_arch = "x86_64")]
430#[derive(Inspect, Default)]
431#[inspect(hex)]
432pub struct SecureRegisterInterceptState {
434 #[inspect(with = "|&x| u64::from(x)")]
435 intercept_control: hvdef::HvRegisterCrInterceptControl,
436 cr0_mask: u64,
437 cr4_mask: u64,
438 ia32_misc_enable_mask: u64,
441}
442
443#[derive(Clone, Inspect)]
446struct ProxyRedirectVectorInfo {
447 device_id: u64,
449 original_vector: u32,
451}
452
453#[derive(Inspect)]
454struct UhCvmPartitionState {
456 #[cfg(guest_arch = "x86_64")]
457 vps_per_socket: u32,
458 #[inspect(
460 with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
461 )]
462 tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
463 #[inspect(with = "inspect::iter_by_index")]
464 vps: Vec<UhCvmVpInner>,
465 shared_memory: GuestMemory,
466 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
467 #[inspect(skip)]
468 isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
469 lapic: VtlArray<LocalApicSet, 2>,
471 hv: GlobalHv<2>,
473 guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
475 shared_dma_client: Arc<dyn DmaClient>,
477 private_dma_client: Arc<dyn DmaClient>,
479 hide_isolation: bool,
480 proxy_interrupt_redirect: bool,
481}
482
483#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
484impl UhCvmPartitionState {
485 fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
486 &self.vps[vp_index as usize]
487 }
488
489 fn is_lower_vtl_startup_denied(&self) -> bool {
490 matches!(
491 *self.guest_vsm.read(),
492 GuestVsmState::Enabled {
493 vtl1: CvmVtl1State {
494 deny_lower_vtl_startup: true,
495 ..
496 }
497 }
498 )
499 }
500}
501
502#[derive(Inspect)]
503struct UhCvmVpInner {
505 tlb_lock_info: VtlArray<TlbLockInfo, 2>,
507 vtl1_enable_called: Mutex<bool>,
509 started: AtomicBool,
511 #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
513 hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
514 #[inspect(with = "|x| inspect::adhoc(|req| inspect::iter_by_key(&*x.lock()).inspect(req))")]
516 proxy_redirect_interrupts: Mutex<HashMap<u32, ProxyRedirectVectorInfo>>,
517}
518
519#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
520#[derive(Inspect)]
521#[inspect(tag = "guest_vsm_state")]
522enum GuestVsmState<T: Inspect> {
524 NotPlatformSupported,
525 NotGuestEnabled,
526 Enabled {
527 #[inspect(flatten)]
528 vtl1: T,
529 },
530}
531
532impl<T: Inspect> GuestVsmState<T> {
533 pub fn from_availability(guest_vsm_available: bool) -> Self {
534 if guest_vsm_available {
535 GuestVsmState::NotGuestEnabled
536 } else {
537 GuestVsmState::NotPlatformSupported
538 }
539 }
540}
541
542#[derive(Inspect)]
543struct CvmVtl1State {
544 enabled_on_any_vp: bool,
546 zero_memory_on_reset: bool,
548 deny_lower_vtl_startup: bool,
550 pub mbec_enabled: bool,
552 pub shadow_supervisor_stack_enabled: bool,
554 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
555 io_read_intercepts: BitBox<u64>,
556 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
557 io_write_intercepts: BitBox<u64>,
558}
559
560#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
561impl CvmVtl1State {
562 fn new(mbec_enabled: bool) -> Self {
563 Self {
564 enabled_on_any_vp: false,
565 zero_memory_on_reset: false,
566 deny_lower_vtl_startup: false,
567 mbec_enabled,
568 shadow_supervisor_stack_enabled: false,
569 io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
570 io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
571 }
572 }
573}
574
575#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
576struct TscReferenceTimeSource {
577 tsc_scale: u64,
578}
579
580#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
581impl TscReferenceTimeSource {
582 fn new(tsc_frequency: u64) -> Self {
583 TscReferenceTimeSource {
584 tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
585 }
586 }
587}
588
589impl GetReferenceTime for TscReferenceTimeSource {
591 fn now(&self) -> ReferenceTimeResult {
592 #[cfg(guest_arch = "x86_64")]
593 {
594 let tsc = safe_intrinsics::rdtsc();
595 let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
596 ReferenceTimeResult {
597 ref_time,
598 system_time: None,
599 }
600 }
601
602 #[cfg(guest_arch = "aarch64")]
603 {
604 todo!("AARCH64_TODO");
605 }
606 }
607}
608
609impl virt::irqcon::ControlGic for UhPartitionInner {
610 fn set_spi_irq(&self, irq_id: u32, high: bool) {
611 if let Err(err) = self.hcl.request_interrupt(
612 hvdef::HvInterruptControl::new()
613 .with_arm64_asserted(high)
614 .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
615 0,
616 irq_id,
617 GuestVtl::Vtl0,
618 ) {
619 tracelimit::warn_ratelimited!(
620 error = &err as &dyn std::error::Error,
621 irq = irq_id,
622 asserted = high,
623 "failed to request spi"
624 );
625 }
626 }
627}
628
629impl virt::Aarch64Partition for UhPartition {
630 fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
631 debug_assert!(vtl == Vtl::Vtl0);
632 self.inner.clone()
633 }
634}
635
636pub struct UhProcessorBox {
642 partition: Arc<UhPartitionInner>,
643 vp_info: TargetVpInfo,
644}
645
646impl UhProcessorBox {
647 pub fn vp_index(&self) -> VpIndex {
649 self.vp_info.base.vp_index
650 }
651
652 pub fn sidecar_enabled(&self) -> bool {
654 self.partition.hcl.sidecar_enabled()
655 }
656
657 pub fn sidecar_base_cpu(&self) -> Option<u32> {
660 self.partition
661 .hcl
662 .sidecar_base_cpu(self.vp_info.base.vp_index.index())
663 }
664
665 pub fn bind_processor<'a, T: Backing>(
672 &'a mut self,
673 driver: &impl Driver,
674 control: Option<&'a mut IdleControl>,
675 ) -> Result<UhProcessor<'a, T>, Error> {
676 if let Some(control) = &control {
677 let vp_index = self.vp_info.base.vp_index;
678
679 let mut current = Default::default();
680 affinity::get_current_thread_affinity(&mut current).unwrap();
681 assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
682
683 self.partition
684 .hcl
685 .set_poll_file(
686 self.partition.vp(vp_index).unwrap().cpu_index,
687 control.ring_fd().as_raw_fd(),
688 )
689 .map_err(Error::Hcl)?;
690 }
691
692 UhProcessor::new(driver, &self.partition, self.vp_info, control)
693 }
694
695 pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
700 self.partition
701 .vp(self.vp_info.base.vp_index)
702 .unwrap()
703 .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
704 }
705}
706
707#[derive(Debug, Inspect)]
708struct UhVpInner {
709 wake_reasons: AtomicU64,
711 #[inspect(skip)]
712 waker: RwLock<Option<Waker>>,
713 message_queues: VtlArray<MessageQueues, 2>,
714 #[inspect(skip)]
715 vp_info: TargetVpInfo,
716 cpu_index: u32,
719 sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
720}
721
722impl UhVpInner {
723 pub fn vp_index(&self) -> VpIndex {
724 self.vp_info.base.vp_index
725 }
726}
727
728#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
729#[derive(Debug, Inspect)]
730enum InitialVpContextOperation {
732 StartVp,
734 EnableVpVtl,
736}
737
738#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
739#[derive(Debug, Inspect)]
740struct VpStartEnableVtl {
742 operation: InitialVpContextOperation,
745 #[inspect(skip)]
746 context: hvdef::hypercall::InitialVpContextX64,
747}
748
749#[derive(Debug, Inspect)]
750struct TlbLockInfo {
751 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
753 blocked_vps: BitBox<AtomicU64>,
754 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
757 blocking_vps: BitBox<AtomicU64>,
758 blocking_vp_count: AtomicU32,
762 sleeping: AtomicBool,
764}
765
766#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
767impl TlbLockInfo {
768 fn new(vp_count: usize) -> Self {
769 Self {
770 blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
771 blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
772 blocking_vp_count: AtomicU32::new(0),
773 sleeping: false.into(),
774 }
775 }
776}
777
778#[bitfield(u32)]
779#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
780struct WakeReason {
781 extint: bool,
782 message_queues: bool,
783 hv_start_enable_vtl_vp: bool,
784 intcon: bool,
785 update_proxy_irr_filter: bool,
786 #[bits(27)]
787 _reserved: u32,
788}
789
790impl WakeReason {
791 const EXTINT: Self = Self::new().with_extint(true);
793 const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
794 #[cfg(guest_arch = "x86_64")]
795 const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
797 #[cfg(guest_arch = "x86_64")]
798 const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
799}
800
801#[bitfield(u32)]
802#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
803struct ExitActivity {
804 pending_event: bool,
805 #[bits(31)]
806 _reserved: u32,
807}
808
809impl UhPartition {
811 pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
813 fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
814 if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
815 return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
816 }
817 *vsm_state = GuestVsmState::NotPlatformSupported;
818 Ok(())
819 }
820
821 match &self.inner.backing_shared {
822 BackingShared::Hypervisor(s) => {
823 revoke(&mut *s.guest_vsm.write())?;
824 self.inner
825 .hcl
826 .set_guest_vsm_partition_config(false)
827 .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
828 }
829 #[cfg(guest_arch = "x86_64")]
830 BackingShared::Snp(SnpBackedShared { cvm, .. })
831 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
832 revoke(&mut *cvm.guest_vsm.write())?;
833 }
834 };
835
836 Ok(())
837 }
838
839 pub fn reference_time(&self) -> u64 {
841 if let Some(hv) = self.inner.hv() {
842 hv.ref_time_source().now().ref_time
843 } else {
844 self.inner
845 .hcl
846 .reference_time()
847 .expect("should not fail to get the reference time")
848 }
849 }
850}
851
852impl virt::Partition for UhPartition {
853 fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
854 None
855 }
856
857 fn caps(&self) -> &PartitionCapabilities {
858 &self.inner.caps
859 }
860
861 fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
862 self.inner
863 .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
864 }
865
866 fn request_yield(&self, _vp_index: VpIndex) {
867 unimplemented!()
868 }
869}
870
871impl X86Partition for UhPartition {
872 fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
873 self.inner.clone()
874 }
875
876 fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
877 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
878 if let Some(apic) = &self.inner.lapic(vtl) {
879 apic.lint(vp_index, lint.into(), |vp_index| {
880 self.inner
881 .vp(vp_index)
882 .unwrap()
883 .wake(vtl, WakeReason::INTCON);
884 });
885 } else if lint == 0 {
886 self.inner
887 .vp(vp_index)
888 .unwrap()
889 .wake(vtl, WakeReason::EXTINT);
890 } else {
891 unimplemented!()
892 }
893 }
894}
895
896impl UhPartitionInner {
897 fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
898 self.vps.get(index.index() as usize)
899 }
900
901 fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
902 self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
903 }
904
905 fn hv(&self) -> Option<&GlobalHv<2>> {
906 self.backing_shared.cvm_state().map(|x| &x.hv)
907 }
908
909 #[cfg(guest_arch = "x86_64")]
911 fn request_proxy_irr_filter_update(
912 &self,
913 vtl: GuestVtl,
914 device_vector: u8,
915 req_vp_index: VpIndex,
916 ) {
917 tracing::debug!(
918 ?vtl,
919 device_vector,
920 req_vp_index = req_vp_index.index(),
921 "request_proxy_irr_filter_update"
922 );
923
924 {
926 let mut device_vector_table = self.device_vector_table.write();
927 device_vector_table.set(device_vector as usize, true);
928 }
929
930 for vp in self.vps.iter() {
932 if vp.vp_index() != req_vp_index {
933 vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
934 }
935 }
936 }
937
938 #[cfg(guest_arch = "x86_64")]
940 fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
941 let device_vector_table = self.device_vector_table.read();
942 for idx in device_vector_table.iter_ones() {
943 irr_vectors.set(idx, true);
944 }
945 }
946
947 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
948 let mut wake_vps = false;
949 resp.field_mut(
950 "enter_modes",
951 &mut inspect::adhoc_mut(|req| {
952 let update = req.is_update();
953 {
954 let mut modes = self.enter_modes.lock();
955 modes.inspect_mut(req);
956 if update {
957 self.enter_modes_atomic.store(
958 hcl::protocol::EnterModes::from(*modes).into(),
959 Ordering::Relaxed,
960 );
961 wake_vps = true;
962 }
963 }
964 }),
965 );
966
967 if wake_vps {
969 for vp in self.vps.iter() {
970 vp.wake_vtl2();
971 }
972 }
973 }
974
975 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
977 fn vsm_status(
978 &self,
979 ) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::register::GetRegError> {
980 self.hcl.get_vsm_partition_status()
982 }
983}
984
985impl virt::Synic for UhPartition {
986 fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
987 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
988 let Some(vp) = self.inner.vp(vp_index) else {
989 tracelimit::warn_ratelimited!(
990 CVM_ALLOWED,
991 vp = vp_index.index(),
992 "invalid vp target for post_message"
993 );
994 return;
995 };
996
997 vp.post_message(
998 vtl,
999 sint,
1000 &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
1001 );
1002 }
1003
1004 fn new_guest_event_port(
1005 &self,
1006 vtl: Vtl,
1007 vp: u32,
1008 sint: u8,
1009 flag: u16,
1010 ) -> Box<dyn vmcore::synic::GuestEventPort> {
1011 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1012 Box::new(UhEventPort {
1013 partition: Arc::downgrade(&self.inner),
1014 params: Arc::new(Mutex::new(UhEventPortParams {
1015 vp: VpIndex::new(vp),
1016 sint,
1017 flag,
1018 vtl,
1019 })),
1020 })
1021 }
1022
1023 fn prefer_os_events(&self) -> bool {
1024 false
1025 }
1026
1027 fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1028 Some(self)
1029 }
1030}
1031
1032impl virt::SynicMonitor for UhPartition {
1033 fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1034 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1036 let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1037
1038 let allocated_page = allocated_block.take();
1040 if let Some(old_gpa) = old_gpa {
1041 let allocated_gpa = allocated_page
1042 .as_ref()
1043 .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1044
1045 let result = if allocated_gpa == Some(old_gpa) {
1048 let vtl = GuestVtl::try_from(vtl).unwrap();
1049 self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1050 } else {
1051 self.inner
1052 .hcl
1053 .modify_vtl_protection_mask(
1054 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1055 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1056 HvInputVtl::CURRENT_VTL,
1057 )
1058 .map_err(|err| anyhow::anyhow!(err))
1059 };
1060
1061 result
1062 .context("failed to unregister old monitor page")
1063 .inspect_err(|_| {
1064 self.inner.monitor_page.set_gpa(None);
1066 })?;
1067
1068 tracing::debug!(old_gpa, "unregistered monitor page");
1069 }
1070
1071 if let Some(gpa) = gpa {
1072 self.inner
1075 .hcl
1076 .modify_vtl_protection_mask(
1077 MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1078 HvMapGpaFlags::new().with_readable(true),
1079 HvInputVtl::CURRENT_VTL,
1080 )
1081 .context("failed to register monitor page")
1082 .inspect_err(|_| {
1083 self.inner.monitor_page.set_gpa(None);
1085 })?;
1086
1087 tracing::debug!(gpa, "registered monitor page");
1088 }
1089
1090 Ok(())
1091 }
1092
1093 fn register_monitor(
1094 &self,
1095 monitor_id: vmcore::monitor::MonitorId,
1096 connection_id: u32,
1097 ) -> Box<dyn Sync + Send> {
1098 self.inner
1099 .monitor_page
1100 .register_monitor(monitor_id, connection_id)
1101 }
1102
1103 fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1104 let vtl = GuestVtl::try_from(vtl).unwrap();
1105
1106 let Some(state) = self.inner.backing_shared.cvm_state() else {
1108 return Ok(None);
1109 };
1110
1111 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1112 if let Some(block) = allocated_block.as_ref() {
1113 let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1115 assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1116 return Ok(Some(gpa));
1117 }
1118
1119 let block = state
1120 .private_dma_client
1121 .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1122 .context("failed to allocate monitor page")?;
1123
1124 let gpn = block.pfns()[0];
1125 *allocated_block = Some(block);
1126 let gpa = gpn << HV_PAGE_SHIFT;
1127 let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1128 if let Some(old_gpa) = old_gpa {
1129 self.inner
1132 .hcl
1133 .modify_vtl_protection_mask(
1134 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1135 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1136 HvInputVtl::CURRENT_VTL,
1137 )
1138 .context("failed to unregister old monitor page")
1139 .inspect_err(|_| {
1140 self.inner.monitor_page.set_gpa(None);
1142 })?;
1143
1144 tracing::debug!(old_gpa, "unregistered monitor page");
1145 }
1146
1147 self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1150 .context("failed to unregister monitor page")
1151 .inspect_err(|_| {
1152 self.inner.monitor_page.set_gpa(None);
1154 })?;
1155
1156 tracing::debug!(gpa, "registered allocated monitor page");
1157
1158 Ok(Some(gpa))
1159 }
1160}
1161
1162impl UhPartitionInner {
1163 #[cfg(guest_arch = "x86_64")]
1164 pub(crate) fn synic_interrupt(
1165 &self,
1166 vp_index: VpIndex,
1167 vtl: GuestVtl,
1168 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1169 move |vector, auto_eoi| {
1172 self.lapic(vtl).unwrap().synic_interrupt(
1173 vp_index,
1174 vector as u8,
1175 auto_eoi,
1176 |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1177 );
1178 }
1179 }
1180
1181 #[cfg(guest_arch = "aarch64")]
1182 fn synic_interrupt(
1183 &self,
1184 _vp_index: VpIndex,
1185 _vtl: GuestVtl,
1186 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1187 move |_, _| {}
1188 }
1189}
1190
1191#[derive(Debug)]
1192struct UhEventPort {
1193 partition: Weak<UhPartitionInner>,
1194 params: Arc<Mutex<UhEventPortParams>>,
1195}
1196
1197#[derive(Debug, Copy, Clone)]
1198struct UhEventPortParams {
1199 vp: VpIndex,
1200 sint: u8,
1201 flag: u16,
1202 vtl: GuestVtl,
1203}
1204
1205impl vmcore::synic::GuestEventPort for UhEventPort {
1206 fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1207 let partition = self.partition.clone();
1208 let params = self.params.clone();
1209 vmcore::interrupt::Interrupt::from_fn(move || {
1210 let UhEventPortParams {
1211 vp,
1212 sint,
1213 flag,
1214 vtl,
1215 } = *params.lock();
1216 let Some(partition) = partition.upgrade() else {
1217 return;
1218 };
1219 tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1220 if let Some(hv) = partition.hv() {
1221 match hv.synic[vtl].signal_event(
1222 vp,
1223 sint,
1224 flag,
1225 &mut partition.synic_interrupt(vp, vtl),
1226 ) {
1227 Ok(_) => {}
1228 Err(SintProxied) => {
1229 tracing::trace!(
1230 vp = vp.index(),
1231 sint,
1232 flag,
1233 "forwarding event to untrusted synic"
1234 );
1235 if let Some(synic) = partition.backing_shared.untrusted_synic() {
1236 synic
1237 .signal_event(
1238 vp,
1239 sint,
1240 flag,
1241 &mut partition.synic_interrupt(vp, vtl),
1242 )
1243 .ok();
1244 } else {
1245 partition.hcl.signal_event_direct(vp.index(), sint, flag)
1246 }
1247 }
1248 }
1249 } else {
1250 partition.hcl.signal_event_direct(vp.index(), sint, flag);
1251 }
1252 })
1253 }
1254
1255 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1256 self.params.lock().vp = VpIndex::new(vp);
1257 Ok(())
1258 }
1259}
1260
1261impl virt::Hv1 for UhPartition {
1262 type Error = Error;
1263 type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1264
1265 fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1266 Some(if let Some(hv) = self.inner.hv() {
1267 hv.ref_time_source().clone()
1268 } else {
1269 ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1270 })
1271 }
1272
1273 fn new_virtual_device(
1274 &self,
1275 ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1276 self.inner.software_devices.is_some().then_some(self)
1277 }
1278}
1279
1280impl GetReferenceTime for UhPartitionInner {
1281 fn now(&self) -> ReferenceTimeResult {
1282 ReferenceTimeResult {
1283 ref_time: self.hcl.reference_time().unwrap(),
1284 system_time: None,
1285 }
1286 }
1287}
1288
1289impl virt::DeviceBuilder for UhPartition {
1290 fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1291 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1292 let device = self
1293 .inner
1294 .software_devices
1295 .as_ref()
1296 .expect("checked in new_virtual_device")
1297 .new_device(self.interrupt_targets[vtl].clone(), device_id)
1298 .map_err(Error::NewDevice)?;
1299
1300 Ok(device)
1301 }
1302}
1303
1304struct UhInterruptTarget {
1305 partition: Arc<UhPartitionInner>,
1306 vtl: GuestVtl,
1307}
1308
1309impl pci_core::msi::SignalMsi for UhInterruptTarget {
1310 fn signal_msi(&self, _rid: u32, address: u64, data: u32) {
1311 self.partition
1312 .request_msi(self.vtl, MsiRequest { address, data });
1313 }
1314}
1315
1316impl UhPartitionInner {
1317 fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1318 if let Some(lapic) = self.lapic(vtl) {
1319 tracing::trace!(?request, "interrupt");
1320 lapic.request_interrupt(request.address, request.data, |vp_index| {
1321 self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1322 });
1323 } else {
1324 let (address, data) = request.as_x86();
1325 if let Err(err) = self.hcl.request_interrupt(
1326 request.hv_x86_interrupt_control(),
1327 address.virt_destination().into(),
1328 data.vector().into(),
1329 vtl,
1330 ) {
1331 tracelimit::warn_ratelimited!(
1332 CVM_ALLOWED,
1333 error = &err as &dyn std::error::Error,
1334 address = request.address,
1335 data = request.data,
1336 "failed to request msi"
1337 );
1338 }
1339 }
1340 }
1341}
1342
1343impl IoApicRouting for UhPartitionInner {
1344 fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1345 self.irq_routes.set_irq_route(irq, request)
1346 }
1347
1348 fn assert_irq(&self, irq: u8) {
1350 self.irq_routes
1351 .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1352 }
1353}
1354
1355#[cfg(target_arch = "x86_64")]
1357fn is_restore_partition_time_available() -> bool {
1358 let result =
1359 safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION, 0);
1360 let enlightenment_info = hvdef::HvEnlightenmentInformation::from(
1361 result.eax as u128
1362 | (result.ebx as u128) << 32
1363 | (result.ecx as u128) << 64
1364 | (result.edx as u128) << 96,
1365 );
1366 enlightenment_info.restore_time_on_resume()
1367}
1368#[cfg(not(target_arch = "x86_64"))]
1370fn is_restore_partition_time_available() -> bool {
1371 false
1373}
1374
1375fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1378 let caps = hcl.get_vsm_capabilities().map_err(Error::GetReg)?;
1380 let hardware_isolated = hcl.isolation().is_hardware_isolated();
1381 let isolated = hcl.isolation().is_isolated();
1382 let config = HvRegisterVsmPartitionConfig::new()
1383 .with_default_vtl_protection_mask(0xF)
1384 .with_enable_vtl_protection(!hardware_isolated)
1385 .with_zero_memory_on_reset(!hardware_isolated)
1386 .with_intercept_cpuid_unimplemented(!hardware_isolated)
1387 .with_intercept_page(caps.intercept_page_available())
1388 .with_intercept_unrecoverable_exception(true)
1389 .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1390 .with_intercept_acceptance(isolated)
1391 .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1392 .with_intercept_system_reset(caps.intercept_system_reset_available())
1393 .with_intercept_restore_partition_time(is_restore_partition_time_available());
1394
1395 hcl.set_vtl2_vsm_partition_config(config)
1396 .map_err(Error::SetReg)
1397}
1398
1399pub struct UhPartitionNewParams<'a> {
1403 pub isolation: IsolationType,
1405 pub hide_isolation: bool,
1408 pub lower_vtl_memory_layout: &'a MemoryLayout,
1410 pub topology: &'a ProcessorTopology,
1412 pub cvm_cpuid_info: Option<&'a [u8]>,
1415 pub snp_secrets: Option<&'a [u8]>,
1417 pub vtom: Option<u64>,
1421 pub handle_synic: bool,
1425 pub no_sidecar_hotplug: bool,
1428 pub use_mmio_hypercalls: bool,
1430 pub intercept_debug_exceptions: bool,
1432 pub disable_proxy_redirect: bool,
1434 pub disable_lower_vtl_timer_virt: bool,
1436}
1437
1438pub struct UhLateParams<'a> {
1440 pub gm: VtlArray<GuestMemory, 2>,
1442 pub vtl0_kernel_exec_gm: GuestMemory,
1444 pub vtl0_user_exec_gm: GuestMemory,
1446 #[cfg(guest_arch = "x86_64")]
1448 pub cpuid: Vec<CpuidLeaf>,
1449 pub crash_notification_send: mesh::Sender<VtlCrash>,
1452 pub vmtime: &'a VmTimeSource,
1454 pub cvm_params: Option<CvmLateParams>,
1456 pub vmbus_relay: bool,
1458}
1459
1460pub struct CvmLateParams {
1462 pub shared_gm: GuestMemory,
1464 pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1466 pub shared_dma_client: Arc<dyn DmaClient>,
1468 pub private_dma_client: Arc<dyn DmaClient>,
1470}
1471
1472#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1474pub enum GpnSource {
1475 GuestMemory,
1477 Dma,
1479}
1480
1481pub trait ProtectIsolatedMemory: Send + Sync {
1483 fn change_host_visibility(
1485 &self,
1486 vtl: GuestVtl,
1487 shared: bool,
1488 gpns: &[u64],
1489 tlb_access: &mut dyn TlbFlushLockAccess,
1490 ) -> Result<(), (HvError, usize)>;
1491
1492 fn query_host_visibility(
1494 &self,
1495 gpns: &[u64],
1496 host_visibility: &mut [HostVisibilityType],
1497 ) -> Result<(), (HvError, usize)>;
1498
1499 fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1501
1502 fn change_default_vtl_protections(
1506 &self,
1507 target_vtl: GuestVtl,
1508 protections: HvMapGpaFlags,
1509 tlb_access: &mut dyn TlbFlushLockAccess,
1510 ) -> Result<(), HvError>;
1511
1512 fn change_vtl_protections(
1514 &self,
1515 target_vtl: GuestVtl,
1516 gpns: &[u64],
1517 protections: HvMapGpaFlags,
1518 tlb_access: &mut dyn TlbFlushLockAccess,
1519 ) -> Result<(), (HvError, usize)>;
1520
1521 fn register_overlay_page(
1524 &self,
1525 vtl: GuestVtl,
1526 gpn: u64,
1527 gpn_source: GpnSource,
1528 check_perms: HvMapGpaFlags,
1529 new_perms: Option<HvMapGpaFlags>,
1530 tlb_access: &mut dyn TlbFlushLockAccess,
1531 ) -> Result<(), HvError>;
1532
1533 fn unregister_overlay_page(
1536 &self,
1537 vtl: GuestVtl,
1538 gpn: u64,
1539 tlb_access: &mut dyn TlbFlushLockAccess,
1540 ) -> Result<(), HvError>;
1541
1542 fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1544
1545 fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1547
1548 fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1554
1555 fn set_vtl1_protections_enabled(&self);
1558
1559 fn vtl1_protections_enabled(&self) -> bool;
1562}
1563
1564pub trait TlbFlushLockAccess {
1566 fn flush(&mut self, vtl: GuestVtl);
1568
1569 fn flush_entire(&mut self);
1571
1572 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1574}
1575
1576pub struct UhProtoPartition<'a> {
1579 params: UhPartitionNewParams<'a>,
1580 hcl: Hcl,
1581 guest_vsm_available: bool,
1582 create_partition_available: bool,
1583 #[cfg(guest_arch = "x86_64")]
1584 cpuid: virt::CpuidLeafSet,
1585}
1586
1587impl<'a> UhProtoPartition<'a> {
1588 pub fn new<T: SpawnDriver>(
1593 params: UhPartitionNewParams<'a>,
1594 driver: impl FnMut(u32) -> T,
1595 ) -> Result<Self, Error> {
1596 let hcl_isolation = match params.isolation {
1597 IsolationType::None => hcl::ioctl::IsolationType::None,
1598 IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1599 IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1600 IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1601 };
1602
1603 let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1605
1606 let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1607
1608 let mut allowed_hypercalls = vec![
1610 hvdef::HypercallCode::HvCallGetVpRegisters,
1611 hvdef::HypercallCode::HvCallSetVpRegisters,
1612 hvdef::HypercallCode::HvCallInstallIntercept,
1613 hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1614 hvdef::HypercallCode::HvCallPostMessageDirect,
1615 hvdef::HypercallCode::HvCallSignalEventDirect,
1616 hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1617 hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1618 hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1619 hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1620 hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1621 hvdef::HypercallCode::HvCallAcceptGpaPages,
1622 hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1623 ];
1624
1625 if params.isolation.is_hardware_isolated() {
1626 allowed_hypercalls.extend(vec![
1627 hvdef::HypercallCode::HvCallEnablePartitionVtl,
1628 hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1629 hvdef::HypercallCode::HvCallEnableVpVtl,
1630 ]);
1631 }
1632
1633 if params.use_mmio_hypercalls {
1634 allowed_hypercalls.extend(vec![
1635 hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1636 hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1637 ]);
1638 }
1639
1640 hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1641
1642 set_vtl2_vsm_partition_config(&hcl)?;
1643
1644 let privs = hcl
1645 .get_privileges_and_features_info()
1646 .map_err(Error::GetReg)?;
1647 let guest_vsm_available = Self::check_guest_vsm_support(privs, &hcl)?;
1648
1649 #[cfg(guest_arch = "x86_64")]
1650 let cpuid = match params.isolation {
1651 IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1652 cpuid_pages: params.cvm_cpuid_info.unwrap(),
1653 vtom: params.vtom.unwrap(),
1654 access_vsm: guest_vsm_available,
1655 }
1656 .build()
1657 .map_err(Error::CvmCpuid)?,
1658
1659 IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1660 topology: params.topology,
1661 vtom: params.vtom.unwrap(),
1662 access_vsm: guest_vsm_available,
1663 }
1664 .build()
1665 .map_err(Error::CvmCpuid)?,
1666 IsolationType::Vbs | IsolationType::None => Default::default(),
1667 };
1668
1669 Ok(UhProtoPartition {
1670 hcl,
1671 params,
1672 guest_vsm_available,
1673 create_partition_available: privs.create_partitions(),
1674 #[cfg(guest_arch = "x86_64")]
1675 cpuid,
1676 })
1677 }
1678
1679 pub fn guest_vsm_available(&self) -> bool {
1681 self.guest_vsm_available
1682 }
1683
1684 pub fn create_partition_available(&self) -> bool {
1687 self.create_partition_available
1688 }
1689
1690 pub async fn build(
1692 self,
1693 late_params: UhLateParams<'_>,
1694 ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1695 let Self {
1696 mut hcl,
1697 params,
1698 guest_vsm_available,
1699 create_partition_available: _,
1700 #[cfg(guest_arch = "x86_64")]
1701 cpuid,
1702 } = self;
1703 let isolation = params.isolation;
1704 let is_hardware_isolated = isolation.is_hardware_isolated();
1705
1706 if params.intercept_debug_exceptions {
1712 if !cfg!(feature = "gdb") {
1713 return Err(Error::InvalidDebugConfiguration);
1714 }
1715
1716 cfg_if::cfg_if! {
1717 if #[cfg(guest_arch = "x86_64")] {
1718 if isolation != IsolationType::Tdx {
1719 let debug_exception_vector = 0x1;
1720 hcl.register_intercept(
1721 HvInterceptType::HvInterceptTypeException,
1722 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1723 HvInterceptParameters::new_exception(debug_exception_vector),
1724 )
1725 .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1726 }
1727 } else {
1728 return Err(Error::InvalidDebugConfiguration);
1729 }
1730 }
1731 }
1732
1733 if !is_hardware_isolated {
1734 if cfg!(guest_arch = "x86_64") {
1735 hcl.register_intercept(
1736 HvInterceptType::HvInterceptTypeX64Msr,
1737 HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1738 HvInterceptParameters::new_zeroed(),
1739 )
1740 .map_err(|err| {
1741 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1742 })?;
1743
1744 hcl.register_intercept(
1745 HvInterceptType::HvInterceptTypeX64ApicEoi,
1746 HV_INTERCEPT_ACCESS_MASK_WRITE,
1747 HvInterceptParameters::new_zeroed(),
1748 )
1749 .map_err(|err| {
1750 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1751 })?;
1752 } else {
1753 if false {
1754 todo!("AARCH64_TODO");
1755 }
1756 }
1757 }
1758
1759 if isolation == IsolationType::Snp {
1760 hcl.register_intercept(
1762 HvInterceptType::HvInterceptTypeException,
1763 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1764 HvInterceptParameters::new_exception(
1765 x86defs::Exception::SEV_VMM_COMMUNICATION.0 as u16,
1766 ),
1767 )
1768 .map_err(|err| {
1769 Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1770 })?;
1771
1772 let mut bitmap = [0u8; 64];
1774 if let Some(secrets) = params.snp_secrets {
1775 bitmap.copy_from_slice(
1776 &secrets
1777 [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1778 );
1779 }
1780 hcl.set_snp_register_bitmap(bitmap);
1781 }
1782
1783 hcl.add_vps(
1785 params.topology.vp_count(),
1786 late_params
1787 .cvm_params
1788 .as_ref()
1789 .map(|x| &x.private_dma_client),
1790 )
1791 .map_err(Error::Hcl)?;
1792
1793 let vps: Vec<_> = params
1794 .topology
1795 .vps_arch()
1796 .map(|vp_info| {
1797 let cpu_index = vp_info.base.vp_index.index();
1800 UhVpInner::new(cpu_index, vp_info)
1801 })
1802 .collect();
1803
1804 #[cfg(guest_arch = "x86_64")]
1806 let software_devices = {
1807 let res = if !is_hardware_isolated {
1808 hcl.register_intercept(
1809 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1810 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1811 HvInterceptParameters::new_zeroed(),
1812 )
1813 } else {
1814 Ok(())
1815 };
1816 match res {
1817 Ok(()) => Some(ApicSoftwareDevices::new(
1818 params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1819 )),
1820 Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1821 Err(err) => {
1822 return Err(Error::InstallIntercept(
1823 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1824 err,
1825 ));
1826 }
1827 }
1828 };
1829
1830 #[cfg(guest_arch = "aarch64")]
1831 let software_devices = None;
1832
1833 #[cfg(guest_arch = "aarch64")]
1834 let caps = virt::aarch64::Aarch64PartitionCapabilities {
1835 supports_aarch32_el0: false,
1837 };
1838
1839 #[cfg(guest_arch = "x86_64")]
1840 let cpuid = UhPartition::construct_cpuid_results(
1841 cpuid,
1842 &late_params.cpuid,
1843 params.topology,
1844 isolation,
1845 params.hide_isolation,
1846 );
1847
1848 #[cfg(guest_arch = "x86_64")]
1849 let caps = UhPartition::construct_capabilities(
1850 params.topology,
1851 &cpuid,
1852 isolation,
1853 params.hide_isolation,
1854 )
1855 .map_err(Error::Capabilities)?;
1856
1857 if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1858 hcl.register_intercept(
1866 HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1867 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1868 HvInterceptParameters::new_zeroed(),
1869 )
1870 .expect("registering synic intercept cannot fail");
1871 }
1872
1873 #[cfg(guest_arch = "x86_64")]
1874 let cvm_state = if is_hardware_isolated {
1875 let vsm_caps = hcl.get_vsm_capabilities().map_err(Error::GetReg)?;
1876 let proxy_interrupt_redirect_available =
1877 vsm_caps.proxy_interrupt_redirect_available() && !params.disable_proxy_redirect;
1878
1879 Some(Self::construct_cvm_state(
1880 ¶ms,
1881 late_params.cvm_params.unwrap(),
1882 &caps,
1883 guest_vsm_available,
1884 proxy_interrupt_redirect_available,
1885 )?)
1886 } else {
1887 None
1888 };
1889 #[cfg(guest_arch = "aarch64")]
1890 let cvm_state = None;
1891
1892 let lower_vtl_timer_virt_available =
1893 hcl.supports_lower_vtl_timer_virt() && !params.disable_lower_vtl_timer_virt;
1894
1895 let backing_shared = BackingShared::new(
1896 isolation,
1897 ¶ms,
1898 BackingSharedParams {
1899 cvm_state,
1900 #[cfg(guest_arch = "x86_64")]
1901 cpuid: &cpuid,
1902 hcl: &hcl,
1903 guest_vsm_available,
1904 lower_vtl_timer_virt_available,
1905 },
1906 )?;
1907
1908 let enter_modes = EnterModes::default();
1909
1910 let partition = Arc::new(UhPartitionInner {
1911 hcl,
1912 vps,
1913 irq_routes: Default::default(),
1914 caps,
1915 enter_modes: Mutex::new(enter_modes),
1916 enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1917 gm: late_params.gm,
1918 vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1919 vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1920 #[cfg(guest_arch = "x86_64")]
1921 cpuid,
1922 crash_notification_send: late_params.crash_notification_send,
1923 monitor_page: MonitorPage::new(),
1924 allocated_monitor_page: Mutex::new(None),
1925 software_devices,
1926 lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1927 vmtime: late_params.vmtime.clone(),
1928 isolation,
1929 no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1930 use_mmio_hypercalls: params.use_mmio_hypercalls,
1931 backing_shared,
1932 #[cfg(guest_arch = "x86_64")]
1933 device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1934 intercept_debug_exceptions: params.intercept_debug_exceptions,
1935 vmbus_relay: late_params.vmbus_relay,
1936 });
1937
1938 if cfg!(guest_arch = "x86_64") {
1939 partition.manage_io_port_intercept_region(0, !0, true);
1941 }
1942
1943 let vps = params
1944 .topology
1945 .vps_arch()
1946 .map(|vp_info| UhProcessorBox {
1947 partition: partition.clone(),
1948 vp_info,
1949 })
1950 .collect();
1951
1952 Ok((
1953 UhPartition {
1954 inner: partition.clone(),
1955 interrupt_targets: VtlArray::from_fn(|vtl| {
1956 Arc::new(UhInterruptTarget {
1957 partition: partition.clone(),
1958 vtl: vtl.try_into().unwrap(),
1959 })
1960 }),
1961 },
1962 vps,
1963 ))
1964 }
1965}
1966
1967impl UhPartition {
1968 pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, hcl::ioctl::register::GetRegError> {
1970 let id = if let Some(hv) = self.inner.hv() {
1974 hv.guest_os_id(Vtl::Vtl0)
1975 } else {
1976 self.inner.hcl.get_guest_os_id(GuestVtl::Vtl0)?
1978 };
1979 Ok(id)
1980 }
1981
1982 pub fn register_host_io_port_fast_path(
1987 &self,
1988 range: RangeInclusive<u16>,
1989 ) -> HostIoPortFastPathHandle {
1990 assert!(!self.inner.isolation.is_hardware_isolated());
1994
1995 self.inner
1996 .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1997 HostIoPortFastPathHandle {
1998 inner: Arc::downgrade(&self.inner),
1999 begin: *range.start(),
2000 end: *range.end(),
2001 }
2002 }
2003
2004 pub fn assert_debug_interrupt(&self, _vtl: u8) {
2006 #[cfg(guest_arch = "x86_64")]
2007 const LINT_INDEX_1: u8 = 1;
2008 #[cfg(guest_arch = "x86_64")]
2009 match self.inner.isolation {
2010 IsolationType::Snp => {
2011 tracing::error!(?_vtl, "Debug interrupts cannot be injected into SNP VMs",);
2012 }
2013 _ => {
2014 let bsp_index = VpIndex::new(0);
2015 self.pulse_lint(bsp_index, Vtl::try_from(_vtl).unwrap(), LINT_INDEX_1)
2016 }
2017 }
2018 }
2019
2020 pub fn set_pm_timer_assist(
2022 &self,
2023 port: Option<u16>,
2024 ) -> Result<(), hcl::ioctl::register::SetRegError> {
2025 self.inner.hcl.set_pm_timer_assist(port)
2026 }
2027
2028 fn register_cvm_dma_overlay_page(
2030 &self,
2031 vtl: GuestVtl,
2032 gpn: u64,
2033 new_perms: HvMapGpaFlags,
2034 ) -> anyhow::Result<()> {
2035 match &self.inner.backing_shared {
2037 #[cfg(guest_arch = "x86_64")]
2038 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2039 .cvm
2040 .isolated_memory_protector
2041 .register_overlay_page(
2042 vtl,
2043 gpn,
2044 GpnSource::Dma,
2046 HvMapGpaFlags::new(),
2047 Some(new_perms),
2048 &mut SnpBacked::tlb_flush_lock_access(
2049 None,
2050 self.inner.as_ref(),
2051 snp_backed_shared,
2052 ),
2053 )
2054 .map_err(|e| anyhow::anyhow!(e)),
2055 #[cfg(guest_arch = "x86_64")]
2056 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2057 .cvm
2058 .isolated_memory_protector
2059 .register_overlay_page(
2060 vtl,
2061 gpn,
2062 GpnSource::Dma,
2063 HvMapGpaFlags::new(),
2064 Some(new_perms),
2065 &mut TdxBacked::tlb_flush_lock_access(
2066 None,
2067 self.inner.as_ref(),
2068 tdx_backed_shared,
2069 ),
2070 )
2071 .map_err(|e| anyhow::anyhow!(e)),
2072 BackingShared::Hypervisor(_) => {
2073 let _ = (vtl, gpn, new_perms);
2074 unreachable!()
2075 }
2076 }
2077 }
2078
2079 fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2081 match &self.inner.backing_shared {
2083 #[cfg(guest_arch = "x86_64")]
2084 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2085 .cvm
2086 .isolated_memory_protector
2087 .unregister_overlay_page(
2088 vtl,
2089 gpn,
2090 &mut SnpBacked::tlb_flush_lock_access(
2091 None,
2092 self.inner.as_ref(),
2093 snp_backed_shared,
2094 ),
2095 )
2096 .map_err(|e| anyhow::anyhow!(e)),
2097 #[cfg(guest_arch = "x86_64")]
2098 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2099 .cvm
2100 .isolated_memory_protector
2101 .unregister_overlay_page(
2102 vtl,
2103 gpn,
2104 &mut TdxBacked::tlb_flush_lock_access(
2105 None,
2106 self.inner.as_ref(),
2107 tdx_backed_shared,
2108 ),
2109 )
2110 .map_err(|e| anyhow::anyhow!(e)),
2111 BackingShared::Hypervisor(_) => {
2112 let _ = (vtl, gpn);
2113 unreachable!()
2114 }
2115 }
2116 }
2117}
2118
2119impl UhProtoPartition<'_> {
2120 fn check_guest_vsm_support(privs: HvPartitionPrivilege, hcl: &Hcl) -> Result<bool, Error> {
2123 if !privs.access_vsm() {
2124 return Ok(false);
2125 }
2126
2127 let guest_vsm_config = hcl
2128 .get_guest_vsm_partition_config()
2129 .map_err(Error::GetReg)?;
2130 Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2131 }
2132
2133 #[cfg(guest_arch = "x86_64")]
2134 fn construct_cvm_state(
2136 params: &UhPartitionNewParams<'_>,
2137 late_params: CvmLateParams,
2138 caps: &PartitionCapabilities,
2139 guest_vsm_available: bool,
2140 proxy_interrupt_redirect_available: bool,
2141 ) -> Result<UhCvmPartitionState, Error> {
2142 use vmcore::reference_time::ReferenceTimeSource;
2143
2144 let vp_count = params.topology.vp_count() as usize;
2145 let vps = (0..vp_count)
2146 .map(|vp_index| UhCvmVpInner {
2147 tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2148 vtl1_enable_called: Mutex::new(false),
2149 started: AtomicBool::new(vp_index == 0),
2150 hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2151 proxy_redirect_interrupts: Mutex::new(HashMap::new()),
2152 })
2153 .collect();
2154 let tlb_locked_vps =
2155 VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2156
2157 let lapic = VtlArray::from_fn(|_| {
2158 LocalApicSet::builder()
2159 .x2apic_capable(caps.x2apic)
2160 .hyperv_enlightenments(true)
2161 .build()
2162 });
2163
2164 let tsc_frequency = get_tsc_frequency(params.isolation)?;
2165 let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2166
2167 let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2174 max_vp_count: params.topology.vp_count(),
2175 vendor: caps.vendor,
2176 tsc_frequency,
2177 ref_time,
2178 is_ref_time_backed_by_tsc: true,
2179 });
2180
2181 Ok(UhCvmPartitionState {
2182 vps_per_socket: params.topology.reserved_vps_per_socket(),
2183 tlb_locked_vps,
2184 vps,
2185 shared_memory: late_params.shared_gm,
2186 isolated_memory_protector: late_params.isolated_memory_protector,
2187 lapic,
2188 hv,
2189 guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2190 shared_dma_client: late_params.shared_dma_client,
2191 private_dma_client: late_params.private_dma_client,
2192 hide_isolation: params.hide_isolation,
2193 proxy_interrupt_redirect: proxy_interrupt_redirect_available,
2194 })
2195 }
2196}
2197
2198impl UhPartition {
2199 #[cfg(guest_arch = "x86_64")]
2200 fn construct_cpuid_results(
2202 cpuid: virt::CpuidLeafSet,
2203 initial_cpuid: &[CpuidLeaf],
2204 topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2205 isolation: IsolationType,
2206 hide_isolation: bool,
2207 ) -> virt::CpuidLeafSet {
2208 let mut cpuid = cpuid.into_leaves();
2209 if isolation.is_hardware_isolated() {
2210 let x2apic = match topology.apic_mode() {
2212 vm_topology::processor::x86::ApicMode::XApic => false,
2213 vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2214 vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2215 };
2216 let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2217 let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2218 cpuid.push(
2219 CpuidLeaf::new(
2220 x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2221 [0, 0, ecx.into(), 0],
2222 )
2223 .masked([0, 0, ecx_mask.into(), 0]),
2224 );
2225
2226 let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2230
2231 hv1_emulator::cpuid::process_hv_cpuid_leaves(
2233 &mut cpuid,
2234 hide_isolation,
2235 [
2236 hv_version.eax,
2237 hv_version.ebx,
2238 hv_version.ecx,
2239 hv_version.edx,
2240 ],
2241 );
2242 }
2243 cpuid.extend(initial_cpuid);
2244 virt::CpuidLeafSet::new(cpuid)
2245 }
2246
2247 #[cfg(guest_arch = "x86_64")]
2248 fn construct_capabilities(
2250 topology: &ProcessorTopology,
2251 cpuid: &virt::CpuidLeafSet,
2252 isolation: IsolationType,
2253 hide_isolation: bool,
2254 ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2255 let mut native_cpuid_fn;
2256 let mut cvm_cpuid_fn;
2257
2258 let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2261 cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2263 &mut cvm_cpuid_fn
2264 } else {
2265 native_cpuid_fn = |leaf, sub_leaf| {
2267 let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2268 cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2269 };
2270 &mut native_cpuid_fn
2271 };
2272
2273 let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2275 match isolation {
2276 IsolationType::Tdx => {
2277 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2278 caps.nxe_forced_on = true;
2280 }
2281 IsolationType::Snp => {
2282 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2283 }
2284 _ => {
2285 assert!(caps.vtom.is_none());
2286 }
2287 }
2288
2289 Ok(caps)
2290 }
2291}
2292
2293#[cfg(guest_arch = "x86_64")]
2294fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2296 let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2299 let hv_frequency = msr
2300 .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2301 .map_err(Error::ReadTscFrequency)?;
2302
2303 let hw_info = match isolation {
2306 IsolationType::Tdx => {
2307 let max_function =
2309 safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2310 .eax;
2311
2312 if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2313 return Err(Error::BadCpuidTsc);
2314 }
2315 let result = safe_intrinsics::cpuid(
2316 x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2317 0,
2318 );
2319 let ratio_denom = result.eax;
2320 let ratio_num = result.ebx;
2321 let clock = result.ecx;
2322 if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2323 return Err(Error::BadCpuidTsc);
2324 }
2325 let allowed_error = 12_500_000;
2328 Some((
2329 clock as u64 * ratio_num as u64 / ratio_denom as u64,
2330 allowed_error,
2331 ))
2332 }
2333 IsolationType::Snp => {
2334 None
2336 }
2337 IsolationType::Vbs | IsolationType::None => None,
2338 };
2339
2340 if let Some((hw_frequency, allowed_error)) = hw_info {
2341 let delta = hw_frequency.abs_diff(hv_frequency);
2344 if delta > allowed_error {
2345 return Err(Error::TscFrequencyMismatch {
2346 hv: hv_frequency,
2347 hw: hw_frequency,
2348 allowed_error,
2349 });
2350 }
2351 }
2352
2353 Ok(hv_frequency)
2354}
2355
2356impl UhPartitionInner {
2357 fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2358 if self.isolation.is_hardware_isolated() {
2359 return;
2360 }
2361
2362 static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2363
2364 let access_type_mask = if active {
2365 HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2366 } else {
2367 HV_INTERCEPT_ACCESS_MASK_NONE
2368 };
2369
2370 if !SKIP_RANGE.load(Ordering::Relaxed) {
2372 match self.hcl.register_intercept(
2373 HvInterceptType::HvInterceptTypeX64IoPortRange,
2374 access_type_mask,
2375 HvInterceptParameters::new_io_port_range(begin..=end),
2376 ) {
2377 Ok(()) => return,
2378 Err(HvError::InvalidParameter) => {
2379 SKIP_RANGE.store(true, Ordering::Relaxed);
2382 tracing::warn!(
2383 CVM_ALLOWED,
2384 "old hypervisor build; using slow path for intercept ranges"
2385 );
2386 }
2387 Err(err) => {
2388 panic!("io port range registration failure: {err:?}");
2389 }
2390 }
2391 }
2392
2393 for port in begin..=end {
2395 self.hcl
2396 .register_intercept(
2397 HvInterceptType::HvInterceptTypeX64IoPort,
2398 access_type_mask,
2399 HvInterceptParameters::new_io_port(port),
2400 )
2401 .expect("registering io intercept cannot fail");
2402 }
2403 }
2404
2405 fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2406 self.lower_vtl_memory_layout
2410 .ram()
2411 .iter()
2412 .any(|m| m.range.contains_addr(gpa))
2413 }
2414
2415 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2416 if self.is_gpa_lower_vtl_ram(gpa) {
2420 !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2422 } else {
2423 false
2424 }
2425 }
2426}
2427
2428#[must_use]
2433pub struct HostIoPortFastPathHandle {
2434 inner: Weak<UhPartitionInner>,
2435 begin: u16,
2436 end: u16,
2437}
2438
2439impl Drop for HostIoPortFastPathHandle {
2440 fn drop(&mut self) {
2441 if let Some(inner) = self.inner.upgrade() {
2442 inner.manage_io_port_intercept_region(self.begin, self.end, true);
2443 }
2444 }
2445}
2446
2447#[derive(Copy, Clone, Debug)]
2452pub struct VtlCrash {
2453 pub vp_index: VpIndex,
2455 pub last_vtl: GuestVtl,
2457 pub control: GuestCrashCtl,
2459 pub parameters: [u64; 5],
2461}
2462
2463#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2466fn validate_vtl_gpa_flags(
2467 flags: HvMapGpaFlags,
2468 mbec_enabled: bool,
2469 shadow_supervisor_stack_enabled: bool,
2470) -> bool {
2471 if flags.adjustable() {
2473 return false;
2474 }
2475
2476 if flags.kernel_executable() != flags.user_executable() {
2478 if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2479 return false;
2480 }
2481 }
2482
2483 if flags.writable()
2485 || flags.kernel_executable()
2486 || flags.user_executable()
2487 || flags.supervisor_shadow_stack()
2488 || flags.paging_writability()
2489 || flags.verify_paging_writability()
2490 {
2491 if !flags.readable() {
2492 return false;
2493 }
2494 }
2495
2496 if flags.supervisor_shadow_stack()
2499 && ((!flags.kernel_executable() && !flags.user_executable())
2500 || shadow_supervisor_stack_enabled)
2501 {
2502 return false;
2503 }
2504
2505 true
2506}