1#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13 if #[cfg(guest_arch = "x86_64")] {
14 mod cvm_cpuid;
15 pub use processor::snp::SnpBacked;
16 pub use processor::tdx::TdxBacked;
17 use crate::processor::HardwareIsolatedBacking;
18 pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19 use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20 use bitvec::prelude::BitArray;
21 use bitvec::prelude::Lsb0;
22 use devmsr::MsrDevice;
23 use hv1_emulator::hv::ProcessorVtlHv;
24 use processor::LapicState;
25 use processor::snp::SnpBackedShared;
26 use processor::tdx::TdxBackedShared;
27 use std::arch::x86_64::CpuidResult;
28 use virt::CpuidLeaf;
29 use virt::state::StateElement;
30 use virt::vp::MpState;
31 type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34 } else if #[cfg(guest_arch = "aarch64")] {
35 pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36 use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37 }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvRegisterName;
66use hvdef::HvRegisterVsmPartitionConfig;
67use hvdef::HvRegisterVsmPartitionStatus;
68use hvdef::Vtl;
69use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
73use hvdef::hypercall::HostVisibilityType;
74use hvdef::hypercall::HvGuestOsId;
75use hvdef::hypercall::HvInputVtl;
76use hvdef::hypercall::HvInterceptParameters;
77use hvdef::hypercall::HvInterceptType;
78use inspect::Inspect;
79use inspect::InspectMut;
80use memory_range::MemoryRange;
81use pal::unix::affinity;
82use pal::unix::affinity::CpuSet;
83use pal_async::driver::Driver;
84use pal_async::driver::SpawnDriver;
85use pal_uring::IdleControl;
86use parking_lot::Mutex;
87use parking_lot::RwLock;
88use processor::BackingSharedParams;
89use processor::SidecarExitReason;
90use sidecar_client::NewSidecarClientError;
91use std::ops::RangeInclusive;
92use std::os::fd::AsRawFd;
93use std::sync::Arc;
94use std::sync::Weak;
95use std::sync::atomic::AtomicBool;
96use std::sync::atomic::AtomicU8;
97use std::sync::atomic::AtomicU32;
98use std::sync::atomic::AtomicU64;
99use std::sync::atomic::Ordering;
100use std::task::Waker;
101use thiserror::Error;
102use user_driver::DmaClient;
103use virt::IsolationType;
104use virt::PartitionCapabilities;
105use virt::VpIndex;
106use virt::irqcon::IoApicRouting;
107use virt::irqcon::MsiRequest;
108use virt::x86::apic_software_device::ApicSoftwareDevices;
109use virt_support_apic::LocalApicSet;
110use vm_topology::memory::MemoryLayout;
111use vm_topology::processor::ProcessorTopology;
112use vm_topology::processor::TargetVpInfo;
113use vmcore::monitor::MonitorPage;
114use vmcore::reference_time::GetReferenceTime;
115use vmcore::reference_time::ReferenceTimeResult;
116use vmcore::reference_time::ReferenceTimeSource;
117use vmcore::vmtime::VmTimeSource;
118use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
119use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
120use x86defs::tdx::TdCallResult;
121use zerocopy::FromBytes;
122use zerocopy::FromZeros;
123use zerocopy::Immutable;
124use zerocopy::IntoBytes;
125use zerocopy::KnownLayout;
126
127#[derive(Error, Debug)]
129#[expect(missing_docs)]
130pub enum Error {
131 #[error("hcl error")]
132 Hcl(#[source] hcl::ioctl::Error),
133 #[error("failed to open sidecar client")]
134 Sidecar(#[source] NewSidecarClientError),
135 #[error("failed to install {0:?} intercept: {1:?}")]
136 InstallIntercept(HvInterceptType, HvError),
137 #[error("failed to query hypervisor register {0:#x?}")]
138 Register(HvRegisterName, #[source] HvError),
139 #[error("failed to set vsm partition config register")]
140 VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
141 #[error("failed to create virtual device")]
142 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
143 #[error("failed to create cpuid tables for cvm")]
144 #[cfg(guest_arch = "x86_64")]
145 CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
146 #[error("failed to update hypercall msr")]
147 UpdateHypercallMsr,
148 #[error("failed to update reference tsc msr")]
149 UpdateReferenceTsc,
150 #[error("failed to map overlay page")]
151 MapOverlay(#[source] std::io::Error),
152 #[error("failed to allocate shared visibility pages for overlay")]
153 AllocateSharedVisOverlay(#[source] anyhow::Error),
154 #[error("failed to open msr device")]
155 OpenMsr(#[source] std::io::Error),
156 #[error("cpuid did not contain valid TSC frequency information")]
157 BadCpuidTsc,
158 #[error("failed to read tsc frequency")]
159 ReadTscFrequency(#[source] std::io::Error),
160 #[error(
161 "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
162 )]
163 TscFrequencyMismatch {
164 hv: u64,
165 hw: u64,
166 allowed_error: u64,
167 },
168 #[error("failed to set vsm partition config: {0:?}")]
169 FailedToSetL2Ctls(TdCallResult),
170 #[error("debugging is configured but the binary does not have the gdb feature")]
171 InvalidDebugConfiguration,
172 #[error("failed to allocate TLB flush page")]
173 AllocateTlbFlushPage(#[source] anyhow::Error),
174 #[error("host does not support required cpu capabilities")]
175 Capabilities(virt::PartitionCapabilitiesError),
176}
177
178#[derive(Error, Debug)]
180#[expect(missing_docs)]
181pub enum RevokeGuestVsmError {
182 #[error("failed to set vsm config")]
183 SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
184 #[error("VTL 1 is already enabled")]
185 Vtl1AlreadyEnabled,
186}
187
188#[derive(Inspect)]
190pub struct UhPartition {
191 #[inspect(flatten)]
192 inner: Arc<UhPartitionInner>,
193 #[inspect(skip)]
195 interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
196}
197
198#[derive(Inspect)]
200#[inspect(extra = "UhPartitionInner::inspect_extra")]
201struct UhPartitionInner {
202 #[inspect(skip)]
203 hcl: Hcl,
204 #[inspect(skip)] vps: Vec<UhVpInner>,
206 irq_routes: virt::irqcon::IrqRoutes,
207 caps: PartitionCapabilities,
208 #[inspect(skip)] enter_modes: Mutex<EnterModes>,
210 #[inspect(skip)]
211 enter_modes_atomic: AtomicU8,
212 #[cfg(guest_arch = "x86_64")]
213 cpuid: virt::CpuidLeafSet,
214 lower_vtl_memory_layout: MemoryLayout,
215 gm: VtlArray<GuestMemory, 2>,
216 vtl0_kernel_exec_gm: GuestMemory,
217 vtl0_user_exec_gm: GuestMemory,
218 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
219 #[inspect(skip)]
220 crash_notification_send: mesh::Sender<VtlCrash>,
221 monitor_page: MonitorPage,
222 #[inspect(skip)]
223 allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
224 software_devices: Option<ApicSoftwareDevices>,
225 #[inspect(skip)]
226 vmtime: VmTimeSource,
227 isolation: IsolationType,
228 #[inspect(with = "inspect::AtomicMut")]
229 no_sidecar_hotplug: AtomicBool,
230 use_mmio_hypercalls: bool,
231 backing_shared: BackingShared,
232 intercept_debug_exceptions: bool,
233 #[cfg(guest_arch = "x86_64")]
234 #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
236 device_vector_table: RwLock<IrrBitmap>,
237 vmbus_relay: bool,
238}
239
240#[derive(Inspect)]
241#[inspect(untagged)]
242enum BackingShared {
243 Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
244 #[cfg(guest_arch = "x86_64")]
245 Snp(#[inspect(flatten)] SnpBackedShared),
246 #[cfg(guest_arch = "x86_64")]
247 Tdx(#[inspect(flatten)] TdxBackedShared),
248}
249
250impl BackingShared {
251 fn new(
252 isolation: IsolationType,
253 partition_params: &UhPartitionNewParams<'_>,
254 backing_shared_params: BackingSharedParams<'_>,
255 ) -> Result<BackingShared, Error> {
256 Ok(match isolation {
257 IsolationType::None | IsolationType::Vbs => {
258 assert!(backing_shared_params.cvm_state.is_none());
259 BackingShared::Hypervisor(HypervisorBackedShared::new(
260 partition_params,
261 backing_shared_params,
262 )?)
263 }
264 #[cfg(guest_arch = "x86_64")]
265 IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
266 partition_params,
267 backing_shared_params,
268 )?),
269 #[cfg(guest_arch = "x86_64")]
270 IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
271 partition_params,
272 backing_shared_params,
273 )?),
274 #[cfg(not(guest_arch = "x86_64"))]
275 _ => unreachable!(),
276 })
277 }
278
279 fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
280 match self {
281 BackingShared::Hypervisor(_) => None,
282 #[cfg(guest_arch = "x86_64")]
283 BackingShared::Snp(SnpBackedShared { cvm, .. })
284 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
285 }
286 }
287
288 fn untrusted_synic(&self) -> Option<&GlobalSynic> {
289 match self {
290 BackingShared::Hypervisor(_) => None,
291 #[cfg(guest_arch = "x86_64")]
292 BackingShared::Snp(_) => None,
293 #[cfg(guest_arch = "x86_64")]
294 BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
295 }
296 }
297}
298
299#[derive(InspectMut, Copy, Clone)]
300struct EnterModes {
301 #[inspect(mut)]
302 first: EnterMode,
303 #[inspect(mut)]
304 second: EnterMode,
305}
306
307impl Default for EnterModes {
308 fn default() -> Self {
309 Self {
310 first: EnterMode::Fast,
311 second: EnterMode::IdleToVtl0,
312 }
313 }
314}
315
316impl From<EnterModes> for hcl::protocol::EnterModes {
317 fn from(value: EnterModes) -> Self {
318 Self::new()
319 .with_first(value.first.into())
320 .with_second(value.second.into())
321 }
322}
323
324#[derive(InspectMut, Copy, Clone)]
325enum EnterMode {
326 Fast,
327 PlayIdle,
328 IdleToVtl0,
329}
330
331impl From<EnterMode> for hcl::protocol::EnterMode {
332 fn from(value: EnterMode) -> Self {
333 match value {
334 EnterMode::Fast => Self::FAST,
335 EnterMode::PlayIdle => Self::PLAY_IDLE,
336 EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
337 }
338 }
339}
340
341#[cfg(guest_arch = "x86_64")]
342#[derive(Inspect)]
343struct GuestVsmVpState {
344 #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
347 vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
348 reg_intercept: SecureRegisterInterceptState,
349}
350
351#[cfg(guest_arch = "x86_64")]
352impl GuestVsmVpState {
353 fn new() -> Self {
354 GuestVsmVpState {
355 vtl0_exit_pending_event: None,
356 reg_intercept: Default::default(),
357 }
358 }
359}
360
361#[cfg(guest_arch = "x86_64")]
362#[derive(Inspect)]
363struct UhCvmVpState {
365 #[inspect(debug)]
367 direct_overlay_handle: user_driver::memory::MemoryBlock,
368 exit_vtl: GuestVtl,
370 hv: VtlArray<ProcessorVtlHv, 2>,
372 lapics: VtlArray<LapicState, 2>,
374 vtl1: Option<GuestVsmVpState>,
376}
377
378#[cfg(guest_arch = "x86_64")]
379impl UhCvmVpState {
380 pub(crate) fn new(
382 cvm_partition: &UhCvmPartitionState,
383 inner: &UhPartitionInner,
384 vp_info: &TargetVpInfo,
385 overlay_pages_required: usize,
386 ) -> Result<Self, Error> {
387 let direct_overlay_handle = cvm_partition
388 .shared_dma_client
389 .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
390 .map_err(Error::AllocateSharedVisOverlay)?;
391
392 let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
393 let lapics = VtlArray::from_fn(|vtl| {
394 let apic_set = &cvm_partition.lapic[vtl];
395
396 let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
399 lapic.set_apic_base(apic_base).unwrap();
401 let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
403 MpState::WaitForSipi
404 } else {
405 MpState::Running
406 };
407 LapicState::new(lapic, activity)
408 });
409
410 let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
411
412 Ok(Self {
413 direct_overlay_handle,
414 exit_vtl: GuestVtl::Vtl0,
415 hv,
416 lapics,
417 vtl1: None,
418 })
419 }
420}
421
422#[cfg(guest_arch = "x86_64")]
423#[derive(Inspect, Default)]
424#[inspect(hex)]
425pub struct SecureRegisterInterceptState {
427 #[inspect(with = "|&x| u64::from(x)")]
428 intercept_control: hvdef::HvRegisterCrInterceptControl,
429 cr0_mask: u64,
430 cr4_mask: u64,
431 ia32_misc_enable_mask: u64,
434}
435
436#[derive(Inspect)]
437struct UhCvmPartitionState {
439 #[cfg(guest_arch = "x86_64")]
440 vps_per_socket: u32,
441 #[inspect(
443 with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
444 )]
445 tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
446 #[inspect(with = "inspect::iter_by_index")]
447 vps: Vec<UhCvmVpInner>,
448 shared_memory: GuestMemory,
449 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
450 #[inspect(skip)]
451 isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
452 lapic: VtlArray<LocalApicSet, 2>,
454 hv: GlobalHv<2>,
456 guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
458 shared_dma_client: Arc<dyn DmaClient>,
460 private_dma_client: Arc<dyn DmaClient>,
462 hide_isolation: bool,
463}
464
465#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
466impl UhCvmPartitionState {
467 fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
468 &self.vps[vp_index as usize]
469 }
470
471 fn is_lower_vtl_startup_denied(&self) -> bool {
472 matches!(
473 *self.guest_vsm.read(),
474 GuestVsmState::Enabled {
475 vtl1: CvmVtl1State {
476 deny_lower_vtl_startup: true,
477 ..
478 }
479 }
480 )
481 }
482}
483
484#[derive(Inspect)]
485struct UhCvmVpInner {
487 tlb_lock_info: VtlArray<TlbLockInfo, 2>,
489 vtl1_enable_called: Mutex<bool>,
491 started: AtomicBool,
493 #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
495 hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
496}
497
498#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
499#[derive(Inspect)]
500#[inspect(tag = "guest_vsm_state")]
501enum GuestVsmState<T: Inspect> {
503 NotPlatformSupported,
504 NotGuestEnabled,
505 Enabled {
506 #[inspect(flatten)]
507 vtl1: T,
508 },
509}
510
511impl<T: Inspect> GuestVsmState<T> {
512 pub fn from_availability(guest_vsm_available: bool) -> Self {
513 if guest_vsm_available {
514 GuestVsmState::NotGuestEnabled
515 } else {
516 GuestVsmState::NotPlatformSupported
517 }
518 }
519}
520
521#[derive(Inspect)]
522struct CvmVtl1State {
523 enabled_on_any_vp: bool,
525 zero_memory_on_reset: bool,
527 deny_lower_vtl_startup: bool,
529 pub mbec_enabled: bool,
531 pub shadow_supervisor_stack_enabled: bool,
533 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
534 io_read_intercepts: BitBox<u64>,
535 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
536 io_write_intercepts: BitBox<u64>,
537}
538
539#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
540impl CvmVtl1State {
541 fn new(mbec_enabled: bool) -> Self {
542 Self {
543 enabled_on_any_vp: false,
544 zero_memory_on_reset: false,
545 deny_lower_vtl_startup: false,
546 mbec_enabled,
547 shadow_supervisor_stack_enabled: false,
548 io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
549 io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
550 }
551 }
552}
553
554#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
555struct TscReferenceTimeSource {
556 tsc_scale: u64,
557}
558
559#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
560impl TscReferenceTimeSource {
561 fn new(tsc_frequency: u64) -> Self {
562 TscReferenceTimeSource {
563 tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
564 }
565 }
566}
567
568impl GetReferenceTime for TscReferenceTimeSource {
570 fn now(&self) -> ReferenceTimeResult {
571 #[cfg(guest_arch = "x86_64")]
572 {
573 let tsc = safe_intrinsics::rdtsc();
574 let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
575 ReferenceTimeResult {
576 ref_time,
577 system_time: None,
578 }
579 }
580
581 #[cfg(guest_arch = "aarch64")]
582 {
583 todo!("AARCH64_TODO");
584 }
585 }
586}
587
588#[cfg(guest_arch = "aarch64")]
589impl virt::irqcon::ControlGic for UhPartitionInner {
590 fn set_spi_irq(&self, irq_id: u32, high: bool) {
591 if let Err(err) = self.hcl.request_interrupt(
592 hvdef::HvInterruptControl::new()
593 .with_arm64_asserted(high)
594 .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
595 0,
596 irq_id,
597 GuestVtl::Vtl0,
598 ) {
599 tracelimit::warn_ratelimited!(
600 error = &err as &dyn std::error::Error,
601 irq = irq_id,
602 asserted = high,
603 "failed to request spi"
604 );
605 }
606 }
607}
608
609#[cfg(guest_arch = "aarch64")]
610impl virt::Aarch64Partition for UhPartition {
611 fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
612 debug_assert!(vtl == Vtl::Vtl0);
613 self.inner.clone()
614 }
615}
616
617pub struct UhProcessorBox {
623 partition: Arc<UhPartitionInner>,
624 vp_info: TargetVpInfo,
625}
626
627impl UhProcessorBox {
628 pub fn vp_index(&self) -> VpIndex {
630 self.vp_info.base.vp_index
631 }
632
633 pub fn sidecar_base_cpu(&self) -> Option<u32> {
636 self.partition
637 .hcl
638 .sidecar_base_cpu(self.vp_info.base.vp_index.index())
639 }
640
641 pub fn bind_processor<'a, T: Backing>(
648 &'a mut self,
649 driver: &impl Driver,
650 control: Option<&'a mut IdleControl>,
651 ) -> Result<UhProcessor<'a, T>, Error> {
652 if let Some(control) = &control {
653 let vp_index = self.vp_info.base.vp_index;
654
655 let mut current = Default::default();
656 affinity::get_current_thread_affinity(&mut current).unwrap();
657 assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
658
659 self.partition
660 .hcl
661 .set_poll_file(
662 self.partition.vp(vp_index).unwrap().cpu_index,
663 control.ring_fd().as_raw_fd(),
664 )
665 .map_err(Error::Hcl)?;
666 }
667
668 UhProcessor::new(driver, &self.partition, self.vp_info, control)
669 }
670
671 pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
676 self.partition
677 .vp(self.vp_info.base.vp_index)
678 .unwrap()
679 .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
680 }
681}
682
683#[derive(Debug, Inspect)]
684struct UhVpInner {
685 wake_reasons: AtomicU64,
687 #[inspect(skip)]
688 waker: RwLock<Option<Waker>>,
689 message_queues: VtlArray<MessageQueues, 2>,
690 #[inspect(skip)]
691 vp_info: TargetVpInfo,
692 cpu_index: u32,
695 sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
696}
697
698impl UhVpInner {
699 pub fn vp_index(&self) -> VpIndex {
700 self.vp_info.base.vp_index
701 }
702}
703
704#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
705#[derive(Debug, Inspect)]
706enum InitialVpContextOperation {
708 StartVp,
710 EnableVpVtl,
712}
713
714#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
715#[derive(Debug, Inspect)]
716struct VpStartEnableVtl {
718 operation: InitialVpContextOperation,
721 #[inspect(skip)]
722 context: hvdef::hypercall::InitialVpContextX64,
723}
724
725#[derive(Debug, Inspect)]
726struct TlbLockInfo {
727 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
729 blocked_vps: BitBox<AtomicU64>,
730 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
733 blocking_vps: BitBox<AtomicU64>,
734 blocking_vp_count: AtomicU32,
738 sleeping: AtomicBool,
740}
741
742#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
743impl TlbLockInfo {
744 fn new(vp_count: usize) -> Self {
745 Self {
746 blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
747 blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
748 blocking_vp_count: AtomicU32::new(0),
749 sleeping: false.into(),
750 }
751 }
752}
753
754#[bitfield(u32)]
755#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
756struct WakeReason {
757 extint: bool,
758 message_queues: bool,
759 hv_start_enable_vtl_vp: bool,
760 intcon: bool,
761 update_proxy_irr_filter: bool,
762 #[bits(27)]
763 _reserved: u32,
764}
765
766impl WakeReason {
767 const EXTINT: Self = Self::new().with_extint(true);
769 const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
770 #[cfg(guest_arch = "x86_64")]
771 const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
773 #[cfg(guest_arch = "x86_64")]
774 const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
775}
776
777#[bitfield(u32)]
778#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
779struct ExitActivity {
780 pending_event: bool,
781 #[bits(31)]
782 _reserved: u32,
783}
784
785impl UhPartition {
787 pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
789 fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
790 if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
791 return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
792 }
793 *vsm_state = GuestVsmState::NotPlatformSupported;
794 Ok(())
795 }
796
797 match &self.inner.backing_shared {
798 BackingShared::Hypervisor(s) => {
799 revoke(&mut *s.guest_vsm.write())?;
800 self.inner
801 .hcl
802 .set_guest_vsm_partition_config(false)
803 .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
804 }
805 #[cfg(guest_arch = "x86_64")]
806 BackingShared::Snp(SnpBackedShared { cvm, .. })
807 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
808 revoke(&mut *cvm.guest_vsm.write())?;
809 }
810 };
811
812 Ok(())
813 }
814
815 pub fn reference_time(&self) -> u64 {
817 if let Some(hv) = self.inner.hv() {
818 hv.ref_time_source().now().ref_time
819 } else {
820 self.inner
821 .hcl
822 .reference_time()
823 .expect("should not fail to get the reference time")
824 }
825 }
826}
827
828impl virt::Partition for UhPartition {
829 fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
830 None
831 }
832
833 fn caps(&self) -> &PartitionCapabilities {
834 &self.inner.caps
835 }
836
837 fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
838 self.inner
839 .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
840 }
841
842 fn request_yield(&self, _vp_index: VpIndex) {
843 unimplemented!()
844 }
845}
846
847impl virt::X86Partition for UhPartition {
848 fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
849 self.inner.clone()
850 }
851
852 fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
853 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
854 if let Some(apic) = &self.inner.lapic(vtl) {
855 apic.lint(vp_index, lint.into(), |vp_index| {
856 self.inner
857 .vp(vp_index)
858 .unwrap()
859 .wake(vtl, WakeReason::INTCON);
860 });
861 } else if lint == 0 {
862 self.inner
863 .vp(vp_index)
864 .unwrap()
865 .wake(vtl, WakeReason::EXTINT);
866 } else {
867 unimplemented!()
868 }
869 }
870}
871
872impl UhPartitionInner {
873 fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
874 self.vps.get(index.index() as usize)
875 }
876
877 fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
878 self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
879 }
880
881 fn hv(&self) -> Option<&GlobalHv<2>> {
882 self.backing_shared.cvm_state().map(|x| &x.hv)
883 }
884
885 #[cfg(guest_arch = "x86_64")]
887 fn request_proxy_irr_filter_update(
888 &self,
889 vtl: GuestVtl,
890 device_vector: u8,
891 req_vp_index: VpIndex,
892 ) {
893 tracing::debug!(
894 ?vtl,
895 device_vector,
896 req_vp_index = req_vp_index.index(),
897 "request_proxy_irr_filter_update"
898 );
899
900 {
902 let mut device_vector_table = self.device_vector_table.write();
903 device_vector_table.set(device_vector as usize, true);
904 }
905
906 for vp in self.vps.iter() {
908 if vp.vp_index() != req_vp_index {
909 vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
910 }
911 }
912 }
913
914 #[cfg(guest_arch = "x86_64")]
916 fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
917 let device_vector_table = self.device_vector_table.read();
918 for idx in device_vector_table.iter_ones() {
919 irr_vectors.set(idx, true);
920 }
921 }
922
923 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
924 let mut wake_vps = false;
925 resp.field_mut(
926 "enter_modes",
927 &mut inspect::adhoc_mut(|req| {
928 let update = req.is_update();
929 {
930 let mut modes = self.enter_modes.lock();
931 modes.inspect_mut(req);
932 if update {
933 self.enter_modes_atomic.store(
934 hcl::protocol::EnterModes::from(*modes).into(),
935 Ordering::Relaxed,
936 );
937 wake_vps = true;
938 }
939 }
940 }),
941 );
942
943 if wake_vps {
945 for vp in self.vps.iter() {
946 vp.wake_vtl2();
947 }
948 }
949 }
950
951 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
953 fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
954 self.hcl.get_vsm_partition_status()
956 }
957}
958
959impl virt::Synic for UhPartition {
960 fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
961 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
962 let Some(vp) = self.inner.vp(vp_index) else {
963 tracelimit::warn_ratelimited!(
964 CVM_ALLOWED,
965 vp = vp_index.index(),
966 "invalid vp target for post_message"
967 );
968 return;
969 };
970
971 vp.post_message(
972 vtl,
973 sint,
974 &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
975 );
976 }
977
978 fn new_guest_event_port(
979 &self,
980 vtl: Vtl,
981 vp: u32,
982 sint: u8,
983 flag: u16,
984 ) -> Box<dyn vmcore::synic::GuestEventPort> {
985 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
986 Box::new(UhEventPort {
987 partition: Arc::downgrade(&self.inner),
988 params: Arc::new(Mutex::new(UhEventPortParams {
989 vp: VpIndex::new(vp),
990 sint,
991 flag,
992 vtl,
993 })),
994 })
995 }
996
997 fn prefer_os_events(&self) -> bool {
998 false
999 }
1000
1001 fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1002 Some(self)
1003 }
1004}
1005
1006impl virt::SynicMonitor for UhPartition {
1007 fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1008 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1010 let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1011
1012 let allocated_page = allocated_block.take();
1014 if let Some(old_gpa) = old_gpa {
1015 let allocated_gpa = allocated_page
1016 .as_ref()
1017 .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1018
1019 let result = if allocated_gpa == Some(old_gpa) {
1022 let vtl = GuestVtl::try_from(vtl).unwrap();
1023 self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1024 } else {
1025 self.inner
1026 .hcl
1027 .modify_vtl_protection_mask(
1028 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1029 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1030 HvInputVtl::CURRENT_VTL,
1031 )
1032 .map_err(|err| anyhow::anyhow!(err))
1033 };
1034
1035 result
1036 .context("failed to unregister old monitor page")
1037 .inspect_err(|_| {
1038 self.inner.monitor_page.set_gpa(None);
1040 })?;
1041
1042 tracing::debug!(old_gpa, "unregistered monitor page");
1043 }
1044
1045 if let Some(gpa) = gpa {
1046 self.inner
1049 .hcl
1050 .modify_vtl_protection_mask(
1051 MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1052 HvMapGpaFlags::new().with_readable(true),
1053 HvInputVtl::CURRENT_VTL,
1054 )
1055 .context("failed to register monitor page")
1056 .inspect_err(|_| {
1057 self.inner.monitor_page.set_gpa(None);
1059 })?;
1060
1061 tracing::debug!(gpa, "registered monitor page");
1062 }
1063
1064 Ok(())
1065 }
1066
1067 fn register_monitor(
1068 &self,
1069 monitor_id: vmcore::monitor::MonitorId,
1070 connection_id: u32,
1071 ) -> Box<dyn Sync + Send> {
1072 self.inner
1073 .monitor_page
1074 .register_monitor(monitor_id, connection_id)
1075 }
1076
1077 fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1078 let vtl = GuestVtl::try_from(vtl).unwrap();
1079
1080 let Some(state) = self.inner.backing_shared.cvm_state() else {
1082 return Ok(None);
1083 };
1084
1085 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1086 if let Some(block) = allocated_block.as_ref() {
1087 let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1089 assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1090 return Ok(Some(gpa));
1091 }
1092
1093 let block = state
1094 .private_dma_client
1095 .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1096 .context("failed to allocate monitor page")?;
1097
1098 let gpn = block.pfns()[0];
1099 *allocated_block = Some(block);
1100 let gpa = gpn << HV_PAGE_SHIFT;
1101 let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1102 if let Some(old_gpa) = old_gpa {
1103 self.inner
1106 .hcl
1107 .modify_vtl_protection_mask(
1108 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1109 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1110 HvInputVtl::CURRENT_VTL,
1111 )
1112 .context("failed to unregister old monitor page")
1113 .inspect_err(|_| {
1114 self.inner.monitor_page.set_gpa(None);
1116 })?;
1117
1118 tracing::debug!(old_gpa, "unregistered monitor page");
1119 }
1120
1121 self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1124 .context("failed to unregister monitor page")
1125 .inspect_err(|_| {
1126 self.inner.monitor_page.set_gpa(None);
1128 })?;
1129
1130 tracing::debug!(gpa, "registered allocated monitor page");
1131
1132 Ok(Some(gpa))
1133 }
1134}
1135
1136impl UhPartitionInner {
1137 #[cfg(guest_arch = "x86_64")]
1138 pub(crate) fn synic_interrupt(
1139 &self,
1140 vp_index: VpIndex,
1141 vtl: GuestVtl,
1142 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1143 move |vector, auto_eoi| {
1146 self.lapic(vtl).unwrap().synic_interrupt(
1147 vp_index,
1148 vector as u8,
1149 auto_eoi,
1150 |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1151 );
1152 }
1153 }
1154
1155 #[cfg(guest_arch = "aarch64")]
1156 fn synic_interrupt(
1157 &self,
1158 _vp_index: VpIndex,
1159 _vtl: GuestVtl,
1160 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1161 move |_, _| {}
1162 }
1163}
1164
1165#[derive(Debug)]
1166struct UhEventPort {
1167 partition: Weak<UhPartitionInner>,
1168 params: Arc<Mutex<UhEventPortParams>>,
1169}
1170
1171#[derive(Debug, Copy, Clone)]
1172struct UhEventPortParams {
1173 vp: VpIndex,
1174 sint: u8,
1175 flag: u16,
1176 vtl: GuestVtl,
1177}
1178
1179impl vmcore::synic::GuestEventPort for UhEventPort {
1180 fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1181 let partition = self.partition.clone();
1182 let params = self.params.clone();
1183 vmcore::interrupt::Interrupt::from_fn(move || {
1184 let UhEventPortParams {
1185 vp,
1186 sint,
1187 flag,
1188 vtl,
1189 } = *params.lock();
1190 let Some(partition) = partition.upgrade() else {
1191 return;
1192 };
1193 tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1194 if let Some(hv) = partition.hv() {
1195 match hv.synic[vtl].signal_event(
1196 vp,
1197 sint,
1198 flag,
1199 &mut partition.synic_interrupt(vp, vtl),
1200 ) {
1201 Ok(_) => {}
1202 Err(SintProxied) => {
1203 tracing::trace!(
1204 vp = vp.index(),
1205 sint,
1206 flag,
1207 "forwarding event to untrusted synic"
1208 );
1209 if let Some(synic) = partition.backing_shared.untrusted_synic() {
1210 synic
1211 .signal_event(
1212 vp,
1213 sint,
1214 flag,
1215 &mut partition.synic_interrupt(vp, vtl),
1216 )
1217 .ok();
1218 } else {
1219 partition.hcl.signal_event_direct(vp.index(), sint, flag)
1220 }
1221 }
1222 }
1223 } else {
1224 partition.hcl.signal_event_direct(vp.index(), sint, flag);
1225 }
1226 })
1227 }
1228
1229 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1230 self.params.lock().vp = VpIndex::new(vp);
1231 Ok(())
1232 }
1233}
1234
1235impl virt::Hv1 for UhPartition {
1236 type Error = Error;
1237 type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1238
1239 fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1240 Some(if let Some(hv) = self.inner.hv() {
1241 hv.ref_time_source().clone()
1242 } else {
1243 ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1244 })
1245 }
1246
1247 fn new_virtual_device(
1248 &self,
1249 ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1250 self.inner.software_devices.is_some().then_some(self)
1251 }
1252}
1253
1254impl GetReferenceTime for UhPartitionInner {
1255 fn now(&self) -> ReferenceTimeResult {
1256 ReferenceTimeResult {
1257 ref_time: self.hcl.reference_time().unwrap(),
1258 system_time: None,
1259 }
1260 }
1261}
1262
1263impl virt::DeviceBuilder for UhPartition {
1264 fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1265 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1266 let device = self
1267 .inner
1268 .software_devices
1269 .as_ref()
1270 .expect("checked in new_virtual_device")
1271 .new_device(self.interrupt_targets[vtl].clone(), device_id)
1272 .map_err(Error::NewDevice)?;
1273
1274 Ok(device)
1275 }
1276}
1277
1278struct UhInterruptTarget {
1279 partition: Arc<UhPartitionInner>,
1280 vtl: GuestVtl,
1281}
1282
1283impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
1284 fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
1285 let partition = self.partition.clone();
1286 let vtl = self.vtl;
1287 Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
1288 }
1289}
1290
1291impl UhPartitionInner {
1292 fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1293 if let Some(lapic) = self.lapic(vtl) {
1294 tracing::trace!(?request, "interrupt");
1295 lapic.request_interrupt(request.address, request.data, |vp_index| {
1296 self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1297 });
1298 } else {
1299 let (address, data) = request.as_x86();
1300 if let Err(err) = self.hcl.request_interrupt(
1301 request.hv_x86_interrupt_control(),
1302 address.virt_destination().into(),
1303 data.vector().into(),
1304 vtl,
1305 ) {
1306 tracelimit::warn_ratelimited!(
1307 CVM_ALLOWED,
1308 error = &err as &dyn std::error::Error,
1309 address = request.address,
1310 data = request.data,
1311 "failed to request msi"
1312 );
1313 }
1314 }
1315 }
1316}
1317
1318impl IoApicRouting for UhPartitionInner {
1319 fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1320 self.irq_routes.set_irq_route(irq, request)
1321 }
1322
1323 fn assert_irq(&self, irq: u8) {
1325 self.irq_routes
1326 .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1327 }
1328}
1329
1330fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1333 let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1335 let hardware_isolated = hcl.isolation().is_hardware_isolated();
1336 let isolated = hcl.isolation().is_isolated();
1337
1338 let config = HvRegisterVsmPartitionConfig::new()
1339 .with_default_vtl_protection_mask(0xF)
1340 .with_enable_vtl_protection(!hardware_isolated)
1341 .with_zero_memory_on_reset(!hardware_isolated)
1342 .with_intercept_cpuid_unimplemented(!hardware_isolated)
1343 .with_intercept_page(caps.intercept_page_available())
1344 .with_intercept_unrecoverable_exception(true)
1345 .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1346 .with_intercept_acceptance(isolated)
1347 .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1348 .with_intercept_system_reset(caps.intercept_system_reset_available());
1349
1350 hcl.set_vtl2_vsm_partition_config(config)
1351 .map_err(Error::VsmPartitionConfig)
1352}
1353
1354pub struct UhPartitionNewParams<'a> {
1358 pub isolation: IsolationType,
1360 pub hide_isolation: bool,
1363 pub lower_vtl_memory_layout: &'a MemoryLayout,
1365 pub topology: &'a ProcessorTopology,
1367 pub cvm_cpuid_info: Option<&'a [u8]>,
1370 pub snp_secrets: Option<&'a [u8]>,
1372 pub vtom: Option<u64>,
1376 pub handle_synic: bool,
1380 pub no_sidecar_hotplug: bool,
1383 pub use_mmio_hypercalls: bool,
1385 pub intercept_debug_exceptions: bool,
1387}
1388
1389pub struct UhLateParams<'a> {
1391 pub gm: VtlArray<GuestMemory, 2>,
1393 pub vtl0_kernel_exec_gm: GuestMemory,
1395 pub vtl0_user_exec_gm: GuestMemory,
1397 #[cfg(guest_arch = "x86_64")]
1399 pub cpuid: Vec<CpuidLeaf>,
1400 pub crash_notification_send: mesh::Sender<VtlCrash>,
1403 pub vmtime: &'a VmTimeSource,
1405 pub cvm_params: Option<CvmLateParams>,
1407 pub vmbus_relay: bool,
1409}
1410
1411pub struct CvmLateParams {
1413 pub shared_gm: GuestMemory,
1415 pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1417 pub shared_dma_client: Arc<dyn DmaClient>,
1419 pub private_dma_client: Arc<dyn DmaClient>,
1421}
1422
1423#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1425pub enum GpnSource {
1426 GuestMemory,
1428 Dma,
1430}
1431
1432pub trait ProtectIsolatedMemory: Send + Sync {
1434 fn change_host_visibility(
1436 &self,
1437 vtl: GuestVtl,
1438 shared: bool,
1439 gpns: &[u64],
1440 tlb_access: &mut dyn TlbFlushLockAccess,
1441 ) -> Result<(), (HvError, usize)>;
1442
1443 fn query_host_visibility(
1445 &self,
1446 gpns: &[u64],
1447 host_visibility: &mut [HostVisibilityType],
1448 ) -> Result<(), (HvError, usize)>;
1449
1450 fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1452
1453 fn change_default_vtl_protections(
1457 &self,
1458 target_vtl: GuestVtl,
1459 protections: HvMapGpaFlags,
1460 tlb_access: &mut dyn TlbFlushLockAccess,
1461 ) -> Result<(), HvError>;
1462
1463 fn change_vtl_protections(
1465 &self,
1466 target_vtl: GuestVtl,
1467 gpns: &[u64],
1468 protections: HvMapGpaFlags,
1469 tlb_access: &mut dyn TlbFlushLockAccess,
1470 ) -> Result<(), (HvError, usize)>;
1471
1472 fn register_overlay_page(
1475 &self,
1476 vtl: GuestVtl,
1477 gpn: u64,
1478 gpn_source: GpnSource,
1479 check_perms: HvMapGpaFlags,
1480 new_perms: Option<HvMapGpaFlags>,
1481 tlb_access: &mut dyn TlbFlushLockAccess,
1482 ) -> Result<(), HvError>;
1483
1484 fn unregister_overlay_page(
1487 &self,
1488 vtl: GuestVtl,
1489 gpn: u64,
1490 tlb_access: &mut dyn TlbFlushLockAccess,
1491 ) -> Result<(), HvError>;
1492
1493 fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1495
1496 fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1498
1499 fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1505
1506 fn set_vtl1_protections_enabled(&self);
1509
1510 fn vtl1_protections_enabled(&self) -> bool;
1513}
1514
1515pub trait TlbFlushLockAccess {
1517 fn flush(&mut self, vtl: GuestVtl);
1519
1520 fn flush_entire(&mut self);
1522
1523 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1525}
1526
1527pub struct UhProtoPartition<'a> {
1530 params: UhPartitionNewParams<'a>,
1531 hcl: Hcl,
1532 guest_vsm_available: bool,
1533 #[cfg(guest_arch = "x86_64")]
1534 cpuid: virt::CpuidLeafSet,
1535}
1536
1537impl<'a> UhProtoPartition<'a> {
1538 pub fn new<T: SpawnDriver>(
1543 params: UhPartitionNewParams<'a>,
1544 driver: impl FnMut(u32) -> T,
1545 ) -> Result<Self, Error> {
1546 let hcl_isolation = match params.isolation {
1547 IsolationType::None => hcl::ioctl::IsolationType::None,
1548 IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1549 IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1550 IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1551 };
1552
1553 let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1555
1556 let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1557
1558 let mut allowed_hypercalls = vec![
1560 hvdef::HypercallCode::HvCallGetVpRegisters,
1561 hvdef::HypercallCode::HvCallSetVpRegisters,
1562 hvdef::HypercallCode::HvCallInstallIntercept,
1563 hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1564 hvdef::HypercallCode::HvCallPostMessageDirect,
1565 hvdef::HypercallCode::HvCallSignalEventDirect,
1566 hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1567 hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1568 hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1569 hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1570 hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1571 hvdef::HypercallCode::HvCallAcceptGpaPages,
1572 hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1573 ];
1574
1575 if params.isolation.is_hardware_isolated() {
1576 allowed_hypercalls.extend(vec![
1577 hvdef::HypercallCode::HvCallEnablePartitionVtl,
1578 hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1579 hvdef::HypercallCode::HvCallEnableVpVtl,
1580 ]);
1581 }
1582
1583 if params.use_mmio_hypercalls {
1584 allowed_hypercalls.extend(vec![
1585 hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1586 hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1587 ]);
1588 }
1589
1590 hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1591
1592 set_vtl2_vsm_partition_config(&hcl)?;
1593
1594 let guest_vsm_available = Self::check_guest_vsm_support(&hcl)?;
1595
1596 #[cfg(guest_arch = "x86_64")]
1597 let cpuid = match params.isolation {
1598 IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1599 cpuid_pages: params.cvm_cpuid_info.unwrap(),
1600 vtom: params.vtom.unwrap(),
1601 access_vsm: guest_vsm_available,
1602 }
1603 .build()
1604 .map_err(Error::CvmCpuid)?,
1605
1606 IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1607 topology: params.topology,
1608 vtom: params.vtom.unwrap(),
1609 access_vsm: guest_vsm_available,
1610 }
1611 .build()
1612 .map_err(Error::CvmCpuid)?,
1613 IsolationType::Vbs | IsolationType::None => Default::default(),
1614 };
1615
1616 Ok(UhProtoPartition {
1617 hcl,
1618 params,
1619 guest_vsm_available,
1620 #[cfg(guest_arch = "x86_64")]
1621 cpuid,
1622 })
1623 }
1624
1625 pub fn guest_vsm_available(&self) -> bool {
1627 self.guest_vsm_available
1628 }
1629
1630 pub async fn build(
1632 self,
1633 late_params: UhLateParams<'_>,
1634 ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1635 let Self {
1636 mut hcl,
1637 params,
1638 guest_vsm_available,
1639 #[cfg(guest_arch = "x86_64")]
1640 cpuid,
1641 } = self;
1642 let isolation = params.isolation;
1643 let is_hardware_isolated = isolation.is_hardware_isolated();
1644
1645 if params.intercept_debug_exceptions {
1651 if !cfg!(feature = "gdb") {
1652 return Err(Error::InvalidDebugConfiguration);
1653 }
1654
1655 cfg_if::cfg_if! {
1656 if #[cfg(guest_arch = "x86_64")] {
1657 if isolation != IsolationType::Tdx {
1658 let debug_exception_vector = 0x1;
1659 hcl.register_intercept(
1660 HvInterceptType::HvInterceptTypeException,
1661 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1662 HvInterceptParameters::new_exception(debug_exception_vector),
1663 )
1664 .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1665 }
1666 } else {
1667 return Err(Error::InvalidDebugConfiguration);
1668 }
1669 }
1670 }
1671
1672 if !is_hardware_isolated {
1673 if cfg!(guest_arch = "x86_64") {
1674 hcl.register_intercept(
1675 HvInterceptType::HvInterceptTypeX64Msr,
1676 HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1677 HvInterceptParameters::new_zeroed(),
1678 )
1679 .map_err(|err| {
1680 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1681 })?;
1682
1683 hcl.register_intercept(
1684 HvInterceptType::HvInterceptTypeX64ApicEoi,
1685 HV_INTERCEPT_ACCESS_MASK_WRITE,
1686 HvInterceptParameters::new_zeroed(),
1687 )
1688 .map_err(|err| {
1689 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1690 })?;
1691 } else {
1692 if false {
1693 todo!("AARCH64_TODO");
1694 }
1695 }
1696 }
1697
1698 if isolation == IsolationType::Snp {
1699 hcl.register_intercept(
1701 HvInterceptType::HvInterceptTypeException,
1702 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1703 HvInterceptParameters::new_exception(0x1D),
1704 )
1705 .map_err(|err| {
1706 Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1707 })?;
1708
1709 let mut bitmap = [0u8; 64];
1711 if let Some(secrets) = params.snp_secrets {
1712 bitmap.copy_from_slice(
1713 &secrets
1714 [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1715 );
1716 }
1717 hcl.set_snp_register_bitmap(bitmap);
1718 }
1719
1720 hcl.add_vps(
1722 params.topology.vp_count(),
1723 late_params
1724 .cvm_params
1725 .as_ref()
1726 .map(|x| &x.private_dma_client),
1727 )
1728 .map_err(Error::Hcl)?;
1729
1730 let vps: Vec<_> = params
1731 .topology
1732 .vps_arch()
1733 .map(|vp_info| {
1734 let cpu_index = vp_info.base.vp_index.index();
1737 UhVpInner::new(cpu_index, vp_info)
1738 })
1739 .collect();
1740
1741 #[cfg(guest_arch = "x86_64")]
1743 let software_devices = {
1744 let res = if !is_hardware_isolated {
1745 hcl.register_intercept(
1746 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1747 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1748 HvInterceptParameters::new_zeroed(),
1749 )
1750 } else {
1751 Ok(())
1752 };
1753 match res {
1754 Ok(()) => Some(ApicSoftwareDevices::new(
1755 params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1756 )),
1757 Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1758 Err(err) => {
1759 return Err(Error::InstallIntercept(
1760 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1761 err,
1762 ));
1763 }
1764 }
1765 };
1766
1767 #[cfg(guest_arch = "aarch64")]
1768 let software_devices = None;
1769
1770 #[cfg(guest_arch = "aarch64")]
1771 let caps = virt::aarch64::Aarch64PartitionCapabilities {};
1772
1773 #[cfg(guest_arch = "x86_64")]
1774 let cpuid = UhPartition::construct_cpuid_results(
1775 cpuid,
1776 &late_params.cpuid,
1777 params.topology,
1778 isolation,
1779 params.hide_isolation,
1780 );
1781
1782 #[cfg(guest_arch = "x86_64")]
1783 let caps = UhPartition::construct_capabilities(
1784 params.topology,
1785 &cpuid,
1786 isolation,
1787 params.hide_isolation,
1788 )
1789 .map_err(Error::Capabilities)?;
1790
1791 if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1792 hcl.register_intercept(
1800 HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1801 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1802 HvInterceptParameters::new_zeroed(),
1803 )
1804 .expect("registering synic intercept cannot fail");
1805 }
1806
1807 #[cfg(guest_arch = "x86_64")]
1808 let cvm_state = if is_hardware_isolated {
1809 Some(Self::construct_cvm_state(
1810 ¶ms,
1811 late_params.cvm_params.unwrap(),
1812 &caps,
1813 guest_vsm_available,
1814 )?)
1815 } else {
1816 None
1817 };
1818 #[cfg(guest_arch = "aarch64")]
1819 let cvm_state = None;
1820
1821 let backing_shared = BackingShared::new(
1822 isolation,
1823 ¶ms,
1824 BackingSharedParams {
1825 cvm_state,
1826 #[cfg(guest_arch = "x86_64")]
1827 cpuid: &cpuid,
1828 hcl: &hcl,
1829 guest_vsm_available,
1830 },
1831 )?;
1832
1833 let enter_modes = EnterModes::default();
1834
1835 let partition = Arc::new(UhPartitionInner {
1836 hcl,
1837 vps,
1838 irq_routes: Default::default(),
1839 caps,
1840 enter_modes: Mutex::new(enter_modes),
1841 enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1842 gm: late_params.gm,
1843 vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1844 vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1845 #[cfg(guest_arch = "x86_64")]
1846 cpuid,
1847 crash_notification_send: late_params.crash_notification_send,
1848 monitor_page: MonitorPage::new(),
1849 allocated_monitor_page: Mutex::new(None),
1850 software_devices,
1851 lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1852 vmtime: late_params.vmtime.clone(),
1853 isolation,
1854 no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1855 use_mmio_hypercalls: params.use_mmio_hypercalls,
1856 backing_shared,
1857 #[cfg(guest_arch = "x86_64")]
1858 device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1859 intercept_debug_exceptions: params.intercept_debug_exceptions,
1860 vmbus_relay: late_params.vmbus_relay,
1861 });
1862
1863 if cfg!(guest_arch = "x86_64") {
1864 partition.manage_io_port_intercept_region(0, !0, true);
1866 }
1867
1868 let vps = params
1869 .topology
1870 .vps_arch()
1871 .map(|vp_info| UhProcessorBox {
1872 partition: partition.clone(),
1873 vp_info,
1874 })
1875 .collect();
1876
1877 Ok((
1878 UhPartition {
1879 inner: partition.clone(),
1880 interrupt_targets: VtlArray::from_fn(|vtl| {
1881 Arc::new(UhInterruptTarget {
1882 partition: partition.clone(),
1883 vtl: vtl.try_into().unwrap(),
1884 })
1885 }),
1886 },
1887 vps,
1888 ))
1889 }
1890}
1891
1892impl UhPartition {
1893 pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
1895 let id = if let Some(hv) = self.inner.hv() {
1899 hv.guest_os_id(Vtl::Vtl0)
1900 } else {
1901 self.inner
1903 .hcl
1904 .get_guest_os_id(Vtl::Vtl0)
1905 .map_err(Error::Hcl)?
1906 };
1907 Ok(id)
1908 }
1909
1910 pub fn register_host_io_port_fast_path(
1915 &self,
1916 range: RangeInclusive<u16>,
1917 ) -> HostIoPortFastPathHandle {
1918 assert!(!self.inner.isolation.is_hardware_isolated());
1922
1923 self.inner
1924 .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1925 HostIoPortFastPathHandle {
1926 inner: Arc::downgrade(&self.inner),
1927 begin: *range.start(),
1928 end: *range.end(),
1929 }
1930 }
1931
1932 pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
1934 self.inner.hcl.set_pm_timer_assist(port)
1935 }
1936
1937 fn register_cvm_dma_overlay_page(
1939 &self,
1940 vtl: GuestVtl,
1941 gpn: u64,
1942 new_perms: HvMapGpaFlags,
1943 ) -> anyhow::Result<()> {
1944 match &self.inner.backing_shared {
1946 #[cfg(guest_arch = "x86_64")]
1947 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1948 .cvm
1949 .isolated_memory_protector
1950 .register_overlay_page(
1951 vtl,
1952 gpn,
1953 GpnSource::Dma,
1955 HvMapGpaFlags::new(),
1956 Some(new_perms),
1957 &mut SnpBacked::tlb_flush_lock_access(
1958 None,
1959 self.inner.as_ref(),
1960 snp_backed_shared,
1961 ),
1962 )
1963 .map_err(|e| anyhow::anyhow!(e)),
1964 #[cfg(guest_arch = "x86_64")]
1965 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
1966 .cvm
1967 .isolated_memory_protector
1968 .register_overlay_page(
1969 vtl,
1970 gpn,
1971 GpnSource::Dma,
1972 HvMapGpaFlags::new(),
1973 Some(new_perms),
1974 &mut TdxBacked::tlb_flush_lock_access(
1975 None,
1976 self.inner.as_ref(),
1977 tdx_backed_shared,
1978 ),
1979 )
1980 .map_err(|e| anyhow::anyhow!(e)),
1981 BackingShared::Hypervisor(_) => {
1982 let _ = (vtl, gpn, new_perms);
1983 unreachable!()
1984 }
1985 }
1986 }
1987
1988 fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
1990 match &self.inner.backing_shared {
1992 #[cfg(guest_arch = "x86_64")]
1993 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1994 .cvm
1995 .isolated_memory_protector
1996 .unregister_overlay_page(
1997 vtl,
1998 gpn,
1999 &mut SnpBacked::tlb_flush_lock_access(
2000 None,
2001 self.inner.as_ref(),
2002 snp_backed_shared,
2003 ),
2004 )
2005 .map_err(|e| anyhow::anyhow!(e)),
2006 #[cfg(guest_arch = "x86_64")]
2007 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2008 .cvm
2009 .isolated_memory_protector
2010 .unregister_overlay_page(
2011 vtl,
2012 gpn,
2013 &mut TdxBacked::tlb_flush_lock_access(
2014 None,
2015 self.inner.as_ref(),
2016 tdx_backed_shared,
2017 ),
2018 )
2019 .map_err(|e| anyhow::anyhow!(e)),
2020 BackingShared::Hypervisor(_) => {
2021 let _ = (vtl, gpn);
2022 unreachable!()
2023 }
2024 }
2025 }
2026}
2027
2028impl UhProtoPartition<'_> {
2029 fn check_guest_vsm_support(hcl: &Hcl) -> Result<bool, Error> {
2032 #[cfg(guest_arch = "x86_64")]
2033 let privs = {
2034 let result = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
2035 let num = result.eax as u64 | ((result.ebx as u64) << 32);
2036 hvdef::HvPartitionPrivilege::from(num)
2037 };
2038
2039 #[cfg(guest_arch = "aarch64")]
2040 let privs = hcl.get_privileges_and_features_info().map_err(Error::Hcl)?;
2041
2042 if !privs.access_vsm() {
2043 return Ok(false);
2044 }
2045 let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
2046 Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2047 }
2048
2049 #[cfg(guest_arch = "x86_64")]
2050 fn construct_cvm_state(
2052 params: &UhPartitionNewParams<'_>,
2053 late_params: CvmLateParams,
2054 caps: &PartitionCapabilities,
2055 guest_vsm_available: bool,
2056 ) -> Result<UhCvmPartitionState, Error> {
2057 use vmcore::reference_time::ReferenceTimeSource;
2058
2059 let vp_count = params.topology.vp_count() as usize;
2060 let vps = (0..vp_count)
2061 .map(|vp_index| UhCvmVpInner {
2062 tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2063 vtl1_enable_called: Mutex::new(false),
2064 started: AtomicBool::new(vp_index == 0),
2065 hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2066 })
2067 .collect();
2068 let tlb_locked_vps =
2069 VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2070
2071 let lapic = VtlArray::from_fn(|_| {
2072 LocalApicSet::builder()
2073 .x2apic_capable(caps.x2apic)
2074 .hyperv_enlightenments(true)
2075 .build()
2076 });
2077
2078 let tsc_frequency = get_tsc_frequency(params.isolation)?;
2079 let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2080
2081 let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2088 max_vp_count: params.topology.vp_count(),
2089 vendor: caps.vendor,
2090 tsc_frequency,
2091 ref_time,
2092 is_ref_time_backed_by_tsc: true,
2093 });
2094
2095 Ok(UhCvmPartitionState {
2096 vps_per_socket: params.topology.reserved_vps_per_socket(),
2097 tlb_locked_vps,
2098 vps,
2099 shared_memory: late_params.shared_gm,
2100 isolated_memory_protector: late_params.isolated_memory_protector,
2101 lapic,
2102 hv,
2103 guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2104 shared_dma_client: late_params.shared_dma_client,
2105 private_dma_client: late_params.private_dma_client,
2106 hide_isolation: params.hide_isolation,
2107 })
2108 }
2109}
2110
2111impl UhPartition {
2112 #[cfg(guest_arch = "x86_64")]
2113 fn construct_cpuid_results(
2115 cpuid: virt::CpuidLeafSet,
2116 initial_cpuid: &[CpuidLeaf],
2117 topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2118 isolation: IsolationType,
2119 hide_isolation: bool,
2120 ) -> virt::CpuidLeafSet {
2121 let mut cpuid = cpuid.into_leaves();
2122 if isolation.is_hardware_isolated() {
2123 let x2apic = match topology.apic_mode() {
2125 vm_topology::processor::x86::ApicMode::XApic => false,
2126 vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2127 vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2128 };
2129 let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2130 let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2131 cpuid.push(
2132 CpuidLeaf::new(
2133 x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2134 [0, 0, ecx.into(), 0],
2135 )
2136 .masked([0, 0, ecx_mask.into(), 0]),
2137 );
2138
2139 let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2143
2144 hv1_emulator::cpuid::process_hv_cpuid_leaves(
2146 &mut cpuid,
2147 hide_isolation,
2148 [
2149 hv_version.eax,
2150 hv_version.ebx,
2151 hv_version.ecx,
2152 hv_version.edx,
2153 ],
2154 );
2155 }
2156 cpuid.extend(initial_cpuid);
2157 virt::CpuidLeafSet::new(cpuid)
2158 }
2159
2160 #[cfg(guest_arch = "x86_64")]
2161 fn construct_capabilities(
2163 topology: &ProcessorTopology,
2164 cpuid: &virt::CpuidLeafSet,
2165 isolation: IsolationType,
2166 hide_isolation: bool,
2167 ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2168 let mut native_cpuid_fn;
2169 let mut cvm_cpuid_fn;
2170
2171 let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2174 cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2176 &mut cvm_cpuid_fn
2177 } else {
2178 native_cpuid_fn = |leaf, sub_leaf| {
2180 let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2181 cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2182 };
2183 &mut native_cpuid_fn
2184 };
2185
2186 let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2188 match isolation {
2189 IsolationType::Tdx => {
2190 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2191 caps.nxe_forced_on = true;
2193 }
2194 IsolationType::Snp => {
2195 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2196 }
2197 _ => {
2198 assert!(caps.vtom.is_none());
2199 }
2200 }
2201
2202 Ok(caps)
2203 }
2204}
2205
2206#[cfg(guest_arch = "x86_64")]
2207fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2209 let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2212 let hv_frequency = msr
2213 .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2214 .map_err(Error::ReadTscFrequency)?;
2215
2216 let hw_info = match isolation {
2219 IsolationType::Tdx => {
2220 let max_function =
2222 safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2223 .eax;
2224
2225 if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2226 return Err(Error::BadCpuidTsc);
2227 }
2228 let result = safe_intrinsics::cpuid(
2229 x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2230 0,
2231 );
2232 let ratio_denom = result.eax;
2233 let ratio_num = result.ebx;
2234 let clock = result.ecx;
2235 if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2236 return Err(Error::BadCpuidTsc);
2237 }
2238 let allowed_error = 12_500_000;
2241 Some((
2242 clock as u64 * ratio_num as u64 / ratio_denom as u64,
2243 allowed_error,
2244 ))
2245 }
2246 IsolationType::Snp => {
2247 None
2249 }
2250 IsolationType::Vbs | IsolationType::None => None,
2251 };
2252
2253 if let Some((hw_frequency, allowed_error)) = hw_info {
2254 let delta = hw_frequency.abs_diff(hv_frequency);
2257 if delta > allowed_error {
2258 return Err(Error::TscFrequencyMismatch {
2259 hv: hv_frequency,
2260 hw: hw_frequency,
2261 allowed_error,
2262 });
2263 }
2264 }
2265
2266 Ok(hv_frequency)
2267}
2268
2269impl UhPartitionInner {
2270 fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2271 if self.isolation.is_hardware_isolated() {
2272 return;
2273 }
2274
2275 static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2276
2277 let access_type_mask = if active {
2278 HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2279 } else {
2280 HV_INTERCEPT_ACCESS_MASK_NONE
2281 };
2282
2283 if !SKIP_RANGE.load(Ordering::Relaxed) {
2285 match self.hcl.register_intercept(
2286 HvInterceptType::HvInterceptTypeX64IoPortRange,
2287 access_type_mask,
2288 HvInterceptParameters::new_io_port_range(begin..=end),
2289 ) {
2290 Ok(()) => return,
2291 Err(HvError::InvalidParameter) => {
2292 SKIP_RANGE.store(true, Ordering::Relaxed);
2295 tracing::warn!(
2296 CVM_ALLOWED,
2297 "old hypervisor build; using slow path for intercept ranges"
2298 );
2299 }
2300 Err(err) => {
2301 panic!("io port range registration failure: {err:?}");
2302 }
2303 }
2304 }
2305
2306 for port in begin..=end {
2308 self.hcl
2309 .register_intercept(
2310 HvInterceptType::HvInterceptTypeX64IoPort,
2311 access_type_mask,
2312 HvInterceptParameters::new_io_port(port),
2313 )
2314 .expect("registering io intercept cannot fail");
2315 }
2316 }
2317
2318 fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2319 self.lower_vtl_memory_layout
2323 .ram()
2324 .iter()
2325 .any(|m| m.range.contains_addr(gpa))
2326 }
2327
2328 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2329 if self.is_gpa_lower_vtl_ram(gpa) {
2333 !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2335 } else {
2336 false
2337 }
2338 }
2339}
2340
2341#[must_use]
2346pub struct HostIoPortFastPathHandle {
2347 inner: Weak<UhPartitionInner>,
2348 begin: u16,
2349 end: u16,
2350}
2351
2352impl Drop for HostIoPortFastPathHandle {
2353 fn drop(&mut self) {
2354 if let Some(inner) = self.inner.upgrade() {
2355 inner.manage_io_port_intercept_region(self.begin, self.end, true);
2356 }
2357 }
2358}
2359
2360#[derive(Copy, Clone, Debug)]
2365pub struct VtlCrash {
2366 pub vp_index: VpIndex,
2368 pub last_vtl: GuestVtl,
2370 pub control: GuestCrashCtl,
2372 pub parameters: [u64; 5],
2374}
2375
2376#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2379fn validate_vtl_gpa_flags(
2380 flags: HvMapGpaFlags,
2381 mbec_enabled: bool,
2382 shadow_supervisor_stack_enabled: bool,
2383) -> bool {
2384 if flags.adjustable() {
2386 return false;
2387 }
2388
2389 if flags.kernel_executable() != flags.user_executable() {
2391 if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2392 return false;
2393 }
2394 }
2395
2396 if flags.writable()
2398 || flags.kernel_executable()
2399 || flags.user_executable()
2400 || flags.supervisor_shadow_stack()
2401 || flags.paging_writability()
2402 || flags.verify_paging_writability()
2403 {
2404 if !flags.readable() {
2405 return false;
2406 }
2407 }
2408
2409 if flags.supervisor_shadow_stack()
2412 && ((!flags.kernel_executable() && !flags.user_executable())
2413 || shadow_supervisor_stack_enabled)
2414 {
2415 return false;
2416 }
2417
2418 true
2419}