1#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13 if #[cfg(guest_arch = "x86_64")] {
14 mod cvm_cpuid;
15 pub use processor::snp::SnpBacked;
16 pub use processor::tdx::TdxBacked;
17 use crate::processor::HardwareIsolatedBacking;
18 pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19 use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20 use bitvec::prelude::BitArray;
21 use bitvec::prelude::Lsb0;
22 use devmsr::MsrDevice;
23 use hv1_emulator::hv::ProcessorVtlHv;
24 use processor::LapicState;
25 use processor::snp::SnpBackedShared;
26 use processor::tdx::TdxBackedShared;
27 use std::arch::x86_64::CpuidResult;
28 use virt::CpuidLeaf;
29 use virt::state::StateElement;
30 use virt::vp::MpState;
31 type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34 } else if #[cfg(guest_arch = "aarch64")] {
35 pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36 use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37 }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvRegisterName;
66use hvdef::HvRegisterVsmPartitionConfig;
67use hvdef::HvRegisterVsmPartitionStatus;
68use hvdef::Vtl;
69use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
73use hvdef::hypercall::HostVisibilityType;
74use hvdef::hypercall::HvGuestOsId;
75use hvdef::hypercall::HvInputVtl;
76use hvdef::hypercall::HvInterceptParameters;
77use hvdef::hypercall::HvInterceptType;
78use inspect::Inspect;
79use inspect::InspectMut;
80use memory_range::MemoryRange;
81use pal::unix::affinity;
82use pal::unix::affinity::CpuSet;
83use pal_async::driver::Driver;
84use pal_async::driver::SpawnDriver;
85use pal_uring::IdleControl;
86use parking_lot::Mutex;
87use parking_lot::RwLock;
88use processor::BackingSharedParams;
89use processor::SidecarExitReason;
90use sidecar_client::NewSidecarClientError;
91use std::ops::RangeInclusive;
92use std::os::fd::AsRawFd;
93use std::sync::Arc;
94use std::sync::Weak;
95use std::sync::atomic::AtomicBool;
96use std::sync::atomic::AtomicU8;
97use std::sync::atomic::AtomicU32;
98use std::sync::atomic::AtomicU64;
99use std::sync::atomic::Ordering;
100use std::task::Waker;
101use thiserror::Error;
102use user_driver::DmaClient;
103use virt::IsolationType;
104use virt::PartitionCapabilities;
105use virt::VpIndex;
106use virt::X86Partition;
107use virt::irqcon::IoApicRouting;
108use virt::irqcon::MsiRequest;
109use virt::x86::apic_software_device::ApicSoftwareDevices;
110use virt_support_apic::LocalApicSet;
111use vm_topology::memory::MemoryLayout;
112use vm_topology::processor::ProcessorTopology;
113use vm_topology::processor::TargetVpInfo;
114use vmcore::monitor::MonitorPage;
115use vmcore::reference_time::GetReferenceTime;
116use vmcore::reference_time::ReferenceTimeResult;
117use vmcore::reference_time::ReferenceTimeSource;
118use vmcore::vmtime::VmTimeSource;
119use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
120use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
121use x86defs::tdx::TdCallResult;
122use zerocopy::FromBytes;
123use zerocopy::FromZeros;
124use zerocopy::Immutable;
125use zerocopy::IntoBytes;
126use zerocopy::KnownLayout;
127
128#[derive(Error, Debug)]
130#[expect(missing_docs)]
131pub enum Error {
132 #[error("hcl error")]
133 Hcl(#[source] hcl::ioctl::Error),
134 #[error("failed to open sidecar client")]
135 Sidecar(#[source] NewSidecarClientError),
136 #[error("failed to install {0:?} intercept: {1:?}")]
137 InstallIntercept(HvInterceptType, HvError),
138 #[error("failed to query hypervisor register {0:#x?}")]
139 Register(HvRegisterName, #[source] HvError),
140 #[error("failed to set vsm partition config register")]
141 VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
142 #[error("failed to create virtual device")]
143 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
144 #[error("failed to create cpuid tables for cvm")]
145 #[cfg(guest_arch = "x86_64")]
146 CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
147 #[error("failed to update hypercall msr")]
148 UpdateHypercallMsr,
149 #[error("failed to update reference tsc msr")]
150 UpdateReferenceTsc,
151 #[error("failed to map overlay page")]
152 MapOverlay(#[source] std::io::Error),
153 #[error("failed to allocate shared visibility pages for overlay")]
154 AllocateSharedVisOverlay(#[source] anyhow::Error),
155 #[error("failed to open msr device")]
156 OpenMsr(#[source] std::io::Error),
157 #[error("cpuid did not contain valid TSC frequency information")]
158 BadCpuidTsc,
159 #[error("failed to read tsc frequency")]
160 ReadTscFrequency(#[source] std::io::Error),
161 #[error(
162 "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
163 )]
164 TscFrequencyMismatch {
165 hv: u64,
166 hw: u64,
167 allowed_error: u64,
168 },
169 #[error("failed to set vsm partition config: {0:?}")]
170 FailedToSetL2Ctls(TdCallResult),
171 #[error("debugging is configured but the binary does not have the gdb feature")]
172 InvalidDebugConfiguration,
173 #[error("failed to allocate TLB flush page")]
174 AllocateTlbFlushPage(#[source] anyhow::Error),
175 #[error("host does not support required cpu capabilities")]
176 Capabilities(virt::PartitionCapabilitiesError),
177}
178
179#[derive(Error, Debug)]
181#[expect(missing_docs)]
182pub enum RevokeGuestVsmError {
183 #[error("failed to set vsm config")]
184 SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
185 #[error("VTL 1 is already enabled")]
186 Vtl1AlreadyEnabled,
187}
188
189#[derive(Inspect)]
191pub struct UhPartition {
192 #[inspect(flatten)]
193 inner: Arc<UhPartitionInner>,
194 #[inspect(skip)]
196 interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
197}
198
199#[derive(Inspect)]
201#[inspect(extra = "UhPartitionInner::inspect_extra")]
202struct UhPartitionInner {
203 #[inspect(skip)]
204 hcl: Hcl,
205 #[inspect(skip)] vps: Vec<UhVpInner>,
207 irq_routes: virt::irqcon::IrqRoutes,
208 caps: PartitionCapabilities,
209 #[inspect(skip)] enter_modes: Mutex<EnterModes>,
211 #[inspect(skip)]
212 enter_modes_atomic: AtomicU8,
213 #[cfg(guest_arch = "x86_64")]
214 cpuid: virt::CpuidLeafSet,
215 lower_vtl_memory_layout: MemoryLayout,
216 gm: VtlArray<GuestMemory, 2>,
217 vtl0_kernel_exec_gm: GuestMemory,
218 vtl0_user_exec_gm: GuestMemory,
219 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
220 #[inspect(skip)]
221 crash_notification_send: mesh::Sender<VtlCrash>,
222 monitor_page: MonitorPage,
223 #[inspect(skip)]
224 allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
225 software_devices: Option<ApicSoftwareDevices>,
226 #[inspect(skip)]
227 vmtime: VmTimeSource,
228 isolation: IsolationType,
229 #[inspect(with = "inspect::AtomicMut")]
230 no_sidecar_hotplug: AtomicBool,
231 use_mmio_hypercalls: bool,
232 backing_shared: BackingShared,
233 intercept_debug_exceptions: bool,
234 #[cfg(guest_arch = "x86_64")]
235 #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
237 device_vector_table: RwLock<IrrBitmap>,
238 vmbus_relay: bool,
239}
240
241#[derive(Inspect)]
242#[inspect(untagged)]
243enum BackingShared {
244 Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
245 #[cfg(guest_arch = "x86_64")]
246 Snp(#[inspect(flatten)] SnpBackedShared),
247 #[cfg(guest_arch = "x86_64")]
248 Tdx(#[inspect(flatten)] TdxBackedShared),
249}
250
251impl BackingShared {
252 fn new(
253 isolation: IsolationType,
254 partition_params: &UhPartitionNewParams<'_>,
255 backing_shared_params: BackingSharedParams<'_>,
256 ) -> Result<BackingShared, Error> {
257 Ok(match isolation {
258 IsolationType::None | IsolationType::Vbs => {
259 assert!(backing_shared_params.cvm_state.is_none());
260 BackingShared::Hypervisor(HypervisorBackedShared::new(
261 partition_params,
262 backing_shared_params,
263 )?)
264 }
265 #[cfg(guest_arch = "x86_64")]
266 IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
267 partition_params,
268 backing_shared_params,
269 )?),
270 #[cfg(guest_arch = "x86_64")]
271 IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
272 partition_params,
273 backing_shared_params,
274 )?),
275 #[cfg(not(guest_arch = "x86_64"))]
276 _ => unreachable!(),
277 })
278 }
279
280 fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
281 match self {
282 BackingShared::Hypervisor(_) => None,
283 #[cfg(guest_arch = "x86_64")]
284 BackingShared::Snp(SnpBackedShared { cvm, .. })
285 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
286 }
287 }
288
289 fn untrusted_synic(&self) -> Option<&GlobalSynic> {
290 match self {
291 BackingShared::Hypervisor(_) => None,
292 #[cfg(guest_arch = "x86_64")]
293 BackingShared::Snp(_) => None,
294 #[cfg(guest_arch = "x86_64")]
295 BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
296 }
297 }
298}
299
300#[derive(InspectMut, Copy, Clone)]
301struct EnterModes {
302 #[inspect(mut)]
303 first: EnterMode,
304 #[inspect(mut)]
305 second: EnterMode,
306}
307
308impl Default for EnterModes {
309 fn default() -> Self {
310 Self {
311 first: EnterMode::Fast,
312 second: EnterMode::IdleToVtl0,
313 }
314 }
315}
316
317impl From<EnterModes> for hcl::protocol::EnterModes {
318 fn from(value: EnterModes) -> Self {
319 Self::new()
320 .with_first(value.first.into())
321 .with_second(value.second.into())
322 }
323}
324
325#[derive(InspectMut, Copy, Clone)]
326enum EnterMode {
327 Fast,
328 PlayIdle,
329 IdleToVtl0,
330}
331
332impl From<EnterMode> for hcl::protocol::EnterMode {
333 fn from(value: EnterMode) -> Self {
334 match value {
335 EnterMode::Fast => Self::FAST,
336 EnterMode::PlayIdle => Self::PLAY_IDLE,
337 EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
338 }
339 }
340}
341
342#[cfg(guest_arch = "x86_64")]
343#[derive(Inspect)]
344struct GuestVsmVpState {
345 #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
348 vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
349 reg_intercept: SecureRegisterInterceptState,
350}
351
352#[cfg(guest_arch = "x86_64")]
353impl GuestVsmVpState {
354 fn new() -> Self {
355 GuestVsmVpState {
356 vtl0_exit_pending_event: None,
357 reg_intercept: Default::default(),
358 }
359 }
360}
361
362#[cfg(guest_arch = "x86_64")]
363#[derive(Inspect)]
364struct UhCvmVpState {
366 #[inspect(debug)]
368 direct_overlay_handle: user_driver::memory::MemoryBlock,
369 exit_vtl: GuestVtl,
371 hv: VtlArray<ProcessorVtlHv, 2>,
373 lapics: VtlArray<LapicState, 2>,
375 vtl1: Option<GuestVsmVpState>,
377}
378
379#[cfg(guest_arch = "x86_64")]
380impl UhCvmVpState {
381 pub(crate) fn new(
383 cvm_partition: &UhCvmPartitionState,
384 inner: &UhPartitionInner,
385 vp_info: &TargetVpInfo,
386 overlay_pages_required: usize,
387 ) -> Result<Self, Error> {
388 let direct_overlay_handle = cvm_partition
389 .shared_dma_client
390 .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
391 .map_err(Error::AllocateSharedVisOverlay)?;
392
393 let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
394 let lapics = VtlArray::from_fn(|vtl| {
395 let apic_set = &cvm_partition.lapic[vtl];
396
397 let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
400 lapic.set_apic_base(apic_base).unwrap();
402 let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
404 MpState::WaitForSipi
405 } else {
406 MpState::Running
407 };
408 LapicState::new(lapic, activity)
409 });
410
411 let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
412
413 Ok(Self {
414 direct_overlay_handle,
415 exit_vtl: GuestVtl::Vtl0,
416 hv,
417 lapics,
418 vtl1: None,
419 })
420 }
421}
422
423#[cfg(guest_arch = "x86_64")]
424#[derive(Inspect, Default)]
425#[inspect(hex)]
426pub struct SecureRegisterInterceptState {
428 #[inspect(with = "|&x| u64::from(x)")]
429 intercept_control: hvdef::HvRegisterCrInterceptControl,
430 cr0_mask: u64,
431 cr4_mask: u64,
432 ia32_misc_enable_mask: u64,
435}
436
437#[derive(Inspect)]
438struct UhCvmPartitionState {
440 #[cfg(guest_arch = "x86_64")]
441 vps_per_socket: u32,
442 #[inspect(
444 with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
445 )]
446 tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
447 #[inspect(with = "inspect::iter_by_index")]
448 vps: Vec<UhCvmVpInner>,
449 shared_memory: GuestMemory,
450 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
451 #[inspect(skip)]
452 isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
453 lapic: VtlArray<LocalApicSet, 2>,
455 hv: GlobalHv<2>,
457 guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
459 shared_dma_client: Arc<dyn DmaClient>,
461 private_dma_client: Arc<dyn DmaClient>,
463 hide_isolation: bool,
464}
465
466#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
467impl UhCvmPartitionState {
468 fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
469 &self.vps[vp_index as usize]
470 }
471
472 fn is_lower_vtl_startup_denied(&self) -> bool {
473 matches!(
474 *self.guest_vsm.read(),
475 GuestVsmState::Enabled {
476 vtl1: CvmVtl1State {
477 deny_lower_vtl_startup: true,
478 ..
479 }
480 }
481 )
482 }
483}
484
485#[derive(Inspect)]
486struct UhCvmVpInner {
488 tlb_lock_info: VtlArray<TlbLockInfo, 2>,
490 vtl1_enable_called: Mutex<bool>,
492 started: AtomicBool,
494 #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
496 hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
497}
498
499#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
500#[derive(Inspect)]
501#[inspect(tag = "guest_vsm_state")]
502enum GuestVsmState<T: Inspect> {
504 NotPlatformSupported,
505 NotGuestEnabled,
506 Enabled {
507 #[inspect(flatten)]
508 vtl1: T,
509 },
510}
511
512impl<T: Inspect> GuestVsmState<T> {
513 pub fn from_availability(guest_vsm_available: bool) -> Self {
514 if guest_vsm_available {
515 GuestVsmState::NotGuestEnabled
516 } else {
517 GuestVsmState::NotPlatformSupported
518 }
519 }
520}
521
522#[derive(Inspect)]
523struct CvmVtl1State {
524 enabled_on_any_vp: bool,
526 zero_memory_on_reset: bool,
528 deny_lower_vtl_startup: bool,
530 pub mbec_enabled: bool,
532 pub shadow_supervisor_stack_enabled: bool,
534 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
535 io_read_intercepts: BitBox<u64>,
536 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
537 io_write_intercepts: BitBox<u64>,
538}
539
540#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
541impl CvmVtl1State {
542 fn new(mbec_enabled: bool) -> Self {
543 Self {
544 enabled_on_any_vp: false,
545 zero_memory_on_reset: false,
546 deny_lower_vtl_startup: false,
547 mbec_enabled,
548 shadow_supervisor_stack_enabled: false,
549 io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
550 io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
551 }
552 }
553}
554
555#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
556struct TscReferenceTimeSource {
557 tsc_scale: u64,
558}
559
560#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
561impl TscReferenceTimeSource {
562 fn new(tsc_frequency: u64) -> Self {
563 TscReferenceTimeSource {
564 tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
565 }
566 }
567}
568
569impl GetReferenceTime for TscReferenceTimeSource {
571 fn now(&self) -> ReferenceTimeResult {
572 #[cfg(guest_arch = "x86_64")]
573 {
574 let tsc = safe_intrinsics::rdtsc();
575 let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
576 ReferenceTimeResult {
577 ref_time,
578 system_time: None,
579 }
580 }
581
582 #[cfg(guest_arch = "aarch64")]
583 {
584 todo!("AARCH64_TODO");
585 }
586 }
587}
588
589impl virt::irqcon::ControlGic for UhPartitionInner {
590 fn set_spi_irq(&self, irq_id: u32, high: bool) {
591 if let Err(err) = self.hcl.request_interrupt(
592 hvdef::HvInterruptControl::new()
593 .with_arm64_asserted(high)
594 .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
595 0,
596 irq_id,
597 GuestVtl::Vtl0,
598 ) {
599 tracelimit::warn_ratelimited!(
600 error = &err as &dyn std::error::Error,
601 irq = irq_id,
602 asserted = high,
603 "failed to request spi"
604 );
605 }
606 }
607}
608
609impl virt::Aarch64Partition for UhPartition {
610 fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
611 debug_assert!(vtl == Vtl::Vtl0);
612 self.inner.clone()
613 }
614}
615
616pub struct UhProcessorBox {
622 partition: Arc<UhPartitionInner>,
623 vp_info: TargetVpInfo,
624}
625
626impl UhProcessorBox {
627 pub fn vp_index(&self) -> VpIndex {
629 self.vp_info.base.vp_index
630 }
631
632 pub fn sidecar_base_cpu(&self) -> Option<u32> {
635 self.partition
636 .hcl
637 .sidecar_base_cpu(self.vp_info.base.vp_index.index())
638 }
639
640 pub fn bind_processor<'a, T: Backing>(
647 &'a mut self,
648 driver: &impl Driver,
649 control: Option<&'a mut IdleControl>,
650 ) -> Result<UhProcessor<'a, T>, Error> {
651 if let Some(control) = &control {
652 let vp_index = self.vp_info.base.vp_index;
653
654 let mut current = Default::default();
655 affinity::get_current_thread_affinity(&mut current).unwrap();
656 assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
657
658 self.partition
659 .hcl
660 .set_poll_file(
661 self.partition.vp(vp_index).unwrap().cpu_index,
662 control.ring_fd().as_raw_fd(),
663 )
664 .map_err(Error::Hcl)?;
665 }
666
667 UhProcessor::new(driver, &self.partition, self.vp_info, control)
668 }
669
670 pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
675 self.partition
676 .vp(self.vp_info.base.vp_index)
677 .unwrap()
678 .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
679 }
680}
681
682#[derive(Debug, Inspect)]
683struct UhVpInner {
684 wake_reasons: AtomicU64,
686 #[inspect(skip)]
687 waker: RwLock<Option<Waker>>,
688 message_queues: VtlArray<MessageQueues, 2>,
689 #[inspect(skip)]
690 vp_info: TargetVpInfo,
691 cpu_index: u32,
694 sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
695}
696
697impl UhVpInner {
698 pub fn vp_index(&self) -> VpIndex {
699 self.vp_info.base.vp_index
700 }
701}
702
703#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
704#[derive(Debug, Inspect)]
705enum InitialVpContextOperation {
707 StartVp,
709 EnableVpVtl,
711}
712
713#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
714#[derive(Debug, Inspect)]
715struct VpStartEnableVtl {
717 operation: InitialVpContextOperation,
720 #[inspect(skip)]
721 context: hvdef::hypercall::InitialVpContextX64,
722}
723
724#[derive(Debug, Inspect)]
725struct TlbLockInfo {
726 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
728 blocked_vps: BitBox<AtomicU64>,
729 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
732 blocking_vps: BitBox<AtomicU64>,
733 blocking_vp_count: AtomicU32,
737 sleeping: AtomicBool,
739}
740
741#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
742impl TlbLockInfo {
743 fn new(vp_count: usize) -> Self {
744 Self {
745 blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
746 blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
747 blocking_vp_count: AtomicU32::new(0),
748 sleeping: false.into(),
749 }
750 }
751}
752
753#[bitfield(u32)]
754#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
755struct WakeReason {
756 extint: bool,
757 message_queues: bool,
758 hv_start_enable_vtl_vp: bool,
759 intcon: bool,
760 update_proxy_irr_filter: bool,
761 #[bits(27)]
762 _reserved: u32,
763}
764
765impl WakeReason {
766 const EXTINT: Self = Self::new().with_extint(true);
768 const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
769 #[cfg(guest_arch = "x86_64")]
770 const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
772 #[cfg(guest_arch = "x86_64")]
773 const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
774}
775
776#[bitfield(u32)]
777#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
778struct ExitActivity {
779 pending_event: bool,
780 #[bits(31)]
781 _reserved: u32,
782}
783
784impl UhPartition {
786 pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
788 fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
789 if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
790 return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
791 }
792 *vsm_state = GuestVsmState::NotPlatformSupported;
793 Ok(())
794 }
795
796 match &self.inner.backing_shared {
797 BackingShared::Hypervisor(s) => {
798 revoke(&mut *s.guest_vsm.write())?;
799 self.inner
800 .hcl
801 .set_guest_vsm_partition_config(false)
802 .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
803 }
804 #[cfg(guest_arch = "x86_64")]
805 BackingShared::Snp(SnpBackedShared { cvm, .. })
806 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
807 revoke(&mut *cvm.guest_vsm.write())?;
808 }
809 };
810
811 Ok(())
812 }
813
814 pub fn reference_time(&self) -> u64 {
816 if let Some(hv) = self.inner.hv() {
817 hv.ref_time_source().now().ref_time
818 } else {
819 self.inner
820 .hcl
821 .reference_time()
822 .expect("should not fail to get the reference time")
823 }
824 }
825}
826
827impl virt::Partition for UhPartition {
828 fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
829 None
830 }
831
832 fn caps(&self) -> &PartitionCapabilities {
833 &self.inner.caps
834 }
835
836 fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
837 self.inner
838 .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
839 }
840
841 fn request_yield(&self, _vp_index: VpIndex) {
842 unimplemented!()
843 }
844}
845
846impl X86Partition for UhPartition {
847 fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
848 self.inner.clone()
849 }
850
851 fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
852 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
853 if let Some(apic) = &self.inner.lapic(vtl) {
854 apic.lint(vp_index, lint.into(), |vp_index| {
855 self.inner
856 .vp(vp_index)
857 .unwrap()
858 .wake(vtl, WakeReason::INTCON);
859 });
860 } else if lint == 0 {
861 self.inner
862 .vp(vp_index)
863 .unwrap()
864 .wake(vtl, WakeReason::EXTINT);
865 } else {
866 unimplemented!()
867 }
868 }
869}
870
871impl UhPartitionInner {
872 fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
873 self.vps.get(index.index() as usize)
874 }
875
876 fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
877 self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
878 }
879
880 fn hv(&self) -> Option<&GlobalHv<2>> {
881 self.backing_shared.cvm_state().map(|x| &x.hv)
882 }
883
884 #[cfg(guest_arch = "x86_64")]
886 fn request_proxy_irr_filter_update(
887 &self,
888 vtl: GuestVtl,
889 device_vector: u8,
890 req_vp_index: VpIndex,
891 ) {
892 tracing::debug!(
893 ?vtl,
894 device_vector,
895 req_vp_index = req_vp_index.index(),
896 "request_proxy_irr_filter_update"
897 );
898
899 {
901 let mut device_vector_table = self.device_vector_table.write();
902 device_vector_table.set(device_vector as usize, true);
903 }
904
905 for vp in self.vps.iter() {
907 if vp.vp_index() != req_vp_index {
908 vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
909 }
910 }
911 }
912
913 #[cfg(guest_arch = "x86_64")]
915 fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
916 let device_vector_table = self.device_vector_table.read();
917 for idx in device_vector_table.iter_ones() {
918 irr_vectors.set(idx, true);
919 }
920 }
921
922 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
923 let mut wake_vps = false;
924 resp.field_mut(
925 "enter_modes",
926 &mut inspect::adhoc_mut(|req| {
927 let update = req.is_update();
928 {
929 let mut modes = self.enter_modes.lock();
930 modes.inspect_mut(req);
931 if update {
932 self.enter_modes_atomic.store(
933 hcl::protocol::EnterModes::from(*modes).into(),
934 Ordering::Relaxed,
935 );
936 wake_vps = true;
937 }
938 }
939 }),
940 );
941
942 if wake_vps {
944 for vp in self.vps.iter() {
945 vp.wake_vtl2();
946 }
947 }
948 }
949
950 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
952 fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
953 self.hcl.get_vsm_partition_status()
955 }
956}
957
958impl virt::Synic for UhPartition {
959 fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
960 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
961 let Some(vp) = self.inner.vp(vp_index) else {
962 tracelimit::warn_ratelimited!(
963 CVM_ALLOWED,
964 vp = vp_index.index(),
965 "invalid vp target for post_message"
966 );
967 return;
968 };
969
970 vp.post_message(
971 vtl,
972 sint,
973 &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
974 );
975 }
976
977 fn new_guest_event_port(
978 &self,
979 vtl: Vtl,
980 vp: u32,
981 sint: u8,
982 flag: u16,
983 ) -> Box<dyn vmcore::synic::GuestEventPort> {
984 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
985 Box::new(UhEventPort {
986 partition: Arc::downgrade(&self.inner),
987 params: Arc::new(Mutex::new(UhEventPortParams {
988 vp: VpIndex::new(vp),
989 sint,
990 flag,
991 vtl,
992 })),
993 })
994 }
995
996 fn prefer_os_events(&self) -> bool {
997 false
998 }
999
1000 fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1001 Some(self)
1002 }
1003}
1004
1005impl virt::SynicMonitor for UhPartition {
1006 fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1007 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1009 let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1010
1011 let allocated_page = allocated_block.take();
1013 if let Some(old_gpa) = old_gpa {
1014 let allocated_gpa = allocated_page
1015 .as_ref()
1016 .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1017
1018 let result = if allocated_gpa == Some(old_gpa) {
1021 let vtl = GuestVtl::try_from(vtl).unwrap();
1022 self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1023 } else {
1024 self.inner
1025 .hcl
1026 .modify_vtl_protection_mask(
1027 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1028 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1029 HvInputVtl::CURRENT_VTL,
1030 )
1031 .map_err(|err| anyhow::anyhow!(err))
1032 };
1033
1034 result
1035 .context("failed to unregister old monitor page")
1036 .inspect_err(|_| {
1037 self.inner.monitor_page.set_gpa(None);
1039 })?;
1040
1041 tracing::debug!(old_gpa, "unregistered monitor page");
1042 }
1043
1044 if let Some(gpa) = gpa {
1045 self.inner
1048 .hcl
1049 .modify_vtl_protection_mask(
1050 MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1051 HvMapGpaFlags::new().with_readable(true),
1052 HvInputVtl::CURRENT_VTL,
1053 )
1054 .context("failed to register monitor page")
1055 .inspect_err(|_| {
1056 self.inner.monitor_page.set_gpa(None);
1058 })?;
1059
1060 tracing::debug!(gpa, "registered monitor page");
1061 }
1062
1063 Ok(())
1064 }
1065
1066 fn register_monitor(
1067 &self,
1068 monitor_id: vmcore::monitor::MonitorId,
1069 connection_id: u32,
1070 ) -> Box<dyn Sync + Send> {
1071 self.inner
1072 .monitor_page
1073 .register_monitor(monitor_id, connection_id)
1074 }
1075
1076 fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1077 let vtl = GuestVtl::try_from(vtl).unwrap();
1078
1079 let Some(state) = self.inner.backing_shared.cvm_state() else {
1081 return Ok(None);
1082 };
1083
1084 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1085 if let Some(block) = allocated_block.as_ref() {
1086 let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1088 assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1089 return Ok(Some(gpa));
1090 }
1091
1092 let block = state
1093 .private_dma_client
1094 .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1095 .context("failed to allocate monitor page")?;
1096
1097 let gpn = block.pfns()[0];
1098 *allocated_block = Some(block);
1099 let gpa = gpn << HV_PAGE_SHIFT;
1100 let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1101 if let Some(old_gpa) = old_gpa {
1102 self.inner
1105 .hcl
1106 .modify_vtl_protection_mask(
1107 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1108 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1109 HvInputVtl::CURRENT_VTL,
1110 )
1111 .context("failed to unregister old monitor page")
1112 .inspect_err(|_| {
1113 self.inner.monitor_page.set_gpa(None);
1115 })?;
1116
1117 tracing::debug!(old_gpa, "unregistered monitor page");
1118 }
1119
1120 self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1123 .context("failed to unregister monitor page")
1124 .inspect_err(|_| {
1125 self.inner.monitor_page.set_gpa(None);
1127 })?;
1128
1129 tracing::debug!(gpa, "registered allocated monitor page");
1130
1131 Ok(Some(gpa))
1132 }
1133}
1134
1135impl UhPartitionInner {
1136 #[cfg(guest_arch = "x86_64")]
1137 pub(crate) fn synic_interrupt(
1138 &self,
1139 vp_index: VpIndex,
1140 vtl: GuestVtl,
1141 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1142 move |vector, auto_eoi| {
1145 self.lapic(vtl).unwrap().synic_interrupt(
1146 vp_index,
1147 vector as u8,
1148 auto_eoi,
1149 |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1150 );
1151 }
1152 }
1153
1154 #[cfg(guest_arch = "aarch64")]
1155 fn synic_interrupt(
1156 &self,
1157 _vp_index: VpIndex,
1158 _vtl: GuestVtl,
1159 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1160 move |_, _| {}
1161 }
1162}
1163
1164#[derive(Debug)]
1165struct UhEventPort {
1166 partition: Weak<UhPartitionInner>,
1167 params: Arc<Mutex<UhEventPortParams>>,
1168}
1169
1170#[derive(Debug, Copy, Clone)]
1171struct UhEventPortParams {
1172 vp: VpIndex,
1173 sint: u8,
1174 flag: u16,
1175 vtl: GuestVtl,
1176}
1177
1178impl vmcore::synic::GuestEventPort for UhEventPort {
1179 fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1180 let partition = self.partition.clone();
1181 let params = self.params.clone();
1182 vmcore::interrupt::Interrupt::from_fn(move || {
1183 let UhEventPortParams {
1184 vp,
1185 sint,
1186 flag,
1187 vtl,
1188 } = *params.lock();
1189 let Some(partition) = partition.upgrade() else {
1190 return;
1191 };
1192 tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1193 if let Some(hv) = partition.hv() {
1194 match hv.synic[vtl].signal_event(
1195 vp,
1196 sint,
1197 flag,
1198 &mut partition.synic_interrupt(vp, vtl),
1199 ) {
1200 Ok(_) => {}
1201 Err(SintProxied) => {
1202 tracing::trace!(
1203 vp = vp.index(),
1204 sint,
1205 flag,
1206 "forwarding event to untrusted synic"
1207 );
1208 if let Some(synic) = partition.backing_shared.untrusted_synic() {
1209 synic
1210 .signal_event(
1211 vp,
1212 sint,
1213 flag,
1214 &mut partition.synic_interrupt(vp, vtl),
1215 )
1216 .ok();
1217 } else {
1218 partition.hcl.signal_event_direct(vp.index(), sint, flag)
1219 }
1220 }
1221 }
1222 } else {
1223 partition.hcl.signal_event_direct(vp.index(), sint, flag);
1224 }
1225 })
1226 }
1227
1228 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1229 self.params.lock().vp = VpIndex::new(vp);
1230 Ok(())
1231 }
1232}
1233
1234impl virt::Hv1 for UhPartition {
1235 type Error = Error;
1236 type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1237
1238 fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1239 Some(if let Some(hv) = self.inner.hv() {
1240 hv.ref_time_source().clone()
1241 } else {
1242 ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1243 })
1244 }
1245
1246 fn new_virtual_device(
1247 &self,
1248 ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1249 self.inner.software_devices.is_some().then_some(self)
1250 }
1251}
1252
1253impl GetReferenceTime for UhPartitionInner {
1254 fn now(&self) -> ReferenceTimeResult {
1255 ReferenceTimeResult {
1256 ref_time: self.hcl.reference_time().unwrap(),
1257 system_time: None,
1258 }
1259 }
1260}
1261
1262impl virt::DeviceBuilder for UhPartition {
1263 fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1264 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1265 let device = self
1266 .inner
1267 .software_devices
1268 .as_ref()
1269 .expect("checked in new_virtual_device")
1270 .new_device(self.interrupt_targets[vtl].clone(), device_id)
1271 .map_err(Error::NewDevice)?;
1272
1273 Ok(device)
1274 }
1275}
1276
1277struct UhInterruptTarget {
1278 partition: Arc<UhPartitionInner>,
1279 vtl: GuestVtl,
1280}
1281
1282impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
1283 fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
1284 let partition = self.partition.clone();
1285 let vtl = self.vtl;
1286 Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
1287 }
1288}
1289
1290impl UhPartitionInner {
1291 fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1292 if let Some(lapic) = self.lapic(vtl) {
1293 tracing::trace!(?request, "interrupt");
1294 lapic.request_interrupt(request.address, request.data, |vp_index| {
1295 self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1296 });
1297 } else {
1298 let (address, data) = request.as_x86();
1299 if let Err(err) = self.hcl.request_interrupt(
1300 request.hv_x86_interrupt_control(),
1301 address.virt_destination().into(),
1302 data.vector().into(),
1303 vtl,
1304 ) {
1305 tracelimit::warn_ratelimited!(
1306 CVM_ALLOWED,
1307 error = &err as &dyn std::error::Error,
1308 address = request.address,
1309 data = request.data,
1310 "failed to request msi"
1311 );
1312 }
1313 }
1314 }
1315}
1316
1317impl IoApicRouting for UhPartitionInner {
1318 fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1319 self.irq_routes.set_irq_route(irq, request)
1320 }
1321
1322 fn assert_irq(&self, irq: u8) {
1324 self.irq_routes
1325 .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1326 }
1327}
1328
1329fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1332 let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1334 let hardware_isolated = hcl.isolation().is_hardware_isolated();
1335 let isolated = hcl.isolation().is_isolated();
1336
1337 let config = HvRegisterVsmPartitionConfig::new()
1338 .with_default_vtl_protection_mask(0xF)
1339 .with_enable_vtl_protection(!hardware_isolated)
1340 .with_zero_memory_on_reset(!hardware_isolated)
1341 .with_intercept_cpuid_unimplemented(!hardware_isolated)
1342 .with_intercept_page(caps.intercept_page_available())
1343 .with_intercept_unrecoverable_exception(true)
1344 .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1345 .with_intercept_acceptance(isolated)
1346 .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1347 .with_intercept_system_reset(caps.intercept_system_reset_available());
1348
1349 hcl.set_vtl2_vsm_partition_config(config)
1350 .map_err(Error::VsmPartitionConfig)
1351}
1352
1353pub struct UhPartitionNewParams<'a> {
1357 pub isolation: IsolationType,
1359 pub hide_isolation: bool,
1362 pub lower_vtl_memory_layout: &'a MemoryLayout,
1364 pub topology: &'a ProcessorTopology,
1366 pub cvm_cpuid_info: Option<&'a [u8]>,
1369 pub snp_secrets: Option<&'a [u8]>,
1371 pub vtom: Option<u64>,
1375 pub handle_synic: bool,
1379 pub no_sidecar_hotplug: bool,
1382 pub use_mmio_hypercalls: bool,
1384 pub intercept_debug_exceptions: bool,
1386}
1387
1388pub struct UhLateParams<'a> {
1390 pub gm: VtlArray<GuestMemory, 2>,
1392 pub vtl0_kernel_exec_gm: GuestMemory,
1394 pub vtl0_user_exec_gm: GuestMemory,
1396 #[cfg(guest_arch = "x86_64")]
1398 pub cpuid: Vec<CpuidLeaf>,
1399 pub crash_notification_send: mesh::Sender<VtlCrash>,
1402 pub vmtime: &'a VmTimeSource,
1404 pub cvm_params: Option<CvmLateParams>,
1406 pub vmbus_relay: bool,
1408}
1409
1410pub struct CvmLateParams {
1412 pub shared_gm: GuestMemory,
1414 pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1416 pub shared_dma_client: Arc<dyn DmaClient>,
1418 pub private_dma_client: Arc<dyn DmaClient>,
1420}
1421
1422#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1424pub enum GpnSource {
1425 GuestMemory,
1427 Dma,
1429}
1430
1431pub trait ProtectIsolatedMemory: Send + Sync {
1433 fn change_host_visibility(
1435 &self,
1436 vtl: GuestVtl,
1437 shared: bool,
1438 gpns: &[u64],
1439 tlb_access: &mut dyn TlbFlushLockAccess,
1440 ) -> Result<(), (HvError, usize)>;
1441
1442 fn query_host_visibility(
1444 &self,
1445 gpns: &[u64],
1446 host_visibility: &mut [HostVisibilityType],
1447 ) -> Result<(), (HvError, usize)>;
1448
1449 fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1451
1452 fn change_default_vtl_protections(
1456 &self,
1457 target_vtl: GuestVtl,
1458 protections: HvMapGpaFlags,
1459 tlb_access: &mut dyn TlbFlushLockAccess,
1460 ) -> Result<(), HvError>;
1461
1462 fn change_vtl_protections(
1464 &self,
1465 target_vtl: GuestVtl,
1466 gpns: &[u64],
1467 protections: HvMapGpaFlags,
1468 tlb_access: &mut dyn TlbFlushLockAccess,
1469 ) -> Result<(), (HvError, usize)>;
1470
1471 fn register_overlay_page(
1474 &self,
1475 vtl: GuestVtl,
1476 gpn: u64,
1477 gpn_source: GpnSource,
1478 check_perms: HvMapGpaFlags,
1479 new_perms: Option<HvMapGpaFlags>,
1480 tlb_access: &mut dyn TlbFlushLockAccess,
1481 ) -> Result<(), HvError>;
1482
1483 fn unregister_overlay_page(
1486 &self,
1487 vtl: GuestVtl,
1488 gpn: u64,
1489 tlb_access: &mut dyn TlbFlushLockAccess,
1490 ) -> Result<(), HvError>;
1491
1492 fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1494
1495 fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1497
1498 fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1504
1505 fn set_vtl1_protections_enabled(&self);
1508
1509 fn vtl1_protections_enabled(&self) -> bool;
1512}
1513
1514pub trait TlbFlushLockAccess {
1516 fn flush(&mut self, vtl: GuestVtl);
1518
1519 fn flush_entire(&mut self);
1521
1522 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1524}
1525
1526pub struct UhProtoPartition<'a> {
1529 params: UhPartitionNewParams<'a>,
1530 hcl: Hcl,
1531 guest_vsm_available: bool,
1532 #[cfg(guest_arch = "x86_64")]
1533 cpuid: virt::CpuidLeafSet,
1534}
1535
1536impl<'a> UhProtoPartition<'a> {
1537 pub fn new<T: SpawnDriver>(
1542 params: UhPartitionNewParams<'a>,
1543 driver: impl FnMut(u32) -> T,
1544 ) -> Result<Self, Error> {
1545 let hcl_isolation = match params.isolation {
1546 IsolationType::None => hcl::ioctl::IsolationType::None,
1547 IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1548 IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1549 IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1550 };
1551
1552 let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1554
1555 let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1556
1557 let mut allowed_hypercalls = vec![
1559 hvdef::HypercallCode::HvCallGetVpRegisters,
1560 hvdef::HypercallCode::HvCallSetVpRegisters,
1561 hvdef::HypercallCode::HvCallInstallIntercept,
1562 hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1563 hvdef::HypercallCode::HvCallPostMessageDirect,
1564 hvdef::HypercallCode::HvCallSignalEventDirect,
1565 hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1566 hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1567 hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1568 hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1569 hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1570 hvdef::HypercallCode::HvCallAcceptGpaPages,
1571 hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1572 ];
1573
1574 if params.isolation.is_hardware_isolated() {
1575 allowed_hypercalls.extend(vec![
1576 hvdef::HypercallCode::HvCallEnablePartitionVtl,
1577 hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1578 hvdef::HypercallCode::HvCallEnableVpVtl,
1579 ]);
1580 }
1581
1582 if params.use_mmio_hypercalls {
1583 allowed_hypercalls.extend(vec![
1584 hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1585 hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1586 ]);
1587 }
1588
1589 hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1590
1591 set_vtl2_vsm_partition_config(&hcl)?;
1592
1593 let guest_vsm_available = Self::check_guest_vsm_support(&hcl)?;
1594
1595 #[cfg(guest_arch = "x86_64")]
1596 let cpuid = match params.isolation {
1597 IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1598 cpuid_pages: params.cvm_cpuid_info.unwrap(),
1599 vtom: params.vtom.unwrap(),
1600 access_vsm: guest_vsm_available,
1601 }
1602 .build()
1603 .map_err(Error::CvmCpuid)?,
1604
1605 IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1606 topology: params.topology,
1607 vtom: params.vtom.unwrap(),
1608 access_vsm: guest_vsm_available,
1609 }
1610 .build()
1611 .map_err(Error::CvmCpuid)?,
1612 IsolationType::Vbs | IsolationType::None => Default::default(),
1613 };
1614
1615 Ok(UhProtoPartition {
1616 hcl,
1617 params,
1618 guest_vsm_available,
1619 #[cfg(guest_arch = "x86_64")]
1620 cpuid,
1621 })
1622 }
1623
1624 pub fn guest_vsm_available(&self) -> bool {
1626 self.guest_vsm_available
1627 }
1628
1629 pub async fn build(
1631 self,
1632 late_params: UhLateParams<'_>,
1633 ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1634 let Self {
1635 mut hcl,
1636 params,
1637 guest_vsm_available,
1638 #[cfg(guest_arch = "x86_64")]
1639 cpuid,
1640 } = self;
1641 let isolation = params.isolation;
1642 let is_hardware_isolated = isolation.is_hardware_isolated();
1643
1644 if params.intercept_debug_exceptions {
1650 if !cfg!(feature = "gdb") {
1651 return Err(Error::InvalidDebugConfiguration);
1652 }
1653
1654 cfg_if::cfg_if! {
1655 if #[cfg(guest_arch = "x86_64")] {
1656 if isolation != IsolationType::Tdx {
1657 let debug_exception_vector = 0x1;
1658 hcl.register_intercept(
1659 HvInterceptType::HvInterceptTypeException,
1660 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1661 HvInterceptParameters::new_exception(debug_exception_vector),
1662 )
1663 .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1664 }
1665 } else {
1666 return Err(Error::InvalidDebugConfiguration);
1667 }
1668 }
1669 }
1670
1671 if !is_hardware_isolated {
1672 if cfg!(guest_arch = "x86_64") {
1673 hcl.register_intercept(
1674 HvInterceptType::HvInterceptTypeX64Msr,
1675 HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1676 HvInterceptParameters::new_zeroed(),
1677 )
1678 .map_err(|err| {
1679 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1680 })?;
1681
1682 hcl.register_intercept(
1683 HvInterceptType::HvInterceptTypeX64ApicEoi,
1684 HV_INTERCEPT_ACCESS_MASK_WRITE,
1685 HvInterceptParameters::new_zeroed(),
1686 )
1687 .map_err(|err| {
1688 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1689 })?;
1690 } else {
1691 if false {
1692 todo!("AARCH64_TODO");
1693 }
1694 }
1695 }
1696
1697 if isolation == IsolationType::Snp {
1698 hcl.register_intercept(
1700 HvInterceptType::HvInterceptTypeException,
1701 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1702 HvInterceptParameters::new_exception(0x1D),
1703 )
1704 .map_err(|err| {
1705 Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1706 })?;
1707
1708 let mut bitmap = [0u8; 64];
1710 if let Some(secrets) = params.snp_secrets {
1711 bitmap.copy_from_slice(
1712 &secrets
1713 [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1714 );
1715 }
1716 hcl.set_snp_register_bitmap(bitmap);
1717 }
1718
1719 hcl.add_vps(
1721 params.topology.vp_count(),
1722 late_params
1723 .cvm_params
1724 .as_ref()
1725 .map(|x| &x.private_dma_client),
1726 )
1727 .map_err(Error::Hcl)?;
1728
1729 let vps: Vec<_> = params
1730 .topology
1731 .vps_arch()
1732 .map(|vp_info| {
1733 let cpu_index = vp_info.base.vp_index.index();
1736 UhVpInner::new(cpu_index, vp_info)
1737 })
1738 .collect();
1739
1740 #[cfg(guest_arch = "x86_64")]
1742 let software_devices = {
1743 let res = if !is_hardware_isolated {
1744 hcl.register_intercept(
1745 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1746 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1747 HvInterceptParameters::new_zeroed(),
1748 )
1749 } else {
1750 Ok(())
1751 };
1752 match res {
1753 Ok(()) => Some(ApicSoftwareDevices::new(
1754 params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1755 )),
1756 Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1757 Err(err) => {
1758 return Err(Error::InstallIntercept(
1759 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1760 err,
1761 ));
1762 }
1763 }
1764 };
1765
1766 #[cfg(guest_arch = "aarch64")]
1767 let software_devices = None;
1768
1769 #[cfg(guest_arch = "aarch64")]
1770 let caps = virt::aarch64::Aarch64PartitionCapabilities {};
1771
1772 #[cfg(guest_arch = "x86_64")]
1773 let cpuid = UhPartition::construct_cpuid_results(
1774 cpuid,
1775 &late_params.cpuid,
1776 params.topology,
1777 isolation,
1778 params.hide_isolation,
1779 );
1780
1781 #[cfg(guest_arch = "x86_64")]
1782 let caps = UhPartition::construct_capabilities(
1783 params.topology,
1784 &cpuid,
1785 isolation,
1786 params.hide_isolation,
1787 )
1788 .map_err(Error::Capabilities)?;
1789
1790 if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1791 hcl.register_intercept(
1799 HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1800 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1801 HvInterceptParameters::new_zeroed(),
1802 )
1803 .expect("registering synic intercept cannot fail");
1804 }
1805
1806 #[cfg(guest_arch = "x86_64")]
1807 let cvm_state = if is_hardware_isolated {
1808 Some(Self::construct_cvm_state(
1809 ¶ms,
1810 late_params.cvm_params.unwrap(),
1811 &caps,
1812 guest_vsm_available,
1813 )?)
1814 } else {
1815 None
1816 };
1817 #[cfg(guest_arch = "aarch64")]
1818 let cvm_state = None;
1819
1820 let backing_shared = BackingShared::new(
1821 isolation,
1822 ¶ms,
1823 BackingSharedParams {
1824 cvm_state,
1825 #[cfg(guest_arch = "x86_64")]
1826 cpuid: &cpuid,
1827 hcl: &hcl,
1828 guest_vsm_available,
1829 },
1830 )?;
1831
1832 let enter_modes = EnterModes::default();
1833
1834 let partition = Arc::new(UhPartitionInner {
1835 hcl,
1836 vps,
1837 irq_routes: Default::default(),
1838 caps,
1839 enter_modes: Mutex::new(enter_modes),
1840 enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1841 gm: late_params.gm,
1842 vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1843 vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1844 #[cfg(guest_arch = "x86_64")]
1845 cpuid,
1846 crash_notification_send: late_params.crash_notification_send,
1847 monitor_page: MonitorPage::new(),
1848 allocated_monitor_page: Mutex::new(None),
1849 software_devices,
1850 lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1851 vmtime: late_params.vmtime.clone(),
1852 isolation,
1853 no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1854 use_mmio_hypercalls: params.use_mmio_hypercalls,
1855 backing_shared,
1856 #[cfg(guest_arch = "x86_64")]
1857 device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1858 intercept_debug_exceptions: params.intercept_debug_exceptions,
1859 vmbus_relay: late_params.vmbus_relay,
1860 });
1861
1862 if cfg!(guest_arch = "x86_64") {
1863 partition.manage_io_port_intercept_region(0, !0, true);
1865 }
1866
1867 let vps = params
1868 .topology
1869 .vps_arch()
1870 .map(|vp_info| UhProcessorBox {
1871 partition: partition.clone(),
1872 vp_info,
1873 })
1874 .collect();
1875
1876 Ok((
1877 UhPartition {
1878 inner: partition.clone(),
1879 interrupt_targets: VtlArray::from_fn(|vtl| {
1880 Arc::new(UhInterruptTarget {
1881 partition: partition.clone(),
1882 vtl: vtl.try_into().unwrap(),
1883 })
1884 }),
1885 },
1886 vps,
1887 ))
1888 }
1889}
1890
1891impl UhPartition {
1892 pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
1894 let id = if let Some(hv) = self.inner.hv() {
1898 hv.guest_os_id(Vtl::Vtl0)
1899 } else {
1900 self.inner
1902 .hcl
1903 .get_guest_os_id(Vtl::Vtl0)
1904 .map_err(Error::Hcl)?
1905 };
1906 Ok(id)
1907 }
1908
1909 pub fn register_host_io_port_fast_path(
1914 &self,
1915 range: RangeInclusive<u16>,
1916 ) -> HostIoPortFastPathHandle {
1917 assert!(!self.inner.isolation.is_hardware_isolated());
1921
1922 self.inner
1923 .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1924 HostIoPortFastPathHandle {
1925 inner: Arc::downgrade(&self.inner),
1926 begin: *range.start(),
1927 end: *range.end(),
1928 }
1929 }
1930
1931 pub fn assert_debug_interrupt(&self, _vtl: u8) {
1933 #[cfg(guest_arch = "x86_64")]
1934 const LINT_INDEX_1: u8 = 1;
1935 #[cfg(guest_arch = "x86_64")]
1936 match self.inner.isolation {
1937 IsolationType::Snp => {
1938 tracing::error!(?_vtl, "Debug interrupts cannot be injected into SNP VMs",);
1939 }
1940 _ => {
1941 let bsp_index = VpIndex::new(0);
1942 self.pulse_lint(bsp_index, Vtl::try_from(_vtl).unwrap(), LINT_INDEX_1)
1943 }
1944 }
1945 }
1946
1947 pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
1949 self.inner.hcl.set_pm_timer_assist(port)
1950 }
1951
1952 fn register_cvm_dma_overlay_page(
1954 &self,
1955 vtl: GuestVtl,
1956 gpn: u64,
1957 new_perms: HvMapGpaFlags,
1958 ) -> anyhow::Result<()> {
1959 match &self.inner.backing_shared {
1961 #[cfg(guest_arch = "x86_64")]
1962 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1963 .cvm
1964 .isolated_memory_protector
1965 .register_overlay_page(
1966 vtl,
1967 gpn,
1968 GpnSource::Dma,
1970 HvMapGpaFlags::new(),
1971 Some(new_perms),
1972 &mut SnpBacked::tlb_flush_lock_access(
1973 None,
1974 self.inner.as_ref(),
1975 snp_backed_shared,
1976 ),
1977 )
1978 .map_err(|e| anyhow::anyhow!(e)),
1979 #[cfg(guest_arch = "x86_64")]
1980 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
1981 .cvm
1982 .isolated_memory_protector
1983 .register_overlay_page(
1984 vtl,
1985 gpn,
1986 GpnSource::Dma,
1987 HvMapGpaFlags::new(),
1988 Some(new_perms),
1989 &mut TdxBacked::tlb_flush_lock_access(
1990 None,
1991 self.inner.as_ref(),
1992 tdx_backed_shared,
1993 ),
1994 )
1995 .map_err(|e| anyhow::anyhow!(e)),
1996 BackingShared::Hypervisor(_) => {
1997 let _ = (vtl, gpn, new_perms);
1998 unreachable!()
1999 }
2000 }
2001 }
2002
2003 fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2005 match &self.inner.backing_shared {
2007 #[cfg(guest_arch = "x86_64")]
2008 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2009 .cvm
2010 .isolated_memory_protector
2011 .unregister_overlay_page(
2012 vtl,
2013 gpn,
2014 &mut SnpBacked::tlb_flush_lock_access(
2015 None,
2016 self.inner.as_ref(),
2017 snp_backed_shared,
2018 ),
2019 )
2020 .map_err(|e| anyhow::anyhow!(e)),
2021 #[cfg(guest_arch = "x86_64")]
2022 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2023 .cvm
2024 .isolated_memory_protector
2025 .unregister_overlay_page(
2026 vtl,
2027 gpn,
2028 &mut TdxBacked::tlb_flush_lock_access(
2029 None,
2030 self.inner.as_ref(),
2031 tdx_backed_shared,
2032 ),
2033 )
2034 .map_err(|e| anyhow::anyhow!(e)),
2035 BackingShared::Hypervisor(_) => {
2036 let _ = (vtl, gpn);
2037 unreachable!()
2038 }
2039 }
2040 }
2041}
2042
2043impl UhProtoPartition<'_> {
2044 fn check_guest_vsm_support(hcl: &Hcl) -> Result<bool, Error> {
2047 #[cfg(guest_arch = "x86_64")]
2048 let privs = {
2049 let result = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
2050 let num = result.eax as u64 | ((result.ebx as u64) << 32);
2051 hvdef::HvPartitionPrivilege::from(num)
2052 };
2053
2054 #[cfg(guest_arch = "aarch64")]
2055 let privs = hcl.get_privileges_and_features_info().map_err(Error::Hcl)?;
2056
2057 if !privs.access_vsm() {
2058 return Ok(false);
2059 }
2060 let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
2061 Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2062 }
2063
2064 #[cfg(guest_arch = "x86_64")]
2065 fn construct_cvm_state(
2067 params: &UhPartitionNewParams<'_>,
2068 late_params: CvmLateParams,
2069 caps: &PartitionCapabilities,
2070 guest_vsm_available: bool,
2071 ) -> Result<UhCvmPartitionState, Error> {
2072 use vmcore::reference_time::ReferenceTimeSource;
2073
2074 let vp_count = params.topology.vp_count() as usize;
2075 let vps = (0..vp_count)
2076 .map(|vp_index| UhCvmVpInner {
2077 tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2078 vtl1_enable_called: Mutex::new(false),
2079 started: AtomicBool::new(vp_index == 0),
2080 hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2081 })
2082 .collect();
2083 let tlb_locked_vps =
2084 VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2085
2086 let lapic = VtlArray::from_fn(|_| {
2087 LocalApicSet::builder()
2088 .x2apic_capable(caps.x2apic)
2089 .hyperv_enlightenments(true)
2090 .build()
2091 });
2092
2093 let tsc_frequency = get_tsc_frequency(params.isolation)?;
2094 let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2095
2096 let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2103 max_vp_count: params.topology.vp_count(),
2104 vendor: caps.vendor,
2105 tsc_frequency,
2106 ref_time,
2107 is_ref_time_backed_by_tsc: true,
2108 });
2109
2110 Ok(UhCvmPartitionState {
2111 vps_per_socket: params.topology.reserved_vps_per_socket(),
2112 tlb_locked_vps,
2113 vps,
2114 shared_memory: late_params.shared_gm,
2115 isolated_memory_protector: late_params.isolated_memory_protector,
2116 lapic,
2117 hv,
2118 guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2119 shared_dma_client: late_params.shared_dma_client,
2120 private_dma_client: late_params.private_dma_client,
2121 hide_isolation: params.hide_isolation,
2122 })
2123 }
2124}
2125
2126impl UhPartition {
2127 #[cfg(guest_arch = "x86_64")]
2128 fn construct_cpuid_results(
2130 cpuid: virt::CpuidLeafSet,
2131 initial_cpuid: &[CpuidLeaf],
2132 topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2133 isolation: IsolationType,
2134 hide_isolation: bool,
2135 ) -> virt::CpuidLeafSet {
2136 let mut cpuid = cpuid.into_leaves();
2137 if isolation.is_hardware_isolated() {
2138 let x2apic = match topology.apic_mode() {
2140 vm_topology::processor::x86::ApicMode::XApic => false,
2141 vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2142 vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2143 };
2144 let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2145 let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2146 cpuid.push(
2147 CpuidLeaf::new(
2148 x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2149 [0, 0, ecx.into(), 0],
2150 )
2151 .masked([0, 0, ecx_mask.into(), 0]),
2152 );
2153
2154 let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2158
2159 hv1_emulator::cpuid::process_hv_cpuid_leaves(
2161 &mut cpuid,
2162 hide_isolation,
2163 [
2164 hv_version.eax,
2165 hv_version.ebx,
2166 hv_version.ecx,
2167 hv_version.edx,
2168 ],
2169 );
2170 }
2171 cpuid.extend(initial_cpuid);
2172 virt::CpuidLeafSet::new(cpuid)
2173 }
2174
2175 #[cfg(guest_arch = "x86_64")]
2176 fn construct_capabilities(
2178 topology: &ProcessorTopology,
2179 cpuid: &virt::CpuidLeafSet,
2180 isolation: IsolationType,
2181 hide_isolation: bool,
2182 ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2183 let mut native_cpuid_fn;
2184 let mut cvm_cpuid_fn;
2185
2186 let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2189 cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2191 &mut cvm_cpuid_fn
2192 } else {
2193 native_cpuid_fn = |leaf, sub_leaf| {
2195 let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2196 cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2197 };
2198 &mut native_cpuid_fn
2199 };
2200
2201 let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2203 match isolation {
2204 IsolationType::Tdx => {
2205 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2206 caps.nxe_forced_on = true;
2208 }
2209 IsolationType::Snp => {
2210 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2211 }
2212 _ => {
2213 assert!(caps.vtom.is_none());
2214 }
2215 }
2216
2217 Ok(caps)
2218 }
2219}
2220
2221#[cfg(guest_arch = "x86_64")]
2222fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2224 let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2227 let hv_frequency = msr
2228 .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2229 .map_err(Error::ReadTscFrequency)?;
2230
2231 let hw_info = match isolation {
2234 IsolationType::Tdx => {
2235 let max_function =
2237 safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2238 .eax;
2239
2240 if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2241 return Err(Error::BadCpuidTsc);
2242 }
2243 let result = safe_intrinsics::cpuid(
2244 x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2245 0,
2246 );
2247 let ratio_denom = result.eax;
2248 let ratio_num = result.ebx;
2249 let clock = result.ecx;
2250 if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2251 return Err(Error::BadCpuidTsc);
2252 }
2253 let allowed_error = 12_500_000;
2256 Some((
2257 clock as u64 * ratio_num as u64 / ratio_denom as u64,
2258 allowed_error,
2259 ))
2260 }
2261 IsolationType::Snp => {
2262 None
2264 }
2265 IsolationType::Vbs | IsolationType::None => None,
2266 };
2267
2268 if let Some((hw_frequency, allowed_error)) = hw_info {
2269 let delta = hw_frequency.abs_diff(hv_frequency);
2272 if delta > allowed_error {
2273 return Err(Error::TscFrequencyMismatch {
2274 hv: hv_frequency,
2275 hw: hw_frequency,
2276 allowed_error,
2277 });
2278 }
2279 }
2280
2281 Ok(hv_frequency)
2282}
2283
2284impl UhPartitionInner {
2285 fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2286 if self.isolation.is_hardware_isolated() {
2287 return;
2288 }
2289
2290 static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2291
2292 let access_type_mask = if active {
2293 HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2294 } else {
2295 HV_INTERCEPT_ACCESS_MASK_NONE
2296 };
2297
2298 if !SKIP_RANGE.load(Ordering::Relaxed) {
2300 match self.hcl.register_intercept(
2301 HvInterceptType::HvInterceptTypeX64IoPortRange,
2302 access_type_mask,
2303 HvInterceptParameters::new_io_port_range(begin..=end),
2304 ) {
2305 Ok(()) => return,
2306 Err(HvError::InvalidParameter) => {
2307 SKIP_RANGE.store(true, Ordering::Relaxed);
2310 tracing::warn!(
2311 CVM_ALLOWED,
2312 "old hypervisor build; using slow path for intercept ranges"
2313 );
2314 }
2315 Err(err) => {
2316 panic!("io port range registration failure: {err:?}");
2317 }
2318 }
2319 }
2320
2321 for port in begin..=end {
2323 self.hcl
2324 .register_intercept(
2325 HvInterceptType::HvInterceptTypeX64IoPort,
2326 access_type_mask,
2327 HvInterceptParameters::new_io_port(port),
2328 )
2329 .expect("registering io intercept cannot fail");
2330 }
2331 }
2332
2333 fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2334 self.lower_vtl_memory_layout
2338 .ram()
2339 .iter()
2340 .any(|m| m.range.contains_addr(gpa))
2341 }
2342
2343 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2344 if self.is_gpa_lower_vtl_ram(gpa) {
2348 !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2350 } else {
2351 false
2352 }
2353 }
2354}
2355
2356#[must_use]
2361pub struct HostIoPortFastPathHandle {
2362 inner: Weak<UhPartitionInner>,
2363 begin: u16,
2364 end: u16,
2365}
2366
2367impl Drop for HostIoPortFastPathHandle {
2368 fn drop(&mut self) {
2369 if let Some(inner) = self.inner.upgrade() {
2370 inner.manage_io_port_intercept_region(self.begin, self.end, true);
2371 }
2372 }
2373}
2374
2375#[derive(Copy, Clone, Debug)]
2380pub struct VtlCrash {
2381 pub vp_index: VpIndex,
2383 pub last_vtl: GuestVtl,
2385 pub control: GuestCrashCtl,
2387 pub parameters: [u64; 5],
2389}
2390
2391#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2394fn validate_vtl_gpa_flags(
2395 flags: HvMapGpaFlags,
2396 mbec_enabled: bool,
2397 shadow_supervisor_stack_enabled: bool,
2398) -> bool {
2399 if flags.adjustable() {
2401 return false;
2402 }
2403
2404 if flags.kernel_executable() != flags.user_executable() {
2406 if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2407 return false;
2408 }
2409 }
2410
2411 if flags.writable()
2413 || flags.kernel_executable()
2414 || flags.user_executable()
2415 || flags.supervisor_shadow_stack()
2416 || flags.paging_writability()
2417 || flags.verify_paging_writability()
2418 {
2419 if !flags.readable() {
2420 return false;
2421 }
2422 }
2423
2424 if flags.supervisor_shadow_stack()
2427 && ((!flags.kernel_executable() && !flags.user_executable())
2428 || shadow_supervisor_stack_enabled)
2429 {
2430 return false;
2431 }
2432
2433 true
2434}