1#![cfg(target_os = "linux")]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13 if #[cfg(target_arch = "x86_64")] { mod cvm_cpuid;
15 pub use processor::snp::SnpBacked;
16 pub use processor::tdx::TdxBacked;
17 use crate::processor::HardwareIsolatedBacking;
18 pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19 use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20 use bitvec::prelude::BitArray;
21 use bitvec::prelude::Lsb0;
22 use devmsr::MsrDevice;
23 use hv1_emulator::hv::ProcessorVtlHv;
24 use processor::LapicState;
25 use processor::snp::SnpBackedShared;
26 use processor::tdx::TdxBackedShared;
27 use std::arch::x86_64::CpuidResult;
28 use virt::CpuidLeaf;
29 use virt::state::StateElement;
30 use virt::vp::MpState;
31 type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34 } else if #[cfg(target_arch = "aarch64")] { pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36 use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37 }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvRegisterName;
66use hvdef::HvRegisterVsmPartitionConfig;
67use hvdef::HvRegisterVsmPartitionStatus;
68use hvdef::Vtl;
69use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
73use hvdef::hypercall::HostVisibilityType;
74use hvdef::hypercall::HvGuestOsId;
75use hvdef::hypercall::HvInputVtl;
76use hvdef::hypercall::HvInterceptParameters;
77use hvdef::hypercall::HvInterceptType;
78use inspect::Inspect;
79use inspect::InspectMut;
80use memory_range::MemoryRange;
81use pal::unix::affinity;
82use pal::unix::affinity::CpuSet;
83use pal_async::driver::Driver;
84use pal_async::driver::SpawnDriver;
85use pal_uring::IdleControl;
86use parking_lot::Mutex;
87use parking_lot::RwLock;
88use processor::BackingSharedParams;
89use processor::SidecarExitReason;
90use sidecar_client::NewSidecarClientError;
91use std::ops::RangeInclusive;
92use std::os::fd::AsRawFd;
93use std::sync::Arc;
94use std::sync::Weak;
95use std::sync::atomic::AtomicBool;
96use std::sync::atomic::AtomicU8;
97use std::sync::atomic::AtomicU32;
98use std::sync::atomic::AtomicU64;
99use std::sync::atomic::Ordering;
100use std::task::Waker;
101use thiserror::Error;
102use user_driver::DmaClient;
103use virt::IsolationType;
104use virt::PartitionCapabilities;
105use virt::VpIndex;
106use virt::irqcon::IoApicRouting;
107use virt::irqcon::MsiRequest;
108use virt::x86::apic_software_device::ApicSoftwareDevices;
109use virt_support_apic::LocalApicSet;
110use vm_topology::memory::MemoryLayout;
111use vm_topology::processor::ProcessorTopology;
112use vm_topology::processor::TargetVpInfo;
113use vmcore::monitor::MonitorPage;
114use vmcore::reference_time::GetReferenceTime;
115use vmcore::reference_time::ReferenceTimeResult;
116use vmcore::reference_time::ReferenceTimeSource;
117use vmcore::vmtime::VmTimeSource;
118use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
119use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
120use x86defs::tdx::TdCallResult;
121use zerocopy::FromBytes;
122use zerocopy::FromZeros;
123use zerocopy::Immutable;
124use zerocopy::IntoBytes;
125use zerocopy::KnownLayout;
126
127#[derive(Error, Debug)]
129#[expect(missing_docs)]
130pub enum Error {
131 #[error("hcl error")]
132 Hcl(#[source] hcl::ioctl::Error),
133 #[error("failed to open sidecar client")]
134 Sidecar(#[source] NewSidecarClientError),
135 #[error("failed to install {0:?} intercept: {1:?}")]
136 InstallIntercept(HvInterceptType, HvError),
137 #[error("failed to query hypervisor register {0:#x?}")]
138 Register(HvRegisterName, #[source] HvError),
139 #[error("failed to set vsm partition config register")]
140 VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
141 #[error("failed to create virtual device")]
142 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
143 #[error("failed to create cpuid tables for cvm")]
144 #[cfg(guest_arch = "x86_64")]
145 CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
146 #[error("failed to update hypercall msr")]
147 UpdateHypercallMsr,
148 #[error("failed to update reference tsc msr")]
149 UpdateReferenceTsc,
150 #[error("failed to map overlay page")]
151 MapOverlay(#[source] std::io::Error),
152 #[error("failed to allocate shared visibility pages for overlay")]
153 AllocateSharedVisOverlay(#[source] anyhow::Error),
154 #[error("failed to open msr device")]
155 OpenMsr(#[source] std::io::Error),
156 #[error("cpuid did not contain valid TSC frequency information")]
157 BadCpuidTsc,
158 #[error("failed to read tsc frequency")]
159 ReadTscFrequency(#[source] std::io::Error),
160 #[error(
161 "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
162 )]
163 TscFrequencyMismatch {
164 hv: u64,
165 hw: u64,
166 allowed_error: u64,
167 },
168 #[error("failed to set vsm partition config: {0:?}")]
169 FailedToSetL2Ctls(TdCallResult),
170 #[error("debugging is configured but the binary does not have the gdb feature")]
171 InvalidDebugConfiguration,
172 #[error("failed to allocate TLB flush page")]
173 AllocateTlbFlushPage(#[source] anyhow::Error),
174}
175
176#[derive(Error, Debug)]
178#[expect(missing_docs)]
179pub enum RevokeGuestVsmError {
180 #[error("failed to set vsm config")]
181 SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
182 #[error("VTL 1 is already enabled")]
183 Vtl1AlreadyEnabled,
184}
185
186#[derive(Inspect)]
188pub struct UhPartition {
189 #[inspect(flatten)]
190 inner: Arc<UhPartitionInner>,
191 #[inspect(skip)]
193 interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
194}
195
196#[derive(Inspect)]
198#[inspect(extra = "UhPartitionInner::inspect_extra")]
199struct UhPartitionInner {
200 #[inspect(skip)]
201 hcl: Hcl,
202 #[inspect(skip)] vps: Vec<UhVpInner>,
204 irq_routes: virt::irqcon::IrqRoutes,
205 caps: PartitionCapabilities,
206 #[inspect(skip)] enter_modes: Mutex<EnterModes>,
208 #[inspect(skip)]
209 enter_modes_atomic: AtomicU8,
210 #[cfg(guest_arch = "x86_64")]
211 cpuid: virt::CpuidLeafSet,
212 lower_vtl_memory_layout: MemoryLayout,
213 gm: VtlArray<GuestMemory, 2>,
214 vtl0_kernel_exec_gm: GuestMemory,
215 vtl0_user_exec_gm: GuestMemory,
216 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
217 #[inspect(skip)]
218 crash_notification_send: mesh::Sender<VtlCrash>,
219 monitor_page: MonitorPage,
220 #[inspect(skip)]
221 allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
222 software_devices: Option<ApicSoftwareDevices>,
223 #[inspect(skip)]
224 vmtime: VmTimeSource,
225 isolation: IsolationType,
226 #[inspect(with = "inspect::AtomicMut")]
227 no_sidecar_hotplug: AtomicBool,
228 use_mmio_hypercalls: bool,
229 backing_shared: BackingShared,
230 intercept_debug_exceptions: bool,
231 #[cfg(guest_arch = "x86_64")]
232 #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
234 device_vector_table: RwLock<IrrBitmap>,
235 vmbus_relay: bool,
236}
237
238#[derive(Inspect)]
239#[inspect(untagged)]
240enum BackingShared {
241 Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
242 #[cfg(guest_arch = "x86_64")]
243 Snp(#[inspect(flatten)] SnpBackedShared),
244 #[cfg(guest_arch = "x86_64")]
245 Tdx(#[inspect(flatten)] TdxBackedShared),
246}
247
248impl BackingShared {
249 fn new(
250 isolation: IsolationType,
251 partition_params: &UhPartitionNewParams<'_>,
252 backing_shared_params: BackingSharedParams<'_>,
253 ) -> Result<BackingShared, Error> {
254 Ok(match isolation {
255 IsolationType::None | IsolationType::Vbs => {
256 assert!(backing_shared_params.cvm_state.is_none());
257 BackingShared::Hypervisor(HypervisorBackedShared::new(
258 partition_params,
259 backing_shared_params,
260 )?)
261 }
262 #[cfg(guest_arch = "x86_64")]
263 IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
264 partition_params,
265 backing_shared_params,
266 )?),
267 #[cfg(guest_arch = "x86_64")]
268 IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
269 partition_params,
270 backing_shared_params,
271 )?),
272 #[cfg(not(guest_arch = "x86_64"))]
273 _ => unreachable!(),
274 })
275 }
276
277 fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
278 match self {
279 BackingShared::Hypervisor(_) => None,
280 #[cfg(guest_arch = "x86_64")]
281 BackingShared::Snp(SnpBackedShared { cvm, .. })
282 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
283 }
284 }
285
286 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
287 fn guest_vsm_disabled(&self) -> bool {
288 match self {
289 BackingShared::Hypervisor(h) => {
290 matches!(*h.guest_vsm.read(), GuestVsmState::NotPlatformSupported)
291 }
292 #[cfg(guest_arch = "x86_64")]
293 BackingShared::Snp(SnpBackedShared { cvm, .. })
294 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
295 matches!(*cvm.guest_vsm.read(), GuestVsmState::NotPlatformSupported)
296 }
297 }
298 }
299
300 fn untrusted_synic(&self) -> Option<&GlobalSynic> {
301 match self {
302 BackingShared::Hypervisor(_) => None,
303 #[cfg(guest_arch = "x86_64")]
304 BackingShared::Snp(_) => None,
305 #[cfg(guest_arch = "x86_64")]
306 BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
307 }
308 }
309}
310
311#[derive(InspectMut, Copy, Clone)]
312struct EnterModes {
313 #[inspect(mut)]
314 first: EnterMode,
315 #[inspect(mut)]
316 second: EnterMode,
317}
318
319impl Default for EnterModes {
320 fn default() -> Self {
321 Self {
322 first: EnterMode::Fast,
323 second: EnterMode::IdleToVtl0,
324 }
325 }
326}
327
328impl From<EnterModes> for hcl::protocol::EnterModes {
329 fn from(value: EnterModes) -> Self {
330 Self::new()
331 .with_first(value.first.into())
332 .with_second(value.second.into())
333 }
334}
335
336#[derive(InspectMut, Copy, Clone)]
337enum EnterMode {
338 Fast,
339 PlayIdle,
340 IdleToVtl0,
341}
342
343impl From<EnterMode> for hcl::protocol::EnterMode {
344 fn from(value: EnterMode) -> Self {
345 match value {
346 EnterMode::Fast => Self::FAST,
347 EnterMode::PlayIdle => Self::PLAY_IDLE,
348 EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
349 }
350 }
351}
352
353#[cfg(guest_arch = "x86_64")]
354#[derive(Inspect)]
355struct GuestVsmVpState {
356 #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
359 vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
360 reg_intercept: SecureRegisterInterceptState,
361}
362
363#[cfg(guest_arch = "x86_64")]
364impl GuestVsmVpState {
365 fn new() -> Self {
366 GuestVsmVpState {
367 vtl0_exit_pending_event: None,
368 reg_intercept: Default::default(),
369 }
370 }
371}
372
373#[cfg(guest_arch = "x86_64")]
374#[derive(Inspect)]
375struct UhCvmVpState {
377 #[inspect(debug)]
379 direct_overlay_handle: user_driver::memory::MemoryBlock,
380 exit_vtl: GuestVtl,
382 hv: VtlArray<ProcessorVtlHv, 2>,
384 lapics: VtlArray<LapicState, 2>,
386 vtl1: Option<GuestVsmVpState>,
388}
389
390#[cfg(guest_arch = "x86_64")]
391impl UhCvmVpState {
392 pub(crate) fn new(
394 cvm_partition: &UhCvmPartitionState,
395 inner: &UhPartitionInner,
396 vp_info: &TargetVpInfo,
397 overlay_pages_required: usize,
398 ) -> Result<Self, Error> {
399 let direct_overlay_handle = cvm_partition
400 .shared_dma_client
401 .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
402 .map_err(Error::AllocateSharedVisOverlay)?;
403
404 let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
405 let lapics = VtlArray::from_fn(|vtl| {
406 let apic_set = &cvm_partition.lapic[vtl];
407
408 let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
411 lapic.set_apic_base(apic_base).unwrap();
413 let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
415 MpState::WaitForSipi
416 } else {
417 MpState::Running
418 };
419 LapicState::new(lapic, activity)
420 });
421
422 let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
423
424 Ok(Self {
425 direct_overlay_handle,
426 exit_vtl: GuestVtl::Vtl0,
427 hv,
428 lapics,
429 vtl1: None,
430 })
431 }
432}
433
434#[cfg(guest_arch = "x86_64")]
435#[derive(Inspect, Default)]
436#[inspect(hex)]
437pub struct SecureRegisterInterceptState {
439 #[inspect(with = "|&x| u64::from(x)")]
440 intercept_control: hvdef::HvRegisterCrInterceptControl,
441 cr0_mask: u64,
442 cr4_mask: u64,
443 ia32_misc_enable_mask: u64,
446}
447
448#[derive(Inspect)]
449struct UhCvmPartitionState {
451 #[cfg(guest_arch = "x86_64")]
452 vps_per_socket: u32,
453 #[inspect(
455 with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
456 )]
457 tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
458 #[inspect(with = "inspect::iter_by_index")]
459 vps: Vec<UhCvmVpInner>,
460 shared_memory: GuestMemory,
461 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
462 #[inspect(skip)]
463 isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
464 lapic: VtlArray<LocalApicSet, 2>,
466 hv: GlobalHv<2>,
468 guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
470 shared_dma_client: Arc<dyn DmaClient>,
472 private_dma_client: Arc<dyn DmaClient>,
474 hide_isolation: bool,
475}
476
477#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
478impl UhCvmPartitionState {
479 fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
480 &self.vps[vp_index as usize]
481 }
482
483 fn is_lower_vtl_startup_denied(&self) -> bool {
484 matches!(
485 *self.guest_vsm.read(),
486 GuestVsmState::Enabled {
487 vtl1: CvmVtl1State {
488 deny_lower_vtl_startup: true,
489 ..
490 }
491 }
492 )
493 }
494}
495
496#[derive(Inspect)]
497struct UhCvmVpInner {
499 tlb_lock_info: VtlArray<TlbLockInfo, 2>,
501 vtl1_enable_called: Mutex<bool>,
503 started: AtomicBool,
505 #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
507 hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
508}
509
510#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
511#[derive(Inspect)]
512#[inspect(tag = "guest_vsm_state")]
513enum GuestVsmState<T: Inspect> {
515 NotPlatformSupported,
516 NotGuestEnabled,
517 Enabled {
518 #[inspect(flatten)]
519 vtl1: T,
520 },
521}
522
523impl<T: Inspect> GuestVsmState<T> {
524 pub fn from_availability(guest_vsm_available: bool) -> Self {
525 if guest_vsm_available {
526 GuestVsmState::NotGuestEnabled
527 } else {
528 GuestVsmState::NotPlatformSupported
529 }
530 }
531}
532
533#[derive(Inspect)]
534struct CvmVtl1State {
535 enabled_on_any_vp: bool,
537 zero_memory_on_reset: bool,
539 deny_lower_vtl_startup: bool,
541 pub mbec_enabled: bool,
543 pub shadow_supervisor_stack_enabled: bool,
545 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
546 io_read_intercepts: BitBox<u64>,
547 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
548 io_write_intercepts: BitBox<u64>,
549}
550
551#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
552impl CvmVtl1State {
553 fn new(mbec_enabled: bool) -> Self {
554 Self {
555 enabled_on_any_vp: false,
556 zero_memory_on_reset: false,
557 deny_lower_vtl_startup: false,
558 mbec_enabled,
559 shadow_supervisor_stack_enabled: false,
560 io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
561 io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
562 }
563 }
564}
565
566#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
567struct TscReferenceTimeSource {
568 tsc_scale: u64,
569}
570
571#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
572impl TscReferenceTimeSource {
573 fn new(tsc_frequency: u64) -> Self {
574 TscReferenceTimeSource {
575 tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
576 }
577 }
578}
579
580impl GetReferenceTime for TscReferenceTimeSource {
582 fn now(&self) -> ReferenceTimeResult {
583 #[cfg(guest_arch = "x86_64")]
584 {
585 let tsc = safe_intrinsics::rdtsc();
586 let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
587 ReferenceTimeResult {
588 ref_time,
589 system_time: None,
590 }
591 }
592
593 #[cfg(guest_arch = "aarch64")]
594 {
595 todo!("AARCH64_TODO");
596 }
597 }
598}
599
600#[cfg(guest_arch = "aarch64")]
601impl virt::irqcon::ControlGic for UhPartitionInner {
602 fn set_spi_irq(&self, irq_id: u32, high: bool) {
603 if let Err(err) = self.hcl.request_interrupt(
604 hvdef::HvInterruptControl::new()
605 .with_arm64_asserted(high)
606 .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
607 0,
608 irq_id,
609 GuestVtl::Vtl0,
610 ) {
611 tracelimit::warn_ratelimited!(
612 error = &err as &dyn std::error::Error,
613 irq = irq_id,
614 asserted = high,
615 "failed to request spi"
616 );
617 }
618 }
619}
620
621#[cfg(guest_arch = "aarch64")]
622impl virt::Aarch64Partition for UhPartition {
623 fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
624 debug_assert!(vtl == Vtl::Vtl0);
625 self.inner.clone()
626 }
627}
628
629pub struct UhProcessorBox {
635 partition: Arc<UhPartitionInner>,
636 vp_info: TargetVpInfo,
637}
638
639impl UhProcessorBox {
640 pub fn vp_index(&self) -> VpIndex {
642 self.vp_info.base.vp_index
643 }
644
645 pub fn sidecar_base_cpu(&self) -> Option<u32> {
648 self.partition
649 .hcl
650 .sidecar_base_cpu(self.vp_info.base.vp_index.index())
651 }
652
653 pub fn bind_processor<'a, T: Backing>(
660 &'a mut self,
661 driver: &impl Driver,
662 control: Option<&'a mut IdleControl>,
663 ) -> Result<UhProcessor<'a, T>, Error> {
664 if let Some(control) = &control {
665 let vp_index = self.vp_info.base.vp_index;
666
667 let mut current = Default::default();
668 affinity::get_current_thread_affinity(&mut current).unwrap();
669 assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
670
671 self.partition
672 .hcl
673 .set_poll_file(
674 self.partition.vp(vp_index).unwrap().cpu_index,
675 control.ring_fd().as_raw_fd(),
676 )
677 .map_err(Error::Hcl)?;
678 }
679
680 UhProcessor::new(driver, &self.partition, self.vp_info, control)
681 }
682
683 pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
688 self.partition
689 .vp(self.vp_info.base.vp_index)
690 .unwrap()
691 .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
692 }
693}
694
695#[derive(Debug, Inspect)]
696struct UhVpInner {
697 wake_reasons: AtomicU64,
699 #[inspect(skip)]
700 waker: RwLock<Option<Waker>>,
701 message_queues: VtlArray<MessageQueues, 2>,
702 #[inspect(skip)]
703 vp_info: TargetVpInfo,
704 cpu_index: u32,
707 sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
708}
709
710impl UhVpInner {
711 pub fn vp_index(&self) -> VpIndex {
712 self.vp_info.base.vp_index
713 }
714}
715
716#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
717#[derive(Debug, Inspect)]
718enum InitialVpContextOperation {
720 StartVp,
722 EnableVpVtl,
724}
725
726#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
727#[derive(Debug, Inspect)]
728struct VpStartEnableVtl {
730 operation: InitialVpContextOperation,
733 #[inspect(skip)]
734 context: hvdef::hypercall::InitialVpContextX64,
735}
736
737#[derive(Debug, Inspect)]
738struct TlbLockInfo {
739 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
741 blocked_vps: BitBox<AtomicU64>,
742 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
745 blocking_vps: BitBox<AtomicU64>,
746 blocking_vp_count: AtomicU32,
750 sleeping: AtomicBool,
752}
753
754#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
755impl TlbLockInfo {
756 fn new(vp_count: usize) -> Self {
757 Self {
758 blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
759 blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
760 blocking_vp_count: AtomicU32::new(0),
761 sleeping: false.into(),
762 }
763 }
764}
765
766#[bitfield(u32)]
767#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
768struct WakeReason {
769 extint: bool,
770 message_queues: bool,
771 hv_start_enable_vtl_vp: bool,
772 intcon: bool,
773 update_proxy_irr_filter: bool,
774 #[bits(27)]
775 _reserved: u32,
776}
777
778impl WakeReason {
779 const EXTINT: Self = Self::new().with_extint(true);
781 const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
782 #[cfg(guest_arch = "x86_64")]
783 const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
785 #[cfg(guest_arch = "x86_64")]
786 const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
787}
788
789#[bitfield(u32)]
790#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
791struct ExitActivity {
792 pending_event: bool,
793 #[bits(31)]
794 _reserved: u32,
795}
796
797impl UhPartition {
799 pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
801 fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
802 if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
803 return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
804 }
805 *vsm_state = GuestVsmState::NotPlatformSupported;
806 Ok(())
807 }
808
809 match &self.inner.backing_shared {
810 BackingShared::Hypervisor(s) => {
811 revoke(&mut *s.guest_vsm.write())?;
812 self.inner
813 .hcl
814 .set_guest_vsm_partition_config(false)
815 .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
816 }
817 #[cfg(guest_arch = "x86_64")]
818 BackingShared::Snp(SnpBackedShared { cvm, .. })
819 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
820 revoke(&mut *cvm.guest_vsm.write())?;
821 }
822 };
823
824 Ok(())
825 }
826
827 pub fn reference_time(&self) -> u64 {
829 if let Some(hv) = self.inner.hv() {
830 hv.ref_time_source().now().ref_time
831 } else {
832 self.inner
833 .hcl
834 .reference_time()
835 .expect("should not fail to get the reference time")
836 }
837 }
838}
839
840impl virt::Partition for UhPartition {
841 fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
842 None
843 }
844
845 fn caps(&self) -> &PartitionCapabilities {
846 &self.inner.caps
847 }
848
849 fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
850 self.inner
851 .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
852 }
853
854 fn request_yield(&self, _vp_index: VpIndex) {
855 unimplemented!()
856 }
857}
858
859impl virt::X86Partition for UhPartition {
860 fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
861 self.inner.clone()
862 }
863
864 fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
865 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
866 if let Some(apic) = &self.inner.lapic(vtl) {
867 apic.lint(vp_index, lint.into(), |vp_index| {
868 self.inner
869 .vp(vp_index)
870 .unwrap()
871 .wake(vtl, WakeReason::INTCON);
872 });
873 } else if lint == 0 {
874 self.inner
875 .vp(vp_index)
876 .unwrap()
877 .wake(vtl, WakeReason::EXTINT);
878 } else {
879 unimplemented!()
880 }
881 }
882}
883
884impl UhPartitionInner {
885 fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
886 self.vps.get(index.index() as usize)
887 }
888
889 fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
890 self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
891 }
892
893 fn hv(&self) -> Option<&GlobalHv<2>> {
894 self.backing_shared.cvm_state().map(|x| &x.hv)
895 }
896
897 #[cfg(guest_arch = "x86_64")]
899 fn request_proxy_irr_filter_update(
900 &self,
901 vtl: GuestVtl,
902 device_vector: u8,
903 req_vp_index: VpIndex,
904 ) {
905 tracing::debug!(
906 ?vtl,
907 device_vector,
908 req_vp_index = req_vp_index.index(),
909 "request_proxy_irr_filter_update"
910 );
911
912 {
914 let mut device_vector_table = self.device_vector_table.write();
915 device_vector_table.set(device_vector as usize, true);
916 }
917
918 for vp in self.vps.iter() {
920 if vp.vp_index() != req_vp_index {
921 vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
922 }
923 }
924 }
925
926 #[cfg(guest_arch = "x86_64")]
928 fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
929 let device_vector_table = self.device_vector_table.read();
930 for idx in device_vector_table.iter_ones() {
931 irr_vectors.set(idx, true);
932 }
933 }
934
935 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
936 let mut wake_vps = false;
937 resp.field_mut(
938 "enter_modes",
939 &mut inspect::adhoc_mut(|req| {
940 let update = req.is_update();
941 {
942 let mut modes = self.enter_modes.lock();
943 modes.inspect_mut(req);
944 if update {
945 self.enter_modes_atomic.store(
946 hcl::protocol::EnterModes::from(*modes).into(),
947 Ordering::Relaxed,
948 );
949 wake_vps = true;
950 }
951 }
952 }),
953 );
954
955 if wake_vps {
957 for vp in self.vps.iter() {
958 vp.wake_vtl2();
959 }
960 }
961 }
962
963 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
965 fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
966 self.hcl.get_vsm_partition_status()
968 }
969}
970
971impl virt::Synic for UhPartition {
972 fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
973 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
974 let Some(vp) = self.inner.vp(vp_index) else {
975 tracelimit::warn_ratelimited!(
976 CVM_ALLOWED,
977 vp = vp_index.index(),
978 "invalid vp target for post_message"
979 );
980 return;
981 };
982
983 vp.post_message(
984 vtl,
985 sint,
986 &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
987 );
988 }
989
990 fn new_guest_event_port(
991 &self,
992 vtl: Vtl,
993 vp: u32,
994 sint: u8,
995 flag: u16,
996 ) -> Box<dyn vmcore::synic::GuestEventPort> {
997 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
998 Box::new(UhEventPort {
999 partition: Arc::downgrade(&self.inner),
1000 params: Arc::new(Mutex::new(UhEventPortParams {
1001 vp: VpIndex::new(vp),
1002 sint,
1003 flag,
1004 vtl,
1005 })),
1006 })
1007 }
1008
1009 fn prefer_os_events(&self) -> bool {
1010 false
1011 }
1012
1013 fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1014 Some(self)
1015 }
1016}
1017
1018impl virt::SynicMonitor for UhPartition {
1019 fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1020 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1022 let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1023
1024 let allocated_page = allocated_block.take();
1026 if let Some(old_gpa) = old_gpa {
1027 let allocated_gpa = allocated_page
1028 .as_ref()
1029 .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1030
1031 let result = if allocated_gpa == Some(old_gpa) {
1034 let vtl = GuestVtl::try_from(vtl).unwrap();
1035 self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1036 } else {
1037 self.inner
1038 .hcl
1039 .modify_vtl_protection_mask(
1040 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1041 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1042 HvInputVtl::CURRENT_VTL,
1043 )
1044 .map_err(|err| anyhow::anyhow!(err))
1045 };
1046
1047 result
1048 .context("failed to unregister old monitor page")
1049 .inspect_err(|_| {
1050 self.inner.monitor_page.set_gpa(None);
1052 })?;
1053
1054 tracing::debug!(old_gpa, "unregistered monitor page");
1055 }
1056
1057 if let Some(gpa) = gpa {
1058 self.inner
1061 .hcl
1062 .modify_vtl_protection_mask(
1063 MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1064 HvMapGpaFlags::new().with_readable(true),
1065 HvInputVtl::CURRENT_VTL,
1066 )
1067 .context("failed to register monitor page")
1068 .inspect_err(|_| {
1069 self.inner.monitor_page.set_gpa(None);
1071 })?;
1072
1073 tracing::debug!(gpa, "registered monitor page");
1074 }
1075
1076 Ok(())
1077 }
1078
1079 fn register_monitor(
1080 &self,
1081 monitor_id: vmcore::monitor::MonitorId,
1082 connection_id: u32,
1083 ) -> Box<dyn Sync + Send> {
1084 self.inner
1085 .monitor_page
1086 .register_monitor(monitor_id, connection_id)
1087 }
1088
1089 fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1090 let vtl = GuestVtl::try_from(vtl).unwrap();
1091
1092 let Some(state) = self.inner.backing_shared.cvm_state() else {
1094 return Ok(None);
1095 };
1096
1097 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1098 if let Some(block) = allocated_block.as_ref() {
1099 let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1101 assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1102 return Ok(Some(gpa));
1103 }
1104
1105 let block = state
1106 .private_dma_client
1107 .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1108 .context("failed to allocate monitor page")?;
1109
1110 let gpn = block.pfns()[0];
1111 *allocated_block = Some(block);
1112 let gpa = gpn << HV_PAGE_SHIFT;
1113 let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1114 if let Some(old_gpa) = old_gpa {
1115 self.inner
1118 .hcl
1119 .modify_vtl_protection_mask(
1120 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1121 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1122 HvInputVtl::CURRENT_VTL,
1123 )
1124 .context("failed to unregister old monitor page")
1125 .inspect_err(|_| {
1126 self.inner.monitor_page.set_gpa(None);
1128 })?;
1129
1130 tracing::debug!(old_gpa, "unregistered monitor page");
1131 }
1132
1133 self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1136 .context("failed to unregister monitor page")
1137 .inspect_err(|_| {
1138 self.inner.monitor_page.set_gpa(None);
1140 })?;
1141
1142 tracing::debug!(gpa, "registered allocated monitor page");
1143
1144 Ok(Some(gpa))
1145 }
1146}
1147
1148impl UhPartitionInner {
1149 #[cfg(guest_arch = "x86_64")]
1150 pub(crate) fn synic_interrupt(
1151 &self,
1152 vp_index: VpIndex,
1153 vtl: GuestVtl,
1154 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1155 move |vector, auto_eoi| {
1158 self.lapic(vtl).unwrap().synic_interrupt(
1159 vp_index,
1160 vector as u8,
1161 auto_eoi,
1162 |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1163 );
1164 }
1165 }
1166
1167 #[cfg(guest_arch = "aarch64")]
1168 fn synic_interrupt(
1169 &self,
1170 _vp_index: VpIndex,
1171 _vtl: GuestVtl,
1172 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1173 move |_, _| {}
1174 }
1175}
1176
1177#[derive(Debug)]
1178struct UhEventPort {
1179 partition: Weak<UhPartitionInner>,
1180 params: Arc<Mutex<UhEventPortParams>>,
1181}
1182
1183#[derive(Debug, Copy, Clone)]
1184struct UhEventPortParams {
1185 vp: VpIndex,
1186 sint: u8,
1187 flag: u16,
1188 vtl: GuestVtl,
1189}
1190
1191impl vmcore::synic::GuestEventPort for UhEventPort {
1192 fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1193 let partition = self.partition.clone();
1194 let params = self.params.clone();
1195 vmcore::interrupt::Interrupt::from_fn(move || {
1196 let UhEventPortParams {
1197 vp,
1198 sint,
1199 flag,
1200 vtl,
1201 } = *params.lock();
1202 let Some(partition) = partition.upgrade() else {
1203 return;
1204 };
1205 tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1206 if let Some(hv) = partition.hv() {
1207 match hv.synic[vtl].signal_event(
1208 vp,
1209 sint,
1210 flag,
1211 &mut partition.synic_interrupt(vp, vtl),
1212 ) {
1213 Ok(_) => {}
1214 Err(SintProxied) => {
1215 tracing::trace!(
1216 vp = vp.index(),
1217 sint,
1218 flag,
1219 "forwarding event to untrusted synic"
1220 );
1221 if let Some(synic) = partition.backing_shared.untrusted_synic() {
1222 synic
1223 .signal_event(
1224 vp,
1225 sint,
1226 flag,
1227 &mut partition.synic_interrupt(vp, vtl),
1228 )
1229 .ok();
1230 } else {
1231 partition.hcl.signal_event_direct(vp.index(), sint, flag)
1232 }
1233 }
1234 }
1235 } else {
1236 partition.hcl.signal_event_direct(vp.index(), sint, flag);
1237 }
1238 })
1239 }
1240
1241 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1242 self.params.lock().vp = VpIndex::new(vp);
1243 Ok(())
1244 }
1245}
1246
1247impl virt::Hv1 for UhPartition {
1248 type Error = Error;
1249 type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1250
1251 fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1252 Some(if let Some(hv) = self.inner.hv() {
1253 hv.ref_time_source().clone()
1254 } else {
1255 ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1256 })
1257 }
1258
1259 fn new_virtual_device(
1260 &self,
1261 ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1262 self.inner.software_devices.is_some().then_some(self)
1263 }
1264}
1265
1266impl GetReferenceTime for UhPartitionInner {
1267 fn now(&self) -> ReferenceTimeResult {
1268 ReferenceTimeResult {
1269 ref_time: self.hcl.reference_time().unwrap(),
1270 system_time: None,
1271 }
1272 }
1273}
1274
1275impl virt::DeviceBuilder for UhPartition {
1276 fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1277 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1278 let device = self
1279 .inner
1280 .software_devices
1281 .as_ref()
1282 .expect("checked in new_virtual_device")
1283 .new_device(self.interrupt_targets[vtl].clone(), device_id)
1284 .map_err(Error::NewDevice)?;
1285
1286 Ok(device)
1287 }
1288}
1289
1290struct UhInterruptTarget {
1291 partition: Arc<UhPartitionInner>,
1292 vtl: GuestVtl,
1293}
1294
1295impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
1296 fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
1297 let partition = self.partition.clone();
1298 let vtl = self.vtl;
1299 Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
1300 }
1301}
1302
1303impl UhPartitionInner {
1304 fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1305 if let Some(lapic) = self.lapic(vtl) {
1306 tracing::trace!(?request, "interrupt");
1307 lapic.request_interrupt(request.address, request.data, |vp_index| {
1308 self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1309 });
1310 } else {
1311 let (address, data) = request.as_x86();
1312 if let Err(err) = self.hcl.request_interrupt(
1313 request.hv_x86_interrupt_control(),
1314 address.virt_destination().into(),
1315 data.vector().into(),
1316 vtl,
1317 ) {
1318 tracelimit::warn_ratelimited!(
1319 CVM_ALLOWED,
1320 error = &err as &dyn std::error::Error,
1321 address = request.address,
1322 data = request.data,
1323 "failed to request msi"
1324 );
1325 }
1326 }
1327 }
1328}
1329
1330impl IoApicRouting for UhPartitionInner {
1331 fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1332 self.irq_routes.set_irq_route(irq, request)
1333 }
1334
1335 fn assert_irq(&self, irq: u8) {
1337 self.irq_routes
1338 .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1339 }
1340}
1341
1342fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1345 let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1347 let hardware_isolated = hcl.isolation().is_hardware_isolated();
1348 let isolated = hcl.isolation().is_isolated();
1349
1350 let config = HvRegisterVsmPartitionConfig::new()
1351 .with_default_vtl_protection_mask(0xF)
1352 .with_enable_vtl_protection(!hardware_isolated)
1353 .with_zero_memory_on_reset(!hardware_isolated)
1354 .with_intercept_cpuid_unimplemented(!hardware_isolated)
1355 .with_intercept_page(caps.intercept_page_available())
1356 .with_intercept_unrecoverable_exception(true)
1357 .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1358 .with_intercept_acceptance(isolated)
1359 .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1360 .with_intercept_system_reset(caps.intercept_system_reset_available());
1361
1362 hcl.set_vtl2_vsm_partition_config(config)
1363 .map_err(Error::VsmPartitionConfig)
1364}
1365
1366pub struct UhPartitionNewParams<'a> {
1370 pub isolation: IsolationType,
1372 pub hide_isolation: bool,
1375 pub lower_vtl_memory_layout: &'a MemoryLayout,
1377 pub topology: &'a ProcessorTopology,
1379 pub cvm_cpuid_info: Option<&'a [u8]>,
1382 pub snp_secrets: Option<&'a [u8]>,
1384 pub vtom: Option<u64>,
1388 pub handle_synic: bool,
1392 pub no_sidecar_hotplug: bool,
1395 pub use_mmio_hypercalls: bool,
1397 pub intercept_debug_exceptions: bool,
1399}
1400
1401pub struct UhLateParams<'a> {
1403 pub gm: VtlArray<GuestMemory, 2>,
1405 pub vtl0_kernel_exec_gm: GuestMemory,
1407 pub vtl0_user_exec_gm: GuestMemory,
1409 #[cfg(guest_arch = "x86_64")]
1411 pub cpuid: Vec<CpuidLeaf>,
1412 pub crash_notification_send: mesh::Sender<VtlCrash>,
1415 pub vmtime: &'a VmTimeSource,
1417 pub cvm_params: Option<CvmLateParams>,
1419 pub vmbus_relay: bool,
1421}
1422
1423pub struct CvmLateParams {
1425 pub shared_gm: GuestMemory,
1427 pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1429 pub shared_dma_client: Arc<dyn DmaClient>,
1431 pub private_dma_client: Arc<dyn DmaClient>,
1433}
1434
1435#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1437pub enum GpnSource {
1438 GuestMemory,
1440 Dma,
1442}
1443
1444pub trait ProtectIsolatedMemory: Send + Sync {
1446 fn change_host_visibility(
1448 &self,
1449 vtl: GuestVtl,
1450 shared: bool,
1451 gpns: &[u64],
1452 tlb_access: &mut dyn TlbFlushLockAccess,
1453 ) -> Result<(), (HvError, usize)>;
1454
1455 fn query_host_visibility(
1457 &self,
1458 gpns: &[u64],
1459 host_visibility: &mut [HostVisibilityType],
1460 ) -> Result<(), (HvError, usize)>;
1461
1462 fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1464
1465 fn change_default_vtl_protections(
1469 &self,
1470 target_vtl: GuestVtl,
1471 protections: HvMapGpaFlags,
1472 tlb_access: &mut dyn TlbFlushLockAccess,
1473 ) -> Result<(), HvError>;
1474
1475 fn change_vtl_protections(
1477 &self,
1478 target_vtl: GuestVtl,
1479 gpns: &[u64],
1480 protections: HvMapGpaFlags,
1481 tlb_access: &mut dyn TlbFlushLockAccess,
1482 ) -> Result<(), (HvError, usize)>;
1483
1484 fn register_overlay_page(
1487 &self,
1488 vtl: GuestVtl,
1489 gpn: u64,
1490 gpn_source: GpnSource,
1491 check_perms: HvMapGpaFlags,
1492 new_perms: Option<HvMapGpaFlags>,
1493 tlb_access: &mut dyn TlbFlushLockAccess,
1494 ) -> Result<(), HvError>;
1495
1496 fn unregister_overlay_page(
1499 &self,
1500 vtl: GuestVtl,
1501 gpn: u64,
1502 tlb_access: &mut dyn TlbFlushLockAccess,
1503 ) -> Result<(), HvError>;
1504
1505 fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1507
1508 fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1510
1511 fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1517
1518 fn set_vtl1_protections_enabled(&self);
1521
1522 fn vtl1_protections_enabled(&self) -> bool;
1525}
1526
1527pub trait TlbFlushLockAccess {
1529 fn flush(&mut self, vtl: GuestVtl);
1531
1532 fn flush_entire(&mut self);
1534
1535 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1537}
1538
1539pub struct UhProtoPartition<'a> {
1542 params: UhPartitionNewParams<'a>,
1543 hcl: Hcl,
1544 guest_vsm_available: bool,
1545 #[cfg(guest_arch = "x86_64")]
1546 cpuid: virt::CpuidLeafSet,
1547}
1548
1549impl<'a> UhProtoPartition<'a> {
1550 pub fn new<T: SpawnDriver>(
1555 params: UhPartitionNewParams<'a>,
1556 driver: impl FnMut(u32) -> T,
1557 ) -> Result<Self, Error> {
1558 let hcl_isolation = match params.isolation {
1559 IsolationType::None => hcl::ioctl::IsolationType::None,
1560 IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1561 IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1562 IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1563 };
1564
1565 let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1567
1568 let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1569
1570 let mut allowed_hypercalls = vec![
1572 hvdef::HypercallCode::HvCallGetVpRegisters,
1573 hvdef::HypercallCode::HvCallSetVpRegisters,
1574 hvdef::HypercallCode::HvCallInstallIntercept,
1575 hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1576 hvdef::HypercallCode::HvCallPostMessageDirect,
1577 hvdef::HypercallCode::HvCallSignalEventDirect,
1578 hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1579 hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1580 hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1581 hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1582 hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1583 hvdef::HypercallCode::HvCallAcceptGpaPages,
1584 hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1585 ];
1586
1587 if params.isolation.is_hardware_isolated() {
1588 allowed_hypercalls.extend(vec![
1589 hvdef::HypercallCode::HvCallEnablePartitionVtl,
1590 hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1591 hvdef::HypercallCode::HvCallEnableVpVtl,
1592 ]);
1593 }
1594
1595 if params.use_mmio_hypercalls {
1596 allowed_hypercalls.extend(vec![
1597 hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1598 hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1599 ]);
1600 }
1601
1602 hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1603
1604 set_vtl2_vsm_partition_config(&hcl)?;
1605
1606 let guest_vsm_available = Self::check_guest_vsm_support(&hcl)?;
1607
1608 #[cfg(guest_arch = "x86_64")]
1609 let cpuid = match params.isolation {
1610 IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1611 cpuid_pages: params.cvm_cpuid_info.unwrap(),
1612 vtom: params.vtom.unwrap(),
1613 access_vsm: guest_vsm_available,
1614 }
1615 .build()
1616 .map_err(Error::CvmCpuid)?,
1617
1618 IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1619 topology: params.topology,
1620 vtom: params.vtom.unwrap(),
1621 access_vsm: guest_vsm_available,
1622 }
1623 .build()
1624 .map_err(Error::CvmCpuid)?,
1625 IsolationType::Vbs | IsolationType::None => Default::default(),
1626 };
1627
1628 Ok(UhProtoPartition {
1629 hcl,
1630 params,
1631 guest_vsm_available,
1632 #[cfg(guest_arch = "x86_64")]
1633 cpuid,
1634 })
1635 }
1636
1637 pub fn guest_vsm_available(&self) -> bool {
1639 self.guest_vsm_available
1640 }
1641
1642 pub async fn build(
1644 self,
1645 late_params: UhLateParams<'_>,
1646 ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1647 let Self {
1648 mut hcl,
1649 params,
1650 guest_vsm_available,
1651 #[cfg(guest_arch = "x86_64")]
1652 cpuid,
1653 } = self;
1654 let isolation = params.isolation;
1655 let is_hardware_isolated = isolation.is_hardware_isolated();
1656
1657 if params.intercept_debug_exceptions {
1663 if !cfg!(feature = "gdb") {
1664 return Err(Error::InvalidDebugConfiguration);
1665 }
1666
1667 cfg_if::cfg_if! {
1668 if #[cfg(guest_arch = "x86_64")] {
1669 if isolation != IsolationType::Tdx {
1670 let debug_exception_vector = 0x1;
1671 hcl.register_intercept(
1672 HvInterceptType::HvInterceptTypeException,
1673 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1674 HvInterceptParameters::new_exception(debug_exception_vector),
1675 )
1676 .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1677 }
1678 } else {
1679 return Err(Error::InvalidDebugConfiguration);
1680 }
1681 }
1682 }
1683
1684 if !is_hardware_isolated {
1685 if cfg!(guest_arch = "x86_64") {
1686 hcl.register_intercept(
1687 HvInterceptType::HvInterceptTypeX64Msr,
1688 HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1689 HvInterceptParameters::new_zeroed(),
1690 )
1691 .map_err(|err| {
1692 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1693 })?;
1694
1695 hcl.register_intercept(
1696 HvInterceptType::HvInterceptTypeX64ApicEoi,
1697 HV_INTERCEPT_ACCESS_MASK_WRITE,
1698 HvInterceptParameters::new_zeroed(),
1699 )
1700 .map_err(|err| {
1701 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1702 })?;
1703 } else {
1704 if false {
1705 todo!("AARCH64_TODO");
1706 }
1707 }
1708 }
1709
1710 if isolation == IsolationType::Snp {
1711 hcl.register_intercept(
1713 HvInterceptType::HvInterceptTypeException,
1714 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1715 HvInterceptParameters::new_exception(0x1D),
1716 )
1717 .map_err(|err| {
1718 Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1719 })?;
1720
1721 let mut bitmap = [0u8; 64];
1723 if let Some(secrets) = params.snp_secrets {
1724 bitmap.copy_from_slice(
1725 &secrets
1726 [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1727 );
1728 }
1729 hcl.set_snp_register_bitmap(bitmap);
1730 }
1731
1732 hcl.add_vps(
1734 params.topology.vp_count(),
1735 late_params
1736 .cvm_params
1737 .as_ref()
1738 .map(|x| &x.private_dma_client),
1739 )
1740 .map_err(Error::Hcl)?;
1741
1742 let vps: Vec<_> = params
1743 .topology
1744 .vps_arch()
1745 .map(|vp_info| {
1746 let cpu_index = vp_info.base.vp_index.index();
1749 UhVpInner::new(cpu_index, vp_info)
1750 })
1751 .collect();
1752
1753 #[cfg(guest_arch = "x86_64")]
1755 let software_devices = {
1756 let res = if !is_hardware_isolated {
1757 hcl.register_intercept(
1758 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1759 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1760 HvInterceptParameters::new_zeroed(),
1761 )
1762 } else {
1763 Ok(())
1764 };
1765 match res {
1766 Ok(()) => Some(ApicSoftwareDevices::new(
1767 params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1768 )),
1769 Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1770 Err(err) => {
1771 return Err(Error::InstallIntercept(
1772 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1773 err,
1774 ));
1775 }
1776 }
1777 };
1778
1779 #[cfg(guest_arch = "aarch64")]
1780 let software_devices = None;
1781
1782 #[cfg(guest_arch = "aarch64")]
1783 let caps = virt::aarch64::Aarch64PartitionCapabilities {};
1784
1785 #[cfg(guest_arch = "x86_64")]
1786 let cpuid = UhPartition::construct_cpuid_results(
1787 cpuid,
1788 &late_params.cpuid,
1789 params.topology,
1790 isolation,
1791 params.hide_isolation,
1792 );
1793
1794 #[cfg(guest_arch = "x86_64")]
1795 let caps = UhPartition::construct_capabilities(
1796 params.topology,
1797 &cpuid,
1798 isolation,
1799 params.hide_isolation,
1800 );
1801
1802 if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1803 hcl.register_intercept(
1811 HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1812 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1813 HvInterceptParameters::new_zeroed(),
1814 )
1815 .expect("registering synic intercept cannot fail");
1816 }
1817
1818 #[cfg(guest_arch = "x86_64")]
1819 let cvm_state = if is_hardware_isolated {
1820 Some(Self::construct_cvm_state(
1821 ¶ms,
1822 late_params.cvm_params.unwrap(),
1823 &caps,
1824 guest_vsm_available,
1825 )?)
1826 } else {
1827 None
1828 };
1829 #[cfg(guest_arch = "aarch64")]
1830 let cvm_state = None;
1831
1832 let backing_shared = BackingShared::new(
1833 isolation,
1834 ¶ms,
1835 BackingSharedParams {
1836 cvm_state,
1837 #[cfg(guest_arch = "x86_64")]
1838 cpuid: &cpuid,
1839 hcl: &hcl,
1840 guest_vsm_available,
1841 },
1842 )?;
1843
1844 let enter_modes = EnterModes::default();
1845
1846 let partition = Arc::new(UhPartitionInner {
1847 hcl,
1848 vps,
1849 irq_routes: Default::default(),
1850 caps,
1851 enter_modes: Mutex::new(enter_modes),
1852 enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1853 gm: late_params.gm,
1854 vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1855 vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1856 #[cfg(guest_arch = "x86_64")]
1857 cpuid,
1858 crash_notification_send: late_params.crash_notification_send,
1859 monitor_page: MonitorPage::new(),
1860 allocated_monitor_page: Mutex::new(None),
1861 software_devices,
1862 lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1863 vmtime: late_params.vmtime.clone(),
1864 isolation,
1865 no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1866 use_mmio_hypercalls: params.use_mmio_hypercalls,
1867 backing_shared,
1868 #[cfg(guest_arch = "x86_64")]
1869 device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1870 intercept_debug_exceptions: params.intercept_debug_exceptions,
1871 vmbus_relay: late_params.vmbus_relay,
1872 });
1873
1874 if cfg!(guest_arch = "x86_64") {
1875 partition.manage_io_port_intercept_region(0, !0, true);
1877 }
1878
1879 let vps = params
1880 .topology
1881 .vps_arch()
1882 .map(|vp_info| UhProcessorBox {
1883 partition: partition.clone(),
1884 vp_info,
1885 })
1886 .collect();
1887
1888 Ok((
1889 UhPartition {
1890 inner: partition.clone(),
1891 interrupt_targets: VtlArray::from_fn(|vtl| {
1892 Arc::new(UhInterruptTarget {
1893 partition: partition.clone(),
1894 vtl: vtl.try_into().unwrap(),
1895 })
1896 }),
1897 },
1898 vps,
1899 ))
1900 }
1901}
1902
1903impl UhPartition {
1904 pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
1906 let id = if let Some(hv) = self.inner.hv() {
1910 hv.guest_os_id(Vtl::Vtl0)
1911 } else {
1912 self.inner
1914 .hcl
1915 .get_guest_os_id(Vtl::Vtl0)
1916 .map_err(Error::Hcl)?
1917 };
1918 Ok(id)
1919 }
1920
1921 pub fn register_host_io_port_fast_path(
1926 &self,
1927 range: RangeInclusive<u16>,
1928 ) -> HostIoPortFastPathHandle {
1929 assert!(!self.inner.isolation.is_hardware_isolated());
1933
1934 self.inner
1935 .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1936 HostIoPortFastPathHandle {
1937 inner: Arc::downgrade(&self.inner),
1938 begin: *range.start(),
1939 end: *range.end(),
1940 }
1941 }
1942
1943 pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
1945 self.inner.hcl.set_pm_timer_assist(port)
1946 }
1947
1948 fn register_cvm_dma_overlay_page(
1950 &self,
1951 vtl: GuestVtl,
1952 gpn: u64,
1953 new_perms: HvMapGpaFlags,
1954 ) -> anyhow::Result<()> {
1955 match &self.inner.backing_shared {
1957 #[cfg(guest_arch = "x86_64")]
1958 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1959 .cvm
1960 .isolated_memory_protector
1961 .register_overlay_page(
1962 vtl,
1963 gpn,
1964 GpnSource::Dma,
1966 HvMapGpaFlags::new(),
1967 Some(new_perms),
1968 &mut SnpBacked::tlb_flush_lock_access(
1969 None,
1970 self.inner.as_ref(),
1971 snp_backed_shared,
1972 ),
1973 )
1974 .map_err(|e| anyhow::anyhow!(e)),
1975 #[cfg(guest_arch = "x86_64")]
1976 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
1977 .cvm
1978 .isolated_memory_protector
1979 .register_overlay_page(
1980 vtl,
1981 gpn,
1982 GpnSource::Dma,
1983 HvMapGpaFlags::new(),
1984 Some(new_perms),
1985 &mut TdxBacked::tlb_flush_lock_access(
1986 None,
1987 self.inner.as_ref(),
1988 tdx_backed_shared,
1989 ),
1990 )
1991 .map_err(|e| anyhow::anyhow!(e)),
1992 BackingShared::Hypervisor(_) => {
1993 let _ = (vtl, gpn, new_perms);
1994 unreachable!()
1995 }
1996 }
1997 }
1998
1999 fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2001 match &self.inner.backing_shared {
2003 #[cfg(guest_arch = "x86_64")]
2004 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2005 .cvm
2006 .isolated_memory_protector
2007 .unregister_overlay_page(
2008 vtl,
2009 gpn,
2010 &mut SnpBacked::tlb_flush_lock_access(
2011 None,
2012 self.inner.as_ref(),
2013 snp_backed_shared,
2014 ),
2015 )
2016 .map_err(|e| anyhow::anyhow!(e)),
2017 #[cfg(guest_arch = "x86_64")]
2018 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2019 .cvm
2020 .isolated_memory_protector
2021 .unregister_overlay_page(
2022 vtl,
2023 gpn,
2024 &mut TdxBacked::tlb_flush_lock_access(
2025 None,
2026 self.inner.as_ref(),
2027 tdx_backed_shared,
2028 ),
2029 )
2030 .map_err(|e| anyhow::anyhow!(e)),
2031 BackingShared::Hypervisor(_) => {
2032 let _ = (vtl, gpn);
2033 unreachable!()
2034 }
2035 }
2036 }
2037}
2038
2039impl UhProtoPartition<'_> {
2040 fn check_guest_vsm_support(hcl: &Hcl) -> Result<bool, Error> {
2043 #[cfg(guest_arch = "x86_64")]
2044 let privs = {
2045 let result = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
2046 let num = result.eax as u64 | ((result.ebx as u64) << 32);
2047 hvdef::HvPartitionPrivilege::from(num)
2048 };
2049
2050 #[cfg(guest_arch = "aarch64")]
2051 let privs = hcl.get_privileges_and_features_info().map_err(Error::Hcl)?;
2052
2053 if !privs.access_vsm() {
2054 return Ok(false);
2055 }
2056 let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
2057 Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2058 }
2059
2060 #[cfg(guest_arch = "x86_64")]
2061 fn construct_cvm_state(
2063 params: &UhPartitionNewParams<'_>,
2064 late_params: CvmLateParams,
2065 caps: &PartitionCapabilities,
2066 guest_vsm_available: bool,
2067 ) -> Result<UhCvmPartitionState, Error> {
2068 use vmcore::reference_time::ReferenceTimeSource;
2069
2070 let vp_count = params.topology.vp_count() as usize;
2071 let vps = (0..vp_count)
2072 .map(|vp_index| UhCvmVpInner {
2073 tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2074 vtl1_enable_called: Mutex::new(false),
2075 started: AtomicBool::new(vp_index == 0),
2076 hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2077 })
2078 .collect();
2079 let tlb_locked_vps =
2080 VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2081
2082 let lapic = VtlArray::from_fn(|_| {
2083 LocalApicSet::builder()
2084 .x2apic_capable(caps.x2apic)
2085 .hyperv_enlightenments(true)
2086 .build()
2087 });
2088
2089 let tsc_frequency = get_tsc_frequency(params.isolation)?;
2090 let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2091
2092 let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2099 max_vp_count: params.topology.vp_count(),
2100 vendor: caps.vendor,
2101 tsc_frequency,
2102 ref_time,
2103 is_ref_time_backed_by_tsc: true,
2104 });
2105
2106 Ok(UhCvmPartitionState {
2107 vps_per_socket: params.topology.reserved_vps_per_socket(),
2108 tlb_locked_vps,
2109 vps,
2110 shared_memory: late_params.shared_gm,
2111 isolated_memory_protector: late_params.isolated_memory_protector,
2112 lapic,
2113 hv,
2114 guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2115 shared_dma_client: late_params.shared_dma_client,
2116 private_dma_client: late_params.private_dma_client,
2117 hide_isolation: params.hide_isolation,
2118 })
2119 }
2120}
2121
2122impl UhPartition {
2123 #[cfg(guest_arch = "x86_64")]
2124 fn construct_cpuid_results(
2126 cpuid: virt::CpuidLeafSet,
2127 initial_cpuid: &[CpuidLeaf],
2128 topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2129 isolation: IsolationType,
2130 hide_isolation: bool,
2131 ) -> virt::CpuidLeafSet {
2132 let mut cpuid = cpuid.into_leaves();
2133 if isolation.is_hardware_isolated() {
2134 let x2apic = match topology.apic_mode() {
2136 vm_topology::processor::x86::ApicMode::XApic => false,
2137 vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2138 vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2139 };
2140 let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2141 let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2142 cpuid.push(
2143 CpuidLeaf::new(
2144 x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2145 [0, 0, ecx.into(), 0],
2146 )
2147 .masked([0, 0, ecx_mask.into(), 0]),
2148 );
2149
2150 let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2154
2155 hv1_emulator::cpuid::process_hv_cpuid_leaves(
2157 &mut cpuid,
2158 hide_isolation,
2159 [
2160 hv_version.eax,
2161 hv_version.ebx,
2162 hv_version.ecx,
2163 hv_version.edx,
2164 ],
2165 );
2166 }
2167 cpuid.extend(initial_cpuid);
2168 virt::CpuidLeafSet::new(cpuid)
2169 }
2170
2171 #[cfg(guest_arch = "x86_64")]
2172 fn construct_capabilities(
2174 topology: &ProcessorTopology,
2175 cpuid: &virt::CpuidLeafSet,
2176 isolation: IsolationType,
2177 hide_isolation: bool,
2178 ) -> virt::x86::X86PartitionCapabilities {
2179 let mut native_cpuid_fn;
2180 let mut cvm_cpuid_fn;
2181
2182 let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2185 cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2187 &mut cvm_cpuid_fn
2188 } else {
2189 native_cpuid_fn = |leaf, sub_leaf| {
2191 let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2192 cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2193 };
2194 &mut native_cpuid_fn
2195 };
2196
2197 let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn);
2199 match isolation {
2200 IsolationType::Tdx => {
2201 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2202 caps.nxe_forced_on = true;
2204 }
2205 IsolationType::Snp => {
2206 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2207 }
2208 _ => {
2209 assert!(caps.vtom.is_none());
2210 }
2211 }
2212
2213 caps
2214 }
2215}
2216
2217#[cfg(guest_arch = "x86_64")]
2218fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2220 let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2223 let hv_frequency = msr
2224 .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2225 .map_err(Error::ReadTscFrequency)?;
2226
2227 let hw_info = match isolation {
2230 IsolationType::Tdx => {
2231 let max_function =
2233 safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2234 .eax;
2235
2236 if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2237 return Err(Error::BadCpuidTsc);
2238 }
2239 let result = safe_intrinsics::cpuid(
2240 x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2241 0,
2242 );
2243 let ratio_denom = result.eax;
2244 let ratio_num = result.ebx;
2245 let clock = result.ecx;
2246 if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2247 return Err(Error::BadCpuidTsc);
2248 }
2249 let allowed_error = 12_500_000;
2252 Some((
2253 clock as u64 * ratio_num as u64 / ratio_denom as u64,
2254 allowed_error,
2255 ))
2256 }
2257 IsolationType::Snp => {
2258 None
2260 }
2261 IsolationType::Vbs | IsolationType::None => None,
2262 };
2263
2264 if let Some((hw_frequency, allowed_error)) = hw_info {
2265 let delta = hw_frequency.abs_diff(hv_frequency);
2268 if delta > allowed_error {
2269 return Err(Error::TscFrequencyMismatch {
2270 hv: hv_frequency,
2271 hw: hw_frequency,
2272 allowed_error,
2273 });
2274 }
2275 }
2276
2277 Ok(hv_frequency)
2278}
2279
2280impl UhPartitionInner {
2281 fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2282 if self.isolation.is_hardware_isolated() {
2283 return;
2284 }
2285
2286 static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2287
2288 let access_type_mask = if active {
2289 HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2290 } else {
2291 HV_INTERCEPT_ACCESS_MASK_NONE
2292 };
2293
2294 if !SKIP_RANGE.load(Ordering::Relaxed) {
2296 match self.hcl.register_intercept(
2297 HvInterceptType::HvInterceptTypeX64IoPortRange,
2298 access_type_mask,
2299 HvInterceptParameters::new_io_port_range(begin..=end),
2300 ) {
2301 Ok(()) => return,
2302 Err(HvError::InvalidParameter) => {
2303 SKIP_RANGE.store(true, Ordering::Relaxed);
2306 tracing::warn!(
2307 CVM_ALLOWED,
2308 "old hypervisor build; using slow path for intercept ranges"
2309 );
2310 }
2311 Err(err) => {
2312 panic!("io port range registration failure: {err:?}");
2313 }
2314 }
2315 }
2316
2317 for port in begin..=end {
2319 self.hcl
2320 .register_intercept(
2321 HvInterceptType::HvInterceptTypeX64IoPort,
2322 access_type_mask,
2323 HvInterceptParameters::new_io_port(port),
2324 )
2325 .expect("registering io intercept cannot fail");
2326 }
2327 }
2328
2329 fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2330 self.lower_vtl_memory_layout
2334 .ram()
2335 .iter()
2336 .any(|m| m.range.contains_addr(gpa))
2337 }
2338
2339 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2340 if self.is_gpa_lower_vtl_ram(gpa) {
2344 !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2346 } else {
2347 false
2348 }
2349 }
2350
2351 #[cfg(guest_arch = "x86_64")]
2353 fn cpuid_result(&self, eax: u32, ecx: u32, default: &[u32; 4]) -> [u32; 4] {
2354 let r = self.cpuid.result(eax, ecx, default);
2355 if eax == hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES {
2356 let mut features = hvdef::HvFeatures::from_cpuid(r);
2364 if self.backing_shared.guest_vsm_disabled() {
2365 features.set_privileges(features.privileges().with_access_vsm(false));
2366 }
2367 features.into_cpuid()
2368 } else {
2369 r
2370 }
2371 }
2372}
2373
2374#[must_use]
2379pub struct HostIoPortFastPathHandle {
2380 inner: Weak<UhPartitionInner>,
2381 begin: u16,
2382 end: u16,
2383}
2384
2385impl Drop for HostIoPortFastPathHandle {
2386 fn drop(&mut self) {
2387 if let Some(inner) = self.inner.upgrade() {
2388 inner.manage_io_port_intercept_region(self.begin, self.end, true);
2389 }
2390 }
2391}
2392
2393#[derive(Copy, Clone, Debug)]
2398pub struct VtlCrash {
2399 pub vp_index: VpIndex,
2401 pub last_vtl: GuestVtl,
2403 pub control: GuestCrashCtl,
2405 pub parameters: [u64; 5],
2407}
2408
2409#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2412fn validate_vtl_gpa_flags(
2413 flags: HvMapGpaFlags,
2414 mbec_enabled: bool,
2415 shadow_supervisor_stack_enabled: bool,
2416) -> bool {
2417 if flags.adjustable() {
2419 return false;
2420 }
2421
2422 if flags.kernel_executable() != flags.user_executable() {
2424 if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2425 return false;
2426 }
2427 }
2428
2429 if flags.writable()
2431 || flags.kernel_executable()
2432 || flags.user_executable()
2433 || flags.supervisor_shadow_stack()
2434 || flags.paging_writability()
2435 || flags.verify_paging_writability()
2436 {
2437 if !flags.readable() {
2438 return false;
2439 }
2440 }
2441
2442 if flags.supervisor_shadow_stack()
2445 && ((!flags.kernel_executable() && !flags.user_executable())
2446 || shadow_supervisor_stack_enabled)
2447 {
2448 return false;
2449 }
2450
2451 true
2452}