1#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13 if #[cfg(guest_arch = "x86_64")] {
14 mod cvm_cpuid;
15 pub use processor::snp::SnpBacked;
16 pub use processor::tdx::TdxBacked;
17 use crate::processor::HardwareIsolatedBacking;
18 pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19 use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20 use bitvec::prelude::BitArray;
21 use bitvec::prelude::Lsb0;
22 use devmsr::MsrDevice;
23 use hv1_emulator::hv::ProcessorVtlHv;
24 use processor::LapicState;
25 use processor::snp::SnpBackedShared;
26 use processor::tdx::TdxBackedShared;
27 use std::arch::x86_64::CpuidResult;
28 use virt::CpuidLeaf;
29 use virt::state::StateElement;
30 use virt::vp::MpState;
31 type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34 } else if #[cfg(guest_arch = "aarch64")] {
35 pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36 use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37 }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvPartitionPrivilege;
66use hvdef::HvRegisterName;
67use hvdef::HvRegisterVsmPartitionConfig;
68use hvdef::HvRegisterVsmPartitionStatus;
69use hvdef::Vtl;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
73use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
74use hvdef::hypercall::HostVisibilityType;
75use hvdef::hypercall::HvGuestOsId;
76use hvdef::hypercall::HvInputVtl;
77use hvdef::hypercall::HvInterceptParameters;
78use hvdef::hypercall::HvInterceptType;
79use inspect::Inspect;
80use inspect::InspectMut;
81use memory_range::MemoryRange;
82use pal::unix::affinity;
83use pal::unix::affinity::CpuSet;
84use pal_async::driver::Driver;
85use pal_async::driver::SpawnDriver;
86use pal_uring::IdleControl;
87use parking_lot::Mutex;
88use parking_lot::RwLock;
89use processor::BackingSharedParams;
90use processor::SidecarExitReason;
91use sidecar_client::NewSidecarClientError;
92use std::collections::HashMap;
93use std::ops::RangeInclusive;
94use std::os::fd::AsRawFd;
95use std::sync::Arc;
96use std::sync::Weak;
97use std::sync::atomic::AtomicBool;
98use std::sync::atomic::AtomicU8;
99use std::sync::atomic::AtomicU32;
100use std::sync::atomic::AtomicU64;
101use std::sync::atomic::Ordering;
102use std::task::Waker;
103use thiserror::Error;
104use user_driver::DmaClient;
105use virt::IsolationType;
106use virt::PartitionCapabilities;
107use virt::VpIndex;
108use virt::X86Partition;
109use virt::irqcon::IoApicRouting;
110use virt::irqcon::MsiRequest;
111use virt::x86::apic_software_device::ApicSoftwareDevices;
112use virt_support_apic::LocalApicSet;
113use vm_topology::memory::MemoryLayout;
114use vm_topology::processor::ProcessorTopology;
115use vm_topology::processor::TargetVpInfo;
116use vmcore::monitor::MonitorPage;
117use vmcore::reference_time::GetReferenceTime;
118use vmcore::reference_time::ReferenceTimeResult;
119use vmcore::reference_time::ReferenceTimeSource;
120use vmcore::vmtime::VmTimeSource;
121use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
122use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
123use x86defs::tdx::TdCallResult;
124use zerocopy::FromBytes;
125use zerocopy::FromZeros;
126use zerocopy::Immutable;
127use zerocopy::IntoBytes;
128use zerocopy::KnownLayout;
129
130#[derive(Error, Debug)]
132#[expect(missing_docs)]
133pub enum Error {
134 #[error("hcl error")]
135 Hcl(#[source] hcl::ioctl::Error),
136 #[error("failed to open sidecar client")]
137 Sidecar(#[source] NewSidecarClientError),
138 #[error("failed to install {0:?} intercept: {1:?}")]
139 InstallIntercept(HvInterceptType, HvError),
140 #[error("failed to query hypervisor register {0:#x?}")]
141 Register(HvRegisterName, #[source] HvError),
142 #[error("failed to set vsm partition config register")]
143 VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
144 #[error("failed to create virtual device")]
145 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
146 #[error("failed to create cpuid tables for cvm")]
147 #[cfg(guest_arch = "x86_64")]
148 CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
149 #[error("failed to update hypercall msr")]
150 UpdateHypercallMsr,
151 #[error("failed to update reference tsc msr")]
152 UpdateReferenceTsc,
153 #[error("failed to map overlay page")]
154 MapOverlay(#[source] std::io::Error),
155 #[error("failed to allocate shared visibility pages for overlay")]
156 AllocateSharedVisOverlay(#[source] anyhow::Error),
157 #[error("failed to open msr device")]
158 OpenMsr(#[source] std::io::Error),
159 #[error("cpuid did not contain valid TSC frequency information")]
160 BadCpuidTsc,
161 #[error("failed to read tsc frequency")]
162 ReadTscFrequency(#[source] std::io::Error),
163 #[error(
164 "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
165 )]
166 TscFrequencyMismatch {
167 hv: u64,
168 hw: u64,
169 allowed_error: u64,
170 },
171 #[error("failed to set vsm partition config: {0:?}")]
172 FailedToSetL2Ctls(TdCallResult),
173 #[error("debugging is configured but the binary does not have the gdb feature")]
174 InvalidDebugConfiguration,
175 #[error("failed to allocate TLB flush page")]
176 AllocateTlbFlushPage(#[source] anyhow::Error),
177 #[error("host does not support required cpu capabilities")]
178 Capabilities(virt::PartitionCapabilitiesError),
179}
180
181#[derive(Error, Debug)]
183#[expect(missing_docs)]
184pub enum RevokeGuestVsmError {
185 #[error("failed to set vsm config")]
186 SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
187 #[error("VTL 1 is already enabled")]
188 Vtl1AlreadyEnabled,
189}
190
191#[derive(Inspect)]
193pub struct UhPartition {
194 #[inspect(flatten)]
195 inner: Arc<UhPartitionInner>,
196 #[inspect(skip)]
198 interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
199}
200
201#[derive(Inspect)]
203#[inspect(extra = "UhPartitionInner::inspect_extra")]
204struct UhPartitionInner {
205 #[inspect(skip)]
206 hcl: Hcl,
207 #[inspect(skip)] vps: Vec<UhVpInner>,
209 irq_routes: virt::irqcon::IrqRoutes,
210 caps: PartitionCapabilities,
211 #[inspect(skip)] enter_modes: Mutex<EnterModes>,
213 #[inspect(skip)]
214 enter_modes_atomic: AtomicU8,
215 #[cfg(guest_arch = "x86_64")]
216 cpuid: virt::CpuidLeafSet,
217 lower_vtl_memory_layout: MemoryLayout,
218 gm: VtlArray<GuestMemory, 2>,
219 vtl0_kernel_exec_gm: GuestMemory,
220 vtl0_user_exec_gm: GuestMemory,
221 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
222 #[inspect(skip)]
223 crash_notification_send: mesh::Sender<VtlCrash>,
224 monitor_page: MonitorPage,
225 #[inspect(skip)]
226 allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
227 software_devices: Option<ApicSoftwareDevices>,
228 #[inspect(skip)]
229 vmtime: VmTimeSource,
230 isolation: IsolationType,
231 #[inspect(with = "inspect::AtomicMut")]
232 no_sidecar_hotplug: AtomicBool,
233 use_mmio_hypercalls: bool,
234 backing_shared: BackingShared,
235 intercept_debug_exceptions: bool,
236 #[cfg(guest_arch = "x86_64")]
237 #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
239 device_vector_table: RwLock<IrrBitmap>,
240 vmbus_relay: bool,
241}
242
243#[derive(Inspect)]
244#[inspect(untagged)]
245enum BackingShared {
246 Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
247 #[cfg(guest_arch = "x86_64")]
248 Snp(#[inspect(flatten)] SnpBackedShared),
249 #[cfg(guest_arch = "x86_64")]
250 Tdx(#[inspect(flatten)] TdxBackedShared),
251}
252
253impl BackingShared {
254 fn new(
255 isolation: IsolationType,
256 partition_params: &UhPartitionNewParams<'_>,
257 backing_shared_params: BackingSharedParams<'_>,
258 ) -> Result<BackingShared, Error> {
259 Ok(match isolation {
260 IsolationType::None | IsolationType::Vbs => {
261 assert!(backing_shared_params.cvm_state.is_none());
262 BackingShared::Hypervisor(HypervisorBackedShared::new(
263 partition_params,
264 backing_shared_params,
265 )?)
266 }
267 #[cfg(guest_arch = "x86_64")]
268 IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
269 partition_params,
270 backing_shared_params,
271 )?),
272 #[cfg(guest_arch = "x86_64")]
273 IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
274 partition_params,
275 backing_shared_params,
276 )?),
277 #[cfg(not(guest_arch = "x86_64"))]
278 _ => unreachable!(),
279 })
280 }
281
282 fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
283 match self {
284 BackingShared::Hypervisor(_) => None,
285 #[cfg(guest_arch = "x86_64")]
286 BackingShared::Snp(SnpBackedShared { cvm, .. })
287 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
288 }
289 }
290
291 fn untrusted_synic(&self) -> Option<&GlobalSynic> {
292 match self {
293 BackingShared::Hypervisor(_) => None,
294 #[cfg(guest_arch = "x86_64")]
295 BackingShared::Snp(_) => None,
296 #[cfg(guest_arch = "x86_64")]
297 BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
298 }
299 }
300}
301
302#[derive(InspectMut, Copy, Clone)]
303struct EnterModes {
304 #[inspect(mut)]
305 first: EnterMode,
306 #[inspect(mut)]
307 second: EnterMode,
308}
309
310impl Default for EnterModes {
311 fn default() -> Self {
312 Self {
313 first: EnterMode::Fast,
314 second: EnterMode::IdleToVtl0,
315 }
316 }
317}
318
319impl From<EnterModes> for hcl::protocol::EnterModes {
320 fn from(value: EnterModes) -> Self {
321 Self::new()
322 .with_first(value.first.into())
323 .with_second(value.second.into())
324 }
325}
326
327#[derive(InspectMut, Copy, Clone)]
328enum EnterMode {
329 Fast,
330 PlayIdle,
331 IdleToVtl0,
332}
333
334impl From<EnterMode> for hcl::protocol::EnterMode {
335 fn from(value: EnterMode) -> Self {
336 match value {
337 EnterMode::Fast => Self::FAST,
338 EnterMode::PlayIdle => Self::PLAY_IDLE,
339 EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
340 }
341 }
342}
343
344#[cfg(guest_arch = "x86_64")]
345#[derive(Inspect)]
346struct GuestVsmVpState {
347 #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
350 vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
351 reg_intercept: SecureRegisterInterceptState,
352}
353
354#[cfg(guest_arch = "x86_64")]
355impl GuestVsmVpState {
356 fn new() -> Self {
357 GuestVsmVpState {
358 vtl0_exit_pending_event: None,
359 reg_intercept: Default::default(),
360 }
361 }
362}
363
364#[cfg(guest_arch = "x86_64")]
365#[derive(Inspect)]
366struct UhCvmVpState {
368 #[inspect(debug)]
370 direct_overlay_handle: user_driver::memory::MemoryBlock,
371 exit_vtl: GuestVtl,
373 hv: VtlArray<ProcessorVtlHv, 2>,
375 lapics: VtlArray<LapicState, 2>,
377 vtl1: Option<GuestVsmVpState>,
379}
380
381#[cfg(guest_arch = "x86_64")]
382impl UhCvmVpState {
383 pub(crate) fn new(
385 cvm_partition: &UhCvmPartitionState,
386 inner: &UhPartitionInner,
387 vp_info: &TargetVpInfo,
388 overlay_pages_required: usize,
389 ) -> Result<Self, Error> {
390 let direct_overlay_handle = cvm_partition
391 .shared_dma_client
392 .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
393 .map_err(Error::AllocateSharedVisOverlay)?;
394
395 let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
396 let lapics = VtlArray::from_fn(|vtl| {
397 let apic_set = &cvm_partition.lapic[vtl];
398
399 let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
402 lapic.set_apic_base(apic_base).unwrap();
404 let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
406 MpState::WaitForSipi
407 } else {
408 MpState::Running
409 };
410 LapicState::new(lapic, activity)
411 });
412
413 let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
414
415 Ok(Self {
416 direct_overlay_handle,
417 exit_vtl: GuestVtl::Vtl0,
418 hv,
419 lapics,
420 vtl1: None,
421 })
422 }
423}
424
425#[cfg(guest_arch = "x86_64")]
426#[derive(Inspect, Default)]
427#[inspect(hex)]
428pub struct SecureRegisterInterceptState {
430 #[inspect(with = "|&x| u64::from(x)")]
431 intercept_control: hvdef::HvRegisterCrInterceptControl,
432 cr0_mask: u64,
433 cr4_mask: u64,
434 ia32_misc_enable_mask: u64,
437}
438
439#[derive(Clone, Inspect)]
442struct ProxyRedirectVectorInfo {
443 device_id: u64,
445 original_vector: u32,
447}
448
449#[derive(Inspect)]
450struct UhCvmPartitionState {
452 #[cfg(guest_arch = "x86_64")]
453 vps_per_socket: u32,
454 #[inspect(
456 with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
457 )]
458 tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
459 #[inspect(with = "inspect::iter_by_index")]
460 vps: Vec<UhCvmVpInner>,
461 shared_memory: GuestMemory,
462 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
463 #[inspect(skip)]
464 isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
465 lapic: VtlArray<LocalApicSet, 2>,
467 hv: GlobalHv<2>,
469 guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
471 shared_dma_client: Arc<dyn DmaClient>,
473 private_dma_client: Arc<dyn DmaClient>,
475 hide_isolation: bool,
476 proxy_interrupt_redirect: bool,
477}
478
479#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
480impl UhCvmPartitionState {
481 fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
482 &self.vps[vp_index as usize]
483 }
484
485 fn is_lower_vtl_startup_denied(&self) -> bool {
486 matches!(
487 *self.guest_vsm.read(),
488 GuestVsmState::Enabled {
489 vtl1: CvmVtl1State {
490 deny_lower_vtl_startup: true,
491 ..
492 }
493 }
494 )
495 }
496}
497
498#[derive(Inspect)]
499struct UhCvmVpInner {
501 tlb_lock_info: VtlArray<TlbLockInfo, 2>,
503 vtl1_enable_called: Mutex<bool>,
505 started: AtomicBool,
507 #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
509 hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
510 #[inspect(with = "|x| inspect::adhoc(|req| inspect::iter_by_key(&*x.lock()).inspect(req))")]
512 proxy_redirect_interrupts: Mutex<HashMap<u32, ProxyRedirectVectorInfo>>,
513}
514
515#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
516#[derive(Inspect)]
517#[inspect(tag = "guest_vsm_state")]
518enum GuestVsmState<T: Inspect> {
520 NotPlatformSupported,
521 NotGuestEnabled,
522 Enabled {
523 #[inspect(flatten)]
524 vtl1: T,
525 },
526}
527
528impl<T: Inspect> GuestVsmState<T> {
529 pub fn from_availability(guest_vsm_available: bool) -> Self {
530 if guest_vsm_available {
531 GuestVsmState::NotGuestEnabled
532 } else {
533 GuestVsmState::NotPlatformSupported
534 }
535 }
536}
537
538#[derive(Inspect)]
539struct CvmVtl1State {
540 enabled_on_any_vp: bool,
542 zero_memory_on_reset: bool,
544 deny_lower_vtl_startup: bool,
546 pub mbec_enabled: bool,
548 pub shadow_supervisor_stack_enabled: bool,
550 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
551 io_read_intercepts: BitBox<u64>,
552 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
553 io_write_intercepts: BitBox<u64>,
554}
555
556#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
557impl CvmVtl1State {
558 fn new(mbec_enabled: bool) -> Self {
559 Self {
560 enabled_on_any_vp: false,
561 zero_memory_on_reset: false,
562 deny_lower_vtl_startup: false,
563 mbec_enabled,
564 shadow_supervisor_stack_enabled: false,
565 io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
566 io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
567 }
568 }
569}
570
571#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
572struct TscReferenceTimeSource {
573 tsc_scale: u64,
574}
575
576#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
577impl TscReferenceTimeSource {
578 fn new(tsc_frequency: u64) -> Self {
579 TscReferenceTimeSource {
580 tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
581 }
582 }
583}
584
585impl GetReferenceTime for TscReferenceTimeSource {
587 fn now(&self) -> ReferenceTimeResult {
588 #[cfg(guest_arch = "x86_64")]
589 {
590 let tsc = safe_intrinsics::rdtsc();
591 let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
592 ReferenceTimeResult {
593 ref_time,
594 system_time: None,
595 }
596 }
597
598 #[cfg(guest_arch = "aarch64")]
599 {
600 todo!("AARCH64_TODO");
601 }
602 }
603}
604
605impl virt::irqcon::ControlGic for UhPartitionInner {
606 fn set_spi_irq(&self, irq_id: u32, high: bool) {
607 if let Err(err) = self.hcl.request_interrupt(
608 hvdef::HvInterruptControl::new()
609 .with_arm64_asserted(high)
610 .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
611 0,
612 irq_id,
613 GuestVtl::Vtl0,
614 ) {
615 tracelimit::warn_ratelimited!(
616 error = &err as &dyn std::error::Error,
617 irq = irq_id,
618 asserted = high,
619 "failed to request spi"
620 );
621 }
622 }
623}
624
625impl virt::Aarch64Partition for UhPartition {
626 fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
627 debug_assert!(vtl == Vtl::Vtl0);
628 self.inner.clone()
629 }
630}
631
632pub struct UhProcessorBox {
638 partition: Arc<UhPartitionInner>,
639 vp_info: TargetVpInfo,
640}
641
642impl UhProcessorBox {
643 pub fn vp_index(&self) -> VpIndex {
645 self.vp_info.base.vp_index
646 }
647
648 pub fn sidecar_base_cpu(&self) -> Option<u32> {
651 self.partition
652 .hcl
653 .sidecar_base_cpu(self.vp_info.base.vp_index.index())
654 }
655
656 pub fn bind_processor<'a, T: Backing>(
663 &'a mut self,
664 driver: &impl Driver,
665 control: Option<&'a mut IdleControl>,
666 ) -> Result<UhProcessor<'a, T>, Error> {
667 if let Some(control) = &control {
668 let vp_index = self.vp_info.base.vp_index;
669
670 let mut current = Default::default();
671 affinity::get_current_thread_affinity(&mut current).unwrap();
672 assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
673
674 self.partition
675 .hcl
676 .set_poll_file(
677 self.partition.vp(vp_index).unwrap().cpu_index,
678 control.ring_fd().as_raw_fd(),
679 )
680 .map_err(Error::Hcl)?;
681 }
682
683 UhProcessor::new(driver, &self.partition, self.vp_info, control)
684 }
685
686 pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
691 self.partition
692 .vp(self.vp_info.base.vp_index)
693 .unwrap()
694 .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
695 }
696}
697
698#[derive(Debug, Inspect)]
699struct UhVpInner {
700 wake_reasons: AtomicU64,
702 #[inspect(skip)]
703 waker: RwLock<Option<Waker>>,
704 message_queues: VtlArray<MessageQueues, 2>,
705 #[inspect(skip)]
706 vp_info: TargetVpInfo,
707 cpu_index: u32,
710 sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
711}
712
713impl UhVpInner {
714 pub fn vp_index(&self) -> VpIndex {
715 self.vp_info.base.vp_index
716 }
717}
718
719#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
720#[derive(Debug, Inspect)]
721enum InitialVpContextOperation {
723 StartVp,
725 EnableVpVtl,
727}
728
729#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
730#[derive(Debug, Inspect)]
731struct VpStartEnableVtl {
733 operation: InitialVpContextOperation,
736 #[inspect(skip)]
737 context: hvdef::hypercall::InitialVpContextX64,
738}
739
740#[derive(Debug, Inspect)]
741struct TlbLockInfo {
742 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
744 blocked_vps: BitBox<AtomicU64>,
745 #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
748 blocking_vps: BitBox<AtomicU64>,
749 blocking_vp_count: AtomicU32,
753 sleeping: AtomicBool,
755}
756
757#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
758impl TlbLockInfo {
759 fn new(vp_count: usize) -> Self {
760 Self {
761 blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
762 blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
763 blocking_vp_count: AtomicU32::new(0),
764 sleeping: false.into(),
765 }
766 }
767}
768
769#[bitfield(u32)]
770#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
771struct WakeReason {
772 extint: bool,
773 message_queues: bool,
774 hv_start_enable_vtl_vp: bool,
775 intcon: bool,
776 update_proxy_irr_filter: bool,
777 #[bits(27)]
778 _reserved: u32,
779}
780
781impl WakeReason {
782 const EXTINT: Self = Self::new().with_extint(true);
784 const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
785 #[cfg(guest_arch = "x86_64")]
786 const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
788 #[cfg(guest_arch = "x86_64")]
789 const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
790}
791
792#[bitfield(u32)]
793#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
794struct ExitActivity {
795 pending_event: bool,
796 #[bits(31)]
797 _reserved: u32,
798}
799
800impl UhPartition {
802 pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
804 fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
805 if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
806 return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
807 }
808 *vsm_state = GuestVsmState::NotPlatformSupported;
809 Ok(())
810 }
811
812 match &self.inner.backing_shared {
813 BackingShared::Hypervisor(s) => {
814 revoke(&mut *s.guest_vsm.write())?;
815 self.inner
816 .hcl
817 .set_guest_vsm_partition_config(false)
818 .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
819 }
820 #[cfg(guest_arch = "x86_64")]
821 BackingShared::Snp(SnpBackedShared { cvm, .. })
822 | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
823 revoke(&mut *cvm.guest_vsm.write())?;
824 }
825 };
826
827 Ok(())
828 }
829
830 pub fn reference_time(&self) -> u64 {
832 if let Some(hv) = self.inner.hv() {
833 hv.ref_time_source().now().ref_time
834 } else {
835 self.inner
836 .hcl
837 .reference_time()
838 .expect("should not fail to get the reference time")
839 }
840 }
841}
842
843impl virt::Partition for UhPartition {
844 fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
845 None
846 }
847
848 fn caps(&self) -> &PartitionCapabilities {
849 &self.inner.caps
850 }
851
852 fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
853 self.inner
854 .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
855 }
856
857 fn request_yield(&self, _vp_index: VpIndex) {
858 unimplemented!()
859 }
860}
861
862impl X86Partition for UhPartition {
863 fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
864 self.inner.clone()
865 }
866
867 fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
868 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
869 if let Some(apic) = &self.inner.lapic(vtl) {
870 apic.lint(vp_index, lint.into(), |vp_index| {
871 self.inner
872 .vp(vp_index)
873 .unwrap()
874 .wake(vtl, WakeReason::INTCON);
875 });
876 } else if lint == 0 {
877 self.inner
878 .vp(vp_index)
879 .unwrap()
880 .wake(vtl, WakeReason::EXTINT);
881 } else {
882 unimplemented!()
883 }
884 }
885}
886
887impl UhPartitionInner {
888 fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
889 self.vps.get(index.index() as usize)
890 }
891
892 fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
893 self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
894 }
895
896 fn hv(&self) -> Option<&GlobalHv<2>> {
897 self.backing_shared.cvm_state().map(|x| &x.hv)
898 }
899
900 #[cfg(guest_arch = "x86_64")]
902 fn request_proxy_irr_filter_update(
903 &self,
904 vtl: GuestVtl,
905 device_vector: u8,
906 req_vp_index: VpIndex,
907 ) {
908 tracing::debug!(
909 ?vtl,
910 device_vector,
911 req_vp_index = req_vp_index.index(),
912 "request_proxy_irr_filter_update"
913 );
914
915 {
917 let mut device_vector_table = self.device_vector_table.write();
918 device_vector_table.set(device_vector as usize, true);
919 }
920
921 for vp in self.vps.iter() {
923 if vp.vp_index() != req_vp_index {
924 vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
925 }
926 }
927 }
928
929 #[cfg(guest_arch = "x86_64")]
931 fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
932 let device_vector_table = self.device_vector_table.read();
933 for idx in device_vector_table.iter_ones() {
934 irr_vectors.set(idx, true);
935 }
936 }
937
938 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
939 let mut wake_vps = false;
940 resp.field_mut(
941 "enter_modes",
942 &mut inspect::adhoc_mut(|req| {
943 let update = req.is_update();
944 {
945 let mut modes = self.enter_modes.lock();
946 modes.inspect_mut(req);
947 if update {
948 self.enter_modes_atomic.store(
949 hcl::protocol::EnterModes::from(*modes).into(),
950 Ordering::Relaxed,
951 );
952 wake_vps = true;
953 }
954 }
955 }),
956 );
957
958 if wake_vps {
960 for vp in self.vps.iter() {
961 vp.wake_vtl2();
962 }
963 }
964 }
965
966 #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
968 fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
969 self.hcl.get_vsm_partition_status()
971 }
972}
973
974impl virt::Synic for UhPartition {
975 fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
976 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
977 let Some(vp) = self.inner.vp(vp_index) else {
978 tracelimit::warn_ratelimited!(
979 CVM_ALLOWED,
980 vp = vp_index.index(),
981 "invalid vp target for post_message"
982 );
983 return;
984 };
985
986 vp.post_message(
987 vtl,
988 sint,
989 &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
990 );
991 }
992
993 fn new_guest_event_port(
994 &self,
995 vtl: Vtl,
996 vp: u32,
997 sint: u8,
998 flag: u16,
999 ) -> Box<dyn vmcore::synic::GuestEventPort> {
1000 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1001 Box::new(UhEventPort {
1002 partition: Arc::downgrade(&self.inner),
1003 params: Arc::new(Mutex::new(UhEventPortParams {
1004 vp: VpIndex::new(vp),
1005 sint,
1006 flag,
1007 vtl,
1008 })),
1009 })
1010 }
1011
1012 fn prefer_os_events(&self) -> bool {
1013 false
1014 }
1015
1016 fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1017 Some(self)
1018 }
1019}
1020
1021impl virt::SynicMonitor for UhPartition {
1022 fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1023 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1025 let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1026
1027 let allocated_page = allocated_block.take();
1029 if let Some(old_gpa) = old_gpa {
1030 let allocated_gpa = allocated_page
1031 .as_ref()
1032 .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1033
1034 let result = if allocated_gpa == Some(old_gpa) {
1037 let vtl = GuestVtl::try_from(vtl).unwrap();
1038 self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1039 } else {
1040 self.inner
1041 .hcl
1042 .modify_vtl_protection_mask(
1043 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1044 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1045 HvInputVtl::CURRENT_VTL,
1046 )
1047 .map_err(|err| anyhow::anyhow!(err))
1048 };
1049
1050 result
1051 .context("failed to unregister old monitor page")
1052 .inspect_err(|_| {
1053 self.inner.monitor_page.set_gpa(None);
1055 })?;
1056
1057 tracing::debug!(old_gpa, "unregistered monitor page");
1058 }
1059
1060 if let Some(gpa) = gpa {
1061 self.inner
1064 .hcl
1065 .modify_vtl_protection_mask(
1066 MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1067 HvMapGpaFlags::new().with_readable(true),
1068 HvInputVtl::CURRENT_VTL,
1069 )
1070 .context("failed to register monitor page")
1071 .inspect_err(|_| {
1072 self.inner.monitor_page.set_gpa(None);
1074 })?;
1075
1076 tracing::debug!(gpa, "registered monitor page");
1077 }
1078
1079 Ok(())
1080 }
1081
1082 fn register_monitor(
1083 &self,
1084 monitor_id: vmcore::monitor::MonitorId,
1085 connection_id: u32,
1086 ) -> Box<dyn Sync + Send> {
1087 self.inner
1088 .monitor_page
1089 .register_monitor(monitor_id, connection_id)
1090 }
1091
1092 fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1093 let vtl = GuestVtl::try_from(vtl).unwrap();
1094
1095 let Some(state) = self.inner.backing_shared.cvm_state() else {
1097 return Ok(None);
1098 };
1099
1100 let mut allocated_block = self.inner.allocated_monitor_page.lock();
1101 if let Some(block) = allocated_block.as_ref() {
1102 let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1104 assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1105 return Ok(Some(gpa));
1106 }
1107
1108 let block = state
1109 .private_dma_client
1110 .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1111 .context("failed to allocate monitor page")?;
1112
1113 let gpn = block.pfns()[0];
1114 *allocated_block = Some(block);
1115 let gpa = gpn << HV_PAGE_SHIFT;
1116 let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1117 if let Some(old_gpa) = old_gpa {
1118 self.inner
1121 .hcl
1122 .modify_vtl_protection_mask(
1123 MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1124 hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1125 HvInputVtl::CURRENT_VTL,
1126 )
1127 .context("failed to unregister old monitor page")
1128 .inspect_err(|_| {
1129 self.inner.monitor_page.set_gpa(None);
1131 })?;
1132
1133 tracing::debug!(old_gpa, "unregistered monitor page");
1134 }
1135
1136 self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1139 .context("failed to unregister monitor page")
1140 .inspect_err(|_| {
1141 self.inner.monitor_page.set_gpa(None);
1143 })?;
1144
1145 tracing::debug!(gpa, "registered allocated monitor page");
1146
1147 Ok(Some(gpa))
1148 }
1149}
1150
1151impl UhPartitionInner {
1152 #[cfg(guest_arch = "x86_64")]
1153 pub(crate) fn synic_interrupt(
1154 &self,
1155 vp_index: VpIndex,
1156 vtl: GuestVtl,
1157 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1158 move |vector, auto_eoi| {
1161 self.lapic(vtl).unwrap().synic_interrupt(
1162 vp_index,
1163 vector as u8,
1164 auto_eoi,
1165 |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1166 );
1167 }
1168 }
1169
1170 #[cfg(guest_arch = "aarch64")]
1171 fn synic_interrupt(
1172 &self,
1173 _vp_index: VpIndex,
1174 _vtl: GuestVtl,
1175 ) -> impl '_ + hv1_emulator::RequestInterrupt {
1176 move |_, _| {}
1177 }
1178}
1179
1180#[derive(Debug)]
1181struct UhEventPort {
1182 partition: Weak<UhPartitionInner>,
1183 params: Arc<Mutex<UhEventPortParams>>,
1184}
1185
1186#[derive(Debug, Copy, Clone)]
1187struct UhEventPortParams {
1188 vp: VpIndex,
1189 sint: u8,
1190 flag: u16,
1191 vtl: GuestVtl,
1192}
1193
1194impl vmcore::synic::GuestEventPort for UhEventPort {
1195 fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1196 let partition = self.partition.clone();
1197 let params = self.params.clone();
1198 vmcore::interrupt::Interrupt::from_fn(move || {
1199 let UhEventPortParams {
1200 vp,
1201 sint,
1202 flag,
1203 vtl,
1204 } = *params.lock();
1205 let Some(partition) = partition.upgrade() else {
1206 return;
1207 };
1208 tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1209 if let Some(hv) = partition.hv() {
1210 match hv.synic[vtl].signal_event(
1211 vp,
1212 sint,
1213 flag,
1214 &mut partition.synic_interrupt(vp, vtl),
1215 ) {
1216 Ok(_) => {}
1217 Err(SintProxied) => {
1218 tracing::trace!(
1219 vp = vp.index(),
1220 sint,
1221 flag,
1222 "forwarding event to untrusted synic"
1223 );
1224 if let Some(synic) = partition.backing_shared.untrusted_synic() {
1225 synic
1226 .signal_event(
1227 vp,
1228 sint,
1229 flag,
1230 &mut partition.synic_interrupt(vp, vtl),
1231 )
1232 .ok();
1233 } else {
1234 partition.hcl.signal_event_direct(vp.index(), sint, flag)
1235 }
1236 }
1237 }
1238 } else {
1239 partition.hcl.signal_event_direct(vp.index(), sint, flag);
1240 }
1241 })
1242 }
1243
1244 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1245 self.params.lock().vp = VpIndex::new(vp);
1246 Ok(())
1247 }
1248}
1249
1250impl virt::Hv1 for UhPartition {
1251 type Error = Error;
1252 type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1253
1254 fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1255 Some(if let Some(hv) = self.inner.hv() {
1256 hv.ref_time_source().clone()
1257 } else {
1258 ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1259 })
1260 }
1261
1262 fn new_virtual_device(
1263 &self,
1264 ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1265 self.inner.software_devices.is_some().then_some(self)
1266 }
1267}
1268
1269impl GetReferenceTime for UhPartitionInner {
1270 fn now(&self) -> ReferenceTimeResult {
1271 ReferenceTimeResult {
1272 ref_time: self.hcl.reference_time().unwrap(),
1273 system_time: None,
1274 }
1275 }
1276}
1277
1278impl virt::DeviceBuilder for UhPartition {
1279 fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1280 let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1281 let device = self
1282 .inner
1283 .software_devices
1284 .as_ref()
1285 .expect("checked in new_virtual_device")
1286 .new_device(self.interrupt_targets[vtl].clone(), device_id)
1287 .map_err(Error::NewDevice)?;
1288
1289 Ok(device)
1290 }
1291}
1292
1293struct UhInterruptTarget {
1294 partition: Arc<UhPartitionInner>,
1295 vtl: GuestVtl,
1296}
1297
1298impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
1299 fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
1300 let partition = self.partition.clone();
1301 let vtl = self.vtl;
1302 Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
1303 }
1304}
1305
1306impl UhPartitionInner {
1307 fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1308 if let Some(lapic) = self.lapic(vtl) {
1309 tracing::trace!(?request, "interrupt");
1310 lapic.request_interrupt(request.address, request.data, |vp_index| {
1311 self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1312 });
1313 } else {
1314 let (address, data) = request.as_x86();
1315 if let Err(err) = self.hcl.request_interrupt(
1316 request.hv_x86_interrupt_control(),
1317 address.virt_destination().into(),
1318 data.vector().into(),
1319 vtl,
1320 ) {
1321 tracelimit::warn_ratelimited!(
1322 CVM_ALLOWED,
1323 error = &err as &dyn std::error::Error,
1324 address = request.address,
1325 data = request.data,
1326 "failed to request msi"
1327 );
1328 }
1329 }
1330 }
1331}
1332
1333impl IoApicRouting for UhPartitionInner {
1334 fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1335 self.irq_routes.set_irq_route(irq, request)
1336 }
1337
1338 fn assert_irq(&self, irq: u8) {
1340 self.irq_routes
1341 .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1342 }
1343}
1344
1345fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1348 let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1350 let hardware_isolated = hcl.isolation().is_hardware_isolated();
1351 let isolated = hcl.isolation().is_isolated();
1352
1353 let config = HvRegisterVsmPartitionConfig::new()
1354 .with_default_vtl_protection_mask(0xF)
1355 .with_enable_vtl_protection(!hardware_isolated)
1356 .with_zero_memory_on_reset(!hardware_isolated)
1357 .with_intercept_cpuid_unimplemented(!hardware_isolated)
1358 .with_intercept_page(caps.intercept_page_available())
1359 .with_intercept_unrecoverable_exception(true)
1360 .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1361 .with_intercept_acceptance(isolated)
1362 .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1363 .with_intercept_system_reset(caps.intercept_system_reset_available());
1364
1365 hcl.set_vtl2_vsm_partition_config(config)
1366 .map_err(Error::VsmPartitionConfig)
1367}
1368
1369pub struct UhPartitionNewParams<'a> {
1373 pub isolation: IsolationType,
1375 pub hide_isolation: bool,
1378 pub lower_vtl_memory_layout: &'a MemoryLayout,
1380 pub topology: &'a ProcessorTopology,
1382 pub cvm_cpuid_info: Option<&'a [u8]>,
1385 pub snp_secrets: Option<&'a [u8]>,
1387 pub vtom: Option<u64>,
1391 pub handle_synic: bool,
1395 pub no_sidecar_hotplug: bool,
1398 pub use_mmio_hypercalls: bool,
1400 pub intercept_debug_exceptions: bool,
1402 pub disable_proxy_redirect: bool,
1404}
1405
1406pub struct UhLateParams<'a> {
1408 pub gm: VtlArray<GuestMemory, 2>,
1410 pub vtl0_kernel_exec_gm: GuestMemory,
1412 pub vtl0_user_exec_gm: GuestMemory,
1414 #[cfg(guest_arch = "x86_64")]
1416 pub cpuid: Vec<CpuidLeaf>,
1417 pub crash_notification_send: mesh::Sender<VtlCrash>,
1420 pub vmtime: &'a VmTimeSource,
1422 pub cvm_params: Option<CvmLateParams>,
1424 pub vmbus_relay: bool,
1426}
1427
1428pub struct CvmLateParams {
1430 pub shared_gm: GuestMemory,
1432 pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1434 pub shared_dma_client: Arc<dyn DmaClient>,
1436 pub private_dma_client: Arc<dyn DmaClient>,
1438}
1439
1440#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1442pub enum GpnSource {
1443 GuestMemory,
1445 Dma,
1447}
1448
1449pub trait ProtectIsolatedMemory: Send + Sync {
1451 fn change_host_visibility(
1453 &self,
1454 vtl: GuestVtl,
1455 shared: bool,
1456 gpns: &[u64],
1457 tlb_access: &mut dyn TlbFlushLockAccess,
1458 ) -> Result<(), (HvError, usize)>;
1459
1460 fn query_host_visibility(
1462 &self,
1463 gpns: &[u64],
1464 host_visibility: &mut [HostVisibilityType],
1465 ) -> Result<(), (HvError, usize)>;
1466
1467 fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1469
1470 fn change_default_vtl_protections(
1474 &self,
1475 target_vtl: GuestVtl,
1476 protections: HvMapGpaFlags,
1477 tlb_access: &mut dyn TlbFlushLockAccess,
1478 ) -> Result<(), HvError>;
1479
1480 fn change_vtl_protections(
1482 &self,
1483 target_vtl: GuestVtl,
1484 gpns: &[u64],
1485 protections: HvMapGpaFlags,
1486 tlb_access: &mut dyn TlbFlushLockAccess,
1487 ) -> Result<(), (HvError, usize)>;
1488
1489 fn register_overlay_page(
1492 &self,
1493 vtl: GuestVtl,
1494 gpn: u64,
1495 gpn_source: GpnSource,
1496 check_perms: HvMapGpaFlags,
1497 new_perms: Option<HvMapGpaFlags>,
1498 tlb_access: &mut dyn TlbFlushLockAccess,
1499 ) -> Result<(), HvError>;
1500
1501 fn unregister_overlay_page(
1504 &self,
1505 vtl: GuestVtl,
1506 gpn: u64,
1507 tlb_access: &mut dyn TlbFlushLockAccess,
1508 ) -> Result<(), HvError>;
1509
1510 fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1512
1513 fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1515
1516 fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1522
1523 fn set_vtl1_protections_enabled(&self);
1526
1527 fn vtl1_protections_enabled(&self) -> bool;
1530}
1531
1532pub trait TlbFlushLockAccess {
1534 fn flush(&mut self, vtl: GuestVtl);
1536
1537 fn flush_entire(&mut self);
1539
1540 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1542}
1543
1544pub struct UhProtoPartition<'a> {
1547 params: UhPartitionNewParams<'a>,
1548 hcl: Hcl,
1549 guest_vsm_available: bool,
1550 create_partition_available: bool,
1551 #[cfg(guest_arch = "x86_64")]
1552 cpuid: virt::CpuidLeafSet,
1553}
1554
1555impl<'a> UhProtoPartition<'a> {
1556 pub fn new<T: SpawnDriver>(
1561 params: UhPartitionNewParams<'a>,
1562 driver: impl FnMut(u32) -> T,
1563 ) -> Result<Self, Error> {
1564 let hcl_isolation = match params.isolation {
1565 IsolationType::None => hcl::ioctl::IsolationType::None,
1566 IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1567 IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1568 IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1569 };
1570
1571 let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1573
1574 let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1575
1576 let mut allowed_hypercalls = vec![
1578 hvdef::HypercallCode::HvCallGetVpRegisters,
1579 hvdef::HypercallCode::HvCallSetVpRegisters,
1580 hvdef::HypercallCode::HvCallInstallIntercept,
1581 hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1582 hvdef::HypercallCode::HvCallPostMessageDirect,
1583 hvdef::HypercallCode::HvCallSignalEventDirect,
1584 hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1585 hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1586 hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1587 hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1588 hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1589 hvdef::HypercallCode::HvCallAcceptGpaPages,
1590 hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1591 ];
1592
1593 if params.isolation.is_hardware_isolated() {
1594 allowed_hypercalls.extend(vec![
1595 hvdef::HypercallCode::HvCallEnablePartitionVtl,
1596 hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1597 hvdef::HypercallCode::HvCallEnableVpVtl,
1598 ]);
1599 }
1600
1601 if params.use_mmio_hypercalls {
1602 allowed_hypercalls.extend(vec![
1603 hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1604 hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1605 ]);
1606 }
1607
1608 hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1609
1610 set_vtl2_vsm_partition_config(&hcl)?;
1611
1612 let privs = hcl.get_privileges_and_features_info().map_err(Error::Hcl)?;
1613 let guest_vsm_available = Self::check_guest_vsm_support(privs, &hcl)?;
1614
1615 #[cfg(guest_arch = "x86_64")]
1616 let cpuid = match params.isolation {
1617 IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1618 cpuid_pages: params.cvm_cpuid_info.unwrap(),
1619 vtom: params.vtom.unwrap(),
1620 access_vsm: guest_vsm_available,
1621 }
1622 .build()
1623 .map_err(Error::CvmCpuid)?,
1624
1625 IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1626 topology: params.topology,
1627 vtom: params.vtom.unwrap(),
1628 access_vsm: guest_vsm_available,
1629 }
1630 .build()
1631 .map_err(Error::CvmCpuid)?,
1632 IsolationType::Vbs | IsolationType::None => Default::default(),
1633 };
1634
1635 Ok(UhProtoPartition {
1636 hcl,
1637 params,
1638 guest_vsm_available,
1639 create_partition_available: privs.create_partitions(),
1640 #[cfg(guest_arch = "x86_64")]
1641 cpuid,
1642 })
1643 }
1644
1645 pub fn guest_vsm_available(&self) -> bool {
1647 self.guest_vsm_available
1648 }
1649
1650 pub fn create_partition_available(&self) -> bool {
1653 self.create_partition_available
1654 }
1655
1656 pub async fn build(
1658 self,
1659 late_params: UhLateParams<'_>,
1660 ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1661 let Self {
1662 mut hcl,
1663 params,
1664 guest_vsm_available,
1665 create_partition_available: _,
1666 #[cfg(guest_arch = "x86_64")]
1667 cpuid,
1668 } = self;
1669 let isolation = params.isolation;
1670 let is_hardware_isolated = isolation.is_hardware_isolated();
1671
1672 if params.intercept_debug_exceptions {
1678 if !cfg!(feature = "gdb") {
1679 return Err(Error::InvalidDebugConfiguration);
1680 }
1681
1682 cfg_if::cfg_if! {
1683 if #[cfg(guest_arch = "x86_64")] {
1684 if isolation != IsolationType::Tdx {
1685 let debug_exception_vector = 0x1;
1686 hcl.register_intercept(
1687 HvInterceptType::HvInterceptTypeException,
1688 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1689 HvInterceptParameters::new_exception(debug_exception_vector),
1690 )
1691 .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1692 }
1693 } else {
1694 return Err(Error::InvalidDebugConfiguration);
1695 }
1696 }
1697 }
1698
1699 if !is_hardware_isolated {
1700 if cfg!(guest_arch = "x86_64") {
1701 hcl.register_intercept(
1702 HvInterceptType::HvInterceptTypeX64Msr,
1703 HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1704 HvInterceptParameters::new_zeroed(),
1705 )
1706 .map_err(|err| {
1707 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1708 })?;
1709
1710 hcl.register_intercept(
1711 HvInterceptType::HvInterceptTypeX64ApicEoi,
1712 HV_INTERCEPT_ACCESS_MASK_WRITE,
1713 HvInterceptParameters::new_zeroed(),
1714 )
1715 .map_err(|err| {
1716 Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1717 })?;
1718 } else {
1719 if false {
1720 todo!("AARCH64_TODO");
1721 }
1722 }
1723 }
1724
1725 if isolation == IsolationType::Snp {
1726 hcl.register_intercept(
1728 HvInterceptType::HvInterceptTypeException,
1729 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1730 HvInterceptParameters::new_exception(0x1D),
1731 )
1732 .map_err(|err| {
1733 Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1734 })?;
1735
1736 let mut bitmap = [0u8; 64];
1738 if let Some(secrets) = params.snp_secrets {
1739 bitmap.copy_from_slice(
1740 &secrets
1741 [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1742 );
1743 }
1744 hcl.set_snp_register_bitmap(bitmap);
1745 }
1746
1747 hcl.add_vps(
1749 params.topology.vp_count(),
1750 late_params
1751 .cvm_params
1752 .as_ref()
1753 .map(|x| &x.private_dma_client),
1754 )
1755 .map_err(Error::Hcl)?;
1756
1757 let vps: Vec<_> = params
1758 .topology
1759 .vps_arch()
1760 .map(|vp_info| {
1761 let cpu_index = vp_info.base.vp_index.index();
1764 UhVpInner::new(cpu_index, vp_info)
1765 })
1766 .collect();
1767
1768 #[cfg(guest_arch = "x86_64")]
1770 let software_devices = {
1771 let res = if !is_hardware_isolated {
1772 hcl.register_intercept(
1773 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1774 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1775 HvInterceptParameters::new_zeroed(),
1776 )
1777 } else {
1778 Ok(())
1779 };
1780 match res {
1781 Ok(()) => Some(ApicSoftwareDevices::new(
1782 params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1783 )),
1784 Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1785 Err(err) => {
1786 return Err(Error::InstallIntercept(
1787 HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1788 err,
1789 ));
1790 }
1791 }
1792 };
1793
1794 #[cfg(guest_arch = "aarch64")]
1795 let software_devices = None;
1796
1797 #[cfg(guest_arch = "aarch64")]
1798 let caps = virt::aarch64::Aarch64PartitionCapabilities {};
1799
1800 #[cfg(guest_arch = "x86_64")]
1801 let cpuid = UhPartition::construct_cpuid_results(
1802 cpuid,
1803 &late_params.cpuid,
1804 params.topology,
1805 isolation,
1806 params.hide_isolation,
1807 );
1808
1809 #[cfg(guest_arch = "x86_64")]
1810 let caps = UhPartition::construct_capabilities(
1811 params.topology,
1812 &cpuid,
1813 isolation,
1814 params.hide_isolation,
1815 )
1816 .map_err(Error::Capabilities)?;
1817
1818 if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1819 hcl.register_intercept(
1827 HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1828 HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1829 HvInterceptParameters::new_zeroed(),
1830 )
1831 .expect("registering synic intercept cannot fail");
1832 }
1833
1834 #[cfg(guest_arch = "x86_64")]
1835 let vsm_caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1836 #[cfg(guest_arch = "x86_64")]
1837 let proxy_interrupt_redirect_available =
1838 vsm_caps.proxy_interrupt_redirect_available() && !params.disable_proxy_redirect;
1839
1840 #[cfg(guest_arch = "x86_64")]
1841 let cvm_state = if is_hardware_isolated {
1842 Some(Self::construct_cvm_state(
1843 ¶ms,
1844 late_params.cvm_params.unwrap(),
1845 &caps,
1846 guest_vsm_available,
1847 proxy_interrupt_redirect_available,
1848 )?)
1849 } else {
1850 None
1851 };
1852 #[cfg(guest_arch = "aarch64")]
1853 let cvm_state = None;
1854
1855 let backing_shared = BackingShared::new(
1856 isolation,
1857 ¶ms,
1858 BackingSharedParams {
1859 cvm_state,
1860 #[cfg(guest_arch = "x86_64")]
1861 cpuid: &cpuid,
1862 hcl: &hcl,
1863 guest_vsm_available,
1864 },
1865 )?;
1866
1867 let enter_modes = EnterModes::default();
1868
1869 let partition = Arc::new(UhPartitionInner {
1870 hcl,
1871 vps,
1872 irq_routes: Default::default(),
1873 caps,
1874 enter_modes: Mutex::new(enter_modes),
1875 enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1876 gm: late_params.gm,
1877 vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1878 vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1879 #[cfg(guest_arch = "x86_64")]
1880 cpuid,
1881 crash_notification_send: late_params.crash_notification_send,
1882 monitor_page: MonitorPage::new(),
1883 allocated_monitor_page: Mutex::new(None),
1884 software_devices,
1885 lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1886 vmtime: late_params.vmtime.clone(),
1887 isolation,
1888 no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1889 use_mmio_hypercalls: params.use_mmio_hypercalls,
1890 backing_shared,
1891 #[cfg(guest_arch = "x86_64")]
1892 device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1893 intercept_debug_exceptions: params.intercept_debug_exceptions,
1894 vmbus_relay: late_params.vmbus_relay,
1895 });
1896
1897 if cfg!(guest_arch = "x86_64") {
1898 partition.manage_io_port_intercept_region(0, !0, true);
1900 }
1901
1902 let vps = params
1903 .topology
1904 .vps_arch()
1905 .map(|vp_info| UhProcessorBox {
1906 partition: partition.clone(),
1907 vp_info,
1908 })
1909 .collect();
1910
1911 Ok((
1912 UhPartition {
1913 inner: partition.clone(),
1914 interrupt_targets: VtlArray::from_fn(|vtl| {
1915 Arc::new(UhInterruptTarget {
1916 partition: partition.clone(),
1917 vtl: vtl.try_into().unwrap(),
1918 })
1919 }),
1920 },
1921 vps,
1922 ))
1923 }
1924}
1925
1926impl UhPartition {
1927 pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
1929 let id = if let Some(hv) = self.inner.hv() {
1933 hv.guest_os_id(Vtl::Vtl0)
1934 } else {
1935 self.inner
1937 .hcl
1938 .get_guest_os_id(Vtl::Vtl0)
1939 .map_err(Error::Hcl)?
1940 };
1941 Ok(id)
1942 }
1943
1944 pub fn register_host_io_port_fast_path(
1949 &self,
1950 range: RangeInclusive<u16>,
1951 ) -> HostIoPortFastPathHandle {
1952 assert!(!self.inner.isolation.is_hardware_isolated());
1956
1957 self.inner
1958 .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1959 HostIoPortFastPathHandle {
1960 inner: Arc::downgrade(&self.inner),
1961 begin: *range.start(),
1962 end: *range.end(),
1963 }
1964 }
1965
1966 pub fn assert_debug_interrupt(&self, _vtl: u8) {
1968 #[cfg(guest_arch = "x86_64")]
1969 const LINT_INDEX_1: u8 = 1;
1970 #[cfg(guest_arch = "x86_64")]
1971 match self.inner.isolation {
1972 IsolationType::Snp => {
1973 tracing::error!(?_vtl, "Debug interrupts cannot be injected into SNP VMs",);
1974 }
1975 _ => {
1976 let bsp_index = VpIndex::new(0);
1977 self.pulse_lint(bsp_index, Vtl::try_from(_vtl).unwrap(), LINT_INDEX_1)
1978 }
1979 }
1980 }
1981
1982 pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
1984 self.inner.hcl.set_pm_timer_assist(port)
1985 }
1986
1987 fn register_cvm_dma_overlay_page(
1989 &self,
1990 vtl: GuestVtl,
1991 gpn: u64,
1992 new_perms: HvMapGpaFlags,
1993 ) -> anyhow::Result<()> {
1994 match &self.inner.backing_shared {
1996 #[cfg(guest_arch = "x86_64")]
1997 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1998 .cvm
1999 .isolated_memory_protector
2000 .register_overlay_page(
2001 vtl,
2002 gpn,
2003 GpnSource::Dma,
2005 HvMapGpaFlags::new(),
2006 Some(new_perms),
2007 &mut SnpBacked::tlb_flush_lock_access(
2008 None,
2009 self.inner.as_ref(),
2010 snp_backed_shared,
2011 ),
2012 )
2013 .map_err(|e| anyhow::anyhow!(e)),
2014 #[cfg(guest_arch = "x86_64")]
2015 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2016 .cvm
2017 .isolated_memory_protector
2018 .register_overlay_page(
2019 vtl,
2020 gpn,
2021 GpnSource::Dma,
2022 HvMapGpaFlags::new(),
2023 Some(new_perms),
2024 &mut TdxBacked::tlb_flush_lock_access(
2025 None,
2026 self.inner.as_ref(),
2027 tdx_backed_shared,
2028 ),
2029 )
2030 .map_err(|e| anyhow::anyhow!(e)),
2031 BackingShared::Hypervisor(_) => {
2032 let _ = (vtl, gpn, new_perms);
2033 unreachable!()
2034 }
2035 }
2036 }
2037
2038 fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2040 match &self.inner.backing_shared {
2042 #[cfg(guest_arch = "x86_64")]
2043 BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2044 .cvm
2045 .isolated_memory_protector
2046 .unregister_overlay_page(
2047 vtl,
2048 gpn,
2049 &mut SnpBacked::tlb_flush_lock_access(
2050 None,
2051 self.inner.as_ref(),
2052 snp_backed_shared,
2053 ),
2054 )
2055 .map_err(|e| anyhow::anyhow!(e)),
2056 #[cfg(guest_arch = "x86_64")]
2057 BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2058 .cvm
2059 .isolated_memory_protector
2060 .unregister_overlay_page(
2061 vtl,
2062 gpn,
2063 &mut TdxBacked::tlb_flush_lock_access(
2064 None,
2065 self.inner.as_ref(),
2066 tdx_backed_shared,
2067 ),
2068 )
2069 .map_err(|e| anyhow::anyhow!(e)),
2070 BackingShared::Hypervisor(_) => {
2071 let _ = (vtl, gpn);
2072 unreachable!()
2073 }
2074 }
2075 }
2076}
2077
2078impl UhProtoPartition<'_> {
2079 fn check_guest_vsm_support(privs: HvPartitionPrivilege, hcl: &Hcl) -> Result<bool, Error> {
2082 if !privs.access_vsm() {
2083 return Ok(false);
2084 }
2085
2086 let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
2087 Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2088 }
2089
2090 #[cfg(guest_arch = "x86_64")]
2091 fn construct_cvm_state(
2093 params: &UhPartitionNewParams<'_>,
2094 late_params: CvmLateParams,
2095 caps: &PartitionCapabilities,
2096 guest_vsm_available: bool,
2097 proxy_interrupt_redirect_available: bool,
2098 ) -> Result<UhCvmPartitionState, Error> {
2099 use vmcore::reference_time::ReferenceTimeSource;
2100
2101 let vp_count = params.topology.vp_count() as usize;
2102 let vps = (0..vp_count)
2103 .map(|vp_index| UhCvmVpInner {
2104 tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2105 vtl1_enable_called: Mutex::new(false),
2106 started: AtomicBool::new(vp_index == 0),
2107 hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2108 proxy_redirect_interrupts: Mutex::new(HashMap::new()),
2109 })
2110 .collect();
2111 let tlb_locked_vps =
2112 VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2113
2114 let lapic = VtlArray::from_fn(|_| {
2115 LocalApicSet::builder()
2116 .x2apic_capable(caps.x2apic)
2117 .hyperv_enlightenments(true)
2118 .build()
2119 });
2120
2121 let tsc_frequency = get_tsc_frequency(params.isolation)?;
2122 let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2123
2124 let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2131 max_vp_count: params.topology.vp_count(),
2132 vendor: caps.vendor,
2133 tsc_frequency,
2134 ref_time,
2135 is_ref_time_backed_by_tsc: true,
2136 });
2137
2138 Ok(UhCvmPartitionState {
2139 vps_per_socket: params.topology.reserved_vps_per_socket(),
2140 tlb_locked_vps,
2141 vps,
2142 shared_memory: late_params.shared_gm,
2143 isolated_memory_protector: late_params.isolated_memory_protector,
2144 lapic,
2145 hv,
2146 guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2147 shared_dma_client: late_params.shared_dma_client,
2148 private_dma_client: late_params.private_dma_client,
2149 hide_isolation: params.hide_isolation,
2150 proxy_interrupt_redirect: proxy_interrupt_redirect_available,
2151 })
2152 }
2153}
2154
2155impl UhPartition {
2156 #[cfg(guest_arch = "x86_64")]
2157 fn construct_cpuid_results(
2159 cpuid: virt::CpuidLeafSet,
2160 initial_cpuid: &[CpuidLeaf],
2161 topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2162 isolation: IsolationType,
2163 hide_isolation: bool,
2164 ) -> virt::CpuidLeafSet {
2165 let mut cpuid = cpuid.into_leaves();
2166 if isolation.is_hardware_isolated() {
2167 let x2apic = match topology.apic_mode() {
2169 vm_topology::processor::x86::ApicMode::XApic => false,
2170 vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2171 vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2172 };
2173 let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2174 let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2175 cpuid.push(
2176 CpuidLeaf::new(
2177 x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2178 [0, 0, ecx.into(), 0],
2179 )
2180 .masked([0, 0, ecx_mask.into(), 0]),
2181 );
2182
2183 let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2187
2188 hv1_emulator::cpuid::process_hv_cpuid_leaves(
2190 &mut cpuid,
2191 hide_isolation,
2192 [
2193 hv_version.eax,
2194 hv_version.ebx,
2195 hv_version.ecx,
2196 hv_version.edx,
2197 ],
2198 );
2199 }
2200 cpuid.extend(initial_cpuid);
2201 virt::CpuidLeafSet::new(cpuid)
2202 }
2203
2204 #[cfg(guest_arch = "x86_64")]
2205 fn construct_capabilities(
2207 topology: &ProcessorTopology,
2208 cpuid: &virt::CpuidLeafSet,
2209 isolation: IsolationType,
2210 hide_isolation: bool,
2211 ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2212 let mut native_cpuid_fn;
2213 let mut cvm_cpuid_fn;
2214
2215 let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2218 cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2220 &mut cvm_cpuid_fn
2221 } else {
2222 native_cpuid_fn = |leaf, sub_leaf| {
2224 let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2225 cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2226 };
2227 &mut native_cpuid_fn
2228 };
2229
2230 let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2232 match isolation {
2233 IsolationType::Tdx => {
2234 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2235 caps.nxe_forced_on = true;
2237 }
2238 IsolationType::Snp => {
2239 assert_eq!(caps.vtom.is_some(), !hide_isolation);
2240 }
2241 _ => {
2242 assert!(caps.vtom.is_none());
2243 }
2244 }
2245
2246 Ok(caps)
2247 }
2248}
2249
2250#[cfg(guest_arch = "x86_64")]
2251fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2253 let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2256 let hv_frequency = msr
2257 .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2258 .map_err(Error::ReadTscFrequency)?;
2259
2260 let hw_info = match isolation {
2263 IsolationType::Tdx => {
2264 let max_function =
2266 safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2267 .eax;
2268
2269 if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2270 return Err(Error::BadCpuidTsc);
2271 }
2272 let result = safe_intrinsics::cpuid(
2273 x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2274 0,
2275 );
2276 let ratio_denom = result.eax;
2277 let ratio_num = result.ebx;
2278 let clock = result.ecx;
2279 if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2280 return Err(Error::BadCpuidTsc);
2281 }
2282 let allowed_error = 12_500_000;
2285 Some((
2286 clock as u64 * ratio_num as u64 / ratio_denom as u64,
2287 allowed_error,
2288 ))
2289 }
2290 IsolationType::Snp => {
2291 None
2293 }
2294 IsolationType::Vbs | IsolationType::None => None,
2295 };
2296
2297 if let Some((hw_frequency, allowed_error)) = hw_info {
2298 let delta = hw_frequency.abs_diff(hv_frequency);
2301 if delta > allowed_error {
2302 return Err(Error::TscFrequencyMismatch {
2303 hv: hv_frequency,
2304 hw: hw_frequency,
2305 allowed_error,
2306 });
2307 }
2308 }
2309
2310 Ok(hv_frequency)
2311}
2312
2313impl UhPartitionInner {
2314 fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2315 if self.isolation.is_hardware_isolated() {
2316 return;
2317 }
2318
2319 static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2320
2321 let access_type_mask = if active {
2322 HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2323 } else {
2324 HV_INTERCEPT_ACCESS_MASK_NONE
2325 };
2326
2327 if !SKIP_RANGE.load(Ordering::Relaxed) {
2329 match self.hcl.register_intercept(
2330 HvInterceptType::HvInterceptTypeX64IoPortRange,
2331 access_type_mask,
2332 HvInterceptParameters::new_io_port_range(begin..=end),
2333 ) {
2334 Ok(()) => return,
2335 Err(HvError::InvalidParameter) => {
2336 SKIP_RANGE.store(true, Ordering::Relaxed);
2339 tracing::warn!(
2340 CVM_ALLOWED,
2341 "old hypervisor build; using slow path for intercept ranges"
2342 );
2343 }
2344 Err(err) => {
2345 panic!("io port range registration failure: {err:?}");
2346 }
2347 }
2348 }
2349
2350 for port in begin..=end {
2352 self.hcl
2353 .register_intercept(
2354 HvInterceptType::HvInterceptTypeX64IoPort,
2355 access_type_mask,
2356 HvInterceptParameters::new_io_port(port),
2357 )
2358 .expect("registering io intercept cannot fail");
2359 }
2360 }
2361
2362 fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2363 self.lower_vtl_memory_layout
2367 .ram()
2368 .iter()
2369 .any(|m| m.range.contains_addr(gpa))
2370 }
2371
2372 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2373 if self.is_gpa_lower_vtl_ram(gpa) {
2377 !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2379 } else {
2380 false
2381 }
2382 }
2383}
2384
2385#[must_use]
2390pub struct HostIoPortFastPathHandle {
2391 inner: Weak<UhPartitionInner>,
2392 begin: u16,
2393 end: u16,
2394}
2395
2396impl Drop for HostIoPortFastPathHandle {
2397 fn drop(&mut self) {
2398 if let Some(inner) = self.inner.upgrade() {
2399 inner.manage_io_port_intercept_region(self.begin, self.end, true);
2400 }
2401 }
2402}
2403
2404#[derive(Copy, Clone, Debug)]
2409pub struct VtlCrash {
2410 pub vp_index: VpIndex,
2412 pub last_vtl: GuestVtl,
2414 pub control: GuestCrashCtl,
2416 pub parameters: [u64; 5],
2418}
2419
2420#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2423fn validate_vtl_gpa_flags(
2424 flags: HvMapGpaFlags,
2425 mbec_enabled: bool,
2426 shadow_supervisor_stack_enabled: bool,
2427) -> bool {
2428 if flags.adjustable() {
2430 return false;
2431 }
2432
2433 if flags.kernel_executable() != flags.user_executable() {
2435 if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2436 return false;
2437 }
2438 }
2439
2440 if flags.writable()
2442 || flags.kernel_executable()
2443 || flags.user_executable()
2444 || flags.supervisor_shadow_stack()
2445 || flags.paging_writability()
2446 || flags.verify_paging_writability()
2447 {
2448 if !flags.readable() {
2449 return false;
2450 }
2451 }
2452
2453 if flags.supervisor_shadow_stack()
2456 && ((!flags.kernel_executable() && !flags.user_executable())
2457 || shadow_supervisor_stack_enabled)
2458 {
2459 return false;
2460 }
2461
2462 true
2463}