virt_mshv_vtl/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implementation of the Underhill hypervisor backend, which uses
5//! `/dev/mshv_vtl` to interact with the Microsoft hypervisor while running in
6//! VTL2.
7
8#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13    if #[cfg(guest_arch = "x86_64")] {
14        mod cvm_cpuid;
15        pub use processor::snp::SnpBacked;
16        pub use processor::tdx::TdxBacked;
17        use crate::processor::HardwareIsolatedBacking;
18        pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19        use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20        use bitvec::prelude::BitArray;
21        use bitvec::prelude::Lsb0;
22        use devmsr::MsrDevice;
23        use hv1_emulator::hv::ProcessorVtlHv;
24        use processor::LapicState;
25        use processor::snp::SnpBackedShared;
26        use processor::tdx::TdxBackedShared;
27        use std::arch::x86_64::CpuidResult;
28        use virt::CpuidLeaf;
29        use virt::state::StateElement;
30        use virt::vp::MpState;
31        /// Bitarray type for representing IRR bits in a x86-64 APIC
32        /// Each bit represent the 256 possible vectors.
33        type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34    } else if #[cfg(guest_arch = "aarch64")] {
35        pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36        use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37    }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvPartitionPrivilege;
66use hvdef::HvRegisterName;
67use hvdef::HvRegisterVsmPartitionConfig;
68use hvdef::HvRegisterVsmPartitionStatus;
69use hvdef::Vtl;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
73use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
74use hvdef::hypercall::HostVisibilityType;
75use hvdef::hypercall::HvGuestOsId;
76use hvdef::hypercall::HvInputVtl;
77use hvdef::hypercall::HvInterceptParameters;
78use hvdef::hypercall::HvInterceptType;
79use inspect::Inspect;
80use inspect::InspectMut;
81use memory_range::MemoryRange;
82use pal::unix::affinity;
83use pal::unix::affinity::CpuSet;
84use pal_async::driver::Driver;
85use pal_async::driver::SpawnDriver;
86use pal_uring::IdleControl;
87use parking_lot::Mutex;
88use parking_lot::RwLock;
89use processor::BackingSharedParams;
90use processor::SidecarExitReason;
91use sidecar_client::NewSidecarClientError;
92use std::collections::HashMap;
93use std::ops::RangeInclusive;
94use std::os::fd::AsRawFd;
95use std::sync::Arc;
96use std::sync::Weak;
97use std::sync::atomic::AtomicBool;
98use std::sync::atomic::AtomicU8;
99use std::sync::atomic::AtomicU32;
100use std::sync::atomic::AtomicU64;
101use std::sync::atomic::Ordering;
102use std::task::Waker;
103use thiserror::Error;
104use user_driver::DmaClient;
105use virt::IsolationType;
106use virt::PartitionCapabilities;
107use virt::VpIndex;
108use virt::X86Partition;
109use virt::irqcon::IoApicRouting;
110use virt::irqcon::MsiRequest;
111use virt::x86::apic_software_device::ApicSoftwareDevices;
112use virt_support_apic::LocalApicSet;
113use vm_topology::memory::MemoryLayout;
114use vm_topology::processor::ProcessorTopology;
115use vm_topology::processor::TargetVpInfo;
116use vmcore::monitor::MonitorPage;
117use vmcore::reference_time::GetReferenceTime;
118use vmcore::reference_time::ReferenceTimeResult;
119use vmcore::reference_time::ReferenceTimeSource;
120use vmcore::vmtime::VmTimeSource;
121use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
122use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
123use x86defs::tdx::TdCallResult;
124use zerocopy::FromBytes;
125use zerocopy::FromZeros;
126use zerocopy::Immutable;
127use zerocopy::IntoBytes;
128use zerocopy::KnownLayout;
129
130/// General error returned by operations.
131#[derive(Error, Debug)]
132#[expect(missing_docs)]
133pub enum Error {
134    #[error("hcl error")]
135    Hcl(#[source] hcl::ioctl::Error),
136    #[error("failed to open sidecar client")]
137    Sidecar(#[source] NewSidecarClientError),
138    #[error("failed to install {0:?} intercept: {1:?}")]
139    InstallIntercept(HvInterceptType, HvError),
140    #[error("failed to query hypervisor register {0:#x?}")]
141    Register(HvRegisterName, #[source] HvError),
142    #[error("failed to set vsm partition config register")]
143    VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
144    #[error("failed to create virtual device")]
145    NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
146    #[error("failed to create cpuid tables for cvm")]
147    #[cfg(guest_arch = "x86_64")]
148    CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
149    #[error("failed to update hypercall msr")]
150    UpdateHypercallMsr,
151    #[error("failed to update reference tsc msr")]
152    UpdateReferenceTsc,
153    #[error("failed to map overlay page")]
154    MapOverlay(#[source] std::io::Error),
155    #[error("failed to allocate shared visibility pages for overlay")]
156    AllocateSharedVisOverlay(#[source] anyhow::Error),
157    #[error("failed to open msr device")]
158    OpenMsr(#[source] std::io::Error),
159    #[error("cpuid did not contain valid TSC frequency information")]
160    BadCpuidTsc,
161    #[error("failed to read tsc frequency")]
162    ReadTscFrequency(#[source] std::io::Error),
163    #[error(
164        "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
165    )]
166    TscFrequencyMismatch {
167        hv: u64,
168        hw: u64,
169        allowed_error: u64,
170    },
171    #[error("failed to set vsm partition config: {0:?}")]
172    FailedToSetL2Ctls(TdCallResult),
173    #[error("debugging is configured but the binary does not have the gdb feature")]
174    InvalidDebugConfiguration,
175    #[error("failed to allocate TLB flush page")]
176    AllocateTlbFlushPage(#[source] anyhow::Error),
177    #[error("host does not support required cpu capabilities")]
178    Capabilities(virt::PartitionCapabilitiesError),
179    #[error("failed to get register")]
180    GetReg(#[source] hcl::ioctl::register::GetRegError),
181    #[error("failed to set register")]
182    SetReg(#[source] hcl::ioctl::register::SetRegError),
183}
184
185/// Error revoking guest VSM.
186#[derive(Error, Debug)]
187#[expect(missing_docs)]
188pub enum RevokeGuestVsmError {
189    #[error("failed to set vsm config")]
190    SetGuestVsmConfig(#[source] hcl::ioctl::register::SetRegError),
191    #[error("VTL 1 is already enabled")]
192    Vtl1AlreadyEnabled,
193}
194
195/// Underhill partition.
196#[derive(Inspect)]
197pub struct UhPartition {
198    #[inspect(flatten)]
199    inner: Arc<UhPartitionInner>,
200    // TODO: remove this extra indirection by refactoring some traits.
201    #[inspect(skip)]
202    interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
203}
204
205/// Underhill partition.
206#[derive(Inspect)]
207#[inspect(extra = "UhPartitionInner::inspect_extra")]
208struct UhPartitionInner {
209    #[inspect(skip)]
210    hcl: Hcl,
211    #[inspect(skip)] // inspected separately
212    vps: Vec<UhVpInner>,
213    irq_routes: virt::irqcon::IrqRoutes,
214    caps: PartitionCapabilities,
215    #[inspect(skip)] // handled in `inspect_extra`
216    enter_modes: Mutex<EnterModes>,
217    #[inspect(skip)]
218    enter_modes_atomic: AtomicU8,
219    #[cfg(guest_arch = "x86_64")]
220    cpuid: virt::CpuidLeafSet,
221    lower_vtl_memory_layout: MemoryLayout,
222    gm: VtlArray<GuestMemory, 2>,
223    vtl0_kernel_exec_gm: GuestMemory,
224    vtl0_user_exec_gm: GuestMemory,
225    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
226    #[inspect(skip)]
227    crash_notification_send: mesh::Sender<VtlCrash>,
228    monitor_page: MonitorPage,
229    #[inspect(skip)]
230    allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
231    software_devices: Option<ApicSoftwareDevices>,
232    #[inspect(skip)]
233    vmtime: VmTimeSource,
234    isolation: IsolationType,
235    #[inspect(with = "inspect::AtomicMut")]
236    no_sidecar_hotplug: AtomicBool,
237    use_mmio_hypercalls: bool,
238    backing_shared: BackingShared,
239    intercept_debug_exceptions: bool,
240    #[cfg(guest_arch = "x86_64")]
241    // N.B For now, only one device vector table i.e. for VTL0 only
242    #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
243    device_vector_table: RwLock<IrrBitmap>,
244    vmbus_relay: bool,
245}
246
247#[derive(Inspect)]
248#[inspect(untagged)]
249enum BackingShared {
250    Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
251    #[cfg(guest_arch = "x86_64")]
252    Snp(#[inspect(flatten)] SnpBackedShared),
253    #[cfg(guest_arch = "x86_64")]
254    Tdx(#[inspect(flatten)] TdxBackedShared),
255}
256
257impl BackingShared {
258    fn new(
259        isolation: IsolationType,
260        partition_params: &UhPartitionNewParams<'_>,
261        backing_shared_params: BackingSharedParams<'_>,
262    ) -> Result<BackingShared, Error> {
263        Ok(match isolation {
264            IsolationType::None | IsolationType::Vbs => {
265                assert!(backing_shared_params.cvm_state.is_none());
266                BackingShared::Hypervisor(HypervisorBackedShared::new(
267                    partition_params,
268                    backing_shared_params,
269                )?)
270            }
271            #[cfg(guest_arch = "x86_64")]
272            IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
273                partition_params,
274                backing_shared_params,
275            )?),
276            #[cfg(guest_arch = "x86_64")]
277            IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
278                partition_params,
279                backing_shared_params,
280            )?),
281            #[cfg(not(guest_arch = "x86_64"))]
282            _ => unreachable!(),
283        })
284    }
285
286    fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
287        match self {
288            BackingShared::Hypervisor(_) => None,
289            #[cfg(guest_arch = "x86_64")]
290            BackingShared::Snp(SnpBackedShared { cvm, .. })
291            | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
292        }
293    }
294
295    fn untrusted_synic(&self) -> Option<&GlobalSynic> {
296        match self {
297            BackingShared::Hypervisor(_) => None,
298            #[cfg(guest_arch = "x86_64")]
299            BackingShared::Snp(_) => None,
300            #[cfg(guest_arch = "x86_64")]
301            BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
302        }
303    }
304}
305
306#[derive(InspectMut, Copy, Clone)]
307struct EnterModes {
308    #[inspect(mut)]
309    first: EnterMode,
310    #[inspect(mut)]
311    second: EnterMode,
312}
313
314impl Default for EnterModes {
315    fn default() -> Self {
316        Self {
317            first: EnterMode::Fast,
318            second: EnterMode::IdleToVtl0,
319        }
320    }
321}
322
323impl From<EnterModes> for hcl::protocol::EnterModes {
324    fn from(value: EnterModes) -> Self {
325        Self::new()
326            .with_first(value.first.into())
327            .with_second(value.second.into())
328    }
329}
330
331#[derive(InspectMut, Copy, Clone)]
332enum EnterMode {
333    Fast,
334    PlayIdle,
335    IdleToVtl0,
336}
337
338impl From<EnterMode> for hcl::protocol::EnterMode {
339    fn from(value: EnterMode) -> Self {
340        match value {
341            EnterMode::Fast => Self::FAST,
342            EnterMode::PlayIdle => Self::PLAY_IDLE,
343            EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
344        }
345    }
346}
347
348#[cfg(guest_arch = "x86_64")]
349#[derive(Inspect)]
350struct GuestVsmVpState {
351    /// The pending event that VTL 1 wants to inject into VTL 0. Injected on
352    /// next exit to VTL 0.
353    #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
354    vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
355    reg_intercept: SecureRegisterInterceptState,
356}
357
358#[cfg(guest_arch = "x86_64")]
359impl GuestVsmVpState {
360    fn new() -> Self {
361        GuestVsmVpState {
362            vtl0_exit_pending_event: None,
363            reg_intercept: Default::default(),
364        }
365    }
366}
367
368#[cfg(guest_arch = "x86_64")]
369#[derive(Inspect)]
370/// VP state for CVMs.
371struct UhCvmVpState {
372    // Allocation handle for direct overlays
373    #[inspect(debug)]
374    direct_overlay_handle: user_driver::memory::MemoryBlock,
375    /// Used in VTL 2 exit code to determine which VTL to exit to.
376    exit_vtl: GuestVtl,
377    /// Hypervisor enlightenment emulator state.
378    hv: VtlArray<ProcessorVtlHv, 2>,
379    /// LAPIC state.
380    lapics: VtlArray<LapicState, 2>,
381    /// Guest VSM state for this vp. Some when VTL 1 is enabled.
382    vtl1: Option<GuestVsmVpState>,
383}
384
385#[cfg(guest_arch = "x86_64")]
386impl UhCvmVpState {
387    /// Creates a new CVM VP state.
388    pub(crate) fn new(
389        cvm_partition: &UhCvmPartitionState,
390        inner: &UhPartitionInner,
391        vp_info: &TargetVpInfo,
392        overlay_pages_required: usize,
393    ) -> Result<Self, Error> {
394        let direct_overlay_handle = cvm_partition
395            .shared_dma_client
396            .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
397            .map_err(Error::AllocateSharedVisOverlay)?;
398
399        let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
400        let lapics = VtlArray::from_fn(|vtl| {
401            let apic_set = &cvm_partition.lapic[vtl];
402
403            // The APIC is software-enabled after reset for secure VTLs, to
404            // maintain compatibility with released versions of secure kernel
405            let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
406            // Initialize APIC base to match the reset VM state.
407            lapic.set_apic_base(apic_base).unwrap();
408            // Only the VTL 0 non-BSP LAPICs should be in the WaitForSipi state.
409            let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
410                MpState::WaitForSipi
411            } else {
412                MpState::Running
413            };
414            LapicState::new(lapic, activity)
415        });
416
417        let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
418
419        Ok(Self {
420            direct_overlay_handle,
421            exit_vtl: GuestVtl::Vtl0,
422            hv,
423            lapics,
424            vtl1: None,
425        })
426    }
427}
428
429#[cfg(guest_arch = "x86_64")]
430#[derive(Inspect, Default)]
431#[inspect(hex)]
432/// Configuration of VTL 1 registration for intercepts on certain registers
433pub struct SecureRegisterInterceptState {
434    #[inspect(with = "|&x| u64::from(x)")]
435    intercept_control: hvdef::HvRegisterCrInterceptControl,
436    cr0_mask: u64,
437    cr4_mask: u64,
438    // Writes to X86X_IA32_MSR_MISC_ENABLE are dropped, so this is only used so
439    // that get_vp_register returns the correct value from a set_vp_register
440    ia32_misc_enable_mask: u64,
441}
442
443/// Information about a redirected interrupt for a specific vector.
444/// Stored per-processor, indexed by the redirected vector number in VTL2.
445#[derive(Clone, Inspect)]
446struct ProxyRedirectVectorInfo {
447    /// Device ID that owns this interrupt
448    device_id: u64,
449    /// Original interrupt vector from the device
450    original_vector: u32,
451}
452
453#[derive(Inspect)]
454/// Partition-wide state for CVMs.
455struct UhCvmPartitionState {
456    #[cfg(guest_arch = "x86_64")]
457    vps_per_socket: u32,
458    /// VPs that have locked their TLB.
459    #[inspect(
460        with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
461    )]
462    tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
463    #[inspect(with = "inspect::iter_by_index")]
464    vps: Vec<UhCvmVpInner>,
465    shared_memory: GuestMemory,
466    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
467    #[inspect(skip)]
468    isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
469    /// The emulated local APIC set.
470    lapic: VtlArray<LocalApicSet, 2>,
471    /// The emulated hypervisor state.
472    hv: GlobalHv<2>,
473    /// Guest VSM state.
474    guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
475    /// Dma client for shared visibility pages.
476    shared_dma_client: Arc<dyn DmaClient>,
477    /// Dma client for private visibility pages.
478    private_dma_client: Arc<dyn DmaClient>,
479    hide_isolation: bool,
480    proxy_interrupt_redirect: bool,
481}
482
483#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
484impl UhCvmPartitionState {
485    fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
486        &self.vps[vp_index as usize]
487    }
488
489    fn is_lower_vtl_startup_denied(&self) -> bool {
490        matches!(
491            *self.guest_vsm.read(),
492            GuestVsmState::Enabled {
493                vtl1: CvmVtl1State {
494                    deny_lower_vtl_startup: true,
495                    ..
496                }
497            }
498        )
499    }
500}
501
502#[derive(Inspect)]
503/// Per-vp state for CVMs.
504struct UhCvmVpInner {
505    /// The current status of TLB locks
506    tlb_lock_info: VtlArray<TlbLockInfo, 2>,
507    /// Whether EnableVpVtl for VTL 1 has been called on this VP.
508    vtl1_enable_called: Mutex<bool>,
509    /// Whether the VP has been started via the StartVp hypercall.
510    started: AtomicBool,
511    /// Start context for StartVp and EnableVpVtl calls.
512    #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
513    hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
514    /// Tracking of proxy redirect interrupts mapped on this VP.
515    #[inspect(with = "|x| inspect::adhoc(|req| inspect::iter_by_key(&*x.lock()).inspect(req))")]
516    proxy_redirect_interrupts: Mutex<HashMap<u32, ProxyRedirectVectorInfo>>,
517}
518
519#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
520#[derive(Inspect)]
521#[inspect(tag = "guest_vsm_state")]
522/// Partition-wide state for guest vsm.
523enum GuestVsmState<T: Inspect> {
524    NotPlatformSupported,
525    NotGuestEnabled,
526    Enabled {
527        #[inspect(flatten)]
528        vtl1: T,
529    },
530}
531
532impl<T: Inspect> GuestVsmState<T> {
533    pub fn from_availability(guest_vsm_available: bool) -> Self {
534        if guest_vsm_available {
535            GuestVsmState::NotGuestEnabled
536        } else {
537            GuestVsmState::NotPlatformSupported
538        }
539    }
540}
541
542#[derive(Inspect)]
543struct CvmVtl1State {
544    /// Whether VTL 1 has been enabled on any vp
545    enabled_on_any_vp: bool,
546    /// Whether guest memory should be zeroed before it resets.
547    zero_memory_on_reset: bool,
548    /// Whether a vp can be started or reset by a lower vtl.
549    deny_lower_vtl_startup: bool,
550    /// Whether Mode-Based Execution Control should be enforced on lower VTLs.
551    pub mbec_enabled: bool,
552    /// Whether shadow supervisor stack is enabled.
553    pub shadow_supervisor_stack_enabled: bool,
554    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
555    io_read_intercepts: BitBox<u64>,
556    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
557    io_write_intercepts: BitBox<u64>,
558}
559
560#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
561impl CvmVtl1State {
562    fn new(mbec_enabled: bool) -> Self {
563        Self {
564            enabled_on_any_vp: false,
565            zero_memory_on_reset: false,
566            deny_lower_vtl_startup: false,
567            mbec_enabled,
568            shadow_supervisor_stack_enabled: false,
569            io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
570            io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
571        }
572    }
573}
574
575#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
576struct TscReferenceTimeSource {
577    tsc_scale: u64,
578}
579
580#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
581impl TscReferenceTimeSource {
582    fn new(tsc_frequency: u64) -> Self {
583        TscReferenceTimeSource {
584            tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
585        }
586    }
587}
588
589/// A time implementation based on TSC.
590impl GetReferenceTime for TscReferenceTimeSource {
591    fn now(&self) -> ReferenceTimeResult {
592        #[cfg(guest_arch = "x86_64")]
593        {
594            let tsc = safe_intrinsics::rdtsc();
595            let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
596            ReferenceTimeResult {
597                ref_time,
598                system_time: None,
599            }
600        }
601
602        #[cfg(guest_arch = "aarch64")]
603        {
604            todo!("AARCH64_TODO");
605        }
606    }
607}
608
609impl virt::irqcon::ControlGic for UhPartitionInner {
610    fn set_spi_irq(&self, irq_id: u32, high: bool) {
611        if let Err(err) = self.hcl.request_interrupt(
612            hvdef::HvInterruptControl::new()
613                .with_arm64_asserted(high)
614                .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
615            0,
616            irq_id,
617            GuestVtl::Vtl0,
618        ) {
619            tracelimit::warn_ratelimited!(
620                error = &err as &dyn std::error::Error,
621                irq = irq_id,
622                asserted = high,
623                "failed to request spi"
624            );
625        }
626    }
627}
628
629impl virt::Aarch64Partition for UhPartition {
630    fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
631        debug_assert!(vtl == Vtl::Vtl0);
632        self.inner.clone()
633    }
634}
635
636/// A wrapper around [`UhProcessor`] that is [`Send`].
637///
638/// This is used to instantiate the processor object on the correct thread,
639/// since all lower VTL processor state accesses must occur from the same
640/// processor at VTL2.
641pub struct UhProcessorBox {
642    partition: Arc<UhPartitionInner>,
643    vp_info: TargetVpInfo,
644}
645
646impl UhProcessorBox {
647    /// Returns the VP index.
648    pub fn vp_index(&self) -> VpIndex {
649        self.vp_info.base.vp_index
650    }
651
652    /// Returns whether sidecar support is enabled.
653    pub fn sidecar_enabled(&self) -> bool {
654        self.partition.hcl.sidecar_enabled()
655    }
656
657    /// Returns the base CPU that manages this processor, when it is a sidecar
658    /// VP.
659    pub fn sidecar_base_cpu(&self) -> Option<u32> {
660        self.partition
661            .hcl
662            .sidecar_base_cpu(self.vp_info.base.vp_index.index())
663    }
664
665    /// Returns the processor object, bound to this thread.
666    ///
667    /// If `control` is provided, then this must be called on the VP's
668    /// associated thread pool thread, and it will dispatch the VP directly.
669    /// Otherwise, the processor will control the processor via the sidecar
670    /// kernel.
671    pub fn bind_processor<'a, T: Backing>(
672        &'a mut self,
673        driver: &impl Driver,
674        control: Option<&'a mut IdleControl>,
675    ) -> Result<UhProcessor<'a, T>, Error> {
676        if let Some(control) = &control {
677            let vp_index = self.vp_info.base.vp_index;
678
679            let mut current = Default::default();
680            affinity::get_current_thread_affinity(&mut current).unwrap();
681            assert_eq!(&current, CpuSet::new().set(vp_index.index()));
682
683            self.partition
684                .hcl
685                .set_poll_file(
686                    self.partition.vp(vp_index).unwrap().cpu_index,
687                    control.ring_fd().as_raw_fd(),
688                )
689                .map_err(Error::Hcl)?;
690        }
691
692        UhProcessor::new(driver, &self.partition, self.vp_info, control)
693    }
694
695    /// Sets the sidecar remove reason for the processor to be due to a task
696    /// running with the given name.
697    ///
698    /// This is useful for diagnostics.
699    pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
700        self.partition
701            .vp(self.vp_info.base.vp_index)
702            .unwrap()
703            .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
704    }
705}
706
707#[derive(Debug, Inspect)]
708struct UhVpInner {
709    /// 32 bits per VTL: top bits are VTL 1, bottom bits are VTL 0.
710    wake_reasons: AtomicU64,
711    #[inspect(skip)]
712    waker: RwLock<Option<Waker>>,
713    message_queues: VtlArray<MessageQueues, 2>,
714    #[inspect(skip)]
715    vp_info: TargetVpInfo,
716    /// The Linux kernel's CPU index for this VP. This should be used instead of VpIndex
717    /// when interacting with non-MSHV kernel interfaces.
718    cpu_index: u32,
719    sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
720}
721
722impl UhVpInner {
723    pub fn vp_index(&self) -> VpIndex {
724        self.vp_info.base.vp_index
725    }
726}
727
728#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
729#[derive(Debug, Inspect)]
730/// Which operation is setting the initial vp context
731enum InitialVpContextOperation {
732    /// The VP is being started via the StartVp hypercall.
733    StartVp,
734    /// The VP is being started via the EnableVpVtl hypercall.
735    EnableVpVtl,
736}
737
738#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
739#[derive(Debug, Inspect)]
740/// State for handling StartVp/EnableVpVtl hypercalls.
741struct VpStartEnableVtl {
742    /// Which operation, startvp or enablevpvtl, is setting the initial vp
743    /// context
744    operation: InitialVpContextOperation,
745    #[inspect(skip)]
746    context: hvdef::hypercall::InitialVpContextX64,
747}
748
749#[derive(Debug, Inspect)]
750struct TlbLockInfo {
751    /// The set of VPs that are waiting for this VP to release the TLB lock.
752    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
753    blocked_vps: BitBox<AtomicU64>,
754    /// The set of VPs that are holding the TLB lock and preventing this VP
755    /// from proceeding.
756    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
757    blocking_vps: BitBox<AtomicU64>,
758    /// The count of blocking VPs. This should always be equivalent to
759    /// `blocking_vps.count_ones()`, however it is accessible in a single
760    /// atomic operation while counting is not.
761    blocking_vp_count: AtomicU32,
762    /// Whether the VP is sleeping due to a TLB lock.
763    sleeping: AtomicBool,
764}
765
766#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
767impl TlbLockInfo {
768    fn new(vp_count: usize) -> Self {
769        Self {
770            blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
771            blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
772            blocking_vp_count: AtomicU32::new(0),
773            sleeping: false.into(),
774        }
775    }
776}
777
778#[bitfield(u32)]
779#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
780struct WakeReason {
781    extint: bool,
782    message_queues: bool,
783    hv_start_enable_vtl_vp: bool,
784    intcon: bool,
785    update_proxy_irr_filter: bool,
786    #[bits(27)]
787    _reserved: u32,
788}
789
790impl WakeReason {
791    // Convenient constants.
792    const EXTINT: Self = Self::new().with_extint(true);
793    const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
794    #[cfg(guest_arch = "x86_64")]
795    const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); // StartVp/EnableVpVtl handling
796    const INTCON: Self = Self::new().with_intcon(true);
797    #[cfg(guest_arch = "x86_64")]
798    const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
799}
800
801#[bitfield(u32)]
802#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
803struct ExitActivity {
804    pending_event: bool,
805    #[bits(31)]
806    _reserved: u32,
807}
808
809/// Immutable access to useful bits of Partition state.
810impl UhPartition {
811    /// Revokes guest VSM.
812    pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
813        fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
814            if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
815                return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
816            }
817            *vsm_state = GuestVsmState::NotPlatformSupported;
818            Ok(())
819        }
820
821        match &self.inner.backing_shared {
822            BackingShared::Hypervisor(s) => {
823                revoke(&mut *s.guest_vsm.write())?;
824                self.inner
825                    .hcl
826                    .set_guest_vsm_partition_config(false)
827                    .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
828            }
829            #[cfg(guest_arch = "x86_64")]
830            BackingShared::Snp(SnpBackedShared { cvm, .. })
831            | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
832                revoke(&mut *cvm.guest_vsm.write())?;
833            }
834        };
835
836        Ok(())
837    }
838
839    /// Returns the current hypervisor reference time, in 100ns units.
840    pub fn reference_time(&self) -> u64 {
841        if let Some(hv) = self.inner.hv() {
842            hv.ref_time_source().now().ref_time
843        } else {
844            self.inner
845                .hcl
846                .reference_time()
847                .expect("should not fail to get the reference time")
848        }
849    }
850}
851
852impl virt::Partition for UhPartition {
853    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
854        None
855    }
856
857    fn caps(&self) -> &PartitionCapabilities {
858        &self.inner.caps
859    }
860
861    fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
862        self.inner
863            .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
864    }
865
866    fn request_yield(&self, _vp_index: VpIndex) {
867        unimplemented!()
868    }
869}
870
871impl X86Partition for UhPartition {
872    fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
873        self.inner.clone()
874    }
875
876    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
877        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
878        if let Some(apic) = &self.inner.lapic(vtl) {
879            apic.lint(vp_index, lint.into(), |vp_index| {
880                self.inner
881                    .vp(vp_index)
882                    .unwrap()
883                    .wake(vtl, WakeReason::INTCON);
884            });
885        } else if lint == 0 {
886            self.inner
887                .vp(vp_index)
888                .unwrap()
889                .wake(vtl, WakeReason::EXTINT);
890        } else {
891            unimplemented!()
892        }
893    }
894}
895
896impl UhPartitionInner {
897    fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
898        self.vps.get(index.index() as usize)
899    }
900
901    fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
902        self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
903    }
904
905    fn hv(&self) -> Option<&GlobalHv<2>> {
906        self.backing_shared.cvm_state().map(|x| &x.hv)
907    }
908
909    /// For requester VP to issue `proxy_irr_blocked` update to other VPs
910    #[cfg(guest_arch = "x86_64")]
911    fn request_proxy_irr_filter_update(
912        &self,
913        vtl: GuestVtl,
914        device_vector: u8,
915        req_vp_index: VpIndex,
916    ) {
917        tracing::debug!(
918            ?vtl,
919            device_vector,
920            req_vp_index = req_vp_index.index(),
921            "request_proxy_irr_filter_update"
922        );
923
924        // Add given vector to partition global device vector table (VTL0 only for now)
925        {
926            let mut device_vector_table = self.device_vector_table.write();
927            device_vector_table.set(device_vector as usize, true);
928        }
929
930        // Wake all other VPs for their `proxy_irr_blocked` filter update
931        for vp in self.vps.iter() {
932            if vp.vp_index() != req_vp_index {
933                vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
934            }
935        }
936    }
937
938    /// Get current partition global device irr vectors (VTL0 for now)
939    #[cfg(guest_arch = "x86_64")]
940    fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
941        let device_vector_table = self.device_vector_table.read();
942        for idx in device_vector_table.iter_ones() {
943            irr_vectors.set(idx, true);
944        }
945    }
946
947    fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
948        let mut wake_vps = false;
949        resp.field_mut(
950            "enter_modes",
951            &mut inspect::adhoc_mut(|req| {
952                let update = req.is_update();
953                {
954                    let mut modes = self.enter_modes.lock();
955                    modes.inspect_mut(req);
956                    if update {
957                        self.enter_modes_atomic.store(
958                            hcl::protocol::EnterModes::from(*modes).into(),
959                            Ordering::Relaxed,
960                        );
961                        wake_vps = true;
962                    }
963                }
964            }),
965        );
966
967        // Wake VPs to propagate updates.
968        if wake_vps {
969            for vp in self.vps.iter() {
970                vp.wake_vtl2();
971            }
972        }
973    }
974
975    // TODO VBS GUEST VSM: enable for aarch64
976    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
977    fn vsm_status(
978        &self,
979    ) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::register::GetRegError> {
980        // TODO: It might be possible to cache VsmPartitionStatus.
981        self.hcl.get_vsm_partition_status()
982    }
983}
984
985impl virt::Synic for UhPartition {
986    fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
987        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
988        let Some(vp) = self.inner.vp(vp_index) else {
989            tracelimit::warn_ratelimited!(
990                CVM_ALLOWED,
991                vp = vp_index.index(),
992                "invalid vp target for post_message"
993            );
994            return;
995        };
996
997        vp.post_message(
998            vtl,
999            sint,
1000            &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
1001        );
1002    }
1003
1004    fn new_guest_event_port(
1005        &self,
1006        vtl: Vtl,
1007        vp: u32,
1008        sint: u8,
1009        flag: u16,
1010    ) -> Box<dyn vmcore::synic::GuestEventPort> {
1011        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1012        Box::new(UhEventPort {
1013            partition: Arc::downgrade(&self.inner),
1014            params: Arc::new(Mutex::new(UhEventPortParams {
1015                vp: VpIndex::new(vp),
1016                sint,
1017                flag,
1018                vtl,
1019            })),
1020        })
1021    }
1022
1023    fn prefer_os_events(&self) -> bool {
1024        false
1025    }
1026
1027    fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1028        Some(self)
1029    }
1030}
1031
1032impl virt::SynicMonitor for UhPartition {
1033    fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1034        // Keep this locked the whole function to avoid racing with allocate_monitor_page.
1035        let mut allocated_block = self.inner.allocated_monitor_page.lock();
1036        let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1037
1038        // Take ownership of any allocated monitor page so it will be freed on function exit.
1039        let allocated_page = allocated_block.take();
1040        if let Some(old_gpa) = old_gpa {
1041            let allocated_gpa = allocated_page
1042                .as_ref()
1043                .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1044
1045            // Revert the old page's permissions, using the appropriate method depending on
1046            // whether it was allocated or guest-supplied.
1047            let result = if allocated_gpa == Some(old_gpa) {
1048                let vtl = GuestVtl::try_from(vtl).unwrap();
1049                self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1050            } else {
1051                self.inner
1052                    .hcl
1053                    .modify_vtl_protection_mask(
1054                        MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1055                        hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1056                        HvInputVtl::CURRENT_VTL,
1057                    )
1058                    .map_err(|err| anyhow::anyhow!(err))
1059            };
1060
1061            result
1062                .context("failed to unregister old monitor page")
1063                .inspect_err(|_| {
1064                    // Leave the page unset if returning a failure.
1065                    self.inner.monitor_page.set_gpa(None);
1066                })?;
1067
1068            tracing::debug!(old_gpa, "unregistered monitor page");
1069        }
1070
1071        if let Some(gpa) = gpa {
1072            // Disallow VTL0 from writing to the page, so we'll get an intercept. Note that read
1073            // permissions must be enabled or this doesn't work correctly.
1074            self.inner
1075                .hcl
1076                .modify_vtl_protection_mask(
1077                    MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1078                    HvMapGpaFlags::new().with_readable(true),
1079                    HvInputVtl::CURRENT_VTL,
1080                )
1081                .context("failed to register monitor page")
1082                .inspect_err(|_| {
1083                    // Leave the page unset if returning a failure.
1084                    self.inner.monitor_page.set_gpa(None);
1085                })?;
1086
1087            tracing::debug!(gpa, "registered monitor page");
1088        }
1089
1090        Ok(())
1091    }
1092
1093    fn register_monitor(
1094        &self,
1095        monitor_id: vmcore::monitor::MonitorId,
1096        connection_id: u32,
1097    ) -> Box<dyn Sync + Send> {
1098        self.inner
1099            .monitor_page
1100            .register_monitor(monitor_id, connection_id)
1101    }
1102
1103    fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1104        let vtl = GuestVtl::try_from(vtl).unwrap();
1105
1106        // Allocating a monitor page is only supported for CVMs.
1107        let Some(state) = self.inner.backing_shared.cvm_state() else {
1108            return Ok(None);
1109        };
1110
1111        let mut allocated_block = self.inner.allocated_monitor_page.lock();
1112        if let Some(block) = allocated_block.as_ref() {
1113            // An allocated monitor page is already in use; no need to change it.
1114            let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1115            assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1116            return Ok(Some(gpa));
1117        }
1118
1119        let block = state
1120            .private_dma_client
1121            .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1122            .context("failed to allocate monitor page")?;
1123
1124        let gpn = block.pfns()[0];
1125        *allocated_block = Some(block);
1126        let gpa = gpn << HV_PAGE_SHIFT;
1127        let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1128        if let Some(old_gpa) = old_gpa {
1129            // The old GPA is guaranteed not to be allocated, since that was checked above, so
1130            // revert its permissions using the method for guest-supplied memory.
1131            self.inner
1132                .hcl
1133                .modify_vtl_protection_mask(
1134                    MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1135                    hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1136                    HvInputVtl::CURRENT_VTL,
1137                )
1138                .context("failed to unregister old monitor page")
1139                .inspect_err(|_| {
1140                    // Leave the page unset if returning a failure.
1141                    self.inner.monitor_page.set_gpa(None);
1142                })?;
1143
1144            tracing::debug!(old_gpa, "unregistered monitor page");
1145        }
1146
1147        // Disallow VTL0 from writing to the page, so we'll get an intercept. Note that read
1148        // permissions must be enabled or this doesn't work correctly.
1149        self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1150            .context("failed to unregister monitor page")
1151            .inspect_err(|_| {
1152                // Leave the page unset if returning a failure.
1153                self.inner.monitor_page.set_gpa(None);
1154            })?;
1155
1156        tracing::debug!(gpa, "registered allocated monitor page");
1157
1158        Ok(Some(gpa))
1159    }
1160}
1161
1162impl UhPartitionInner {
1163    #[cfg(guest_arch = "x86_64")]
1164    pub(crate) fn synic_interrupt(
1165        &self,
1166        vp_index: VpIndex,
1167        vtl: GuestVtl,
1168    ) -> impl '_ + hv1_emulator::RequestInterrupt {
1169        // TODO CVM: optimize for SNP with secure avic to avoid internal wake
1170        // and for TDX to avoid trip to user mode
1171        move |vector, auto_eoi| {
1172            self.lapic(vtl).unwrap().synic_interrupt(
1173                vp_index,
1174                vector as u8,
1175                auto_eoi,
1176                |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1177            );
1178        }
1179    }
1180
1181    #[cfg(guest_arch = "aarch64")]
1182    fn synic_interrupt(
1183        &self,
1184        _vp_index: VpIndex,
1185        _vtl: GuestVtl,
1186    ) -> impl '_ + hv1_emulator::RequestInterrupt {
1187        move |_, _| {}
1188    }
1189}
1190
1191#[derive(Debug)]
1192struct UhEventPort {
1193    partition: Weak<UhPartitionInner>,
1194    params: Arc<Mutex<UhEventPortParams>>,
1195}
1196
1197#[derive(Debug, Copy, Clone)]
1198struct UhEventPortParams {
1199    vp: VpIndex,
1200    sint: u8,
1201    flag: u16,
1202    vtl: GuestVtl,
1203}
1204
1205impl vmcore::synic::GuestEventPort for UhEventPort {
1206    fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1207        let partition = self.partition.clone();
1208        let params = self.params.clone();
1209        vmcore::interrupt::Interrupt::from_fn(move || {
1210            let UhEventPortParams {
1211                vp,
1212                sint,
1213                flag,
1214                vtl,
1215            } = *params.lock();
1216            let Some(partition) = partition.upgrade() else {
1217                return;
1218            };
1219            tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1220            if let Some(hv) = partition.hv() {
1221                match hv.synic[vtl].signal_event(
1222                    vp,
1223                    sint,
1224                    flag,
1225                    &mut partition.synic_interrupt(vp, vtl),
1226                ) {
1227                    Ok(_) => {}
1228                    Err(SintProxied) => {
1229                        tracing::trace!(
1230                            vp = vp.index(),
1231                            sint,
1232                            flag,
1233                            "forwarding event to untrusted synic"
1234                        );
1235                        if let Some(synic) = partition.backing_shared.untrusted_synic() {
1236                            synic
1237                                .signal_event(
1238                                    vp,
1239                                    sint,
1240                                    flag,
1241                                    &mut partition.synic_interrupt(vp, vtl),
1242                                )
1243                                .ok();
1244                        } else {
1245                            partition.hcl.signal_event_direct(vp.index(), sint, flag)
1246                        }
1247                    }
1248                }
1249            } else {
1250                partition.hcl.signal_event_direct(vp.index(), sint, flag);
1251            }
1252        })
1253    }
1254
1255    fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1256        self.params.lock().vp = VpIndex::new(vp);
1257        Ok(())
1258    }
1259}
1260
1261impl virt::Hv1 for UhPartition {
1262    type Error = Error;
1263    type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1264
1265    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1266        Some(if let Some(hv) = self.inner.hv() {
1267            hv.ref_time_source().clone()
1268        } else {
1269            ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1270        })
1271    }
1272
1273    fn new_virtual_device(
1274        &self,
1275    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1276        self.inner.software_devices.is_some().then_some(self)
1277    }
1278}
1279
1280impl GetReferenceTime for UhPartitionInner {
1281    fn now(&self) -> ReferenceTimeResult {
1282        ReferenceTimeResult {
1283            ref_time: self.hcl.reference_time().unwrap(),
1284            system_time: None,
1285        }
1286    }
1287}
1288
1289impl virt::DeviceBuilder for UhPartition {
1290    fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1291        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1292        let device = self
1293            .inner
1294            .software_devices
1295            .as_ref()
1296            .expect("checked in new_virtual_device")
1297            .new_device(self.interrupt_targets[vtl].clone(), device_id)
1298            .map_err(Error::NewDevice)?;
1299
1300        Ok(device)
1301    }
1302}
1303
1304struct UhInterruptTarget {
1305    partition: Arc<UhPartitionInner>,
1306    vtl: GuestVtl,
1307}
1308
1309impl pci_core::msi::SignalMsi for UhInterruptTarget {
1310    fn signal_msi(&self, _rid: u32, address: u64, data: u32) {
1311        self.partition
1312            .request_msi(self.vtl, MsiRequest { address, data });
1313    }
1314}
1315
1316impl UhPartitionInner {
1317    fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1318        if let Some(lapic) = self.lapic(vtl) {
1319            tracing::trace!(?request, "interrupt");
1320            lapic.request_interrupt(request.address, request.data, |vp_index| {
1321                self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1322            });
1323        } else {
1324            let (address, data) = request.as_x86();
1325            if let Err(err) = self.hcl.request_interrupt(
1326                request.hv_x86_interrupt_control(),
1327                address.virt_destination().into(),
1328                data.vector().into(),
1329                vtl,
1330            ) {
1331                tracelimit::warn_ratelimited!(
1332                    CVM_ALLOWED,
1333                    error = &err as &dyn std::error::Error,
1334                    address = request.address,
1335                    data = request.data,
1336                    "failed to request msi"
1337                );
1338            }
1339        }
1340    }
1341}
1342
1343impl IoApicRouting for UhPartitionInner {
1344    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1345        self.irq_routes.set_irq_route(irq, request)
1346    }
1347
1348    // The IO-APIC is always hooked up to VTL0.
1349    fn assert_irq(&self, irq: u8) {
1350        self.irq_routes
1351            .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1352    }
1353}
1354
1355// xtask-fmt allow-target-arch cpu-intrinsic
1356#[cfg(target_arch = "x86_64")]
1357fn is_restore_partition_time_available() -> bool {
1358    let result =
1359        safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION, 0);
1360    let enlightenment_info = hvdef::HvEnlightenmentInformation::from(
1361        result.eax as u128
1362            | (result.ebx as u128) << 32
1363            | (result.ecx as u128) << 64
1364            | (result.edx as u128) << 96,
1365    );
1366    enlightenment_info.restore_time_on_resume()
1367}
1368// xtask-fmt allow-target-arch cpu-intrinsic
1369#[cfg(not(target_arch = "x86_64"))]
1370fn is_restore_partition_time_available() -> bool {
1371    // Only available on x86_64 Hyper-V hypervisor.
1372    false
1373}
1374
1375/// Configure the [`hvdef::HvRegisterVsmPartitionConfig`] register with the
1376/// values used by underhill.
1377fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1378    // Read available capabilities to determine what to enable.
1379    let caps = hcl.get_vsm_capabilities().map_err(Error::GetReg)?;
1380    let hardware_isolated = hcl.isolation().is_hardware_isolated();
1381    let isolated = hcl.isolation().is_isolated();
1382    let config = HvRegisterVsmPartitionConfig::new()
1383        .with_default_vtl_protection_mask(0xF)
1384        .with_enable_vtl_protection(!hardware_isolated)
1385        .with_zero_memory_on_reset(!hardware_isolated)
1386        .with_intercept_cpuid_unimplemented(!hardware_isolated)
1387        .with_intercept_page(caps.intercept_page_available())
1388        .with_intercept_unrecoverable_exception(true)
1389        .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1390        .with_intercept_acceptance(isolated)
1391        .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1392        .with_intercept_system_reset(caps.intercept_system_reset_available())
1393        .with_intercept_restore_partition_time(is_restore_partition_time_available());
1394
1395    hcl.set_vtl2_vsm_partition_config(config)
1396        .map_err(Error::SetReg)
1397}
1398
1399/// Configuration parameters supplied to [`UhProtoPartition::new`].
1400///
1401/// These do not include runtime resources.
1402pub struct UhPartitionNewParams<'a> {
1403    /// The isolation type for the partition.
1404    pub isolation: IsolationType,
1405    /// Hide isolation from the guest. The guest will run as if it is not
1406    /// isolated.
1407    pub hide_isolation: bool,
1408    /// The memory layout for lower VTLs.
1409    pub lower_vtl_memory_layout: &'a MemoryLayout,
1410    /// The guest processor topology.
1411    pub topology: &'a ProcessorTopology,
1412    /// The unparsed CVM cpuid info.
1413    // TODO: move parsing up a layer.
1414    pub cvm_cpuid_info: Option<&'a [u8]>,
1415    /// The unparsed CVM secrets page.
1416    pub snp_secrets: Option<&'a [u8]>,
1417    /// The virtual top of memory for hardware-isolated VMs.
1418    ///
1419    /// Must be a power of two.
1420    pub vtom: Option<u64>,
1421    /// Handle synic messages and events.
1422    ///
1423    /// On TDX, this prevents the hypervisor from getting vmtdcall exits.
1424    pub handle_synic: bool,
1425    /// Do not hotplug sidecar VPs on their first exit. Just continue running
1426    /// the VP remotely.
1427    pub no_sidecar_hotplug: bool,
1428    /// Use MMIO access hypercalls.
1429    pub use_mmio_hypercalls: bool,
1430    /// Intercept guest debug exceptions to support gdbstub.
1431    pub intercept_debug_exceptions: bool,
1432    /// Disable proxy interrupt redirection.
1433    pub disable_proxy_redirect: bool,
1434    /// Disable lower VTL timer virtualization.
1435    pub disable_lower_vtl_timer_virt: bool,
1436}
1437
1438/// Parameters to [`UhProtoPartition::build`].
1439pub struct UhLateParams<'a> {
1440    /// Guest memory for lower VTLs.
1441    pub gm: VtlArray<GuestMemory, 2>,
1442    /// Guest memory for VTL 0 kernel execute access.
1443    pub vtl0_kernel_exec_gm: GuestMemory,
1444    /// Guest memory for VTL 0 user execute access.
1445    pub vtl0_user_exec_gm: GuestMemory,
1446    /// The CPUID leaves to expose to the guest.
1447    #[cfg(guest_arch = "x86_64")]
1448    pub cpuid: Vec<CpuidLeaf>,
1449    /// The mesh sender to use for crash notifications.
1450    // FUTURE: remove mesh dependency from this layer.
1451    pub crash_notification_send: mesh::Sender<VtlCrash>,
1452    /// The VM time source.
1453    pub vmtime: &'a VmTimeSource,
1454    /// Parameters for CVMs only.
1455    pub cvm_params: Option<CvmLateParams>,
1456    /// vmbus_relay is enabled and active for partition
1457    pub vmbus_relay: bool,
1458}
1459
1460/// CVM-only parameters to [`UhProtoPartition::build`].
1461pub struct CvmLateParams {
1462    /// Guest memory for untrusted devices, like overlay pages.
1463    pub shared_gm: GuestMemory,
1464    /// An object to call to change host visibility on guest memory.
1465    pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1466    /// Dma client for shared visibility pages.
1467    pub shared_dma_client: Arc<dyn DmaClient>,
1468    /// Allocator for private visibility pages.
1469    pub private_dma_client: Arc<dyn DmaClient>,
1470}
1471
1472/// Represents a GPN that is either in guest memory or was allocated by dma_client.
1473#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1474pub enum GpnSource {
1475    /// The GPN is in regular guest RAM.
1476    GuestMemory,
1477    /// The GPN was allocated by dma_client and is not in guest RAM.
1478    Dma,
1479}
1480
1481/// Trait for CVM-related protections on guest memory.
1482pub trait ProtectIsolatedMemory: Send + Sync {
1483    /// Changes host visibility on guest memory.
1484    fn change_host_visibility(
1485        &self,
1486        vtl: GuestVtl,
1487        shared: bool,
1488        gpns: &[u64],
1489        tlb_access: &mut dyn TlbFlushLockAccess,
1490    ) -> Result<(), (HvError, usize)>;
1491
1492    /// Queries host visibility on guest memory.
1493    fn query_host_visibility(
1494        &self,
1495        gpns: &[u64],
1496        host_visibility: &mut [HostVisibilityType],
1497    ) -> Result<(), (HvError, usize)>;
1498
1499    /// Gets the default protections/permissions for VTL 0.
1500    fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1501
1502    /// Changes the default protections/permissions for a VTL. For VBS-isolated
1503    /// VMs, the protections apply to all vtls lower than the specified one. For
1504    /// hardware-isolated VMs, they apply just to the given vtl.
1505    fn change_default_vtl_protections(
1506        &self,
1507        target_vtl: GuestVtl,
1508        protections: HvMapGpaFlags,
1509        tlb_access: &mut dyn TlbFlushLockAccess,
1510    ) -> Result<(), HvError>;
1511
1512    /// Changes the vtl protections on a range of guest memory.
1513    fn change_vtl_protections(
1514        &self,
1515        target_vtl: GuestVtl,
1516        gpns: &[u64],
1517        protections: HvMapGpaFlags,
1518        tlb_access: &mut dyn TlbFlushLockAccess,
1519    ) -> Result<(), (HvError, usize)>;
1520
1521    /// Registers a page as an overlay page by first validating it has the
1522    /// required permissions, optionally modifying them, then locking them.
1523    fn register_overlay_page(
1524        &self,
1525        vtl: GuestVtl,
1526        gpn: u64,
1527        gpn_source: GpnSource,
1528        check_perms: HvMapGpaFlags,
1529        new_perms: Option<HvMapGpaFlags>,
1530        tlb_access: &mut dyn TlbFlushLockAccess,
1531    ) -> Result<(), HvError>;
1532
1533    /// Unregisters an overlay page, removing its permission lock and restoring
1534    /// the previous permissions.
1535    fn unregister_overlay_page(
1536        &self,
1537        vtl: GuestVtl,
1538        gpn: u64,
1539        tlb_access: &mut dyn TlbFlushLockAccess,
1540    ) -> Result<(), HvError>;
1541
1542    /// Checks whether a page is currently registered as an overlay page.
1543    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1544
1545    /// Locks the permissions and mappings for a set of guest pages.
1546    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1547
1548    /// Unlocks the permissions and mappings for a set of guest pages.
1549    ///
1550    /// Panics if asked to unlock a page that was not previously locked. The
1551    /// caller must ensure that the given slice has the same ordering as the
1552    /// one passed to `lock_gpns`.
1553    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1554
1555    /// Alerts the memory protector that vtl 1 is ready to set vtl protections
1556    /// on lower-vtl memory, and that these protections should be enforced.
1557    fn set_vtl1_protections_enabled(&self);
1558
1559    /// Whether VTL 1 is prepared to modify vtl protections on lower-vtl memory,
1560    /// and therefore whether these protections should be enforced.
1561    fn vtl1_protections_enabled(&self) -> bool;
1562}
1563
1564/// Trait for access to TLB flush and lock machinery.
1565pub trait TlbFlushLockAccess {
1566    /// Flush the entire TLB for all VPs for the given VTL.
1567    fn flush(&mut self, vtl: GuestVtl);
1568
1569    /// Flush the entire TLB for all VPs for all VTLs.
1570    fn flush_entire(&mut self);
1571
1572    /// Causes the specified VTL on the current VP to wait on all TLB locks.
1573    fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1574}
1575
1576/// A partially built partition. Used to allow querying partition capabilities
1577/// before fully instantiating the partition.
1578pub struct UhProtoPartition<'a> {
1579    params: UhPartitionNewParams<'a>,
1580    hcl: Hcl,
1581    guest_vsm_available: bool,
1582    create_partition_available: bool,
1583    #[cfg(guest_arch = "x86_64")]
1584    cpuid: virt::CpuidLeafSet,
1585}
1586
1587impl<'a> UhProtoPartition<'a> {
1588    /// Creates a new prototype partition.
1589    ///
1590    /// `driver(cpu)` returns the driver to use for polling the sidecar device
1591    /// whose base CPU is `cpu`.
1592    pub fn new<T: SpawnDriver>(
1593        params: UhPartitionNewParams<'a>,
1594        driver: impl FnMut(u32) -> T,
1595    ) -> Result<Self, Error> {
1596        let hcl_isolation = match params.isolation {
1597            IsolationType::None => hcl::ioctl::IsolationType::None,
1598            IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1599            IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1600            IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1601        };
1602
1603        // Try to open the sidecar device, if it is present.
1604        let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1605
1606        let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1607
1608        // Set the hypercalls that this process will use.
1609        let mut allowed_hypercalls = vec![
1610            hvdef::HypercallCode::HvCallGetVpRegisters,
1611            hvdef::HypercallCode::HvCallSetVpRegisters,
1612            hvdef::HypercallCode::HvCallInstallIntercept,
1613            hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1614            hvdef::HypercallCode::HvCallPostMessageDirect,
1615            hvdef::HypercallCode::HvCallSignalEventDirect,
1616            hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1617            hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1618            hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1619            hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1620            hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1621            hvdef::HypercallCode::HvCallAcceptGpaPages,
1622            hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1623        ];
1624
1625        if params.isolation.is_hardware_isolated() {
1626            allowed_hypercalls.extend(vec![
1627                hvdef::HypercallCode::HvCallEnablePartitionVtl,
1628                hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1629                hvdef::HypercallCode::HvCallEnableVpVtl,
1630            ]);
1631        }
1632
1633        if params.use_mmio_hypercalls {
1634            allowed_hypercalls.extend(vec![
1635                hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1636                hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1637            ]);
1638        }
1639
1640        hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1641
1642        set_vtl2_vsm_partition_config(&hcl)?;
1643
1644        let privs = hcl
1645            .get_privileges_and_features_info()
1646            .map_err(Error::GetReg)?;
1647        let guest_vsm_available = Self::check_guest_vsm_support(privs, &hcl)?;
1648
1649        #[cfg(guest_arch = "x86_64")]
1650        let cpuid = match params.isolation {
1651            IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1652                cpuid_pages: params.cvm_cpuid_info.unwrap(),
1653                vtom: params.vtom.unwrap(),
1654                access_vsm: guest_vsm_available,
1655            }
1656            .build()
1657            .map_err(Error::CvmCpuid)?,
1658
1659            IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1660                topology: params.topology,
1661                vtom: params.vtom.unwrap(),
1662                access_vsm: guest_vsm_available,
1663            }
1664            .build()
1665            .map_err(Error::CvmCpuid)?,
1666            IsolationType::Vbs | IsolationType::None => Default::default(),
1667        };
1668
1669        Ok(UhProtoPartition {
1670            hcl,
1671            params,
1672            guest_vsm_available,
1673            create_partition_available: privs.create_partitions(),
1674            #[cfg(guest_arch = "x86_64")]
1675            cpuid,
1676        })
1677    }
1678
1679    /// Returns whether VSM support will be available to the guest.
1680    pub fn guest_vsm_available(&self) -> bool {
1681        self.guest_vsm_available
1682    }
1683
1684    /// Returns whether this partition has the create partitions hypercall
1685    /// available.
1686    pub fn create_partition_available(&self) -> bool {
1687        self.create_partition_available
1688    }
1689
1690    /// Returns a new Underhill partition.
1691    pub async fn build(
1692        self,
1693        late_params: UhLateParams<'_>,
1694    ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1695        let Self {
1696            mut hcl,
1697            params,
1698            guest_vsm_available,
1699            create_partition_available: _,
1700            #[cfg(guest_arch = "x86_64")]
1701            cpuid,
1702        } = self;
1703        let isolation = params.isolation;
1704        let is_hardware_isolated = isolation.is_hardware_isolated();
1705
1706        // Intercept Debug Exceptions
1707        // On TDX because all OpenHCL TDs today have the debug policy bit set,
1708        // OpenHCL registers for the intercepts itself.
1709        // However, on non-TDX platforms hypervisor installs the
1710        // intercept on behalf of the guest.
1711        if params.intercept_debug_exceptions {
1712            if !cfg!(feature = "gdb") {
1713                return Err(Error::InvalidDebugConfiguration);
1714            }
1715
1716            cfg_if::cfg_if! {
1717                if #[cfg(guest_arch = "x86_64")] {
1718                    if isolation != IsolationType::Tdx {
1719                        let debug_exception_vector = 0x1;
1720                        hcl.register_intercept(
1721                            HvInterceptType::HvInterceptTypeException,
1722                            HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1723                            HvInterceptParameters::new_exception(debug_exception_vector),
1724                        )
1725                        .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1726                    }
1727                } else {
1728                    return Err(Error::InvalidDebugConfiguration);
1729                }
1730            }
1731        }
1732
1733        if !is_hardware_isolated {
1734            if cfg!(guest_arch = "x86_64") {
1735                hcl.register_intercept(
1736                    HvInterceptType::HvInterceptTypeX64Msr,
1737                    HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1738                    HvInterceptParameters::new_zeroed(),
1739                )
1740                .map_err(|err| {
1741                    Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1742                })?;
1743
1744                hcl.register_intercept(
1745                    HvInterceptType::HvInterceptTypeX64ApicEoi,
1746                    HV_INTERCEPT_ACCESS_MASK_WRITE,
1747                    HvInterceptParameters::new_zeroed(),
1748                )
1749                .map_err(|err| {
1750                    Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1751                })?;
1752            } else {
1753                if false {
1754                    todo!("AARCH64_TODO");
1755                }
1756            }
1757        }
1758
1759        if isolation == IsolationType::Snp {
1760            // SNP VMs register for the #VC exception to support reflect-VC.
1761            hcl.register_intercept(
1762                HvInterceptType::HvInterceptTypeException,
1763                HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1764                HvInterceptParameters::new_exception(
1765                    x86defs::Exception::SEV_VMM_COMMUNICATION.0 as u16,
1766                ),
1767            )
1768            .map_err(|err| {
1769                Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1770            })?;
1771
1772            // Get the register tweak bitmap from secrets page.
1773            let mut bitmap = [0u8; 64];
1774            if let Some(secrets) = params.snp_secrets {
1775                bitmap.copy_from_slice(
1776                    &secrets
1777                        [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1778                );
1779            }
1780            hcl.set_snp_register_bitmap(bitmap);
1781        }
1782
1783        // Do per-VP HCL initialization.
1784        hcl.add_vps(
1785            params.topology.vp_count(),
1786            late_params
1787                .cvm_params
1788                .as_ref()
1789                .map(|x| &x.private_dma_client),
1790        )
1791        .map_err(Error::Hcl)?;
1792
1793        let vps: Vec<_> = params
1794            .topology
1795            .vps_arch()
1796            .map(|vp_info| {
1797                // TODO: determine CPU index, which in theory could be different
1798                // from the VP index, though this hasn't happened yet.
1799                let cpu_index = vp_info.base.vp_index.index();
1800                UhVpInner::new(cpu_index, vp_info)
1801            })
1802            .collect();
1803
1804        // Enable support for VPCI devices if the hypervisor supports it.
1805        #[cfg(guest_arch = "x86_64")]
1806        let software_devices = {
1807            let res = if !is_hardware_isolated {
1808                hcl.register_intercept(
1809                    HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1810                    HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1811                    HvInterceptParameters::new_zeroed(),
1812                )
1813            } else {
1814                Ok(())
1815            };
1816            match res {
1817                Ok(()) => Some(ApicSoftwareDevices::new(
1818                    params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1819                )),
1820                Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1821                Err(err) => {
1822                    return Err(Error::InstallIntercept(
1823                        HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1824                        err,
1825                    ));
1826                }
1827            }
1828        };
1829
1830        #[cfg(guest_arch = "aarch64")]
1831        let software_devices = None;
1832
1833        #[cfg(guest_arch = "aarch64")]
1834        let caps = virt::aarch64::Aarch64PartitionCapabilities {
1835            // TODO: query aarch32 support from the hypervisor.
1836            supports_aarch32_el0: false,
1837        };
1838
1839        #[cfg(guest_arch = "x86_64")]
1840        let cpuid = UhPartition::construct_cpuid_results(
1841            cpuid,
1842            &late_params.cpuid,
1843            params.topology,
1844            isolation,
1845            params.hide_isolation,
1846        );
1847
1848        #[cfg(guest_arch = "x86_64")]
1849        let caps = UhPartition::construct_capabilities(
1850            params.topology,
1851            &cpuid,
1852            isolation,
1853            params.hide_isolation,
1854        )
1855        .map_err(Error::Capabilities)?;
1856
1857        if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1858            // The hypervisor will manage the untrusted SINTs (or the whole
1859            // synic for non-hardware-isolated VMs), but some event ports
1860            // and message ports are implemented here. Register an intercept
1861            // to handle HvSignalEvent and HvPostMessage hypercalls when the
1862            // hypervisor doesn't recognize the connection ID.
1863            //
1864            // TDX manages this locally instead of through the hypervisor.
1865            hcl.register_intercept(
1866                HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1867                HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1868                HvInterceptParameters::new_zeroed(),
1869            )
1870            .expect("registering synic intercept cannot fail");
1871        }
1872
1873        #[cfg(guest_arch = "x86_64")]
1874        let cvm_state = if is_hardware_isolated {
1875            let vsm_caps = hcl.get_vsm_capabilities().map_err(Error::GetReg)?;
1876            let proxy_interrupt_redirect_available =
1877                vsm_caps.proxy_interrupt_redirect_available() && !params.disable_proxy_redirect;
1878
1879            Some(Self::construct_cvm_state(
1880                &params,
1881                late_params.cvm_params.unwrap(),
1882                &caps,
1883                guest_vsm_available,
1884                proxy_interrupt_redirect_available,
1885            )?)
1886        } else {
1887            None
1888        };
1889        #[cfg(guest_arch = "aarch64")]
1890        let cvm_state = None;
1891
1892        let lower_vtl_timer_virt_available =
1893            hcl.supports_lower_vtl_timer_virt() && !params.disable_lower_vtl_timer_virt;
1894
1895        let backing_shared = BackingShared::new(
1896            isolation,
1897            &params,
1898            BackingSharedParams {
1899                cvm_state,
1900                #[cfg(guest_arch = "x86_64")]
1901                cpuid: &cpuid,
1902                hcl: &hcl,
1903                guest_vsm_available,
1904                lower_vtl_timer_virt_available,
1905            },
1906        )?;
1907
1908        let enter_modes = EnterModes::default();
1909
1910        let partition = Arc::new(UhPartitionInner {
1911            hcl,
1912            vps,
1913            irq_routes: Default::default(),
1914            caps,
1915            enter_modes: Mutex::new(enter_modes),
1916            enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1917            gm: late_params.gm,
1918            vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1919            vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1920            #[cfg(guest_arch = "x86_64")]
1921            cpuid,
1922            crash_notification_send: late_params.crash_notification_send,
1923            monitor_page: MonitorPage::new(),
1924            allocated_monitor_page: Mutex::new(None),
1925            software_devices,
1926            lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1927            vmtime: late_params.vmtime.clone(),
1928            isolation,
1929            no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1930            use_mmio_hypercalls: params.use_mmio_hypercalls,
1931            backing_shared,
1932            #[cfg(guest_arch = "x86_64")]
1933            device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1934            intercept_debug_exceptions: params.intercept_debug_exceptions,
1935            vmbus_relay: late_params.vmbus_relay,
1936        });
1937
1938        if cfg!(guest_arch = "x86_64") {
1939            // Intercept all IOs unless opted out.
1940            partition.manage_io_port_intercept_region(0, !0, true);
1941        }
1942
1943        let vps = params
1944            .topology
1945            .vps_arch()
1946            .map(|vp_info| UhProcessorBox {
1947                partition: partition.clone(),
1948                vp_info,
1949            })
1950            .collect();
1951
1952        Ok((
1953            UhPartition {
1954                inner: partition.clone(),
1955                interrupt_targets: VtlArray::from_fn(|vtl| {
1956                    Arc::new(UhInterruptTarget {
1957                        partition: partition.clone(),
1958                        vtl: vtl.try_into().unwrap(),
1959                    })
1960                }),
1961            },
1962            vps,
1963        ))
1964    }
1965}
1966
1967impl UhPartition {
1968    /// Gets the guest OS ID for VTL0.
1969    pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, hcl::ioctl::register::GetRegError> {
1970        // If Underhill is emulating the hypervisor interfaces, get this value
1971        // from the emulator. This happens when running under hardware isolation
1972        // or when configured for testing.
1973        let id = if let Some(hv) = self.inner.hv() {
1974            hv.guest_os_id(Vtl::Vtl0)
1975        } else {
1976            // Ask the hypervisor for this value.
1977            self.inner.hcl.get_guest_os_id(GuestVtl::Vtl0)?
1978        };
1979        Ok(id)
1980    }
1981
1982    /// Configures guest accesses to IO ports in `range` to go directly to the
1983    /// host.
1984    ///
1985    /// When the return value is dropped, the ports will be unregistered.
1986    pub fn register_host_io_port_fast_path(
1987        &self,
1988        range: RangeInclusive<u16>,
1989    ) -> HostIoPortFastPathHandle {
1990        // There is no way to provide a fast path for some hardware isolated
1991        // VM architectures. The devices that do use this facility are not
1992        // enabled on hardware isolated VMs.
1993        assert!(!self.inner.isolation.is_hardware_isolated());
1994
1995        self.inner
1996            .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1997        HostIoPortFastPathHandle {
1998            inner: Arc::downgrade(&self.inner),
1999            begin: *range.start(),
2000            end: *range.end(),
2001        }
2002    }
2003
2004    /// Trigger the LINT1 interrupt vector on the LAPIC of the BSP.
2005    pub fn assert_debug_interrupt(&self, _vtl: u8) {
2006        #[cfg(guest_arch = "x86_64")]
2007        const LINT_INDEX_1: u8 = 1;
2008        #[cfg(guest_arch = "x86_64")]
2009        match self.inner.isolation {
2010            IsolationType::Snp => {
2011                tracing::error!(?_vtl, "Debug interrupts cannot be injected into SNP VMs",);
2012            }
2013            _ => {
2014                let bsp_index = VpIndex::new(0);
2015                self.pulse_lint(bsp_index, Vtl::try_from(_vtl).unwrap(), LINT_INDEX_1)
2016            }
2017        }
2018    }
2019
2020    /// Enables or disables the PM timer assist.
2021    pub fn set_pm_timer_assist(
2022        &self,
2023        port: Option<u16>,
2024    ) -> Result<(), hcl::ioctl::register::SetRegError> {
2025        self.inner.hcl.set_pm_timer_assist(port)
2026    }
2027
2028    /// Sets guest memory protections for a monitor page.
2029    fn register_cvm_dma_overlay_page(
2030        &self,
2031        vtl: GuestVtl,
2032        gpn: u64,
2033        new_perms: HvMapGpaFlags,
2034    ) -> anyhow::Result<()> {
2035        // How the monitor page is protected depends on the isolation type of the VM.
2036        match &self.inner.backing_shared {
2037            #[cfg(guest_arch = "x86_64")]
2038            BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2039                .cvm
2040                .isolated_memory_protector
2041                .register_overlay_page(
2042                    vtl,
2043                    gpn,
2044                    // On a CVM, the monitor page is always DMA-allocated.
2045                    GpnSource::Dma,
2046                    HvMapGpaFlags::new(),
2047                    Some(new_perms),
2048                    &mut SnpBacked::tlb_flush_lock_access(
2049                        None,
2050                        self.inner.as_ref(),
2051                        snp_backed_shared,
2052                    ),
2053                )
2054                .map_err(|e| anyhow::anyhow!(e)),
2055            #[cfg(guest_arch = "x86_64")]
2056            BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2057                .cvm
2058                .isolated_memory_protector
2059                .register_overlay_page(
2060                    vtl,
2061                    gpn,
2062                    GpnSource::Dma,
2063                    HvMapGpaFlags::new(),
2064                    Some(new_perms),
2065                    &mut TdxBacked::tlb_flush_lock_access(
2066                        None,
2067                        self.inner.as_ref(),
2068                        tdx_backed_shared,
2069                    ),
2070                )
2071                .map_err(|e| anyhow::anyhow!(e)),
2072            BackingShared::Hypervisor(_) => {
2073                let _ = (vtl, gpn, new_perms);
2074                unreachable!()
2075            }
2076        }
2077    }
2078
2079    /// Reverts guest memory protections for a monitor page.
2080    fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2081        // How the monitor page is protected depends on the isolation type of the VM.
2082        match &self.inner.backing_shared {
2083            #[cfg(guest_arch = "x86_64")]
2084            BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2085                .cvm
2086                .isolated_memory_protector
2087                .unregister_overlay_page(
2088                    vtl,
2089                    gpn,
2090                    &mut SnpBacked::tlb_flush_lock_access(
2091                        None,
2092                        self.inner.as_ref(),
2093                        snp_backed_shared,
2094                    ),
2095                )
2096                .map_err(|e| anyhow::anyhow!(e)),
2097            #[cfg(guest_arch = "x86_64")]
2098            BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2099                .cvm
2100                .isolated_memory_protector
2101                .unregister_overlay_page(
2102                    vtl,
2103                    gpn,
2104                    &mut TdxBacked::tlb_flush_lock_access(
2105                        None,
2106                        self.inner.as_ref(),
2107                        tdx_backed_shared,
2108                    ),
2109                )
2110                .map_err(|e| anyhow::anyhow!(e)),
2111            BackingShared::Hypervisor(_) => {
2112                let _ = (vtl, gpn);
2113                unreachable!()
2114            }
2115        }
2116    }
2117}
2118
2119impl UhProtoPartition<'_> {
2120    /// Whether Guest VSM is available to the guest. If so, for hardware CVMs,
2121    /// it is safe to expose Guest VSM support via cpuid.
2122    fn check_guest_vsm_support(privs: HvPartitionPrivilege, hcl: &Hcl) -> Result<bool, Error> {
2123        if !privs.access_vsm() {
2124            return Ok(false);
2125        }
2126
2127        let guest_vsm_config = hcl
2128            .get_guest_vsm_partition_config()
2129            .map_err(Error::GetReg)?;
2130        Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2131    }
2132
2133    #[cfg(guest_arch = "x86_64")]
2134    /// Constructs partition-wide CVM state.
2135    fn construct_cvm_state(
2136        params: &UhPartitionNewParams<'_>,
2137        late_params: CvmLateParams,
2138        caps: &PartitionCapabilities,
2139        guest_vsm_available: bool,
2140        proxy_interrupt_redirect_available: bool,
2141    ) -> Result<UhCvmPartitionState, Error> {
2142        use vmcore::reference_time::ReferenceTimeSource;
2143
2144        let vp_count = params.topology.vp_count() as usize;
2145        let vps = (0..vp_count)
2146            .map(|vp_index| UhCvmVpInner {
2147                tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2148                vtl1_enable_called: Mutex::new(false),
2149                started: AtomicBool::new(vp_index == 0),
2150                hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2151                proxy_redirect_interrupts: Mutex::new(HashMap::new()),
2152            })
2153            .collect();
2154        let tlb_locked_vps =
2155            VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2156
2157        let lapic = VtlArray::from_fn(|_| {
2158            LocalApicSet::builder()
2159                .x2apic_capable(caps.x2apic)
2160                .hyperv_enlightenments(true)
2161                .build()
2162        });
2163
2164        let tsc_frequency = get_tsc_frequency(params.isolation)?;
2165        let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2166
2167        // If we're emulating the APIC, then we also must emulate the hypervisor
2168        // enlightenments, since the hypervisor can't support enlightenments
2169        // without also providing an APIC.
2170        //
2171        // Additionally, TDX provides hardware APIC emulation but we still need
2172        // to emulate the hypervisor enlightenments.
2173        let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2174            max_vp_count: params.topology.vp_count(),
2175            vendor: caps.vendor,
2176            tsc_frequency,
2177            ref_time,
2178            is_ref_time_backed_by_tsc: true,
2179        });
2180
2181        Ok(UhCvmPartitionState {
2182            vps_per_socket: params.topology.reserved_vps_per_socket(),
2183            tlb_locked_vps,
2184            vps,
2185            shared_memory: late_params.shared_gm,
2186            isolated_memory_protector: late_params.isolated_memory_protector,
2187            lapic,
2188            hv,
2189            guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2190            shared_dma_client: late_params.shared_dma_client,
2191            private_dma_client: late_params.private_dma_client,
2192            hide_isolation: params.hide_isolation,
2193            proxy_interrupt_redirect: proxy_interrupt_redirect_available,
2194        })
2195    }
2196}
2197
2198impl UhPartition {
2199    #[cfg(guest_arch = "x86_64")]
2200    /// Constructs the set of cpuid results to show to the guest
2201    fn construct_cpuid_results(
2202        cpuid: virt::CpuidLeafSet,
2203        initial_cpuid: &[CpuidLeaf],
2204        topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2205        isolation: IsolationType,
2206        hide_isolation: bool,
2207    ) -> virt::CpuidLeafSet {
2208        let mut cpuid = cpuid.into_leaves();
2209        if isolation.is_hardware_isolated() {
2210            // Update the x2apic leaf based on the topology.
2211            let x2apic = match topology.apic_mode() {
2212                vm_topology::processor::x86::ApicMode::XApic => false,
2213                vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2214                vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2215            };
2216            let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2217            let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2218            cpuid.push(
2219                CpuidLeaf::new(
2220                    x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2221                    [0, 0, ecx.into(), 0],
2222                )
2223                .masked([0, 0, ecx_mask.into(), 0]),
2224            );
2225
2226            // Get the hypervisor version from the host. This is just for
2227            // reporting purposes, so it is safe even if the hypervisor is not
2228            // trusted.
2229            let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2230
2231            // Perform final processing steps for synthetic leaves.
2232            hv1_emulator::cpuid::process_hv_cpuid_leaves(
2233                &mut cpuid,
2234                hide_isolation,
2235                [
2236                    hv_version.eax,
2237                    hv_version.ebx,
2238                    hv_version.ecx,
2239                    hv_version.edx,
2240                ],
2241            );
2242        }
2243        cpuid.extend(initial_cpuid);
2244        virt::CpuidLeafSet::new(cpuid)
2245    }
2246
2247    #[cfg(guest_arch = "x86_64")]
2248    /// Computes the partition capabilities
2249    fn construct_capabilities(
2250        topology: &ProcessorTopology,
2251        cpuid: &virt::CpuidLeafSet,
2252        isolation: IsolationType,
2253        hide_isolation: bool,
2254    ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2255        let mut native_cpuid_fn;
2256        let mut cvm_cpuid_fn;
2257
2258        // Determine the method to get cpuid results for the guest when
2259        // computing partition capabilities.
2260        let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2261            // Use the filtered CPUID to determine capabilities.
2262            cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2263            &mut cvm_cpuid_fn
2264        } else {
2265            // Just use the native cpuid.
2266            native_cpuid_fn = |leaf, sub_leaf| {
2267                let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2268                cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2269            };
2270            &mut native_cpuid_fn
2271        };
2272
2273        // Compute and validate capabilities.
2274        let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2275        match isolation {
2276            IsolationType::Tdx => {
2277                assert_eq!(caps.vtom.is_some(), !hide_isolation);
2278                // TDX 1.5 requires EFER.NXE to be set to 1, so set it at RESET/INIT.
2279                caps.nxe_forced_on = true;
2280            }
2281            IsolationType::Snp => {
2282                assert_eq!(caps.vtom.is_some(), !hide_isolation);
2283            }
2284            _ => {
2285                assert!(caps.vtom.is_none());
2286            }
2287        }
2288
2289        Ok(caps)
2290    }
2291}
2292
2293#[cfg(guest_arch = "x86_64")]
2294/// Gets the TSC frequency for the current platform.
2295fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2296    // Always get the frequency from the hypervisor. It's believed that, as long
2297    // as the hypervisor is behaving, it will provide the most precise and accurate frequency.
2298    let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2299    let hv_frequency = msr
2300        .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2301        .map_err(Error::ReadTscFrequency)?;
2302
2303    // Get the hardware-advertised frequency and validate that the
2304    // hypervisor frequency is not too far off.
2305    let hw_info = match isolation {
2306        IsolationType::Tdx => {
2307            // TDX provides the TSC frequency via cpuid.
2308            let max_function =
2309                safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2310                    .eax;
2311
2312            if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2313                return Err(Error::BadCpuidTsc);
2314            }
2315            let result = safe_intrinsics::cpuid(
2316                x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2317                0,
2318            );
2319            let ratio_denom = result.eax;
2320            let ratio_num = result.ebx;
2321            let clock = result.ecx;
2322            if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2323                return Err(Error::BadCpuidTsc);
2324            }
2325            // TDX TSC is configurable in units of 25MHz, so allow up to 12.5MHz
2326            // error.
2327            let allowed_error = 12_500_000;
2328            Some((
2329                clock as u64 * ratio_num as u64 / ratio_denom as u64,
2330                allowed_error,
2331            ))
2332        }
2333        IsolationType::Snp => {
2334            // SNP currently does not provide the frequency.
2335            None
2336        }
2337        IsolationType::Vbs | IsolationType::None => None,
2338    };
2339
2340    if let Some((hw_frequency, allowed_error)) = hw_info {
2341        // Don't allow the frequencies to be different by more than the hardware
2342        // precision.
2343        let delta = hw_frequency.abs_diff(hv_frequency);
2344        if delta > allowed_error {
2345            return Err(Error::TscFrequencyMismatch {
2346                hv: hv_frequency,
2347                hw: hw_frequency,
2348                allowed_error,
2349            });
2350        }
2351    }
2352
2353    Ok(hv_frequency)
2354}
2355
2356impl UhPartitionInner {
2357    fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2358        if self.isolation.is_hardware_isolated() {
2359            return;
2360        }
2361
2362        static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2363
2364        let access_type_mask = if active {
2365            HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2366        } else {
2367            HV_INTERCEPT_ACCESS_MASK_NONE
2368        };
2369
2370        // Try to register the whole range at once.
2371        if !SKIP_RANGE.load(Ordering::Relaxed) {
2372            match self.hcl.register_intercept(
2373                HvInterceptType::HvInterceptTypeX64IoPortRange,
2374                access_type_mask,
2375                HvInterceptParameters::new_io_port_range(begin..=end),
2376            ) {
2377                Ok(()) => return,
2378                Err(HvError::InvalidParameter) => {
2379                    // Probably a build that doesn't support range wrapping yet.
2380                    // Don't try again.
2381                    SKIP_RANGE.store(true, Ordering::Relaxed);
2382                    tracing::warn!(
2383                        CVM_ALLOWED,
2384                        "old hypervisor build; using slow path for intercept ranges"
2385                    );
2386                }
2387                Err(err) => {
2388                    panic!("io port range registration failure: {err:?}");
2389                }
2390            }
2391        }
2392
2393        // Fall back to registering one port at a time.
2394        for port in begin..=end {
2395            self.hcl
2396                .register_intercept(
2397                    HvInterceptType::HvInterceptTypeX64IoPort,
2398                    access_type_mask,
2399                    HvInterceptParameters::new_io_port(port),
2400                )
2401                .expect("registering io intercept cannot fail");
2402        }
2403    }
2404
2405    fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2406        // TODO: this probably should reflect changes to the memory map via PAM
2407        // registers. Right now this isn't an issue because the relevant region,
2408        // VGA, is handled on the host.
2409        self.lower_vtl_memory_layout
2410            .ram()
2411            .iter()
2412            .any(|m| m.range.contains_addr(gpa))
2413    }
2414
2415    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2416        // TODO: this probably should reflect changes to the memory map via PAM
2417        // registers. Right now this isn't an issue because the relevant region,
2418        // VGA, is handled on the host.
2419        if self.is_gpa_lower_vtl_ram(gpa) {
2420            // The monitor page is protected against lower VTL writes.
2421            !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2422        } else {
2423            false
2424        }
2425    }
2426}
2427
2428/// Handle returned by [`UhPartition::register_host_io_port_fast_path`].
2429///
2430/// When dropped, unregisters the IO ports so that they are no longer forwarded
2431/// to the host.
2432#[must_use]
2433pub struct HostIoPortFastPathHandle {
2434    inner: Weak<UhPartitionInner>,
2435    begin: u16,
2436    end: u16,
2437}
2438
2439impl Drop for HostIoPortFastPathHandle {
2440    fn drop(&mut self) {
2441        if let Some(inner) = self.inner.upgrade() {
2442            inner.manage_io_port_intercept_region(self.begin, self.end, true);
2443        }
2444    }
2445}
2446
2447/// The application level VTL crash data not suited for putting
2448/// on the wire.
2449///
2450/// FUTURE: move/remove this to standardize across virt backends.
2451#[derive(Copy, Clone, Debug)]
2452pub struct VtlCrash {
2453    /// The VP that crashed.
2454    pub vp_index: VpIndex,
2455    /// The VTL that crashed.
2456    pub last_vtl: GuestVtl,
2457    /// The crash control information.
2458    pub control: GuestCrashCtl,
2459    /// The crash parameters.
2460    pub parameters: [u64; 5],
2461}
2462
2463/// Validate that flags is a valid setting for VTL memory protection when
2464/// applied to VTL 1.
2465#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2466fn validate_vtl_gpa_flags(
2467    flags: HvMapGpaFlags,
2468    mbec_enabled: bool,
2469    shadow_supervisor_stack_enabled: bool,
2470) -> bool {
2471    // Adjust is not allowed for VTL1.
2472    if flags.adjustable() {
2473        return false;
2474    }
2475
2476    // KX must equal UX unless MBEC is enabled. KX && !UX is invalid.
2477    if flags.kernel_executable() != flags.user_executable() {
2478        if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2479            return false;
2480        }
2481    }
2482
2483    // Read must be specified if anything else is specified.
2484    if flags.writable()
2485        || flags.kernel_executable()
2486        || flags.user_executable()
2487        || flags.supervisor_shadow_stack()
2488        || flags.paging_writability()
2489        || flags.verify_paging_writability()
2490    {
2491        if !flags.readable() {
2492            return false;
2493        }
2494    }
2495
2496    // Supervisor shadow stack protection is invalid if shadow stacks are disabled
2497    // or if execute is not specified.
2498    if flags.supervisor_shadow_stack()
2499        && ((!flags.kernel_executable() && !flags.user_executable())
2500            || shadow_supervisor_stack_enabled)
2501    {
2502        return false;
2503    }
2504
2505    true
2506}