virt_mshv_vtl/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implementation of the Underhill hypervisor backend, which uses
5//! `/dev/mshv_vtl` to interact with the Microsoft hypervisor while running in
6//! VTL2.
7
8#![cfg(all(guest_is_native, target_os = "linux"))]
9
10mod devmsr;
11
12cfg_if::cfg_if!(
13    if #[cfg(guest_arch = "x86_64")] {
14        mod cvm_cpuid;
15        pub use processor::snp::SnpBacked;
16        pub use processor::tdx::TdxBacked;
17        use crate::processor::HardwareIsolatedBacking;
18        pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
19        use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
20        use bitvec::prelude::BitArray;
21        use bitvec::prelude::Lsb0;
22        use devmsr::MsrDevice;
23        use hv1_emulator::hv::ProcessorVtlHv;
24        use processor::LapicState;
25        use processor::snp::SnpBackedShared;
26        use processor::tdx::TdxBackedShared;
27        use std::arch::x86_64::CpuidResult;
28        use virt::CpuidLeaf;
29        use virt::state::StateElement;
30        use virt::vp::MpState;
31        /// Bitarray type for representing IRR bits in a x86-64 APIC
32        /// Each bit represent the 256 possible vectors.
33        type IrrBitmap = BitArray<[u32; 8], Lsb0>;
34    } else if #[cfg(guest_arch = "aarch64")] {
35        pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
36        use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
37    }
38);
39
40mod processor;
41pub use processor::Backing;
42pub use processor::UhProcessor;
43
44use anyhow::Context as AnyhowContext;
45use bitfield_struct::bitfield;
46use bitvec::boxed::BitBox;
47use bitvec::vec::BitVec;
48use cvm_tracing::CVM_ALLOWED;
49use guestmem::GuestMemory;
50use guestmem::GuestMemoryBackingError;
51use hcl::GuestVtl;
52use hcl::ioctl::Hcl;
53use hcl::ioctl::SetVsmPartitionConfigError;
54use hv1_emulator::hv::GlobalHv;
55use hv1_emulator::message_queues::MessageQueues;
56use hv1_emulator::synic::GlobalSynic;
57use hv1_emulator::synic::SintProxied;
58use hv1_structs::VtlArray;
59use hvdef::GuestCrashCtl;
60use hvdef::HV_PAGE_SHIFT;
61use hvdef::HV_PAGE_SIZE;
62use hvdef::HV_PAGE_SIZE_USIZE;
63use hvdef::HvError;
64use hvdef::HvMapGpaFlags;
65use hvdef::HvPartitionPrivilege;
66use hvdef::HvRegisterName;
67use hvdef::HvRegisterVsmPartitionConfig;
68use hvdef::HvRegisterVsmPartitionStatus;
69use hvdef::Vtl;
70use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
71use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
72use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
73use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
74use hvdef::hypercall::HostVisibilityType;
75use hvdef::hypercall::HvGuestOsId;
76use hvdef::hypercall::HvInputVtl;
77use hvdef::hypercall::HvInterceptParameters;
78use hvdef::hypercall::HvInterceptType;
79use inspect::Inspect;
80use inspect::InspectMut;
81use memory_range::MemoryRange;
82use pal::unix::affinity;
83use pal::unix::affinity::CpuSet;
84use pal_async::driver::Driver;
85use pal_async::driver::SpawnDriver;
86use pal_uring::IdleControl;
87use parking_lot::Mutex;
88use parking_lot::RwLock;
89use processor::BackingSharedParams;
90use processor::SidecarExitReason;
91use sidecar_client::NewSidecarClientError;
92use std::collections::HashMap;
93use std::ops::RangeInclusive;
94use std::os::fd::AsRawFd;
95use std::sync::Arc;
96use std::sync::Weak;
97use std::sync::atomic::AtomicBool;
98use std::sync::atomic::AtomicU8;
99use std::sync::atomic::AtomicU32;
100use std::sync::atomic::AtomicU64;
101use std::sync::atomic::Ordering;
102use std::task::Waker;
103use thiserror::Error;
104use user_driver::DmaClient;
105use virt::IsolationType;
106use virt::PartitionCapabilities;
107use virt::VpIndex;
108use virt::X86Partition;
109use virt::irqcon::IoApicRouting;
110use virt::irqcon::MsiRequest;
111use virt::x86::apic_software_device::ApicSoftwareDevices;
112use virt_support_apic::LocalApicSet;
113use vm_topology::memory::MemoryLayout;
114use vm_topology::processor::ProcessorTopology;
115use vm_topology::processor::TargetVpInfo;
116use vmcore::monitor::MonitorPage;
117use vmcore::reference_time::GetReferenceTime;
118use vmcore::reference_time::ReferenceTimeResult;
119use vmcore::reference_time::ReferenceTimeSource;
120use vmcore::vmtime::VmTimeSource;
121use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
122use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
123use x86defs::tdx::TdCallResult;
124use zerocopy::FromBytes;
125use zerocopy::FromZeros;
126use zerocopy::Immutable;
127use zerocopy::IntoBytes;
128use zerocopy::KnownLayout;
129
130/// General error returned by operations.
131#[derive(Error, Debug)]
132#[expect(missing_docs)]
133pub enum Error {
134    #[error("hcl error")]
135    Hcl(#[source] hcl::ioctl::Error),
136    #[error("failed to open sidecar client")]
137    Sidecar(#[source] NewSidecarClientError),
138    #[error("failed to install {0:?} intercept: {1:?}")]
139    InstallIntercept(HvInterceptType, HvError),
140    #[error("failed to query hypervisor register {0:#x?}")]
141    Register(HvRegisterName, #[source] HvError),
142    #[error("failed to set vsm partition config register")]
143    VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
144    #[error("failed to create virtual device")]
145    NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
146    #[error("failed to create cpuid tables for cvm")]
147    #[cfg(guest_arch = "x86_64")]
148    CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
149    #[error("failed to update hypercall msr")]
150    UpdateHypercallMsr,
151    #[error("failed to update reference tsc msr")]
152    UpdateReferenceTsc,
153    #[error("failed to map overlay page")]
154    MapOverlay(#[source] std::io::Error),
155    #[error("failed to allocate shared visibility pages for overlay")]
156    AllocateSharedVisOverlay(#[source] anyhow::Error),
157    #[error("failed to open msr device")]
158    OpenMsr(#[source] std::io::Error),
159    #[error("cpuid did not contain valid TSC frequency information")]
160    BadCpuidTsc,
161    #[error("failed to read tsc frequency")]
162    ReadTscFrequency(#[source] std::io::Error),
163    #[error(
164        "tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
165    )]
166    TscFrequencyMismatch {
167        hv: u64,
168        hw: u64,
169        allowed_error: u64,
170    },
171    #[error("failed to set vsm partition config: {0:?}")]
172    FailedToSetL2Ctls(TdCallResult),
173    #[error("debugging is configured but the binary does not have the gdb feature")]
174    InvalidDebugConfiguration,
175    #[error("failed to allocate TLB flush page")]
176    AllocateTlbFlushPage(#[source] anyhow::Error),
177    #[error("host does not support required cpu capabilities")]
178    Capabilities(virt::PartitionCapabilitiesError),
179}
180
181/// Error revoking guest VSM.
182#[derive(Error, Debug)]
183#[expect(missing_docs)]
184pub enum RevokeGuestVsmError {
185    #[error("failed to set vsm config")]
186    SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
187    #[error("VTL 1 is already enabled")]
188    Vtl1AlreadyEnabled,
189}
190
191/// Underhill partition.
192#[derive(Inspect)]
193pub struct UhPartition {
194    #[inspect(flatten)]
195    inner: Arc<UhPartitionInner>,
196    // TODO: remove this extra indirection by refactoring some traits.
197    #[inspect(skip)]
198    interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
199}
200
201/// Underhill partition.
202#[derive(Inspect)]
203#[inspect(extra = "UhPartitionInner::inspect_extra")]
204struct UhPartitionInner {
205    #[inspect(skip)]
206    hcl: Hcl,
207    #[inspect(skip)] // inspected separately
208    vps: Vec<UhVpInner>,
209    irq_routes: virt::irqcon::IrqRoutes,
210    caps: PartitionCapabilities,
211    #[inspect(skip)] // handled in `inspect_extra`
212    enter_modes: Mutex<EnterModes>,
213    #[inspect(skip)]
214    enter_modes_atomic: AtomicU8,
215    #[cfg(guest_arch = "x86_64")]
216    cpuid: virt::CpuidLeafSet,
217    lower_vtl_memory_layout: MemoryLayout,
218    gm: VtlArray<GuestMemory, 2>,
219    vtl0_kernel_exec_gm: GuestMemory,
220    vtl0_user_exec_gm: GuestMemory,
221    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
222    #[inspect(skip)]
223    crash_notification_send: mesh::Sender<VtlCrash>,
224    monitor_page: MonitorPage,
225    #[inspect(skip)]
226    allocated_monitor_page: Mutex<Option<user_driver::memory::MemoryBlock>>,
227    software_devices: Option<ApicSoftwareDevices>,
228    #[inspect(skip)]
229    vmtime: VmTimeSource,
230    isolation: IsolationType,
231    #[inspect(with = "inspect::AtomicMut")]
232    no_sidecar_hotplug: AtomicBool,
233    use_mmio_hypercalls: bool,
234    backing_shared: BackingShared,
235    intercept_debug_exceptions: bool,
236    #[cfg(guest_arch = "x86_64")]
237    // N.B For now, only one device vector table i.e. for VTL0 only
238    #[inspect(hex, with = "|x| inspect::iter_by_index(x.read().into_inner())")]
239    device_vector_table: RwLock<IrrBitmap>,
240    vmbus_relay: bool,
241}
242
243#[derive(Inspect)]
244#[inspect(untagged)]
245enum BackingShared {
246    Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
247    #[cfg(guest_arch = "x86_64")]
248    Snp(#[inspect(flatten)] SnpBackedShared),
249    #[cfg(guest_arch = "x86_64")]
250    Tdx(#[inspect(flatten)] TdxBackedShared),
251}
252
253impl BackingShared {
254    fn new(
255        isolation: IsolationType,
256        partition_params: &UhPartitionNewParams<'_>,
257        backing_shared_params: BackingSharedParams<'_>,
258    ) -> Result<BackingShared, Error> {
259        Ok(match isolation {
260            IsolationType::None | IsolationType::Vbs => {
261                assert!(backing_shared_params.cvm_state.is_none());
262                BackingShared::Hypervisor(HypervisorBackedShared::new(
263                    partition_params,
264                    backing_shared_params,
265                )?)
266            }
267            #[cfg(guest_arch = "x86_64")]
268            IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
269                partition_params,
270                backing_shared_params,
271            )?),
272            #[cfg(guest_arch = "x86_64")]
273            IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
274                partition_params,
275                backing_shared_params,
276            )?),
277            #[cfg(not(guest_arch = "x86_64"))]
278            _ => unreachable!(),
279        })
280    }
281
282    fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
283        match self {
284            BackingShared::Hypervisor(_) => None,
285            #[cfg(guest_arch = "x86_64")]
286            BackingShared::Snp(SnpBackedShared { cvm, .. })
287            | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
288        }
289    }
290
291    fn untrusted_synic(&self) -> Option<&GlobalSynic> {
292        match self {
293            BackingShared::Hypervisor(_) => None,
294            #[cfg(guest_arch = "x86_64")]
295            BackingShared::Snp(_) => None,
296            #[cfg(guest_arch = "x86_64")]
297            BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
298        }
299    }
300}
301
302#[derive(InspectMut, Copy, Clone)]
303struct EnterModes {
304    #[inspect(mut)]
305    first: EnterMode,
306    #[inspect(mut)]
307    second: EnterMode,
308}
309
310impl Default for EnterModes {
311    fn default() -> Self {
312        Self {
313            first: EnterMode::Fast,
314            second: EnterMode::IdleToVtl0,
315        }
316    }
317}
318
319impl From<EnterModes> for hcl::protocol::EnterModes {
320    fn from(value: EnterModes) -> Self {
321        Self::new()
322            .with_first(value.first.into())
323            .with_second(value.second.into())
324    }
325}
326
327#[derive(InspectMut, Copy, Clone)]
328enum EnterMode {
329    Fast,
330    PlayIdle,
331    IdleToVtl0,
332}
333
334impl From<EnterMode> for hcl::protocol::EnterMode {
335    fn from(value: EnterMode) -> Self {
336        match value {
337            EnterMode::Fast => Self::FAST,
338            EnterMode::PlayIdle => Self::PLAY_IDLE,
339            EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
340        }
341    }
342}
343
344#[cfg(guest_arch = "x86_64")]
345#[derive(Inspect)]
346struct GuestVsmVpState {
347    /// The pending event that VTL 1 wants to inject into VTL 0. Injected on
348    /// next exit to VTL 0.
349    #[inspect(with = "|x| x.as_ref().map(inspect::AsDebug)")]
350    vtl0_exit_pending_event: Option<hvdef::HvX64PendingExceptionEvent>,
351    reg_intercept: SecureRegisterInterceptState,
352}
353
354#[cfg(guest_arch = "x86_64")]
355impl GuestVsmVpState {
356    fn new() -> Self {
357        GuestVsmVpState {
358            vtl0_exit_pending_event: None,
359            reg_intercept: Default::default(),
360        }
361    }
362}
363
364#[cfg(guest_arch = "x86_64")]
365#[derive(Inspect)]
366/// VP state for CVMs.
367struct UhCvmVpState {
368    // Allocation handle for direct overlays
369    #[inspect(debug)]
370    direct_overlay_handle: user_driver::memory::MemoryBlock,
371    /// Used in VTL 2 exit code to determine which VTL to exit to.
372    exit_vtl: GuestVtl,
373    /// Hypervisor enlightenment emulator state.
374    hv: VtlArray<ProcessorVtlHv, 2>,
375    /// LAPIC state.
376    lapics: VtlArray<LapicState, 2>,
377    /// Guest VSM state for this vp. Some when VTL 1 is enabled.
378    vtl1: Option<GuestVsmVpState>,
379}
380
381#[cfg(guest_arch = "x86_64")]
382impl UhCvmVpState {
383    /// Creates a new CVM VP state.
384    pub(crate) fn new(
385        cvm_partition: &UhCvmPartitionState,
386        inner: &UhPartitionInner,
387        vp_info: &TargetVpInfo,
388        overlay_pages_required: usize,
389    ) -> Result<Self, Error> {
390        let direct_overlay_handle = cvm_partition
391            .shared_dma_client
392            .allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
393            .map_err(Error::AllocateSharedVisOverlay)?;
394
395        let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
396        let lapics = VtlArray::from_fn(|vtl| {
397            let apic_set = &cvm_partition.lapic[vtl];
398
399            // The APIC is software-enabled after reset for secure VTLs, to
400            // maintain compatibility with released versions of secure kernel
401            let mut lapic = apic_set.add_apic(vp_info, vtl == Vtl::Vtl1);
402            // Initialize APIC base to match the reset VM state.
403            lapic.set_apic_base(apic_base).unwrap();
404            // Only the VTL 0 non-BSP LAPICs should be in the WaitForSipi state.
405            let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
406                MpState::WaitForSipi
407            } else {
408                MpState::Running
409            };
410            LapicState::new(lapic, activity)
411        });
412
413        let hv = VtlArray::from_fn(|vtl| cvm_partition.hv.add_vp(vp_info.base.vp_index, vtl));
414
415        Ok(Self {
416            direct_overlay_handle,
417            exit_vtl: GuestVtl::Vtl0,
418            hv,
419            lapics,
420            vtl1: None,
421        })
422    }
423}
424
425#[cfg(guest_arch = "x86_64")]
426#[derive(Inspect, Default)]
427#[inspect(hex)]
428/// Configuration of VTL 1 registration for intercepts on certain registers
429pub struct SecureRegisterInterceptState {
430    #[inspect(with = "|&x| u64::from(x)")]
431    intercept_control: hvdef::HvRegisterCrInterceptControl,
432    cr0_mask: u64,
433    cr4_mask: u64,
434    // Writes to X86X_IA32_MSR_MISC_ENABLE are dropped, so this is only used so
435    // that get_vp_register returns the correct value from a set_vp_register
436    ia32_misc_enable_mask: u64,
437}
438
439/// Information about a redirected interrupt for a specific vector.
440/// Stored per-processor, indexed by the redirected vector number in VTL2.
441#[derive(Clone, Inspect)]
442struct ProxyRedirectVectorInfo {
443    /// Device ID that owns this interrupt
444    device_id: u64,
445    /// Original interrupt vector from the device
446    original_vector: u32,
447}
448
449#[derive(Inspect)]
450/// Partition-wide state for CVMs.
451struct UhCvmPartitionState {
452    #[cfg(guest_arch = "x86_64")]
453    vps_per_socket: u32,
454    /// VPs that have locked their TLB.
455    #[inspect(
456        with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
457    )]
458    tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
459    #[inspect(with = "inspect::iter_by_index")]
460    vps: Vec<UhCvmVpInner>,
461    shared_memory: GuestMemory,
462    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
463    #[inspect(skip)]
464    isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
465    /// The emulated local APIC set.
466    lapic: VtlArray<LocalApicSet, 2>,
467    /// The emulated hypervisor state.
468    hv: GlobalHv<2>,
469    /// Guest VSM state.
470    guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
471    /// Dma client for shared visibility pages.
472    shared_dma_client: Arc<dyn DmaClient>,
473    /// Dma client for private visibility pages.
474    private_dma_client: Arc<dyn DmaClient>,
475    hide_isolation: bool,
476    proxy_interrupt_redirect: bool,
477}
478
479#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
480impl UhCvmPartitionState {
481    fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
482        &self.vps[vp_index as usize]
483    }
484
485    fn is_lower_vtl_startup_denied(&self) -> bool {
486        matches!(
487            *self.guest_vsm.read(),
488            GuestVsmState::Enabled {
489                vtl1: CvmVtl1State {
490                    deny_lower_vtl_startup: true,
491                    ..
492                }
493            }
494        )
495    }
496}
497
498#[derive(Inspect)]
499/// Per-vp state for CVMs.
500struct UhCvmVpInner {
501    /// The current status of TLB locks
502    tlb_lock_info: VtlArray<TlbLockInfo, 2>,
503    /// Whether EnableVpVtl for VTL 1 has been called on this VP.
504    vtl1_enable_called: Mutex<bool>,
505    /// Whether the VP has been started via the StartVp hypercall.
506    started: AtomicBool,
507    /// Start context for StartVp and EnableVpVtl calls.
508    #[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
509    hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
510    /// Tracking of proxy redirect interrupts mapped on this VP.
511    #[inspect(with = "|x| inspect::adhoc(|req| inspect::iter_by_key(&*x.lock()).inspect(req))")]
512    proxy_redirect_interrupts: Mutex<HashMap<u32, ProxyRedirectVectorInfo>>,
513}
514
515#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
516#[derive(Inspect)]
517#[inspect(tag = "guest_vsm_state")]
518/// Partition-wide state for guest vsm.
519enum GuestVsmState<T: Inspect> {
520    NotPlatformSupported,
521    NotGuestEnabled,
522    Enabled {
523        #[inspect(flatten)]
524        vtl1: T,
525    },
526}
527
528impl<T: Inspect> GuestVsmState<T> {
529    pub fn from_availability(guest_vsm_available: bool) -> Self {
530        if guest_vsm_available {
531            GuestVsmState::NotGuestEnabled
532        } else {
533            GuestVsmState::NotPlatformSupported
534        }
535    }
536}
537
538#[derive(Inspect)]
539struct CvmVtl1State {
540    /// Whether VTL 1 has been enabled on any vp
541    enabled_on_any_vp: bool,
542    /// Whether guest memory should be zeroed before it resets.
543    zero_memory_on_reset: bool,
544    /// Whether a vp can be started or reset by a lower vtl.
545    deny_lower_vtl_startup: bool,
546    /// Whether Mode-Based Execution Control should be enforced on lower VTLs.
547    pub mbec_enabled: bool,
548    /// Whether shadow supervisor stack is enabled.
549    pub shadow_supervisor_stack_enabled: bool,
550    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
551    io_read_intercepts: BitBox<u64>,
552    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
553    io_write_intercepts: BitBox<u64>,
554}
555
556#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
557impl CvmVtl1State {
558    fn new(mbec_enabled: bool) -> Self {
559        Self {
560            enabled_on_any_vp: false,
561            zero_memory_on_reset: false,
562            deny_lower_vtl_startup: false,
563            mbec_enabled,
564            shadow_supervisor_stack_enabled: false,
565            io_read_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
566            io_write_intercepts: BitVec::repeat(false, u16::MAX as usize + 1).into_boxed_bitslice(),
567        }
568    }
569}
570
571#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
572struct TscReferenceTimeSource {
573    tsc_scale: u64,
574}
575
576#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
577impl TscReferenceTimeSource {
578    fn new(tsc_frequency: u64) -> Self {
579        TscReferenceTimeSource {
580            tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
581        }
582    }
583}
584
585/// A time implementation based on TSC.
586impl GetReferenceTime for TscReferenceTimeSource {
587    fn now(&self) -> ReferenceTimeResult {
588        #[cfg(guest_arch = "x86_64")]
589        {
590            let tsc = safe_intrinsics::rdtsc();
591            let ref_time = ((self.tsc_scale as u128 * tsc as u128) >> 64) as u64;
592            ReferenceTimeResult {
593                ref_time,
594                system_time: None,
595            }
596        }
597
598        #[cfg(guest_arch = "aarch64")]
599        {
600            todo!("AARCH64_TODO");
601        }
602    }
603}
604
605impl virt::irqcon::ControlGic for UhPartitionInner {
606    fn set_spi_irq(&self, irq_id: u32, high: bool) {
607        if let Err(err) = self.hcl.request_interrupt(
608            hvdef::HvInterruptControl::new()
609                .with_arm64_asserted(high)
610                .with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
611            0,
612            irq_id,
613            GuestVtl::Vtl0,
614        ) {
615            tracelimit::warn_ratelimited!(
616                error = &err as &dyn std::error::Error,
617                irq = irq_id,
618                asserted = high,
619                "failed to request spi"
620            );
621        }
622    }
623}
624
625impl virt::Aarch64Partition for UhPartition {
626    fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
627        debug_assert!(vtl == Vtl::Vtl0);
628        self.inner.clone()
629    }
630}
631
632/// A wrapper around [`UhProcessor`] that is [`Send`].
633///
634/// This is used to instantiate the processor object on the correct thread,
635/// since all lower VTL processor state accesses must occur from the same
636/// processor at VTL2.
637pub struct UhProcessorBox {
638    partition: Arc<UhPartitionInner>,
639    vp_info: TargetVpInfo,
640}
641
642impl UhProcessorBox {
643    /// Returns the VP index.
644    pub fn vp_index(&self) -> VpIndex {
645        self.vp_info.base.vp_index
646    }
647
648    /// Returns the base CPU that manages this processor, when it is a sidecar
649    /// VP.
650    pub fn sidecar_base_cpu(&self) -> Option<u32> {
651        self.partition
652            .hcl
653            .sidecar_base_cpu(self.vp_info.base.vp_index.index())
654    }
655
656    /// Returns the processor object, bound to this thread.
657    ///
658    /// If `control` is provided, then this must be called on the VP's
659    /// associated thread pool thread, and it will dispatch the VP directly.
660    /// Otherwise, the processor will control the processor via the sidecar
661    /// kernel.
662    pub fn bind_processor<'a, T: Backing>(
663        &'a mut self,
664        driver: &impl Driver,
665        control: Option<&'a mut IdleControl>,
666    ) -> Result<UhProcessor<'a, T>, Error> {
667        if let Some(control) = &control {
668            let vp_index = self.vp_info.base.vp_index;
669
670            let mut current = Default::default();
671            affinity::get_current_thread_affinity(&mut current).unwrap();
672            assert_eq!(&current, CpuSet::new().set(vp_index.index()));
673
674            self.partition
675                .hcl
676                .set_poll_file(
677                    self.partition.vp(vp_index).unwrap().cpu_index,
678                    control.ring_fd().as_raw_fd(),
679                )
680                .map_err(Error::Hcl)?;
681        }
682
683        UhProcessor::new(driver, &self.partition, self.vp_info, control)
684    }
685
686    /// Sets the sidecar remove reason for the processor to be due to a task
687    /// running with the given name.
688    ///
689    /// This is useful for diagnostics.
690    pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
691        self.partition
692            .vp(self.vp_info.base.vp_index)
693            .unwrap()
694            .set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
695    }
696}
697
698#[derive(Debug, Inspect)]
699struct UhVpInner {
700    /// 32 bits per VTL: top bits are VTL 1, bottom bits are VTL 0.
701    wake_reasons: AtomicU64,
702    #[inspect(skip)]
703    waker: RwLock<Option<Waker>>,
704    message_queues: VtlArray<MessageQueues, 2>,
705    #[inspect(skip)]
706    vp_info: TargetVpInfo,
707    /// The Linux kernel's CPU index for this VP. This should be used instead of VpIndex
708    /// when interacting with non-MSHV kernel interfaces.
709    cpu_index: u32,
710    sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
711}
712
713impl UhVpInner {
714    pub fn vp_index(&self) -> VpIndex {
715        self.vp_info.base.vp_index
716    }
717}
718
719#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
720#[derive(Debug, Inspect)]
721/// Which operation is setting the initial vp context
722enum InitialVpContextOperation {
723    /// The VP is being started via the StartVp hypercall.
724    StartVp,
725    /// The VP is being started via the EnableVpVtl hypercall.
726    EnableVpVtl,
727}
728
729#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
730#[derive(Debug, Inspect)]
731/// State for handling StartVp/EnableVpVtl hypercalls.
732struct VpStartEnableVtl {
733    /// Which operation, startvp or enablevpvtl, is setting the initial vp
734    /// context
735    operation: InitialVpContextOperation,
736    #[inspect(skip)]
737    context: hvdef::hypercall::InitialVpContextX64,
738}
739
740#[derive(Debug, Inspect)]
741struct TlbLockInfo {
742    /// The set of VPs that are waiting for this VP to release the TLB lock.
743    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
744    blocked_vps: BitBox<AtomicU64>,
745    /// The set of VPs that are holding the TLB lock and preventing this VP
746    /// from proceeding.
747    #[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
748    blocking_vps: BitBox<AtomicU64>,
749    /// The count of blocking VPs. This should always be equivalent to
750    /// `blocking_vps.count_ones()`, however it is accessible in a single
751    /// atomic operation while counting is not.
752    blocking_vp_count: AtomicU32,
753    /// Whether the VP is sleeping due to a TLB lock.
754    sleeping: AtomicBool,
755}
756
757#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
758impl TlbLockInfo {
759    fn new(vp_count: usize) -> Self {
760        Self {
761            blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
762            blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
763            blocking_vp_count: AtomicU32::new(0),
764            sleeping: false.into(),
765        }
766    }
767}
768
769#[bitfield(u32)]
770#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
771struct WakeReason {
772    extint: bool,
773    message_queues: bool,
774    hv_start_enable_vtl_vp: bool,
775    intcon: bool,
776    update_proxy_irr_filter: bool,
777    #[bits(27)]
778    _reserved: u32,
779}
780
781impl WakeReason {
782    // Convenient constants.
783    const EXTINT: Self = Self::new().with_extint(true);
784    const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
785    #[cfg(guest_arch = "x86_64")]
786    const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); // StartVp/EnableVpVtl handling
787    const INTCON: Self = Self::new().with_intcon(true);
788    #[cfg(guest_arch = "x86_64")]
789    const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
790}
791
792#[bitfield(u32)]
793#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
794struct ExitActivity {
795    pending_event: bool,
796    #[bits(31)]
797    _reserved: u32,
798}
799
800/// Immutable access to useful bits of Partition state.
801impl UhPartition {
802    /// Revokes guest VSM.
803    pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
804        fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
805            if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
806                return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
807            }
808            *vsm_state = GuestVsmState::NotPlatformSupported;
809            Ok(())
810        }
811
812        match &self.inner.backing_shared {
813            BackingShared::Hypervisor(s) => {
814                revoke(&mut *s.guest_vsm.write())?;
815                self.inner
816                    .hcl
817                    .set_guest_vsm_partition_config(false)
818                    .map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
819            }
820            #[cfg(guest_arch = "x86_64")]
821            BackingShared::Snp(SnpBackedShared { cvm, .. })
822            | BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
823                revoke(&mut *cvm.guest_vsm.write())?;
824            }
825        };
826
827        Ok(())
828    }
829
830    /// Returns the current hypervisor reference time, in 100ns units.
831    pub fn reference_time(&self) -> u64 {
832        if let Some(hv) = self.inner.hv() {
833            hv.ref_time_source().now().ref_time
834        } else {
835            self.inner
836                .hcl
837                .reference_time()
838                .expect("should not fail to get the reference time")
839        }
840    }
841}
842
843impl virt::Partition for UhPartition {
844    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
845        None
846    }
847
848    fn caps(&self) -> &PartitionCapabilities {
849        &self.inner.caps
850    }
851
852    fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
853        self.inner
854            .request_msi(vtl.try_into().expect("higher vtl not configured"), request)
855    }
856
857    fn request_yield(&self, _vp_index: VpIndex) {
858        unimplemented!()
859    }
860}
861
862impl X86Partition for UhPartition {
863    fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
864        self.inner.clone()
865    }
866
867    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
868        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
869        if let Some(apic) = &self.inner.lapic(vtl) {
870            apic.lint(vp_index, lint.into(), |vp_index| {
871                self.inner
872                    .vp(vp_index)
873                    .unwrap()
874                    .wake(vtl, WakeReason::INTCON);
875            });
876        } else if lint == 0 {
877            self.inner
878                .vp(vp_index)
879                .unwrap()
880                .wake(vtl, WakeReason::EXTINT);
881        } else {
882            unimplemented!()
883        }
884    }
885}
886
887impl UhPartitionInner {
888    fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
889        self.vps.get(index.index() as usize)
890    }
891
892    fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
893        self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
894    }
895
896    fn hv(&self) -> Option<&GlobalHv<2>> {
897        self.backing_shared.cvm_state().map(|x| &x.hv)
898    }
899
900    /// For requester VP to issue `proxy_irr_blocked` update to other VPs
901    #[cfg(guest_arch = "x86_64")]
902    fn request_proxy_irr_filter_update(
903        &self,
904        vtl: GuestVtl,
905        device_vector: u8,
906        req_vp_index: VpIndex,
907    ) {
908        tracing::debug!(
909            ?vtl,
910            device_vector,
911            req_vp_index = req_vp_index.index(),
912            "request_proxy_irr_filter_update"
913        );
914
915        // Add given vector to partition global device vector table (VTL0 only for now)
916        {
917            let mut device_vector_table = self.device_vector_table.write();
918            device_vector_table.set(device_vector as usize, true);
919        }
920
921        // Wake all other VPs for their `proxy_irr_blocked` filter update
922        for vp in self.vps.iter() {
923            if vp.vp_index() != req_vp_index {
924                vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
925            }
926        }
927    }
928
929    /// Get current partition global device irr vectors (VTL0 for now)
930    #[cfg(guest_arch = "x86_64")]
931    fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
932        let device_vector_table = self.device_vector_table.read();
933        for idx in device_vector_table.iter_ones() {
934            irr_vectors.set(idx, true);
935        }
936    }
937
938    fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
939        let mut wake_vps = false;
940        resp.field_mut(
941            "enter_modes",
942            &mut inspect::adhoc_mut(|req| {
943                let update = req.is_update();
944                {
945                    let mut modes = self.enter_modes.lock();
946                    modes.inspect_mut(req);
947                    if update {
948                        self.enter_modes_atomic.store(
949                            hcl::protocol::EnterModes::from(*modes).into(),
950                            Ordering::Relaxed,
951                        );
952                        wake_vps = true;
953                    }
954                }
955            }),
956        );
957
958        // Wake VPs to propagate updates.
959        if wake_vps {
960            for vp in self.vps.iter() {
961                vp.wake_vtl2();
962            }
963        }
964    }
965
966    // TODO VBS GUEST VSM: enable for aarch64
967    #[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
968    fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
969        // TODO: It might be possible to cache VsmPartitionStatus.
970        self.hcl.get_vsm_partition_status()
971    }
972}
973
974impl virt::Synic for UhPartition {
975    fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
976        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
977        let Some(vp) = self.inner.vp(vp_index) else {
978            tracelimit::warn_ratelimited!(
979                CVM_ALLOWED,
980                vp = vp_index.index(),
981                "invalid vp target for post_message"
982            );
983            return;
984        };
985
986        vp.post_message(
987            vtl,
988            sint,
989            &hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
990        );
991    }
992
993    fn new_guest_event_port(
994        &self,
995        vtl: Vtl,
996        vp: u32,
997        sint: u8,
998        flag: u16,
999    ) -> Box<dyn vmcore::synic::GuestEventPort> {
1000        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1001        Box::new(UhEventPort {
1002            partition: Arc::downgrade(&self.inner),
1003            params: Arc::new(Mutex::new(UhEventPortParams {
1004                vp: VpIndex::new(vp),
1005                sint,
1006                flag,
1007                vtl,
1008            })),
1009        })
1010    }
1011
1012    fn prefer_os_events(&self) -> bool {
1013        false
1014    }
1015
1016    fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
1017        Some(self)
1018    }
1019}
1020
1021impl virt::SynicMonitor for UhPartition {
1022    fn set_monitor_page(&self, vtl: Vtl, gpa: Option<u64>) -> anyhow::Result<()> {
1023        // Keep this locked the whole function to avoid racing with allocate_monitor_page.
1024        let mut allocated_block = self.inner.allocated_monitor_page.lock();
1025        let old_gpa = self.inner.monitor_page.set_gpa(gpa);
1026
1027        // Take ownership of any allocated monitor page so it will be freed on function exit.
1028        let allocated_page = allocated_block.take();
1029        if let Some(old_gpa) = old_gpa {
1030            let allocated_gpa = allocated_page
1031                .as_ref()
1032                .map(|b| b.pfns()[0] << HV_PAGE_SHIFT);
1033
1034            // Revert the old page's permissions, using the appropriate method depending on
1035            // whether it was allocated or guest-supplied.
1036            let result = if allocated_gpa == Some(old_gpa) {
1037                let vtl = GuestVtl::try_from(vtl).unwrap();
1038                self.unregister_cvm_dma_overlay_page(vtl, old_gpa >> HV_PAGE_SHIFT)
1039            } else {
1040                self.inner
1041                    .hcl
1042                    .modify_vtl_protection_mask(
1043                        MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1044                        hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1045                        HvInputVtl::CURRENT_VTL,
1046                    )
1047                    .map_err(|err| anyhow::anyhow!(err))
1048            };
1049
1050            result
1051                .context("failed to unregister old monitor page")
1052                .inspect_err(|_| {
1053                    // Leave the page unset if returning a failure.
1054                    self.inner.monitor_page.set_gpa(None);
1055                })?;
1056
1057            tracing::debug!(old_gpa, "unregistered monitor page");
1058        }
1059
1060        if let Some(gpa) = gpa {
1061            // Disallow VTL0 from writing to the page, so we'll get an intercept. Note that read
1062            // permissions must be enabled or this doesn't work correctly.
1063            self.inner
1064                .hcl
1065                .modify_vtl_protection_mask(
1066                    MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
1067                    HvMapGpaFlags::new().with_readable(true),
1068                    HvInputVtl::CURRENT_VTL,
1069                )
1070                .context("failed to register monitor page")
1071                .inspect_err(|_| {
1072                    // Leave the page unset if returning a failure.
1073                    self.inner.monitor_page.set_gpa(None);
1074                })?;
1075
1076            tracing::debug!(gpa, "registered monitor page");
1077        }
1078
1079        Ok(())
1080    }
1081
1082    fn register_monitor(
1083        &self,
1084        monitor_id: vmcore::monitor::MonitorId,
1085        connection_id: u32,
1086    ) -> Box<dyn Sync + Send> {
1087        self.inner
1088            .monitor_page
1089            .register_monitor(monitor_id, connection_id)
1090    }
1091
1092    fn allocate_monitor_page(&self, vtl: Vtl) -> anyhow::Result<Option<u64>> {
1093        let vtl = GuestVtl::try_from(vtl).unwrap();
1094
1095        // Allocating a monitor page is only supported for CVMs.
1096        let Some(state) = self.inner.backing_shared.cvm_state() else {
1097            return Ok(None);
1098        };
1099
1100        let mut allocated_block = self.inner.allocated_monitor_page.lock();
1101        if let Some(block) = allocated_block.as_ref() {
1102            // An allocated monitor page is already in use; no need to change it.
1103            let gpa = block.pfns()[0] << HV_PAGE_SHIFT;
1104            assert_eq!(self.inner.monitor_page.gpa(), Some(gpa));
1105            return Ok(Some(gpa));
1106        }
1107
1108        let block = state
1109            .private_dma_client
1110            .allocate_dma_buffer(HV_PAGE_SIZE_USIZE)
1111            .context("failed to allocate monitor page")?;
1112
1113        let gpn = block.pfns()[0];
1114        *allocated_block = Some(block);
1115        let gpa = gpn << HV_PAGE_SHIFT;
1116        let old_gpa = self.inner.monitor_page.set_gpa(Some(gpa));
1117        if let Some(old_gpa) = old_gpa {
1118            // The old GPA is guaranteed not to be allocated, since that was checked above, so
1119            // revert its permissions using the method for guest-supplied memory.
1120            self.inner
1121                .hcl
1122                .modify_vtl_protection_mask(
1123                    MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
1124                    hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
1125                    HvInputVtl::CURRENT_VTL,
1126                )
1127                .context("failed to unregister old monitor page")
1128                .inspect_err(|_| {
1129                    // Leave the page unset if returning a failure.
1130                    self.inner.monitor_page.set_gpa(None);
1131                })?;
1132
1133            tracing::debug!(old_gpa, "unregistered monitor page");
1134        }
1135
1136        // Disallow VTL0 from writing to the page, so we'll get an intercept. Note that read
1137        // permissions must be enabled or this doesn't work correctly.
1138        self.register_cvm_dma_overlay_page(vtl, gpn, HvMapGpaFlags::new().with_readable(true))
1139            .context("failed to unregister monitor page")
1140            .inspect_err(|_| {
1141                // Leave the page unset if returning a failure.
1142                self.inner.monitor_page.set_gpa(None);
1143            })?;
1144
1145        tracing::debug!(gpa, "registered allocated monitor page");
1146
1147        Ok(Some(gpa))
1148    }
1149}
1150
1151impl UhPartitionInner {
1152    #[cfg(guest_arch = "x86_64")]
1153    pub(crate) fn synic_interrupt(
1154        &self,
1155        vp_index: VpIndex,
1156        vtl: GuestVtl,
1157    ) -> impl '_ + hv1_emulator::RequestInterrupt {
1158        // TODO CVM: optimize for SNP with secure avic to avoid internal wake
1159        // and for TDX to avoid trip to user mode
1160        move |vector, auto_eoi| {
1161            self.lapic(vtl).unwrap().synic_interrupt(
1162                vp_index,
1163                vector as u8,
1164                auto_eoi,
1165                |vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
1166            );
1167        }
1168    }
1169
1170    #[cfg(guest_arch = "aarch64")]
1171    fn synic_interrupt(
1172        &self,
1173        _vp_index: VpIndex,
1174        _vtl: GuestVtl,
1175    ) -> impl '_ + hv1_emulator::RequestInterrupt {
1176        move |_, _| {}
1177    }
1178}
1179
1180#[derive(Debug)]
1181struct UhEventPort {
1182    partition: Weak<UhPartitionInner>,
1183    params: Arc<Mutex<UhEventPortParams>>,
1184}
1185
1186#[derive(Debug, Copy, Clone)]
1187struct UhEventPortParams {
1188    vp: VpIndex,
1189    sint: u8,
1190    flag: u16,
1191    vtl: GuestVtl,
1192}
1193
1194impl vmcore::synic::GuestEventPort for UhEventPort {
1195    fn interrupt(&self) -> vmcore::interrupt::Interrupt {
1196        let partition = self.partition.clone();
1197        let params = self.params.clone();
1198        vmcore::interrupt::Interrupt::from_fn(move || {
1199            let UhEventPortParams {
1200                vp,
1201                sint,
1202                flag,
1203                vtl,
1204            } = *params.lock();
1205            let Some(partition) = partition.upgrade() else {
1206                return;
1207            };
1208            tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
1209            if let Some(hv) = partition.hv() {
1210                match hv.synic[vtl].signal_event(
1211                    vp,
1212                    sint,
1213                    flag,
1214                    &mut partition.synic_interrupt(vp, vtl),
1215                ) {
1216                    Ok(_) => {}
1217                    Err(SintProxied) => {
1218                        tracing::trace!(
1219                            vp = vp.index(),
1220                            sint,
1221                            flag,
1222                            "forwarding event to untrusted synic"
1223                        );
1224                        if let Some(synic) = partition.backing_shared.untrusted_synic() {
1225                            synic
1226                                .signal_event(
1227                                    vp,
1228                                    sint,
1229                                    flag,
1230                                    &mut partition.synic_interrupt(vp, vtl),
1231                                )
1232                                .ok();
1233                        } else {
1234                            partition.hcl.signal_event_direct(vp.index(), sint, flag)
1235                        }
1236                    }
1237                }
1238            } else {
1239                partition.hcl.signal_event_direct(vp.index(), sint, flag);
1240            }
1241        })
1242    }
1243
1244    fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1245        self.params.lock().vp = VpIndex::new(vp);
1246        Ok(())
1247    }
1248}
1249
1250impl virt::Hv1 for UhPartition {
1251    type Error = Error;
1252    type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
1253
1254    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
1255        Some(if let Some(hv) = self.inner.hv() {
1256            hv.ref_time_source().clone()
1257        } else {
1258            ReferenceTimeSource::from(self.inner.clone() as Arc<_>)
1259        })
1260    }
1261
1262    fn new_virtual_device(
1263        &self,
1264    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
1265        self.inner.software_devices.is_some().then_some(self)
1266    }
1267}
1268
1269impl GetReferenceTime for UhPartitionInner {
1270    fn now(&self) -> ReferenceTimeResult {
1271        ReferenceTimeResult {
1272            ref_time: self.hcl.reference_time().unwrap(),
1273            system_time: None,
1274        }
1275    }
1276}
1277
1278impl virt::DeviceBuilder for UhPartition {
1279    fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
1280        let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
1281        let device = self
1282            .inner
1283            .software_devices
1284            .as_ref()
1285            .expect("checked in new_virtual_device")
1286            .new_device(self.interrupt_targets[vtl].clone(), device_id)
1287            .map_err(Error::NewDevice)?;
1288
1289        Ok(device)
1290    }
1291}
1292
1293struct UhInterruptTarget {
1294    partition: Arc<UhPartitionInner>,
1295    vtl: GuestVtl,
1296}
1297
1298impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
1299    fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
1300        let partition = self.partition.clone();
1301        let vtl = self.vtl;
1302        Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
1303    }
1304}
1305
1306impl UhPartitionInner {
1307    fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
1308        if let Some(lapic) = self.lapic(vtl) {
1309            tracing::trace!(?request, "interrupt");
1310            lapic.request_interrupt(request.address, request.data, |vp_index| {
1311                self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
1312            });
1313        } else {
1314            let (address, data) = request.as_x86();
1315            if let Err(err) = self.hcl.request_interrupt(
1316                request.hv_x86_interrupt_control(),
1317                address.virt_destination().into(),
1318                data.vector().into(),
1319                vtl,
1320            ) {
1321                tracelimit::warn_ratelimited!(
1322                    CVM_ALLOWED,
1323                    error = &err as &dyn std::error::Error,
1324                    address = request.address,
1325                    data = request.data,
1326                    "failed to request msi"
1327                );
1328            }
1329        }
1330    }
1331}
1332
1333impl IoApicRouting for UhPartitionInner {
1334    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1335        self.irq_routes.set_irq_route(irq, request)
1336    }
1337
1338    // The IO-APIC is always hooked up to VTL0.
1339    fn assert_irq(&self, irq: u8) {
1340        self.irq_routes
1341            .assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
1342    }
1343}
1344
1345/// Configure the [`hvdef::HvRegisterVsmPartitionConfig`] register with the
1346/// values used by underhill.
1347fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
1348    // Read available capabilities to determine what to enable.
1349    let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1350    let hardware_isolated = hcl.isolation().is_hardware_isolated();
1351    let isolated = hcl.isolation().is_isolated();
1352
1353    let config = HvRegisterVsmPartitionConfig::new()
1354        .with_default_vtl_protection_mask(0xF)
1355        .with_enable_vtl_protection(!hardware_isolated)
1356        .with_zero_memory_on_reset(!hardware_isolated)
1357        .with_intercept_cpuid_unimplemented(!hardware_isolated)
1358        .with_intercept_page(caps.intercept_page_available())
1359        .with_intercept_unrecoverable_exception(true)
1360        .with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
1361        .with_intercept_acceptance(isolated)
1362        .with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
1363        .with_intercept_system_reset(caps.intercept_system_reset_available());
1364
1365    hcl.set_vtl2_vsm_partition_config(config)
1366        .map_err(Error::VsmPartitionConfig)
1367}
1368
1369/// Configuration parameters supplied to [`UhProtoPartition::new`].
1370///
1371/// These do not include runtime resources.
1372pub struct UhPartitionNewParams<'a> {
1373    /// The isolation type for the partition.
1374    pub isolation: IsolationType,
1375    /// Hide isolation from the guest. The guest will run as if it is not
1376    /// isolated.
1377    pub hide_isolation: bool,
1378    /// The memory layout for lower VTLs.
1379    pub lower_vtl_memory_layout: &'a MemoryLayout,
1380    /// The guest processor topology.
1381    pub topology: &'a ProcessorTopology,
1382    /// The unparsed CVM cpuid info.
1383    // TODO: move parsing up a layer.
1384    pub cvm_cpuid_info: Option<&'a [u8]>,
1385    /// The unparsed CVM secrets page.
1386    pub snp_secrets: Option<&'a [u8]>,
1387    /// The virtual top of memory for hardware-isolated VMs.
1388    ///
1389    /// Must be a power of two.
1390    pub vtom: Option<u64>,
1391    /// Handle synic messages and events.
1392    ///
1393    /// On TDX, this prevents the hypervisor from getting vmtdcall exits.
1394    pub handle_synic: bool,
1395    /// Do not hotplug sidecar VPs on their first exit. Just continue running
1396    /// the VP remotely.
1397    pub no_sidecar_hotplug: bool,
1398    /// Use MMIO access hypercalls.
1399    pub use_mmio_hypercalls: bool,
1400    /// Intercept guest debug exceptions to support gdbstub.
1401    pub intercept_debug_exceptions: bool,
1402    /// Disable proxy interrupt redirection.
1403    pub disable_proxy_redirect: bool,
1404}
1405
1406/// Parameters to [`UhProtoPartition::build`].
1407pub struct UhLateParams<'a> {
1408    /// Guest memory for lower VTLs.
1409    pub gm: VtlArray<GuestMemory, 2>,
1410    /// Guest memory for VTL 0 kernel execute access.
1411    pub vtl0_kernel_exec_gm: GuestMemory,
1412    /// Guest memory for VTL 0 user execute access.
1413    pub vtl0_user_exec_gm: GuestMemory,
1414    /// The CPUID leaves to expose to the guest.
1415    #[cfg(guest_arch = "x86_64")]
1416    pub cpuid: Vec<CpuidLeaf>,
1417    /// The mesh sender to use for crash notifications.
1418    // FUTURE: remove mesh dependency from this layer.
1419    pub crash_notification_send: mesh::Sender<VtlCrash>,
1420    /// The VM time source.
1421    pub vmtime: &'a VmTimeSource,
1422    /// Parameters for CVMs only.
1423    pub cvm_params: Option<CvmLateParams>,
1424    /// vmbus_relay is enabled and active for partition
1425    pub vmbus_relay: bool,
1426}
1427
1428/// CVM-only parameters to [`UhProtoPartition::build`].
1429pub struct CvmLateParams {
1430    /// Guest memory for untrusted devices, like overlay pages.
1431    pub shared_gm: GuestMemory,
1432    /// An object to call to change host visibility on guest memory.
1433    pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
1434    /// Dma client for shared visibility pages.
1435    pub shared_dma_client: Arc<dyn DmaClient>,
1436    /// Allocator for private visibility pages.
1437    pub private_dma_client: Arc<dyn DmaClient>,
1438}
1439
1440/// Represents a GPN that is either in guest memory or was allocated by dma_client.
1441#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1442pub enum GpnSource {
1443    /// The GPN is in regular guest RAM.
1444    GuestMemory,
1445    /// The GPN was allocated by dma_client and is not in guest RAM.
1446    Dma,
1447}
1448
1449/// Trait for CVM-related protections on guest memory.
1450pub trait ProtectIsolatedMemory: Send + Sync {
1451    /// Changes host visibility on guest memory.
1452    fn change_host_visibility(
1453        &self,
1454        vtl: GuestVtl,
1455        shared: bool,
1456        gpns: &[u64],
1457        tlb_access: &mut dyn TlbFlushLockAccess,
1458    ) -> Result<(), (HvError, usize)>;
1459
1460    /// Queries host visibility on guest memory.
1461    fn query_host_visibility(
1462        &self,
1463        gpns: &[u64],
1464        host_visibility: &mut [HostVisibilityType],
1465    ) -> Result<(), (HvError, usize)>;
1466
1467    /// Gets the default protections/permissions for VTL 0.
1468    fn default_vtl0_protections(&self) -> HvMapGpaFlags;
1469
1470    /// Changes the default protections/permissions for a VTL. For VBS-isolated
1471    /// VMs, the protections apply to all vtls lower than the specified one. For
1472    /// hardware-isolated VMs, they apply just to the given vtl.
1473    fn change_default_vtl_protections(
1474        &self,
1475        target_vtl: GuestVtl,
1476        protections: HvMapGpaFlags,
1477        tlb_access: &mut dyn TlbFlushLockAccess,
1478    ) -> Result<(), HvError>;
1479
1480    /// Changes the vtl protections on a range of guest memory.
1481    fn change_vtl_protections(
1482        &self,
1483        target_vtl: GuestVtl,
1484        gpns: &[u64],
1485        protections: HvMapGpaFlags,
1486        tlb_access: &mut dyn TlbFlushLockAccess,
1487    ) -> Result<(), (HvError, usize)>;
1488
1489    /// Registers a page as an overlay page by first validating it has the
1490    /// required permissions, optionally modifying them, then locking them.
1491    fn register_overlay_page(
1492        &self,
1493        vtl: GuestVtl,
1494        gpn: u64,
1495        gpn_source: GpnSource,
1496        check_perms: HvMapGpaFlags,
1497        new_perms: Option<HvMapGpaFlags>,
1498        tlb_access: &mut dyn TlbFlushLockAccess,
1499    ) -> Result<(), HvError>;
1500
1501    /// Unregisters an overlay page, removing its permission lock and restoring
1502    /// the previous permissions.
1503    fn unregister_overlay_page(
1504        &self,
1505        vtl: GuestVtl,
1506        gpn: u64,
1507        tlb_access: &mut dyn TlbFlushLockAccess,
1508    ) -> Result<(), HvError>;
1509
1510    /// Checks whether a page is currently registered as an overlay page.
1511    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool;
1512
1513    /// Locks the permissions and mappings for a set of guest pages.
1514    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError>;
1515
1516    /// Unlocks the permissions and mappings for a set of guest pages.
1517    ///
1518    /// Panics if asked to unlock a page that was not previously locked. The
1519    /// caller must ensure that the given slice has the same ordering as the
1520    /// one passed to `lock_gpns`.
1521    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]);
1522
1523    /// Alerts the memory protector that vtl 1 is ready to set vtl protections
1524    /// on lower-vtl memory, and that these protections should be enforced.
1525    fn set_vtl1_protections_enabled(&self);
1526
1527    /// Whether VTL 1 is prepared to modify vtl protections on lower-vtl memory,
1528    /// and therefore whether these protections should be enforced.
1529    fn vtl1_protections_enabled(&self) -> bool;
1530}
1531
1532/// Trait for access to TLB flush and lock machinery.
1533pub trait TlbFlushLockAccess {
1534    /// Flush the entire TLB for all VPs for the given VTL.
1535    fn flush(&mut self, vtl: GuestVtl);
1536
1537    /// Flush the entire TLB for all VPs for all VTLs.
1538    fn flush_entire(&mut self);
1539
1540    /// Causes the specified VTL on the current VP to wait on all TLB locks.
1541    fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
1542}
1543
1544/// A partially built partition. Used to allow querying partition capabilities
1545/// before fully instantiating the partition.
1546pub struct UhProtoPartition<'a> {
1547    params: UhPartitionNewParams<'a>,
1548    hcl: Hcl,
1549    guest_vsm_available: bool,
1550    create_partition_available: bool,
1551    #[cfg(guest_arch = "x86_64")]
1552    cpuid: virt::CpuidLeafSet,
1553}
1554
1555impl<'a> UhProtoPartition<'a> {
1556    /// Creates a new prototype partition.
1557    ///
1558    /// `driver(cpu)` returns the driver to use for polling the sidecar device
1559    /// whose base CPU is `cpu`.
1560    pub fn new<T: SpawnDriver>(
1561        params: UhPartitionNewParams<'a>,
1562        driver: impl FnMut(u32) -> T,
1563    ) -> Result<Self, Error> {
1564        let hcl_isolation = match params.isolation {
1565            IsolationType::None => hcl::ioctl::IsolationType::None,
1566            IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
1567            IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
1568            IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
1569        };
1570
1571        // Try to open the sidecar device, if it is present.
1572        let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
1573
1574        let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
1575
1576        // Set the hypercalls that this process will use.
1577        let mut allowed_hypercalls = vec![
1578            hvdef::HypercallCode::HvCallGetVpRegisters,
1579            hvdef::HypercallCode::HvCallSetVpRegisters,
1580            hvdef::HypercallCode::HvCallInstallIntercept,
1581            hvdef::HypercallCode::HvCallTranslateVirtualAddress,
1582            hvdef::HypercallCode::HvCallPostMessageDirect,
1583            hvdef::HypercallCode::HvCallSignalEventDirect,
1584            hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
1585            hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
1586            hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
1587            hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
1588            hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
1589            hvdef::HypercallCode::HvCallAcceptGpaPages,
1590            hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1591        ];
1592
1593        if params.isolation.is_hardware_isolated() {
1594            allowed_hypercalls.extend(vec![
1595                hvdef::HypercallCode::HvCallEnablePartitionVtl,
1596                hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
1597                hvdef::HypercallCode::HvCallEnableVpVtl,
1598            ]);
1599        }
1600
1601        if params.use_mmio_hypercalls {
1602            allowed_hypercalls.extend(vec![
1603                hvdef::HypercallCode::HvCallMemoryMappedIoRead,
1604                hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
1605            ]);
1606        }
1607
1608        hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
1609
1610        set_vtl2_vsm_partition_config(&hcl)?;
1611
1612        let privs = hcl.get_privileges_and_features_info().map_err(Error::Hcl)?;
1613        let guest_vsm_available = Self::check_guest_vsm_support(privs, &hcl)?;
1614
1615        #[cfg(guest_arch = "x86_64")]
1616        let cpuid = match params.isolation {
1617            IsolationType::Snp => cvm_cpuid::CpuidResultsIsolationType::Snp {
1618                cpuid_pages: params.cvm_cpuid_info.unwrap(),
1619                vtom: params.vtom.unwrap(),
1620                access_vsm: guest_vsm_available,
1621            }
1622            .build()
1623            .map_err(Error::CvmCpuid)?,
1624
1625            IsolationType::Tdx => cvm_cpuid::CpuidResultsIsolationType::Tdx {
1626                topology: params.topology,
1627                vtom: params.vtom.unwrap(),
1628                access_vsm: guest_vsm_available,
1629            }
1630            .build()
1631            .map_err(Error::CvmCpuid)?,
1632            IsolationType::Vbs | IsolationType::None => Default::default(),
1633        };
1634
1635        Ok(UhProtoPartition {
1636            hcl,
1637            params,
1638            guest_vsm_available,
1639            create_partition_available: privs.create_partitions(),
1640            #[cfg(guest_arch = "x86_64")]
1641            cpuid,
1642        })
1643    }
1644
1645    /// Returns whether VSM support will be available to the guest.
1646    pub fn guest_vsm_available(&self) -> bool {
1647        self.guest_vsm_available
1648    }
1649
1650    /// Returns whether this partition has the create partitions hypercall
1651    /// available.
1652    pub fn create_partition_available(&self) -> bool {
1653        self.create_partition_available
1654    }
1655
1656    /// Returns a new Underhill partition.
1657    pub async fn build(
1658        self,
1659        late_params: UhLateParams<'_>,
1660    ) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
1661        let Self {
1662            mut hcl,
1663            params,
1664            guest_vsm_available,
1665            create_partition_available: _,
1666            #[cfg(guest_arch = "x86_64")]
1667            cpuid,
1668        } = self;
1669        let isolation = params.isolation;
1670        let is_hardware_isolated = isolation.is_hardware_isolated();
1671
1672        // Intercept Debug Exceptions
1673        // On TDX because all OpenHCL TDs today have the debug policy bit set,
1674        // OpenHCL registers for the intercepts itself.
1675        // However, on non-TDX platforms hypervisor installs the
1676        // intercept on behalf of the guest.
1677        if params.intercept_debug_exceptions {
1678            if !cfg!(feature = "gdb") {
1679                return Err(Error::InvalidDebugConfiguration);
1680            }
1681
1682            cfg_if::cfg_if! {
1683                if #[cfg(guest_arch = "x86_64")] {
1684                    if isolation != IsolationType::Tdx {
1685                        let debug_exception_vector = 0x1;
1686                        hcl.register_intercept(
1687                            HvInterceptType::HvInterceptTypeException,
1688                            HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1689                            HvInterceptParameters::new_exception(debug_exception_vector),
1690                        )
1691                        .map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
1692                    }
1693                } else {
1694                    return Err(Error::InvalidDebugConfiguration);
1695                }
1696            }
1697        }
1698
1699        if !is_hardware_isolated {
1700            if cfg!(guest_arch = "x86_64") {
1701                hcl.register_intercept(
1702                    HvInterceptType::HvInterceptTypeX64Msr,
1703                    HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
1704                    HvInterceptParameters::new_zeroed(),
1705                )
1706                .map_err(|err| {
1707                    Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
1708                })?;
1709
1710                hcl.register_intercept(
1711                    HvInterceptType::HvInterceptTypeX64ApicEoi,
1712                    HV_INTERCEPT_ACCESS_MASK_WRITE,
1713                    HvInterceptParameters::new_zeroed(),
1714                )
1715                .map_err(|err| {
1716                    Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
1717                })?;
1718            } else {
1719                if false {
1720                    todo!("AARCH64_TODO");
1721                }
1722            }
1723        }
1724
1725        if isolation == IsolationType::Snp {
1726            // SNP VMs register for the #VC exception to support reflect-VC.
1727            hcl.register_intercept(
1728                HvInterceptType::HvInterceptTypeException,
1729                HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1730                HvInterceptParameters::new_exception(0x1D),
1731            )
1732            .map_err(|err| {
1733                Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
1734            })?;
1735
1736            // Get the register tweak bitmap from secrets page.
1737            let mut bitmap = [0u8; 64];
1738            if let Some(secrets) = params.snp_secrets {
1739                bitmap.copy_from_slice(
1740                    &secrets
1741                        [REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
1742                );
1743            }
1744            hcl.set_snp_register_bitmap(bitmap);
1745        }
1746
1747        // Do per-VP HCL initialization.
1748        hcl.add_vps(
1749            params.topology.vp_count(),
1750            late_params
1751                .cvm_params
1752                .as_ref()
1753                .map(|x| &x.private_dma_client),
1754        )
1755        .map_err(Error::Hcl)?;
1756
1757        let vps: Vec<_> = params
1758            .topology
1759            .vps_arch()
1760            .map(|vp_info| {
1761                // TODO: determine CPU index, which in theory could be different
1762                // from the VP index, though this hasn't happened yet.
1763                let cpu_index = vp_info.base.vp_index.index();
1764                UhVpInner::new(cpu_index, vp_info)
1765            })
1766            .collect();
1767
1768        // Enable support for VPCI devices if the hypervisor supports it.
1769        #[cfg(guest_arch = "x86_64")]
1770        let software_devices = {
1771            let res = if !is_hardware_isolated {
1772                hcl.register_intercept(
1773                    HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1774                    HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1775                    HvInterceptParameters::new_zeroed(),
1776                )
1777            } else {
1778                Ok(())
1779            };
1780            match res {
1781                Ok(()) => Some(ApicSoftwareDevices::new(
1782                    params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
1783                )),
1784                Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
1785                Err(err) => {
1786                    return Err(Error::InstallIntercept(
1787                        HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
1788                        err,
1789                    ));
1790                }
1791            }
1792        };
1793
1794        #[cfg(guest_arch = "aarch64")]
1795        let software_devices = None;
1796
1797        #[cfg(guest_arch = "aarch64")]
1798        let caps = virt::aarch64::Aarch64PartitionCapabilities {};
1799
1800        #[cfg(guest_arch = "x86_64")]
1801        let cpuid = UhPartition::construct_cpuid_results(
1802            cpuid,
1803            &late_params.cpuid,
1804            params.topology,
1805            isolation,
1806            params.hide_isolation,
1807        );
1808
1809        #[cfg(guest_arch = "x86_64")]
1810        let caps = UhPartition::construct_capabilities(
1811            params.topology,
1812            &cpuid,
1813            isolation,
1814            params.hide_isolation,
1815        )
1816        .map_err(Error::Capabilities)?;
1817
1818        if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
1819            // The hypervisor will manage the untrusted SINTs (or the whole
1820            // synic for non-hardware-isolated VMs), but some event ports
1821            // and message ports are implemented here. Register an intercept
1822            // to handle HvSignalEvent and HvPostMessage hypercalls when the
1823            // hypervisor doesn't recognize the connection ID.
1824            //
1825            // TDX manages this locally instead of through the hypervisor.
1826            hcl.register_intercept(
1827                HvInterceptType::HvInterceptTypeUnknownSynicConnection,
1828                HV_INTERCEPT_ACCESS_MASK_EXECUTE,
1829                HvInterceptParameters::new_zeroed(),
1830            )
1831            .expect("registering synic intercept cannot fail");
1832        }
1833
1834        #[cfg(guest_arch = "x86_64")]
1835        let vsm_caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
1836        #[cfg(guest_arch = "x86_64")]
1837        let proxy_interrupt_redirect_available =
1838            vsm_caps.proxy_interrupt_redirect_available() && !params.disable_proxy_redirect;
1839
1840        #[cfg(guest_arch = "x86_64")]
1841        let cvm_state = if is_hardware_isolated {
1842            Some(Self::construct_cvm_state(
1843                &params,
1844                late_params.cvm_params.unwrap(),
1845                &caps,
1846                guest_vsm_available,
1847                proxy_interrupt_redirect_available,
1848            )?)
1849        } else {
1850            None
1851        };
1852        #[cfg(guest_arch = "aarch64")]
1853        let cvm_state = None;
1854
1855        let backing_shared = BackingShared::new(
1856            isolation,
1857            &params,
1858            BackingSharedParams {
1859                cvm_state,
1860                #[cfg(guest_arch = "x86_64")]
1861                cpuid: &cpuid,
1862                hcl: &hcl,
1863                guest_vsm_available,
1864            },
1865        )?;
1866
1867        let enter_modes = EnterModes::default();
1868
1869        let partition = Arc::new(UhPartitionInner {
1870            hcl,
1871            vps,
1872            irq_routes: Default::default(),
1873            caps,
1874            enter_modes: Mutex::new(enter_modes),
1875            enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
1876            gm: late_params.gm,
1877            vtl0_kernel_exec_gm: late_params.vtl0_kernel_exec_gm,
1878            vtl0_user_exec_gm: late_params.vtl0_user_exec_gm,
1879            #[cfg(guest_arch = "x86_64")]
1880            cpuid,
1881            crash_notification_send: late_params.crash_notification_send,
1882            monitor_page: MonitorPage::new(),
1883            allocated_monitor_page: Mutex::new(None),
1884            software_devices,
1885            lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
1886            vmtime: late_params.vmtime.clone(),
1887            isolation,
1888            no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
1889            use_mmio_hypercalls: params.use_mmio_hypercalls,
1890            backing_shared,
1891            #[cfg(guest_arch = "x86_64")]
1892            device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
1893            intercept_debug_exceptions: params.intercept_debug_exceptions,
1894            vmbus_relay: late_params.vmbus_relay,
1895        });
1896
1897        if cfg!(guest_arch = "x86_64") {
1898            // Intercept all IOs unless opted out.
1899            partition.manage_io_port_intercept_region(0, !0, true);
1900        }
1901
1902        let vps = params
1903            .topology
1904            .vps_arch()
1905            .map(|vp_info| UhProcessorBox {
1906                partition: partition.clone(),
1907                vp_info,
1908            })
1909            .collect();
1910
1911        Ok((
1912            UhPartition {
1913                inner: partition.clone(),
1914                interrupt_targets: VtlArray::from_fn(|vtl| {
1915                    Arc::new(UhInterruptTarget {
1916                        partition: partition.clone(),
1917                        vtl: vtl.try_into().unwrap(),
1918                    })
1919                }),
1920            },
1921            vps,
1922        ))
1923    }
1924}
1925
1926impl UhPartition {
1927    /// Gets the guest OS ID for VTL0.
1928    pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
1929        // If Underhill is emulating the hypervisor interfaces, get this value
1930        // from the emulator. This happens when running under hardware isolation
1931        // or when configured for testing.
1932        let id = if let Some(hv) = self.inner.hv() {
1933            hv.guest_os_id(Vtl::Vtl0)
1934        } else {
1935            // Ask the hypervisor for this value.
1936            self.inner
1937                .hcl
1938                .get_guest_os_id(Vtl::Vtl0)
1939                .map_err(Error::Hcl)?
1940        };
1941        Ok(id)
1942    }
1943
1944    /// Configures guest accesses to IO ports in `range` to go directly to the
1945    /// host.
1946    ///
1947    /// When the return value is dropped, the ports will be unregistered.
1948    pub fn register_host_io_port_fast_path(
1949        &self,
1950        range: RangeInclusive<u16>,
1951    ) -> HostIoPortFastPathHandle {
1952        // There is no way to provide a fast path for some hardware isolated
1953        // VM architectures. The devices that do use this facility are not
1954        // enabled on hardware isolated VMs.
1955        assert!(!self.inner.isolation.is_hardware_isolated());
1956
1957        self.inner
1958            .manage_io_port_intercept_region(*range.start(), *range.end(), false);
1959        HostIoPortFastPathHandle {
1960            inner: Arc::downgrade(&self.inner),
1961            begin: *range.start(),
1962            end: *range.end(),
1963        }
1964    }
1965
1966    /// Trigger the LINT1 interrupt vector on the LAPIC of the BSP.
1967    pub fn assert_debug_interrupt(&self, _vtl: u8) {
1968        #[cfg(guest_arch = "x86_64")]
1969        const LINT_INDEX_1: u8 = 1;
1970        #[cfg(guest_arch = "x86_64")]
1971        match self.inner.isolation {
1972            IsolationType::Snp => {
1973                tracing::error!(?_vtl, "Debug interrupts cannot be injected into SNP VMs",);
1974            }
1975            _ => {
1976                let bsp_index = VpIndex::new(0);
1977                self.pulse_lint(bsp_index, Vtl::try_from(_vtl).unwrap(), LINT_INDEX_1)
1978            }
1979        }
1980    }
1981
1982    /// Enables or disables the PM timer assist.
1983    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
1984        self.inner.hcl.set_pm_timer_assist(port)
1985    }
1986
1987    /// Sets guest memory protections for a monitor page.
1988    fn register_cvm_dma_overlay_page(
1989        &self,
1990        vtl: GuestVtl,
1991        gpn: u64,
1992        new_perms: HvMapGpaFlags,
1993    ) -> anyhow::Result<()> {
1994        // How the monitor page is protected depends on the isolation type of the VM.
1995        match &self.inner.backing_shared {
1996            #[cfg(guest_arch = "x86_64")]
1997            BackingShared::Snp(snp_backed_shared) => snp_backed_shared
1998                .cvm
1999                .isolated_memory_protector
2000                .register_overlay_page(
2001                    vtl,
2002                    gpn,
2003                    // On a CVM, the monitor page is always DMA-allocated.
2004                    GpnSource::Dma,
2005                    HvMapGpaFlags::new(),
2006                    Some(new_perms),
2007                    &mut SnpBacked::tlb_flush_lock_access(
2008                        None,
2009                        self.inner.as_ref(),
2010                        snp_backed_shared,
2011                    ),
2012                )
2013                .map_err(|e| anyhow::anyhow!(e)),
2014            #[cfg(guest_arch = "x86_64")]
2015            BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2016                .cvm
2017                .isolated_memory_protector
2018                .register_overlay_page(
2019                    vtl,
2020                    gpn,
2021                    GpnSource::Dma,
2022                    HvMapGpaFlags::new(),
2023                    Some(new_perms),
2024                    &mut TdxBacked::tlb_flush_lock_access(
2025                        None,
2026                        self.inner.as_ref(),
2027                        tdx_backed_shared,
2028                    ),
2029                )
2030                .map_err(|e| anyhow::anyhow!(e)),
2031            BackingShared::Hypervisor(_) => {
2032                let _ = (vtl, gpn, new_perms);
2033                unreachable!()
2034            }
2035        }
2036    }
2037
2038    /// Reverts guest memory protections for a monitor page.
2039    fn unregister_cvm_dma_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> anyhow::Result<()> {
2040        // How the monitor page is protected depends on the isolation type of the VM.
2041        match &self.inner.backing_shared {
2042            #[cfg(guest_arch = "x86_64")]
2043            BackingShared::Snp(snp_backed_shared) => snp_backed_shared
2044                .cvm
2045                .isolated_memory_protector
2046                .unregister_overlay_page(
2047                    vtl,
2048                    gpn,
2049                    &mut SnpBacked::tlb_flush_lock_access(
2050                        None,
2051                        self.inner.as_ref(),
2052                        snp_backed_shared,
2053                    ),
2054                )
2055                .map_err(|e| anyhow::anyhow!(e)),
2056            #[cfg(guest_arch = "x86_64")]
2057            BackingShared::Tdx(tdx_backed_shared) => tdx_backed_shared
2058                .cvm
2059                .isolated_memory_protector
2060                .unregister_overlay_page(
2061                    vtl,
2062                    gpn,
2063                    &mut TdxBacked::tlb_flush_lock_access(
2064                        None,
2065                        self.inner.as_ref(),
2066                        tdx_backed_shared,
2067                    ),
2068                )
2069                .map_err(|e| anyhow::anyhow!(e)),
2070            BackingShared::Hypervisor(_) => {
2071                let _ = (vtl, gpn);
2072                unreachable!()
2073            }
2074        }
2075    }
2076}
2077
2078impl UhProtoPartition<'_> {
2079    /// Whether Guest VSM is available to the guest. If so, for hardware CVMs,
2080    /// it is safe to expose Guest VSM support via cpuid.
2081    fn check_guest_vsm_support(privs: HvPartitionPrivilege, hcl: &Hcl) -> Result<bool, Error> {
2082        if !privs.access_vsm() {
2083            return Ok(false);
2084        }
2085
2086        let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
2087        Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
2088    }
2089
2090    #[cfg(guest_arch = "x86_64")]
2091    /// Constructs partition-wide CVM state.
2092    fn construct_cvm_state(
2093        params: &UhPartitionNewParams<'_>,
2094        late_params: CvmLateParams,
2095        caps: &PartitionCapabilities,
2096        guest_vsm_available: bool,
2097        proxy_interrupt_redirect_available: bool,
2098    ) -> Result<UhCvmPartitionState, Error> {
2099        use vmcore::reference_time::ReferenceTimeSource;
2100
2101        let vp_count = params.topology.vp_count() as usize;
2102        let vps = (0..vp_count)
2103            .map(|vp_index| UhCvmVpInner {
2104                tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
2105                vtl1_enable_called: Mutex::new(false),
2106                started: AtomicBool::new(vp_index == 0),
2107                hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
2108                proxy_redirect_interrupts: Mutex::new(HashMap::new()),
2109            })
2110            .collect();
2111        let tlb_locked_vps =
2112            VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
2113
2114        let lapic = VtlArray::from_fn(|_| {
2115            LocalApicSet::builder()
2116                .x2apic_capable(caps.x2apic)
2117                .hyperv_enlightenments(true)
2118                .build()
2119        });
2120
2121        let tsc_frequency = get_tsc_frequency(params.isolation)?;
2122        let ref_time = ReferenceTimeSource::new(TscReferenceTimeSource::new(tsc_frequency));
2123
2124        // If we're emulating the APIC, then we also must emulate the hypervisor
2125        // enlightenments, since the hypervisor can't support enlightenments
2126        // without also providing an APIC.
2127        //
2128        // Additionally, TDX provides hardware APIC emulation but we still need
2129        // to emulate the hypervisor enlightenments.
2130        let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
2131            max_vp_count: params.topology.vp_count(),
2132            vendor: caps.vendor,
2133            tsc_frequency,
2134            ref_time,
2135            is_ref_time_backed_by_tsc: true,
2136        });
2137
2138        Ok(UhCvmPartitionState {
2139            vps_per_socket: params.topology.reserved_vps_per_socket(),
2140            tlb_locked_vps,
2141            vps,
2142            shared_memory: late_params.shared_gm,
2143            isolated_memory_protector: late_params.isolated_memory_protector,
2144            lapic,
2145            hv,
2146            guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
2147            shared_dma_client: late_params.shared_dma_client,
2148            private_dma_client: late_params.private_dma_client,
2149            hide_isolation: params.hide_isolation,
2150            proxy_interrupt_redirect: proxy_interrupt_redirect_available,
2151        })
2152    }
2153}
2154
2155impl UhPartition {
2156    #[cfg(guest_arch = "x86_64")]
2157    /// Constructs the set of cpuid results to show to the guest
2158    fn construct_cpuid_results(
2159        cpuid: virt::CpuidLeafSet,
2160        initial_cpuid: &[CpuidLeaf],
2161        topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
2162        isolation: IsolationType,
2163        hide_isolation: bool,
2164    ) -> virt::CpuidLeafSet {
2165        let mut cpuid = cpuid.into_leaves();
2166        if isolation.is_hardware_isolated() {
2167            // Update the x2apic leaf based on the topology.
2168            let x2apic = match topology.apic_mode() {
2169                vm_topology::processor::x86::ApicMode::XApic => false,
2170                vm_topology::processor::x86::ApicMode::X2ApicSupported => true,
2171                vm_topology::processor::x86::ApicMode::X2ApicEnabled => true,
2172            };
2173            let ecx = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(x2apic);
2174            let ecx_mask = x86defs::cpuid::VersionAndFeaturesEcx::new().with_x2_apic(true);
2175            cpuid.push(
2176                CpuidLeaf::new(
2177                    x86defs::cpuid::CpuidFunction::VersionAndFeatures.0,
2178                    [0, 0, ecx.into(), 0],
2179                )
2180                .masked([0, 0, ecx_mask.into(), 0]),
2181            );
2182
2183            // Get the hypervisor version from the host. This is just for
2184            // reporting purposes, so it is safe even if the hypervisor is not
2185            // trusted.
2186            let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
2187
2188            // Perform final processing steps for synthetic leaves.
2189            hv1_emulator::cpuid::process_hv_cpuid_leaves(
2190                &mut cpuid,
2191                hide_isolation,
2192                [
2193                    hv_version.eax,
2194                    hv_version.ebx,
2195                    hv_version.ecx,
2196                    hv_version.edx,
2197                ],
2198            );
2199        }
2200        cpuid.extend(initial_cpuid);
2201        virt::CpuidLeafSet::new(cpuid)
2202    }
2203
2204    #[cfg(guest_arch = "x86_64")]
2205    /// Computes the partition capabilities
2206    fn construct_capabilities(
2207        topology: &ProcessorTopology,
2208        cpuid: &virt::CpuidLeafSet,
2209        isolation: IsolationType,
2210        hide_isolation: bool,
2211    ) -> Result<virt::x86::X86PartitionCapabilities, virt::x86::X86PartitionCapabilitiesError> {
2212        let mut native_cpuid_fn;
2213        let mut cvm_cpuid_fn;
2214
2215        // Determine the method to get cpuid results for the guest when
2216        // computing partition capabilities.
2217        let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if isolation.is_hardware_isolated() {
2218            // Use the filtered CPUID to determine capabilities.
2219            cvm_cpuid_fn = move |leaf, sub_leaf| cpuid.result(leaf, sub_leaf, &[0, 0, 0, 0]);
2220            &mut cvm_cpuid_fn
2221        } else {
2222            // Just use the native cpuid.
2223            native_cpuid_fn = |leaf, sub_leaf| {
2224                let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
2225                cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
2226            };
2227            &mut native_cpuid_fn
2228        };
2229
2230        // Compute and validate capabilities.
2231        let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn)?;
2232        match isolation {
2233            IsolationType::Tdx => {
2234                assert_eq!(caps.vtom.is_some(), !hide_isolation);
2235                // TDX 1.5 requires EFER.NXE to be set to 1, so set it at RESET/INIT.
2236                caps.nxe_forced_on = true;
2237            }
2238            IsolationType::Snp => {
2239                assert_eq!(caps.vtom.is_some(), !hide_isolation);
2240            }
2241            _ => {
2242                assert!(caps.vtom.is_none());
2243            }
2244        }
2245
2246        Ok(caps)
2247    }
2248}
2249
2250#[cfg(guest_arch = "x86_64")]
2251/// Gets the TSC frequency for the current platform.
2252fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
2253    // Always get the frequency from the hypervisor. It's believed that, as long
2254    // as the hypervisor is behaving, it will provide the most precise and accurate frequency.
2255    let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
2256    let hv_frequency = msr
2257        .read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
2258        .map_err(Error::ReadTscFrequency)?;
2259
2260    // Get the hardware-advertised frequency and validate that the
2261    // hypervisor frequency is not too far off.
2262    let hw_info = match isolation {
2263        IsolationType::Tdx => {
2264            // TDX provides the TSC frequency via cpuid.
2265            let max_function =
2266                safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
2267                    .eax;
2268
2269            if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
2270                return Err(Error::BadCpuidTsc);
2271            }
2272            let result = safe_intrinsics::cpuid(
2273                x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
2274                0,
2275            );
2276            let ratio_denom = result.eax;
2277            let ratio_num = result.ebx;
2278            let clock = result.ecx;
2279            if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
2280                return Err(Error::BadCpuidTsc);
2281            }
2282            // TDX TSC is configurable in units of 25MHz, so allow up to 12.5MHz
2283            // error.
2284            let allowed_error = 12_500_000;
2285            Some((
2286                clock as u64 * ratio_num as u64 / ratio_denom as u64,
2287                allowed_error,
2288            ))
2289        }
2290        IsolationType::Snp => {
2291            // SNP currently does not provide the frequency.
2292            None
2293        }
2294        IsolationType::Vbs | IsolationType::None => None,
2295    };
2296
2297    if let Some((hw_frequency, allowed_error)) = hw_info {
2298        // Don't allow the frequencies to be different by more than the hardware
2299        // precision.
2300        let delta = hw_frequency.abs_diff(hv_frequency);
2301        if delta > allowed_error {
2302            return Err(Error::TscFrequencyMismatch {
2303                hv: hv_frequency,
2304                hw: hw_frequency,
2305                allowed_error,
2306            });
2307        }
2308    }
2309
2310    Ok(hv_frequency)
2311}
2312
2313impl UhPartitionInner {
2314    fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
2315        if self.isolation.is_hardware_isolated() {
2316            return;
2317        }
2318
2319        static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
2320
2321        let access_type_mask = if active {
2322            HV_INTERCEPT_ACCESS_MASK_READ_WRITE
2323        } else {
2324            HV_INTERCEPT_ACCESS_MASK_NONE
2325        };
2326
2327        // Try to register the whole range at once.
2328        if !SKIP_RANGE.load(Ordering::Relaxed) {
2329            match self.hcl.register_intercept(
2330                HvInterceptType::HvInterceptTypeX64IoPortRange,
2331                access_type_mask,
2332                HvInterceptParameters::new_io_port_range(begin..=end),
2333            ) {
2334                Ok(()) => return,
2335                Err(HvError::InvalidParameter) => {
2336                    // Probably a build that doesn't support range wrapping yet.
2337                    // Don't try again.
2338                    SKIP_RANGE.store(true, Ordering::Relaxed);
2339                    tracing::warn!(
2340                        CVM_ALLOWED,
2341                        "old hypervisor build; using slow path for intercept ranges"
2342                    );
2343                }
2344                Err(err) => {
2345                    panic!("io port range registration failure: {err:?}");
2346                }
2347            }
2348        }
2349
2350        // Fall back to registering one port at a time.
2351        for port in begin..=end {
2352            self.hcl
2353                .register_intercept(
2354                    HvInterceptType::HvInterceptTypeX64IoPort,
2355                    access_type_mask,
2356                    HvInterceptParameters::new_io_port(port),
2357                )
2358                .expect("registering io intercept cannot fail");
2359        }
2360    }
2361
2362    fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
2363        // TODO: this probably should reflect changes to the memory map via PAM
2364        // registers. Right now this isn't an issue because the relevant region,
2365        // VGA, is handled on the host.
2366        self.lower_vtl_memory_layout
2367            .ram()
2368            .iter()
2369            .any(|m| m.range.contains_addr(gpa))
2370    }
2371
2372    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
2373        // TODO: this probably should reflect changes to the memory map via PAM
2374        // registers. Right now this isn't an issue because the relevant region,
2375        // VGA, is handled on the host.
2376        if self.is_gpa_lower_vtl_ram(gpa) {
2377            // The monitor page is protected against lower VTL writes.
2378            !write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
2379        } else {
2380            false
2381        }
2382    }
2383}
2384
2385/// Handle returned by [`UhPartition::register_host_io_port_fast_path`].
2386///
2387/// When dropped, unregisters the IO ports so that they are no longer forwarded
2388/// to the host.
2389#[must_use]
2390pub struct HostIoPortFastPathHandle {
2391    inner: Weak<UhPartitionInner>,
2392    begin: u16,
2393    end: u16,
2394}
2395
2396impl Drop for HostIoPortFastPathHandle {
2397    fn drop(&mut self) {
2398        if let Some(inner) = self.inner.upgrade() {
2399            inner.manage_io_port_intercept_region(self.begin, self.end, true);
2400        }
2401    }
2402}
2403
2404/// The application level VTL crash data not suited for putting
2405/// on the wire.
2406///
2407/// FUTURE: move/remove this to standardize across virt backends.
2408#[derive(Copy, Clone, Debug)]
2409pub struct VtlCrash {
2410    /// The VP that crashed.
2411    pub vp_index: VpIndex,
2412    /// The VTL that crashed.
2413    pub last_vtl: GuestVtl,
2414    /// The crash control information.
2415    pub control: GuestCrashCtl,
2416    /// The crash parameters.
2417    pub parameters: [u64; 5],
2418}
2419
2420/// Validate that flags is a valid setting for VTL memory protection when
2421/// applied to VTL 1.
2422#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
2423fn validate_vtl_gpa_flags(
2424    flags: HvMapGpaFlags,
2425    mbec_enabled: bool,
2426    shadow_supervisor_stack_enabled: bool,
2427) -> bool {
2428    // Adjust is not allowed for VTL1.
2429    if flags.adjustable() {
2430        return false;
2431    }
2432
2433    // KX must equal UX unless MBEC is enabled. KX && !UX is invalid.
2434    if flags.kernel_executable() != flags.user_executable() {
2435        if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
2436            return false;
2437        }
2438    }
2439
2440    // Read must be specified if anything else is specified.
2441    if flags.writable()
2442        || flags.kernel_executable()
2443        || flags.user_executable()
2444        || flags.supervisor_shadow_stack()
2445        || flags.paging_writability()
2446        || flags.verify_paging_writability()
2447    {
2448        if !flags.readable() {
2449            return false;
2450        }
2451    }
2452
2453    // Supervisor shadow stack protection is invalid if shadow stacks are disabled
2454    // or if execute is not specified.
2455    if flags.supervisor_shadow_stack()
2456        && ((!flags.kernel_executable() && !flags.user_executable())
2457            || shadow_supervisor_stack_enabled)
2458    {
2459        return false;
2460    }
2461
2462    true
2463}