hcl/
ioctl.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interface to `mshv_vtl` driver.
5
6mod deferred;
7
8pub mod aarch64;
9pub mod snp;
10pub mod tdx;
11pub mod x64;
12
13use self::deferred::DeferredActionSlots;
14use self::ioctls::*;
15use crate::GuestVtl;
16use crate::ioctl::deferred::DeferredAction;
17use crate::mapped_page::MappedPage;
18use crate::protocol;
19use crate::protocol::EnterModes;
20use crate::protocol::HCL_REG_PAGE_OFFSET;
21use crate::protocol::HCL_VMSA_GUEST_VSM_PAGE_OFFSET;
22use crate::protocol::HCL_VMSA_PAGE_OFFSET;
23use crate::protocol::MSHV_APIC_PAGE_OFFSET;
24use crate::protocol::hcl_intr_offload_flags;
25use crate::protocol::hcl_run;
26use bitvec::vec::BitVec;
27use cvm_tracing::CVM_ALLOWED;
28use deferred::RegisteredDeferredActions;
29use deferred::push_deferred_action;
30use deferred::register_deferred_actions;
31use hv1_structs::ProcessorSet;
32use hv1_structs::VtlArray;
33use hvdef::HV_PAGE_SIZE;
34use hvdef::HV_PARTITION_ID_SELF;
35use hvdef::HV_VP_INDEX_SELF;
36use hvdef::HvAarch64RegisterPage;
37use hvdef::HvAllArchRegisterName;
38#[cfg(guest_arch = "aarch64")]
39use hvdef::HvArm64RegisterName;
40use hvdef::HvError;
41use hvdef::HvMapGpaFlags;
42use hvdef::HvMessage;
43use hvdef::HvRegisterName;
44use hvdef::HvRegisterValue;
45use hvdef::HvRegisterVsmPartitionConfig;
46use hvdef::HvStatus;
47use hvdef::HvX64RegisterName;
48use hvdef::HvX64RegisterPage;
49use hvdef::HypercallCode;
50use hvdef::Vtl;
51use hvdef::hypercall::AssertVirtualInterrupt;
52use hvdef::hypercall::HostVisibilityType;
53use hvdef::hypercall::HvGpaRange;
54use hvdef::hypercall::HvGpaRangeExtended;
55use hvdef::hypercall::HvInputVtl;
56use hvdef::hypercall::HvInterceptParameters;
57use hvdef::hypercall::HvInterceptType;
58use hvdef::hypercall::HvRegisterAssoc;
59use hvdef::hypercall::HypercallOutput;
60use hvdef::hypercall::InitialVpContextX64;
61use hvdef::hypercall::ModifyHostVisibility;
62use memory_range::MemoryRange;
63use pal::unix::pthread::*;
64use parking_lot::Mutex;
65use private::BackingPrivate;
66use sidecar_client::NewSidecarClientError;
67use sidecar_client::SidecarClient;
68use sidecar_client::SidecarRun;
69use sidecar_client::SidecarVp;
70use std::cell::UnsafeCell;
71use std::fmt::Debug;
72use std::fs::File;
73use std::io;
74use std::os::unix::prelude::*;
75use std::sync::Arc;
76use std::sync::Once;
77use std::sync::atomic::AtomicU8;
78use std::sync::atomic::AtomicU32;
79use std::sync::atomic::Ordering;
80use thiserror::Error;
81use user_driver::DmaClient;
82use user_driver::memory::MemoryBlock;
83use x86defs::snp::SevVmsa;
84use x86defs::tdx::TdCallResultCode;
85use x86defs::vmx::ApicPage;
86use zerocopy::FromBytes;
87use zerocopy::FromZeros;
88use zerocopy::Immutable;
89use zerocopy::IntoBytes;
90use zerocopy::KnownLayout;
91
92/// Error returned by HCL operations.
93#[derive(Error, Debug)]
94#[expect(missing_docs)]
95pub enum Error {
96    #[error("failed to open mshv device")]
97    OpenMshv(#[source] io::Error),
98    #[error("failed to open hvcall device")]
99    OpenHvcall(#[source] io::Error),
100    #[error("failed to open lower VTL memory device")]
101    OpenGpa(#[source] io::Error),
102    #[error("ReturnToLowerVtl")]
103    ReturnToLowerVtl(#[source] nix::Error),
104    #[error("AddVtl0Memory")]
105    AddVtl0Memory(#[source] nix::Error),
106    #[error("hcl_set_vp_register")]
107    SetVpRegister(#[source] nix::Error),
108    #[error("hcl_get_vp_register")]
109    GetVpRegister(#[source] nix::Error),
110    #[error("failed to get VP register {reg:#x?} from hypercall")]
111    GetVpRegisterHypercall {
112        #[cfg(guest_arch = "x86_64")]
113        reg: HvX64RegisterName,
114        #[cfg(guest_arch = "aarch64")]
115        reg: HvArm64RegisterName,
116        #[source]
117        err: HvError,
118    },
119    #[error("hcl_request_interrupt")]
120    RequestInterrupt(#[source] HvError),
121    #[error("hcl_cancel_vp failed")]
122    CancelVp(#[source] nix::Error),
123    #[error("failed to signal event")]
124    SignalEvent(#[source] HvError),
125    #[error("failed to post message")]
126    PostMessage(#[source] HvError),
127    #[error("failed to mmap the vp context {:?}", .1.map(|vtl| format!("for VTL {:?}", vtl)).unwrap_or("".to_string()))]
128    MmapVp(#[source] io::Error, Option<Vtl>),
129    #[error("failed to set the poll file")]
130    SetPollFile(#[source] nix::Error),
131    #[error("failed to check hcl capabilities")]
132    CheckExtensions(#[source] nix::Error),
133    #[error("failed to mmap the register page")]
134    MmapRegPage(#[source] io::Error),
135    #[error("invalid num signal events")]
136    NumSignalEvent(#[source] io::Error),
137    #[error("failed to create vtl")]
138    CreateVTL(#[source] nix::Error),
139    #[error("Gva to gpa translation failed")]
140    TranslateGvaToGpa(#[source] TranslateGvaToGpaError),
141    #[error("gpa failed vtl access check")]
142    CheckVtlAccess(#[source] HvError),
143    #[error("failed to set registers using set_vp_registers hypercall")]
144    SetRegisters(#[source] HvError),
145    #[error("Unknown register name: {0:x}")]
146    UnknownRegisterName(u32),
147    #[error("Invalid register value")]
148    InvalidRegisterValue,
149    #[error("failed to set host visibility")]
150    SetHostVisibility(#[source] nix::Error),
151    #[error("failed to allocate host overlay page")]
152    HostOverlayPageExhausted,
153    #[error("sidecar error")]
154    Sidecar(#[source] sidecar_client::SidecarError),
155    #[error("failed to open sidecar")]
156    OpenSidecar(#[source] NewSidecarClientError),
157    #[error(
158        "mismatch between requested isolation type {requested:?} and supported isolation type {supported:?}"
159    )]
160    MismatchedIsolation {
161        supported: IsolationType,
162        requested: IsolationType,
163    },
164    #[error("private page pool allocator missing, required for requested isolation type")]
165    MissingPrivateMemory,
166    #[error("failed to allocate pages for vp")]
167    AllocVp(#[source] anyhow::Error),
168}
169
170/// Error for IOCTL errors specifically.
171#[derive(Debug, Error)]
172#[error("hcl request failed")]
173pub struct IoctlError(#[source] pub(crate) nix::Error);
174
175/// Error returned when issuing hypercalls.
176#[derive(Debug, Error)]
177#[expect(missing_docs)]
178pub enum HypercallError {
179    #[error("hypercall failed with {0:?}")]
180    Hypervisor(HvError),
181    #[error("ioctl failed")]
182    Ioctl(#[source] IoctlError),
183}
184
185impl HypercallError {
186    pub(crate) fn check(r: Result<i32, nix::Error>) -> Result<(), Self> {
187        match r {
188            Ok(n) => HvStatus(n.try_into().expect("hypervisor result out of range"))
189                .result()
190                .map_err(Self::Hypervisor),
191            Err(err) => Err(Self::Ioctl(IoctlError(err))),
192        }
193    }
194}
195
196/// Errors when issuing hypercalls via the kernel direct interface.
197#[derive(Error, Debug)]
198#[expect(missing_docs)]
199pub enum HvcallError {
200    #[error(
201        "kernel rejected the hypercall, most likely due to the hypercall code not being allowed via set_allowed_hypercalls"
202    )]
203    HypercallIoctlFailed(#[source] nix::Error),
204    #[error("input parameters are larger than a page")]
205    InputParametersTooLarge,
206    #[error("output parameters are larger than a page")]
207    OutputParametersTooLarge,
208    #[error("output and input list lengths do not match")]
209    InputOutputRepListMismatch,
210}
211
212/// Error applying VTL protections.
213// TODO: move to `underhill_mem`.
214#[derive(Error, Debug)]
215#[expect(missing_docs)]
216pub enum ApplyVtlProtectionsError {
217    #[error("hypervisor failed with {output:?} when protecting pages {range} for vtl {vtl:?}")]
218    Hypervisor {
219        range: MemoryRange,
220        output: HypercallOutput,
221        #[source]
222        hv_error: HvError,
223        vtl: HvInputVtl,
224    },
225    #[error("snp failure to protect pages {range} with {permissions:x?} for vtl {vtl:?}")]
226    Snp {
227        #[source]
228        failed_operation: snp::SnpPageError,
229        range: MemoryRange,
230        permissions: x86defs::snp::SevRmpAdjust,
231        vtl: HvInputVtl,
232    },
233    #[error(
234        "tdcall failed with {error:?} when protecting pages {range} with permissions {permissions:x?} for vtl {vtl:?}"
235    )]
236    Tdx {
237        error: TdCallResultCode,
238        range: MemoryRange,
239        permissions: x86defs::tdx::TdgMemPageGpaAttr,
240        vtl: HvInputVtl,
241    },
242    #[error("no valid protections for vtl {0:?}")]
243    InvalidVtl(Vtl),
244}
245
246/// Error setting guest VSM configuration.
247#[derive(Error, Debug)]
248#[expect(missing_docs)]
249pub enum SetGuestVsmConfigError {
250    #[error("hypervisor failed to configure guest vsm to {enable_guest_vsm}")]
251    Hypervisor {
252        enable_guest_vsm: bool,
253        #[source]
254        hv_error: HvError,
255    },
256}
257
258/// Error getting the VP idnex from an APIC ID.
259#[derive(Error, Debug)]
260#[expect(missing_docs)]
261pub enum GetVpIndexFromApicIdError {
262    #[error("hypervisor failed when querying vp index for {apic_id}")]
263    Hypervisor {
264        #[source]
265        hv_error: HvError,
266        apic_id: u32,
267    },
268}
269
270/// Error setting VSM partition configuration.
271#[derive(Error, Debug)]
272#[expect(missing_docs)]
273pub enum SetVsmPartitionConfigError {
274    #[error("hypervisor failed when configuring vsm partition config {config:?}")]
275    Hypervisor {
276        config: HvRegisterVsmPartitionConfig,
277        #[source]
278        hv_error: HvError,
279    },
280}
281
282/// Error translating a GVA to a GPA.
283#[derive(Error, Debug)]
284#[expect(missing_docs)]
285pub enum TranslateGvaToGpaError {
286    #[error("hypervisor failed when translating gva {gva:#x}")]
287    Hypervisor {
288        gva: u64,
289        #[source]
290        hv_error: HvError,
291    },
292    #[error("sidecar kernel failed when translating gva {gva:#x}")]
293    Sidecar {
294        gva: u64,
295        #[source]
296        error: sidecar_client::SidecarError,
297    },
298}
299
300/// Result from [`Hcl::check_vtl_access`] if vtl permissions were violated
301#[derive(Debug)]
302pub struct CheckVtlAccessResult {
303    /// The intercepting VTL.
304    pub vtl: Vtl,
305    /// The flags that were denied.
306    pub denied_flags: HvMapGpaFlags,
307}
308
309/// Error accepting pages.
310// TODO: move to `underhill_mem`.
311#[derive(Error, Debug)]
312#[expect(missing_docs)]
313pub enum AcceptPagesError {
314    #[error("hypervisor failed to accept pages {range} with {output:?}")]
315    Hypervisor {
316        range: MemoryRange,
317        output: HypercallOutput,
318        #[source]
319        hv_error: HvError,
320    },
321    #[error("snp failure to protect pages {range}")]
322    Snp {
323        #[source]
324        failed_operation: snp::SnpPageError,
325        range: MemoryRange,
326    },
327    #[error("tdcall failure when accepting pages {range}")]
328    Tdx {
329        #[source]
330        error: tdcall::AcceptPagesError,
331        range: MemoryRange,
332    },
333}
334
335// Action translation(to HVCALL) for pin/unpin GPA range.
336#[derive(Debug, Copy, Clone)]
337enum GpaPinUnpinAction {
338    PinGpaRange,
339    UnpinGpaRange,
340}
341
342/// Error pinning a GPA.
343#[derive(Error, Debug)]
344#[error("partial success: {ranges_processed} operations succeeded, but encountered an error")]
345struct PinUnpinError {
346    ranges_processed: usize,
347    #[source]
348    error: HvError,
349}
350
351/// Result of translate gva hypercall from [`Hcl`]
352pub struct TranslateResult {
353    /// The GPA that the GVA translated to.
354    pub gpa_page: u64,
355    /// Whether the page was an overlay page.
356    pub overlay_page: bool, // Note: hardcoded to false on WHP
357}
358
359/// Possible types for rep hypercalls
360enum HvcallRepInput<'a, T> {
361    /// The actual elements to rep over
362    Elements(&'a [T]),
363    /// The elements for the rep are implied and only a count is needed
364    Count(u16),
365}
366
367mod ioctls {
368    #![allow(non_camel_case_types)]
369
370    use crate::protocol;
371    use hvdef::hypercall::HvRegisterAssoc;
372    use nix::ioctl_none;
373    use nix::ioctl_read;
374    use nix::ioctl_readwrite;
375    use nix::ioctl_write_ptr;
376
377    // The unsafe interface to the `mshv` kernel module comprises
378    // the following IOCTLs.
379    const MSHV_IOCTL: u8 = 0xb8;
380    const MSHV_VTL_RETURN_TO_LOWER_VTL: u16 = 0x27;
381    const MSHV_SET_VP_REGISTERS: u16 = 0x6;
382    const MSHV_GET_VP_REGISTERS: u16 = 0x5;
383    const MSHV_HVCALL_SETUP: u16 = 0x1E;
384    const MSHV_HVCALL: u16 = 0x1F;
385    const MSHV_VTL_ADD_VTL0_MEMORY: u16 = 0x21;
386    const MSHV_VTL_SET_POLL_FILE: u16 = 0x25;
387    const MSHV_CREATE_VTL: u16 = 0x1D;
388    const MSHV_CHECK_EXTENSION: u16 = 0x00;
389    const MSHV_VTL_PVALIDATE: u16 = 0x28;
390    const MSHV_VTL_RMPADJUST: u16 = 0x29;
391    const MSHV_VTL_TDCALL: u16 = 0x32;
392    const MSHV_VTL_READ_VMX_CR4_FIXED1: u16 = 0x33;
393    const MSHV_VTL_GUEST_VSM_VMSA_PFN: u16 = 0x34;
394    const MSHV_VTL_RMPQUERY: u16 = 0x35;
395    const MSHV_INVLPGB: u16 = 0x36;
396    const MSHV_TLBSYNC: u16 = 0x37;
397    const MSHV_KICKCPUS: u16 = 0x38;
398
399    #[repr(C)]
400    #[derive(Copy, Clone)]
401    pub struct mshv_vp_registers {
402        pub count: ::std::os::raw::c_int,
403        pub regs: *mut HvRegisterAssoc,
404    }
405
406    #[repr(C, packed)]
407    #[derive(Copy, Clone)]
408    pub struct mshv_pvalidate {
409        /// Execute the pvalidate instruction on the set of memory pages specified
410        pub start_pfn: ::std::os::raw::c_ulonglong,
411        pub page_count: ::std::os::raw::c_ulonglong,
412        pub validate: ::std::os::raw::c_uchar,
413        pub terminate_on_failure: ::std::os::raw::c_uchar,
414        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
415        /// it's device memory.
416        pub ram: u8,
417        pub padding: [::std::os::raw::c_uchar; 1],
418    }
419
420    #[repr(C, packed)]
421    #[derive(Copy, Clone)]
422    pub struct mshv_rmpadjust {
423        /// Execute the rmpadjust instruction on the set of memory pages specified
424        pub start_pfn: ::std::os::raw::c_ulonglong,
425        pub page_count: ::std::os::raw::c_ulonglong,
426        pub value: ::std::os::raw::c_ulonglong,
427        pub terminate_on_failure: ::std::os::raw::c_uchar,
428        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
429        /// it's device memory.
430        pub ram: u8,
431        pub padding: [::std::os::raw::c_uchar; 6],
432    }
433
434    #[repr(C, packed)]
435    #[derive(Copy, Clone)]
436    pub struct mshv_rmpquery {
437        /// Execute the rmpquery instruction on the set of memory pages specified
438        pub start_pfn: ::std::os::raw::c_ulonglong,
439        pub page_count: ::std::os::raw::c_ulonglong,
440        pub terminate_on_failure: ::std::os::raw::c_uchar,
441        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
442        /// it's device memory.
443        pub ram: u8,
444        pub padding: [::std::os::raw::c_uchar; 6],
445        /// Output array for the flags, must have at least `page_count` entries.
446        pub flags: *mut ::std::os::raw::c_ulonglong,
447        /// Output array for the page sizes, must have at least `page_count` entries.
448        pub page_size: *mut ::std::os::raw::c_ulonglong,
449        /// Output for the amount of pages processed, a scalar.
450        pub pages_processed: *mut ::std::os::raw::c_ulonglong,
451    }
452
453    #[repr(C, packed)]
454    #[derive(Copy, Clone)]
455    pub struct mshv_tdcall {
456        pub rax: u64, // Call code and returned status
457        pub rcx: u64,
458        pub rdx: u64,
459        pub r8: u64,
460        pub r9: u64,
461        pub r10_out: u64, // only supported as output
462        pub r11_out: u64, // only supported as output
463    }
464
465    ioctl_none!(
466        /// Relinquish the processor to VTL0.
467        hcl_return_to_lower_vtl,
468        MSHV_IOCTL,
469        MSHV_VTL_RETURN_TO_LOWER_VTL
470    );
471
472    ioctl_write_ptr!(
473        /// Set a VTL0 register for the current processor of the current
474        /// partition.
475        /// It is not allowed to set registers for other processors or
476        /// other partitions for the security and coherency reasons.
477        hcl_set_vp_register,
478        MSHV_IOCTL,
479        MSHV_SET_VP_REGISTERS,
480        mshv_vp_registers
481    );
482
483    ioctl_readwrite!(
484        /// Get a VTL0 register for the current processor of the current
485        /// partition.
486        /// It is not allowed to get registers of other processors or
487        /// other partitions for the security and coherency reasons.
488        hcl_get_vp_register,
489        MSHV_IOCTL,
490        MSHV_GET_VP_REGISTERS,
491        mshv_vp_registers
492    );
493
494    ioctl_write_ptr!(
495        /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support
496        /// DMA from the guest.
497        hcl_add_vtl0_memory,
498        MSHV_IOCTL,
499        MSHV_VTL_ADD_VTL0_MEMORY,
500        protocol::hcl_pfn_range_t
501    );
502
503    ioctl_write_ptr!(
504        /// Sets the file to be polled while running a VP in VTL0. If the file
505        /// becomes readable, then the VP run will be cancelled.
506        hcl_set_poll_file,
507        MSHV_IOCTL,
508        MSHV_VTL_SET_POLL_FILE,
509        protocol::hcl_set_poll_file
510    );
511
512    ioctl_write_ptr!(
513        /// Sets up the hypercall allow map. Allowed once
514        /// per fd.
515        hcl_hvcall_setup,
516        MSHV_IOCTL,
517        MSHV_HVCALL_SETUP,
518        protocol::hcl_hvcall_setup
519    );
520
521    ioctl_readwrite!(
522        /// Performs a hypercall from the user mode.
523        hcl_hvcall,
524        MSHV_IOCTL,
525        MSHV_HVCALL,
526        protocol::hcl_hvcall
527    );
528
529    ioctl_write_ptr!(
530        /// Executes the pvalidate instruction on a page range.
531        hcl_pvalidate_pages,
532        MSHV_IOCTL,
533        MSHV_VTL_PVALIDATE,
534        mshv_pvalidate
535    );
536
537    ioctl_write_ptr!(
538        /// Executes the rmpadjust instruction on a page range.
539        hcl_rmpadjust_pages,
540        MSHV_IOCTL,
541        MSHV_VTL_RMPADJUST,
542        mshv_rmpadjust
543    );
544
545    ioctl_write_ptr!(
546        /// Executes the rmpquery instruction on a page range.
547        hcl_rmpquery_pages,
548        MSHV_IOCTL,
549        MSHV_VTL_RMPQUERY,
550        mshv_rmpquery
551    );
552
553    ioctl_readwrite!(
554        /// Executes a tdcall.
555        hcl_tdcall,
556        MSHV_IOCTL,
557        MSHV_VTL_TDCALL,
558        mshv_tdcall
559    );
560
561    ioctl_read!(
562        hcl_read_vmx_cr4_fixed1,
563        MSHV_IOCTL,
564        MSHV_VTL_READ_VMX_CR4_FIXED1,
565        u64
566    );
567
568    ioctl_readwrite!(
569        hcl_read_guest_vsm_page_pfn,
570        MSHV_IOCTL,
571        MSHV_VTL_GUEST_VSM_VMSA_PFN,
572        u64
573    );
574
575    pub const HCL_CAP_REGISTER_PAGE: u32 = 1;
576    pub const HCL_CAP_VTL_RETURN_ACTION: u32 = 2;
577    pub const HCL_CAP_DR6_SHARED: u32 = 3;
578
579    ioctl_write_ptr!(
580        /// Check for the presence of an extension capability.
581        hcl_check_extension,
582        MSHV_IOCTL,
583        MSHV_CHECK_EXTENSION,
584        u32
585    );
586
587    ioctl_read!(mshv_create_vtl, MSHV_IOCTL, MSHV_CREATE_VTL, u8);
588
589    #[repr(C)]
590    pub struct mshv_invlpgb {
591        pub rax: u64,
592        pub _pad0: u32,
593        pub edx: u32,
594        pub _pad1: u32,
595        pub ecx: u32,
596    }
597
598    ioctl_write_ptr!(
599        /// Issue an INVLPGB instruction.
600        hcl_invlpgb,
601        MSHV_IOCTL,
602        MSHV_INVLPGB,
603        mshv_invlpgb
604    );
605
606    ioctl_none!(
607        /// Issue a TLBSYNC instruction.
608        hcl_tlbsync,
609        MSHV_IOCTL,
610        MSHV_TLBSYNC
611    );
612
613    ioctl_write_ptr!(
614        /// Kick CPUs.
615        hcl_kickcpus,
616        MSHV_IOCTL,
617        MSHV_KICKCPUS,
618        protocol::hcl_kick_cpus
619    );
620}
621
622/// The `/dev/mshv_vtl_low` device for accessing VTL0 memory.
623pub struct MshvVtlLow {
624    file: File,
625}
626
627impl MshvVtlLow {
628    /// Opens the device.
629    pub fn new() -> Result<Self, Error> {
630        let file = fs_err::OpenOptions::new()
631            .read(true)
632            .write(true)
633            .open("/dev/mshv_vtl_low")
634            .map_err(Error::OpenGpa)?;
635
636        Ok(Self { file: file.into() })
637    }
638
639    /// Gets the device file.
640    pub fn get(&self) -> &File {
641        &self.file
642    }
643
644    /// The flag to set in the file offset to map guest memory as shared instead
645    /// of private.
646    pub const SHARED_MEMORY_FLAG: u64 = 1 << 63;
647}
648
649/// An open `/dev/mshv` device file.
650pub struct Mshv {
651    file: File,
652}
653
654impl Mshv {
655    /// Opens the mshv device.
656    pub fn new() -> Result<Self, Error> {
657        let file = fs_err::OpenOptions::new()
658            .read(true)
659            .write(true)
660            .open("/dev/mshv")
661            .map_err(Error::OpenMshv)?;
662
663        Ok(Self { file: file.into() })
664    }
665
666    fn check_extension(&self, cap: u32) -> Result<bool, Error> {
667        // SAFETY: calling IOCTL as documented, with no special requirements.
668        let supported = unsafe {
669            hcl_check_extension(self.file.as_raw_fd(), &cap).map_err(Error::CheckExtensions)?
670        };
671        Ok(supported != 0)
672    }
673
674    /// Opens an mshv_vtl device file.
675    pub fn create_vtl(&self) -> Result<MshvVtl, Error> {
676        let cap = &mut 0_u8;
677        // SAFETY: calling IOCTL as documented, with no special requirements.
678        let supported =
679            unsafe { mshv_create_vtl(self.file.as_raw_fd(), cap).map_err(Error::CreateVTL)? };
680        // SAFETY: calling IOCTL as documented, with no special requirements.
681        let vtl_file = unsafe { File::from_raw_fd(supported) };
682        Ok(MshvVtl { file: vtl_file })
683    }
684}
685
686/// An open mshv_vtl device file.
687#[derive(Debug)]
688pub struct MshvVtl {
689    file: File,
690}
691
692impl MshvVtl {
693    /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support DMA from the guest.
694    pub fn add_vtl0_memory(&self, mem_range: MemoryRange, shared: bool) -> Result<(), Error> {
695        let flags = if shared {
696            MshvVtlLow::SHARED_MEMORY_FLAG / HV_PAGE_SIZE
697        } else {
698            0
699        };
700        let ram_disposition = protocol::hcl_pfn_range_t {
701            start_pfn: mem_range.start_4k_gpn() | flags,
702            last_pfn: mem_range.end_4k_gpn(),
703        };
704
705        // SAFETY: calling IOCTL as documented, with no special requirements.
706        unsafe {
707            hcl_add_vtl0_memory(self.file.as_raw_fd(), &ram_disposition)
708                .map_err(Error::AddVtl0Memory)?;
709        }
710
711        Ok(())
712    }
713}
714
715#[cfg(guest_arch = "x86_64")]
716fn is_vtl_shared_mtrr(reg: HvX64RegisterName) -> bool {
717    matches!(
718        reg,
719        HvX64RegisterName::MsrMtrrCap
720            | HvX64RegisterName::MsrMtrrDefType
721            | HvX64RegisterName::MsrMtrrPhysBase0
722            | HvX64RegisterName::MsrMtrrPhysBase1
723            | HvX64RegisterName::MsrMtrrPhysBase2
724            | HvX64RegisterName::MsrMtrrPhysBase3
725            | HvX64RegisterName::MsrMtrrPhysBase4
726            | HvX64RegisterName::MsrMtrrPhysBase5
727            | HvX64RegisterName::MsrMtrrPhysBase6
728            | HvX64RegisterName::MsrMtrrPhysBase7
729            | HvX64RegisterName::MsrMtrrPhysBase8
730            | HvX64RegisterName::MsrMtrrPhysBase9
731            | HvX64RegisterName::MsrMtrrPhysBaseA
732            | HvX64RegisterName::MsrMtrrPhysBaseB
733            | HvX64RegisterName::MsrMtrrPhysBaseC
734            | HvX64RegisterName::MsrMtrrPhysBaseD
735            | HvX64RegisterName::MsrMtrrPhysBaseE
736            | HvX64RegisterName::MsrMtrrPhysBaseF
737            | HvX64RegisterName::MsrMtrrPhysMask0
738            | HvX64RegisterName::MsrMtrrPhysMask1
739            | HvX64RegisterName::MsrMtrrPhysMask2
740            | HvX64RegisterName::MsrMtrrPhysMask3
741            | HvX64RegisterName::MsrMtrrPhysMask4
742            | HvX64RegisterName::MsrMtrrPhysMask5
743            | HvX64RegisterName::MsrMtrrPhysMask6
744            | HvX64RegisterName::MsrMtrrPhysMask7
745            | HvX64RegisterName::MsrMtrrPhysMask8
746            | HvX64RegisterName::MsrMtrrPhysMask9
747            | HvX64RegisterName::MsrMtrrPhysMaskA
748            | HvX64RegisterName::MsrMtrrPhysMaskB
749            | HvX64RegisterName::MsrMtrrPhysMaskC
750            | HvX64RegisterName::MsrMtrrPhysMaskD
751            | HvX64RegisterName::MsrMtrrPhysMaskE
752            | HvX64RegisterName::MsrMtrrPhysMaskF
753            | HvX64RegisterName::MsrMtrrFix64k00000
754            | HvX64RegisterName::MsrMtrrFix16k80000
755            | HvX64RegisterName::MsrMtrrFix16kA0000
756            | HvX64RegisterName::MsrMtrrFix4kC0000
757            | HvX64RegisterName::MsrMtrrFix4kC8000
758            | HvX64RegisterName::MsrMtrrFix4kD0000
759            | HvX64RegisterName::MsrMtrrFix4kD8000
760            | HvX64RegisterName::MsrMtrrFix4kE0000
761            | HvX64RegisterName::MsrMtrrFix4kE8000
762            | HvX64RegisterName::MsrMtrrFix4kF0000
763            | HvX64RegisterName::MsrMtrrFix4kF8000
764    )
765}
766
767/// Indicate whether reg is shared across VTLs.
768///
769/// This function is not complete: DR6 may or may not be shared, depending on
770/// the processor type; the caller needs to check HvRegisterVsmCapabilities.
771/// Some MSRs are not included here as they are not represented in
772/// HvX64RegisterName, including MSR_TSC_FREQUENCY, MSR_MCG_CAP,
773/// MSR_MCG_STATUS, MSR_RESET, MSR_GUEST_IDLE, and MSR_DEBUG_DEVICE_OPTIONS.
774#[cfg(guest_arch = "x86_64")]
775fn is_vtl_shared_reg(reg: HvX64RegisterName) -> bool {
776    is_vtl_shared_mtrr(reg)
777        || matches!(
778            reg,
779            HvX64RegisterName::VpIndex
780                | HvX64RegisterName::VpRuntime
781                | HvX64RegisterName::TimeRefCount
782                | HvX64RegisterName::Rax
783                | HvX64RegisterName::Rbx
784                | HvX64RegisterName::Rcx
785                | HvX64RegisterName::Rdx
786                | HvX64RegisterName::Rsi
787                | HvX64RegisterName::Rdi
788                | HvX64RegisterName::Rbp
789                | HvX64RegisterName::Cr2
790                | HvX64RegisterName::R8
791                | HvX64RegisterName::R9
792                | HvX64RegisterName::R10
793                | HvX64RegisterName::R11
794                | HvX64RegisterName::R12
795                | HvX64RegisterName::R13
796                | HvX64RegisterName::R14
797                | HvX64RegisterName::R15
798                | HvX64RegisterName::Dr0
799                | HvX64RegisterName::Dr1
800                | HvX64RegisterName::Dr2
801                | HvX64RegisterName::Dr3
802                | HvX64RegisterName::Xmm0
803                | HvX64RegisterName::Xmm1
804                | HvX64RegisterName::Xmm2
805                | HvX64RegisterName::Xmm3
806                | HvX64RegisterName::Xmm4
807                | HvX64RegisterName::Xmm5
808                | HvX64RegisterName::Xmm6
809                | HvX64RegisterName::Xmm7
810                | HvX64RegisterName::Xmm8
811                | HvX64RegisterName::Xmm9
812                | HvX64RegisterName::Xmm10
813                | HvX64RegisterName::Xmm11
814                | HvX64RegisterName::Xmm12
815                | HvX64RegisterName::Xmm13
816                | HvX64RegisterName::Xmm14
817                | HvX64RegisterName::Xmm15
818                | HvX64RegisterName::FpMmx0
819                | HvX64RegisterName::FpMmx1
820                | HvX64RegisterName::FpMmx2
821                | HvX64RegisterName::FpMmx3
822                | HvX64RegisterName::FpMmx4
823                | HvX64RegisterName::FpMmx5
824                | HvX64RegisterName::FpMmx6
825                | HvX64RegisterName::FpMmx7
826                | HvX64RegisterName::FpControlStatus
827                | HvX64RegisterName::XmmControlStatus
828                | HvX64RegisterName::Xfem
829        )
830}
831
832/// Indicate whether reg is shared across VTLs.
833#[cfg(guest_arch = "aarch64")]
834fn is_vtl_shared_reg(reg: HvArm64RegisterName) -> bool {
835    use hvdef::HvArm64RegisterName;
836
837    matches!(
838        reg,
839        HvArm64RegisterName::X0
840            | HvArm64RegisterName::X1
841            | HvArm64RegisterName::X2
842            | HvArm64RegisterName::X3
843            | HvArm64RegisterName::X4
844            | HvArm64RegisterName::X5
845            | HvArm64RegisterName::X6
846            | HvArm64RegisterName::X7
847            | HvArm64RegisterName::X8
848            | HvArm64RegisterName::X9
849            | HvArm64RegisterName::X10
850            | HvArm64RegisterName::X11
851            | HvArm64RegisterName::X12
852            | HvArm64RegisterName::X13
853            | HvArm64RegisterName::X14
854            | HvArm64RegisterName::X15
855            | HvArm64RegisterName::X16
856            | HvArm64RegisterName::X17
857            | HvArm64RegisterName::X19
858            | HvArm64RegisterName::X20
859            | HvArm64RegisterName::X21
860            | HvArm64RegisterName::X22
861            | HvArm64RegisterName::X23
862            | HvArm64RegisterName::X24
863            | HvArm64RegisterName::X25
864            | HvArm64RegisterName::X26
865            | HvArm64RegisterName::X27
866            | HvArm64RegisterName::X28
867            | HvArm64RegisterName::XFp
868            | HvArm64RegisterName::XLr
869    )
870}
871
872/// The `/dev/mshv_hvcall` device for issuing hypercalls directly to the
873/// hypervisor.
874#[derive(Debug)]
875pub struct MshvHvcall(File);
876
877impl MshvHvcall {
878    /// Opens the device.
879    pub fn new() -> Result<Self, Error> {
880        let file = fs_err::OpenOptions::new()
881            .read(true)
882            .write(true)
883            .open("/dev/mshv_hvcall")
884            .map_err(Error::OpenHvcall)?;
885
886        Ok(Self(file.into()))
887    }
888
889    /// Set allowed hypercalls.
890    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
891        type ItemType = u64;
892        let item_size_bytes = size_of::<ItemType>();
893        let item_size_bits = item_size_bytes * 8;
894
895        let mut allow_bitmap = Vec::<ItemType>::new();
896        for &code in codes {
897            let map_index = (code.0 as usize) / item_size_bits;
898            if map_index >= allow_bitmap.len() {
899                allow_bitmap.resize(map_index + 1, 0);
900            }
901            allow_bitmap[map_index] |= (1 as ItemType) << (code.0 % item_size_bits as u16);
902        }
903
904        let hvcall_setup = protocol::hcl_hvcall_setup {
905            allow_bitmap_size: (allow_bitmap.len() * item_size_bytes) as u64,
906            allow_bitmap_ptr: allow_bitmap.as_ptr(),
907        };
908
909        // SAFETY: following the IOCTL definition.
910        unsafe {
911            hcl_hvcall_setup(self.0.as_raw_fd(), &hvcall_setup)
912                .expect("Hypercall setup IOCTL must be supported");
913        }
914    }
915
916    /// Accepts VTL 0 pages with no host visibility.
917    ///
918    /// [`HypercallCode::HvCallAcceptGpaPages`] must be allowed.
919    pub fn accept_gpa_pages(
920        &self,
921        range: MemoryRange,
922        memory_type: hvdef::hypercall::AcceptMemoryType,
923    ) -> Result<(), AcceptPagesError> {
924        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
925            - size_of::<hvdef::hypercall::AcceptGpaPages>())
926            / size_of::<u64>();
927
928        let span = tracing::info_span!("accept_pages", CVM_ALLOWED, ?range);
929        let _enter = span.enter();
930
931        let mut current_page = range.start() / HV_PAGE_SIZE;
932        let end = range.end() / HV_PAGE_SIZE;
933
934        while current_page < end {
935            let header = hvdef::hypercall::AcceptGpaPages {
936                partition_id: HV_PARTITION_ID_SELF,
937                page_attributes: hvdef::hypercall::AcceptPagesAttributes::new()
938                    .with_memory_type(memory_type.0)
939                    .with_host_visibility(HostVisibilityType::PRIVATE)
940                    .with_vtl_set(0), // vtl protections cannot be applied for VTL 0 memory
941                vtl_permission_set: hvdef::hypercall::VtlPermissionSet {
942                    vtl_permission_from_1: [0; hvdef::hypercall::HV_VTL_PERMISSION_SET_SIZE],
943                },
944                gpa_page_base: current_page,
945            };
946
947            let remaining_pages = end - current_page;
948            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
949
950            // SAFETY: The input header and rep slice are the correct types for
951            //         this hypercall. A dummy type of u8 is provided to satisfy
952            //         the compiler for input and output rep type. The given
953            //         input and slices are valid references while this function
954            //         is called.
955            //
956            //         The hypercall output is validated right after the hypercall is issued.
957            let output = unsafe {
958                self.hvcall_rep::<hvdef::hypercall::AcceptGpaPages, u8, u8>(
959                    HypercallCode::HvCallAcceptGpaPages,
960                    &header,
961                    HvcallRepInput::Count(count as u16),
962                    None,
963                )
964                .expect("kernel hypercall submission should always succeed")
965            };
966
967            output
968                .result()
969                .map_err(|err| AcceptPagesError::Hypervisor {
970                    range: MemoryRange::from_4k_gpn_range(current_page..current_page + count),
971                    output,
972                    hv_error: err,
973                })?;
974
975            current_page += count;
976
977            assert_eq!(output.elements_processed() as u64, count);
978        }
979        Ok(())
980    }
981
982    /// Modifies the host visibility of the given pages.
983    ///
984    /// [`HypercallCode::HvCallModifySparseGpaPageHostVisibility`] must be
985    /// allowed.
986    ///
987    /// Returns on error, the hypervisor error and the number of pages
988    /// processed.
989    //
990    // TODO SNP: this isn't really safe. Probably this should be an IOCTL in the
991    // kernel so that it can validate the page ranges are VTL0 memory.
992    pub fn modify_gpa_visibility(
993        &self,
994        host_visibility: HostVisibilityType,
995        mut gpns: &[u64],
996    ) -> Result<(), (HvError, usize)> {
997        const GPNS_PER_CALL: usize = (HV_PAGE_SIZE as usize
998            - size_of::<hvdef::hypercall::ModifySparsePageVisibility>())
999            / size_of::<u64>();
1000
1001        while !gpns.is_empty() {
1002            let n = gpns.len().min(GPNS_PER_CALL);
1003            // SAFETY: The input header and rep slice are the correct types for this hypercall.
1004            //         The hypercall output is validated right after the hypercall is issued.
1005            let result = unsafe {
1006                self.hvcall_rep(
1007                    HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1008                    &hvdef::hypercall::ModifySparsePageVisibility {
1009                        partition_id: HV_PARTITION_ID_SELF,
1010                        host_visibility: ModifyHostVisibility::new()
1011                            .with_host_visibility(host_visibility),
1012                        reserved: 0,
1013                    },
1014                    HvcallRepInput::Elements(&gpns[..n]),
1015                    None::<&mut [u8]>,
1016                )
1017                .unwrap()
1018            };
1019
1020            match result.result() {
1021                Ok(()) => {
1022                    assert_eq!({ result.elements_processed() }, n);
1023                }
1024                Err(HvError::Timeout) => {}
1025                Err(e) => return Err((e, result.elements_processed())),
1026            }
1027            gpns = &gpns[result.elements_processed()..];
1028        }
1029        Ok(())
1030    }
1031
1032    /// Given a constructed hcl_hvcall protocol object, issues an IOCTL to invoke a hypercall via
1033    /// the direct hypercall kernel interface. This function will retry hypercalls if the hypervisor
1034    /// times out the hypercall.
1035    ///
1036    /// Input and output data are referenced as pointers in the call object.
1037    ///
1038    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A
1039    /// caller must check the return value for the result of the hypercall.
1040    ///
1041    /// Before invoking hypercalls, a list of hypercalls that are allowed
1042    /// has to be set with `Hcl::set_allowed_hypercalls`:
1043    /// ```ignore
1044    /// set_allowed_hypercalls(&[
1045    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1046    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1047    /// ]);
1048    /// ```
1049    /// # Safety
1050    /// This function makes no guarantees that the given input header, input and output types are
1051    /// valid for the given hypercall. It is the caller's responsibility to use the correct types
1052    /// with the specified hypercall.
1053    ///
1054    /// The caller must ensure that the input and output data are valid for the lifetime of this
1055    /// call.
1056    ///
1057    /// A caller must check the returned [HypercallOutput] for success or failure from the
1058    /// hypervisor.
1059    ///
1060    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be
1061    /// validated by the caller if needed.
1062    unsafe fn invoke_hvcall_ioctl(
1063        &self,
1064        mut call_object: protocol::hcl_hvcall,
1065    ) -> Result<HypercallOutput, HvcallError> {
1066        loop {
1067            // SAFETY: following the IOCTL definition. The data referenced in the call
1068            // lives as long as `self` does thus the lifetime elision doesn't contradict
1069            // the compiler's invariants.
1070            //
1071            // The hypervisor is trusted to fill out the output page with a valid
1072            // representation of an instance the output type, except in the case of hardware
1073            // isolated VMs where the caller must validate output as needed.
1074            unsafe {
1075                hcl_hvcall(self.0.as_raw_fd(), &mut call_object)
1076                    .map_err(HvcallError::HypercallIoctlFailed)?;
1077            }
1078
1079            if call_object.status.call_status() == Err(HvError::Timeout).into() {
1080                // Any hypercall can timeout, even one that doesn't have reps. Continue processing
1081                // from wherever the hypervisor left off.  The rep start index isn't checked for
1082                // validity, since it is only being used as an input to the untrusted hypervisor.
1083                // This applies to both simple and rep hypercalls.
1084                call_object
1085                    .control
1086                    .set_rep_start(call_object.status.elements_processed());
1087            } else {
1088                if call_object.control.rep_count() == 0 {
1089                    // For non-rep hypercalls, the elements processed field should be 0.
1090                    assert_eq!(call_object.status.elements_processed(), 0);
1091                } else {
1092                    // Hardware isolated VMs cannot trust output from the hypervisor, but check for
1093                    // consistency between the number of elements processed and the expected count. A
1094                    // violation of this assertion indicates a buggy or malicious hypervisor.
1095                    assert!(
1096                        (call_object.status.result().is_ok()
1097                            && call_object.control.rep_count()
1098                                == call_object.status.elements_processed())
1099                            || (call_object.status.result().is_err()
1100                                && call_object.control.rep_count()
1101                                    > call_object.status.elements_processed())
1102                    );
1103                }
1104
1105                return Ok(call_object.status);
1106            }
1107        }
1108    }
1109
1110    /// Issues a non-rep hypercall to the hypervisor via the direct hypercall kernel interface.
1111    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1112    /// This call constructs the appropriate hypercall input control from the described parameters.
1113    ///
1114    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1115    /// return value for the result of the hypercall.
1116    ///
1117    /// `code` is the hypercall code.
1118    /// `input` is the input type required by the hypercall.
1119    /// `output` is the output type required by the hypercall.
1120    ///
1121    /// Before invoking hypercalls, a list of hypercalls that are allowed
1122    /// has to be set with `Hcl::set_allowed_hypercalls`:
1123    /// ```ignore
1124    /// set_allowed_hypercalls(&[
1125    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1126    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1127    /// ]);
1128    /// ```
1129    /// # Safety
1130    /// This function makes no guarantees that the given input header, input and output types are valid for the
1131    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1132    ///
1133    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1134    ///
1135    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1136    /// caller if needed.
1137    unsafe fn hvcall<I, O>(
1138        &self,
1139        code: HypercallCode,
1140        input: &I,
1141        output: &mut O,
1142    ) -> Result<HypercallOutput, HvcallError>
1143    where
1144        I: IntoBytes + Sized + Immutable + KnownLayout,
1145        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1146    {
1147        const fn assert_size<I, O>()
1148        where
1149            I: Sized,
1150            O: Sized,
1151        {
1152            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1153            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1154        }
1155        assert_size::<I, O>();
1156
1157        let control = hvdef::hypercall::Control::new().with_code(code.0);
1158
1159        let call_object = protocol::hcl_hvcall {
1160            control,
1161            input_data: input.as_bytes().as_ptr().cast(),
1162            input_size: size_of::<I>(),
1163            status: FromZeros::new_zeroed(),
1164            output_data: output.as_bytes().as_ptr().cast(),
1165            output_size: size_of::<O>(),
1166        };
1167
1168        // SAFETY: The data referenced in the call lives as long as `self` does.
1169        unsafe { self.invoke_hvcall_ioctl(call_object) }
1170    }
1171
1172    /// Issues a rep hypercall to the hypervisor via the direct hypercall kernel
1173    /// interface. Like the non-rep version, this is not intended to be used
1174    /// externally other than to construct safe wrappers. This call constructs
1175    /// the appropriate hypercall input control from the described parameters.
1176    ///
1177    /// `Ok(HypercallOutput)` is returned if the kernel was successful in
1178    /// issuing the hypercall. A caller must check the return value for the
1179    /// result of the hypercall.
1180    ///
1181    /// `code` is the hypercall code. `input_header` is the hypercall fixed
1182    /// length input header. Variable length headers are not supported.
1183    /// `input_rep` is the list of input elements. The length of the slice is
1184    /// used as the rep count.
1185    ///
1186    /// `output_rep` is the optional output rep list. A caller must check the
1187    /// returned [HypercallOutput] for the number of valid elements in this
1188    /// list.
1189    ///
1190    /// # Safety
1191    /// This function makes no guarantees that the given input header, input rep
1192    /// and output rep types are valid for the given hypercall. It is the
1193    /// caller's responsibility to use the correct types with the specified
1194    /// hypercall.
1195    ///
1196    /// A caller must check the returned [HypercallOutput] for success or
1197    /// failure from the hypervisor and processed rep count.
1198    ///
1199    /// Hardware isolated VMs cannot trust output from the hypervisor. This
1200    /// routine will ensure that the hypervisor either returns success with all
1201    /// elements processed, or returns failure with an incomplete number of
1202    /// elements processed. Actual validation of the output elements is the
1203    /// respsonsibility of the caller.
1204    unsafe fn hvcall_rep<InputHeader, InputRep, O>(
1205        &self,
1206        code: HypercallCode,
1207        input_header: &InputHeader,
1208        input_rep: HvcallRepInput<'_, InputRep>,
1209        output_rep: Option<&mut [O]>,
1210    ) -> Result<HypercallOutput, HvcallError>
1211    where
1212        InputHeader: IntoBytes + Sized + Immutable + KnownLayout,
1213        InputRep: IntoBytes + Sized + Immutable + KnownLayout,
1214        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1215    {
1216        // Construct input buffer.
1217        let (input, count) = match input_rep {
1218            HvcallRepInput::Elements(e) => {
1219                ([input_header.as_bytes(), e.as_bytes()].concat(), e.len())
1220            }
1221            HvcallRepInput::Count(c) => (input_header.as_bytes().to_vec(), c.into()),
1222        };
1223
1224        if input.len() > HV_PAGE_SIZE as usize {
1225            return Err(HvcallError::InputParametersTooLarge);
1226        }
1227
1228        if let Some(output_rep) = &output_rep {
1229            if output_rep.as_bytes().len() > HV_PAGE_SIZE as usize {
1230                return Err(HvcallError::OutputParametersTooLarge);
1231            }
1232
1233            if count != output_rep.len() {
1234                return Err(HvcallError::InputOutputRepListMismatch);
1235            }
1236        }
1237
1238        let (output_data, output_size) = match output_rep {
1239            Some(output_rep) => (
1240                output_rep.as_bytes().as_ptr().cast(),
1241                output_rep.as_bytes().len(),
1242            ),
1243            None => (std::ptr::null(), 0),
1244        };
1245
1246        let control = hvdef::hypercall::Control::new()
1247            .with_code(code.0)
1248            .with_rep_count(count);
1249
1250        let call_object = protocol::hcl_hvcall {
1251            control,
1252            input_data: input.as_ptr().cast(),
1253            input_size: input.len(),
1254            status: HypercallOutput::new(),
1255            output_data,
1256            output_size,
1257        };
1258
1259        // SAFETY: The data referenced in the call lives as long as `self` does.
1260        unsafe { self.invoke_hvcall_ioctl(call_object) }
1261    }
1262
1263    /// Issues a non-rep hypercall with variable input to the hypervisor via the direct hypercall kernel interface.
1264    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1265    /// This call constructs the appropriate hypercall input control from the described parameters.
1266    ///
1267    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1268    /// return value for the result of the hypercall.
1269    ///
1270    /// `code` is the hypercall code.
1271    /// `input` is the input type required by the hypercall.
1272    /// `output` is the output type required by the hypercall.
1273    /// `variable_input` is the contents of the variable input to the hypercall. The length must be a multiple of 8 bytes.
1274    ///
1275    /// # Safety
1276    /// This function makes no guarantees that the given input header, input and output types are valid for the
1277    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1278    ///
1279    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1280    ///
1281    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1282    /// caller if needed.
1283    unsafe fn hvcall_var<I, O>(
1284        &self,
1285        code: HypercallCode,
1286        input: &I,
1287        variable_input: &[u8],
1288        output: &mut O,
1289    ) -> Result<HypercallOutput, HvcallError>
1290    where
1291        I: IntoBytes + Sized + Immutable + KnownLayout,
1292        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1293    {
1294        const fn assert_size<I, O>()
1295        where
1296            I: Sized,
1297            O: Sized,
1298        {
1299            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1300            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1301        }
1302        assert_size::<I, O>();
1303        assert!(variable_input.len() % 8 == 0);
1304
1305        let input = [input.as_bytes(), variable_input].concat();
1306        if input.len() > HV_PAGE_SIZE as usize {
1307            return Err(HvcallError::InputParametersTooLarge);
1308        }
1309
1310        let control = hvdef::hypercall::Control::new()
1311            .with_code(code.0)
1312            .with_variable_header_size(variable_input.len() / 8);
1313
1314        let call_object = protocol::hcl_hvcall {
1315            control,
1316            input_data: input.as_bytes().as_ptr().cast(),
1317            input_size: input.len(),
1318            status: FromZeros::new_zeroed(),
1319            output_data: output.as_bytes().as_ptr().cast(),
1320            output_size: size_of::<O>(),
1321        };
1322
1323        // SAFETY: The data referenced in the call lives as long as `self` does.
1324        unsafe { self.invoke_hvcall_ioctl(call_object) }
1325    }
1326
1327    /// Sets the VTL protection mask for the specified memory range.
1328    ///
1329    /// [`HypercallCode::HvCallModifyVtlProtectionMask`] must be allowed.
1330    pub fn modify_vtl_protection_mask(
1331        &self,
1332        range: MemoryRange,
1333        map_flags: HvMapGpaFlags,
1334        target_vtl: HvInputVtl,
1335    ) -> Result<(), ApplyVtlProtectionsError> {
1336        let header = hvdef::hypercall::ModifyVtlProtectionMask {
1337            partition_id: HV_PARTITION_ID_SELF,
1338            map_flags,
1339            target_vtl,
1340            reserved: [0; 3],
1341        };
1342
1343        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
1344            - size_of::<hvdef::hypercall::ModifyVtlProtectionMask>())
1345            / size_of::<u64>();
1346
1347        let span = tracing::info_span!("modify_vtl_protection_mask", CVM_ALLOWED, ?range);
1348        let _enter = span.enter();
1349
1350        let start = range.start() / HV_PAGE_SIZE;
1351        let end = range.end() / HV_PAGE_SIZE;
1352
1353        // Reuse the same vector for every hypercall.
1354        let mut pages = Vec::new();
1355        for current_page in (start..end).step_by(MAX_INPUT_ELEMENTS) {
1356            let remaining_pages = end - current_page;
1357            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
1358            pages.clear();
1359            pages.extend(current_page..current_page + count);
1360
1361            // SAFETY: The input header and rep slice are the correct types for this hypercall. A dummy type of u8 is
1362            //         provided to satisfy the compiler for output rep type. The given input and slices are valid
1363            //         references while this function is called.
1364            //
1365            //         The hypercall output is validated right after the hypercall is issued.
1366            let output = unsafe {
1367                self.hvcall_rep::<hvdef::hypercall::ModifyVtlProtectionMask, u64, u8>(
1368                    HypercallCode::HvCallModifyVtlProtectionMask,
1369                    &header,
1370                    HvcallRepInput::Elements(pages.as_slice()),
1371                    None,
1372                )
1373                .expect("kernel hypercall submission should always succeed")
1374            };
1375
1376            output.result().map_err(|err| {
1377                let page_range =
1378                    *pages.first().expect("not empty")..*pages.last().expect("not empty") + 1;
1379                ApplyVtlProtectionsError::Hypervisor {
1380                    range: MemoryRange::from_4k_gpn_range(page_range),
1381                    output,
1382                    hv_error: err,
1383                    vtl: target_vtl,
1384                }
1385            })?;
1386
1387            assert_eq!(output.elements_processed() as u64, count);
1388        }
1389
1390        Ok(())
1391    }
1392
1393    /// Get a single VP register for the given VTL via hypercall.
1394    fn get_vp_register_for_vtl_inner(
1395        &self,
1396        target_vtl: HvInputVtl,
1397        name: HvRegisterName,
1398    ) -> Result<HvRegisterValue, Error> {
1399        let header = hvdef::hypercall::GetSetVpRegisters {
1400            partition_id: HV_PARTITION_ID_SELF,
1401            vp_index: HV_VP_INDEX_SELF,
1402            target_vtl,
1403            rsvd: [0; 3],
1404        };
1405        let mut output = [HvRegisterValue::new_zeroed()];
1406
1407        // SAFETY: The input header and rep slice are the correct types for this hypercall.
1408        //         The hypercall output is validated right after the hypercall is issued.
1409        let status = unsafe {
1410            self.hvcall_rep(
1411                HypercallCode::HvCallGetVpRegisters,
1412                &header,
1413                HvcallRepInput::Elements(&[name]),
1414                Some(&mut output),
1415            )
1416            .expect("get_vp_register hypercall should not fail")
1417        };
1418
1419        // Status must be success with 1 rep completed
1420        status
1421            .result()
1422            .map_err(|err| Error::GetVpRegisterHypercall {
1423                reg: name.into(),
1424                err,
1425            })?;
1426        assert_eq!(status.elements_processed(), 1);
1427
1428        Ok(output[0])
1429    }
1430
1431    /// Get a single VP register for the given VTL via hypercall. Only a select
1432    /// set of registers are supported; others will cause a panic.
1433    #[cfg(guest_arch = "x86_64")]
1434    pub fn get_vp_register_for_vtl(
1435        &self,
1436        vtl: HvInputVtl,
1437        name: HvX64RegisterName,
1438    ) -> Result<HvRegisterValue, Error> {
1439        match vtl.target_vtl().unwrap() {
1440            None | Some(Vtl::Vtl2) => {
1441                assert!(matches!(
1442                    name,
1443                    HvX64RegisterName::GuestVsmPartitionConfig
1444                        | HvX64RegisterName::VsmPartitionConfig
1445                        | HvX64RegisterName::VsmPartitionStatus
1446                        | HvX64RegisterName::VsmCapabilities
1447                        | HvX64RegisterName::TimeRefCount
1448                        | HvX64RegisterName::VsmVpSecureConfigVtl0
1449                        | HvX64RegisterName::VsmVpSecureConfigVtl1
1450                ));
1451            }
1452            Some(Vtl::Vtl1) => {
1453                todo!("TODO: allowed registers for VTL1");
1454            }
1455            Some(Vtl::Vtl0) => {
1456                // Only VTL-private registers can go through this path.
1457                // VTL-shared registers have to go through the kernel (either
1458                // via the CPU context page or via the dedicated ioctl), as
1459                // they may require special handling there.
1460                //
1461                // Register access should go through the register page if
1462                // possible (as a performance optimization). In practice,
1463                // registers that are normally available on the register page
1464                // are handled here only when it is unavailable (e.g., running
1465                // in WHP).
1466                assert!(!is_vtl_shared_reg(name));
1467            }
1468        }
1469
1470        self.get_vp_register_for_vtl_inner(vtl, name.into())
1471    }
1472
1473    /// Get a single VP register for the given VTL via hypercall. Only a select
1474    /// set of registers are supported; others will cause a panic.
1475    #[cfg(guest_arch = "aarch64")]
1476    pub fn get_vp_register_for_vtl(
1477        &self,
1478        vtl: HvInputVtl,
1479        name: HvArm64RegisterName,
1480    ) -> Result<HvRegisterValue, Error> {
1481        match vtl.target_vtl().unwrap() {
1482            None | Some(Vtl::Vtl2) => {
1483                assert!(matches!(
1484                    name,
1485                    HvArm64RegisterName::GuestVsmPartitionConfig
1486                        | HvArm64RegisterName::VsmPartitionConfig
1487                        | HvArm64RegisterName::VsmPartitionStatus
1488                        | HvArm64RegisterName::VsmCapabilities
1489                        | HvArm64RegisterName::TimeRefCount
1490                        | HvArm64RegisterName::VsmVpSecureConfigVtl0
1491                        | HvArm64RegisterName::VsmVpSecureConfigVtl1
1492                        | HvArm64RegisterName::PrivilegesAndFeaturesInfo
1493                ));
1494            }
1495            Some(Vtl::Vtl1) => {
1496                // TODO: allowed registers for VTL1
1497                todo!();
1498            }
1499            Some(Vtl::Vtl0) => {
1500                // Only VTL-private registers can go through this path.
1501                // VTL-shared registers have to go through the kernel (either
1502                // via the CPU context page or via the dedicated ioctl), as
1503                // they may require special handling there.
1504                assert!(!is_vtl_shared_reg(name));
1505            }
1506        }
1507
1508        self.get_vp_register_for_vtl_inner(vtl, name.into())
1509    }
1510
1511    /// Invokes the HvCallMemoryMappedIoRead hypercall
1512    pub fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> Result<(), HvError> {
1513        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1514
1515        let header = hvdef::hypercall::MemoryMappedIoRead {
1516            gpa,
1517            access_width: data.len() as u32,
1518            reserved_z0: 0,
1519        };
1520
1521        let mut output: hvdef::hypercall::MemoryMappedIoReadOutput = FromZeros::new_zeroed();
1522
1523        // SAFETY: The input header and slice are the correct types for this hypercall.
1524        //         The hypercall output is validated right after the hypercall is issued.
1525        let status = unsafe {
1526            self.hvcall(
1527                HypercallCode::HvCallMemoryMappedIoRead,
1528                &header,
1529                &mut output,
1530            )
1531            .expect("submitting hypercall should not fail")
1532        };
1533
1534        // Only copy the data if the hypercall was successful
1535        if status.result().is_ok() {
1536            data.copy_from_slice(&output.data[..data.len()]);
1537        };
1538
1539        status.result()
1540    }
1541
1542    /// Invokes the HvCallMemoryMappedIoWrite hypercall
1543    pub fn mmio_write(&self, gpa: u64, data: &[u8]) -> Result<(), HvError> {
1544        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1545
1546        let mut header = hvdef::hypercall::MemoryMappedIoWrite {
1547            gpa,
1548            access_width: data.len() as u32,
1549            reserved_z0: 0,
1550            data: [0; hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH],
1551        };
1552
1553        header.data[..data.len()].copy_from_slice(data);
1554
1555        // SAFETY: The input header and slice are the correct types for this hypercall.
1556        //         The hypercall output is validated right after the hypercall is issued.
1557        let status = unsafe {
1558            self.hvcall(HypercallCode::HvCallMemoryMappedIoWrite, &header, &mut ())
1559                .expect("submitting hypercall should not fail")
1560        };
1561
1562        status.result()
1563    }
1564}
1565
1566/// The HCL device and collection of fds.
1567#[derive(Debug)]
1568pub struct Hcl {
1569    mshv_hvcall: MshvHvcall,
1570    mshv_vtl: MshvVtl,
1571    vps: Vec<HclVp>,
1572    supports_vtl_ret_action: bool,
1573    supports_register_page: bool,
1574    dr6_shared: bool,
1575    isolation: IsolationType,
1576    snp_register_bitmap: [u8; 64],
1577    sidecar: Option<SidecarClient>,
1578}
1579
1580/// The isolation type for a partition.
1581// TODO: Add guest_arch cfgs.
1582#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1583pub enum IsolationType {
1584    /// No isolation.
1585    None,
1586    /// Hyper-V software isolation.
1587    Vbs,
1588    /// AMD SNP.
1589    Snp,
1590    /// Intel TDX.
1591    Tdx,
1592}
1593
1594impl IsolationType {
1595    /// Returns true if the isolation type is not `None`.
1596    pub fn is_isolated(&self) -> bool {
1597        !matches!(self, Self::None)
1598    }
1599
1600    /// Returns whether the isolation type is hardware-backed.
1601    pub fn is_hardware_isolated(&self) -> bool {
1602        matches!(self, Self::Snp | Self::Tdx)
1603    }
1604}
1605
1606impl Hcl {
1607    /// Returns true if DR6 is a shared register on this processor.
1608    pub fn dr6_shared(&self) -> bool {
1609        self.dr6_shared
1610    }
1611}
1612
1613#[derive(Debug)]
1614struct HclVp {
1615    state: Mutex<VpState>,
1616    run: MappedPage<hcl_run>,
1617    backing: BackingState,
1618}
1619
1620#[derive(Debug)]
1621enum BackingState {
1622    MshvAarch64 {
1623        reg_page: Option<MappedPage<HvAarch64RegisterPage>>,
1624    },
1625    MshvX64 {
1626        reg_page: Option<MappedPage<HvX64RegisterPage>>,
1627    },
1628    Snp {
1629        vmsa: VtlArray<MappedPage<SevVmsa>, 2>,
1630    },
1631    Tdx {
1632        vtl0_apic_page: MappedPage<ApicPage>,
1633        vtl1_apic_page: MemoryBlock,
1634    },
1635}
1636
1637#[derive(Debug)]
1638enum VpState {
1639    Running(Pthread),
1640    NotRunning,
1641}
1642
1643impl HclVp {
1644    fn new(
1645        hcl: &Hcl,
1646        vp: u32,
1647        map_reg_page: bool,
1648        isolation_type: IsolationType,
1649        private_dma_client: Option<&Arc<dyn DmaClient>>,
1650    ) -> Result<Self, Error> {
1651        let fd = &hcl.mshv_vtl.file;
1652        let run: MappedPage<hcl_run> =
1653            MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?;
1654        // Block proxied interrupts on all vectors by default. The mask will be
1655        // relaxed as the guest runs.
1656        //
1657        // This is only used on CVMs. Skip it otherwise, since run page accesses
1658        // will fault on VPs that are still in the sidecar kernel.
1659        if isolation_type.is_hardware_isolated() {
1660            // SAFETY: `proxy_irr_blocked` is not accessed by any other VPs/kernel at this point (`HclVp` creation)
1661            // so we know we have exclusive access.
1662            let proxy_irr_blocked = unsafe { &mut (*run.as_ptr()).proxy_irr_blocked };
1663            proxy_irr_blocked.fill(!0);
1664        }
1665
1666        let backing = match isolation_type {
1667            IsolationType::None | IsolationType::Vbs if cfg!(guest_arch = "aarch64") => {
1668                BackingState::MshvAarch64 {
1669                    reg_page: if map_reg_page {
1670                        Some(
1671                            MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1672                                .map_err(Error::MmapRegPage)?,
1673                        )
1674                    } else {
1675                        None
1676                    },
1677                }
1678            }
1679            IsolationType::None | IsolationType::Vbs => BackingState::MshvX64 {
1680                reg_page: if map_reg_page {
1681                    Some(
1682                        MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1683                            .map_err(Error::MmapRegPage)?,
1684                    )
1685                } else {
1686                    None
1687                },
1688            },
1689            IsolationType::Snp => {
1690                let vmsa_vtl0 = MappedPage::new(fd, HCL_VMSA_PAGE_OFFSET | vp as i64)
1691                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?;
1692                let vmsa_vtl1 = MappedPage::new(fd, HCL_VMSA_GUEST_VSM_PAGE_OFFSET | vp as i64)
1693                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl1)))?;
1694                BackingState::Snp {
1695                    vmsa: [vmsa_vtl0, vmsa_vtl1].into(),
1696                }
1697            }
1698            IsolationType::Tdx => BackingState::Tdx {
1699                vtl0_apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
1700                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?,
1701                vtl1_apic_page: private_dma_client
1702                    .ok_or(Error::MissingPrivateMemory)?
1703                    .allocate_dma_buffer(HV_PAGE_SIZE as usize)
1704                    .map_err(Error::AllocVp)?,
1705            },
1706        };
1707
1708        Ok(Self {
1709            state: Mutex::new(VpState::NotRunning),
1710            run,
1711            backing,
1712        })
1713    }
1714}
1715
1716/// Object used to run and to access state for a specific VP.
1717pub struct ProcessorRunner<'a, T: Backing<'a>> {
1718    hcl: &'a Hcl,
1719    vp: &'a HclVp,
1720    sidecar: Option<SidecarVp<'a>>,
1721    deferred_actions: Option<RegisteredDeferredActions<'a>>,
1722    run: &'a UnsafeCell<hcl_run>,
1723    intercept_message: &'a UnsafeCell<HvMessage>,
1724    state: T,
1725}
1726
1727/// An error returned by [`Hcl::runner`].
1728#[derive(Debug, Error)]
1729pub enum NoRunner {
1730    /// The partition is for a different isolation type.
1731    #[error("mismatched isolation type")]
1732    MismatchedIsolation,
1733    /// A sidecar VP was requested, but no sidecar was provided.
1734    #[error("missing sidecar")]
1735    MissingSidecar,
1736    /// The sidecar VP could not be contacted.
1737    #[error("sidecar communication error")]
1738    Sidecar(#[source] sidecar_client::SidecarError),
1739}
1740
1741/// An isolation-type-specific backing for a processor runner.
1742#[expect(private_bounds)]
1743pub trait Backing<'a>: BackingPrivate<'a> {}
1744
1745impl<'a, T: BackingPrivate<'a>> Backing<'a> for T {}
1746
1747mod private {
1748    use super::Error;
1749    use super::Hcl;
1750    use super::HclVp;
1751    use super::NoRunner;
1752    use super::ProcessorRunner;
1753    use crate::GuestVtl;
1754    use hvdef::HvRegisterName;
1755    use hvdef::HvRegisterValue;
1756    use sidecar_client::SidecarVp;
1757
1758    pub(super) trait BackingPrivate<'a>: Sized {
1759        fn new(vp: &'a HclVp, sidecar: Option<&SidecarVp<'a>>, hcl: &Hcl)
1760        -> Result<Self, NoRunner>;
1761
1762        fn try_set_reg(
1763            runner: &mut ProcessorRunner<'a, Self>,
1764            vtl: GuestVtl,
1765            name: HvRegisterName,
1766            value: HvRegisterValue,
1767        ) -> Result<bool, Error>;
1768
1769        fn must_flush_regs_on(runner: &ProcessorRunner<'a, Self>, name: HvRegisterName) -> bool;
1770
1771        fn try_get_reg(
1772            runner: &ProcessorRunner<'a, Self>,
1773            vtl: GuestVtl,
1774            name: HvRegisterName,
1775        ) -> Result<Option<HvRegisterValue>, Error>;
1776
1777        fn flush_register_page(runner: &mut ProcessorRunner<'a, Self>);
1778    }
1779}
1780
1781impl<'a, T: Backing<'a>> Drop for ProcessorRunner<'a, T> {
1782    fn drop(&mut self) {
1783        self.flush_deferred_state();
1784        drop(self.deferred_actions.take());
1785        let old_state = std::mem::replace(&mut *self.vp.state.lock(), VpState::NotRunning);
1786        assert!(matches!(old_state, VpState::Running(thread) if thread == Pthread::current()));
1787    }
1788}
1789
1790impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
1791    /// Flushes any deferred state. Must be called if preparing the partition
1792    /// for save/restore (servicing).
1793    pub fn flush_deferred_state(&mut self) {
1794        T::flush_register_page(self);
1795        if let Some(actions) = &mut self.deferred_actions {
1796            actions.flush();
1797        }
1798    }
1799}
1800
1801impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
1802    // Registers that are shared between VTLs need to be handled by the kernel
1803    // as they may require special handling there. set_reg and get_reg will
1804    // handle these registers using a dedicated ioctl, instead of the general-
1805    // purpose Set/GetVpRegisters hypercalls.
1806    #[cfg(guest_arch = "x86_64")]
1807    fn is_kernel_managed(&self, name: HvX64RegisterName) -> bool {
1808        if name == HvX64RegisterName::Dr6 {
1809            self.hcl.dr6_shared()
1810        } else {
1811            is_vtl_shared_reg(name)
1812        }
1813    }
1814
1815    #[cfg(guest_arch = "aarch64")]
1816    fn is_kernel_managed(&self, name: HvArm64RegisterName) -> bool {
1817        is_vtl_shared_reg(name)
1818    }
1819
1820    fn set_reg(&mut self, vtl: GuestVtl, regs: &[HvRegisterAssoc]) -> Result<(), Error> {
1821        if regs.is_empty() {
1822            return Ok(());
1823        }
1824
1825        if let Some(sidecar) = &mut self.sidecar {
1826            sidecar
1827                .set_vp_registers(vtl.into(), regs)
1828                .map_err(Error::Sidecar)?;
1829        } else {
1830            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1831            // currently has a bug where it only supports one register at a
1832            // time. Once that's fixed, this code could set a group of
1833            // registers in one ioctl.
1834            for reg in regs {
1835                let hc_regs = &mut [HvRegisterAssoc {
1836                    name: reg.name,
1837                    pad: [0; 3],
1838                    value: reg.value,
1839                }];
1840
1841                if self.is_kernel_managed(reg.name.into()) {
1842                    let hv_vp_register_args = mshv_vp_registers {
1843                        count: 1,
1844                        regs: hc_regs.as_mut_ptr(),
1845                    };
1846                    // SAFETY: ioctl call with correct types.
1847                    unsafe {
1848                        hcl_set_vp_register(
1849                            self.hcl.mshv_vtl.file.as_raw_fd(),
1850                            &hv_vp_register_args,
1851                        )
1852                        .map_err(Error::SetVpRegister)?;
1853                    }
1854                } else {
1855                    let hc_regs = [HvRegisterAssoc {
1856                        name: reg.name,
1857                        pad: [0; 3],
1858                        value: reg.value,
1859                    }];
1860                    self.set_vp_registers_hvcall_inner(vtl.into(), &hc_regs)
1861                        .map_err(Error::SetRegisters)?;
1862                }
1863            }
1864        }
1865        Ok(())
1866    }
1867
1868    fn get_reg(&mut self, vtl: GuestVtl, regs: &mut [HvRegisterAssoc]) -> Result<(), Error> {
1869        if regs.is_empty() {
1870            return Ok(());
1871        }
1872
1873        if let Some(sidecar) = &mut self.sidecar {
1874            sidecar
1875                .get_vp_registers(vtl.into(), regs)
1876                .map_err(Error::Sidecar)?;
1877        } else {
1878            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1879            // currently has a bug where it only supports one register at a
1880            // time. Once that's fixed, this code could set a group of
1881            // registers in one ioctl.
1882            for reg in regs {
1883                if self.is_kernel_managed(reg.name.into()) {
1884                    let mut mshv_vp_register_args = mshv_vp_registers {
1885                        count: 1,
1886                        regs: reg,
1887                    };
1888                    // SAFETY: we know that our file is a vCPU fd, we know the kernel will only read the
1889                    // correct amount of memory from our pointer, and we verify the return result.
1890                    unsafe {
1891                        hcl_get_vp_register(
1892                            self.hcl.mshv_vtl.file.as_raw_fd(),
1893                            &mut mshv_vp_register_args,
1894                        )
1895                        .map_err(Error::GetVpRegister)?;
1896                    }
1897                } else {
1898                    reg.value = self
1899                        .hcl
1900                        .mshv_hvcall
1901                        .get_vp_register_for_vtl(vtl.into(), reg.name.into())?;
1902                }
1903            }
1904        }
1905        Ok(())
1906    }
1907
1908    /// Clears the cancel flag so that the VP can be run again.
1909    pub fn clear_cancel(&mut self) {
1910        if !self.is_sidecar() {
1911            // SAFETY: self.run is mapped, and the cancel field is atomically
1912            // accessed by everyone.
1913            let cancel = unsafe { &*(&raw mut (*self.run.get()).cancel).cast::<AtomicU32>() };
1914            cancel.store(0, Ordering::SeqCst);
1915        }
1916    }
1917
1918    /// Set the halted state of the VP. If `true`, then `run()` will not
1919    /// actually run the VP but will just wait for a cancel request or signal.
1920    pub fn set_halted(&mut self, halted: bool) {
1921        // SAFETY: the `flags` field of the run page will not be concurrently
1922        // updated.
1923        let flags = unsafe { &mut (*self.run.get()).flags };
1924        if halted {
1925            *flags |= protocol::MSHV_VTL_RUN_FLAG_HALTED
1926        } else {
1927            *flags &= !protocol::MSHV_VTL_RUN_FLAG_HALTED
1928        }
1929    }
1930
1931    /// Gets the proxied interrupt request bitmap for VTL 0 from the hypervisor.
1932    pub fn proxy_irr_vtl0(&mut self) -> Option<[u32; 8]> {
1933        // SAFETY: the `scan_proxy_irr` and `proxy_irr` fields of the run page
1934        // are concurrently updated by the kernel on multiple processors. They
1935        // are accessed atomically everywhere.
1936        unsafe {
1937            let scan_proxy_irr = &*((&raw mut (*self.run.get()).scan_proxy_irr).cast::<AtomicU8>());
1938            let proxy_irr = &*((&raw mut (*self.run.get()).proxy_irr).cast::<[AtomicU32; 8]>());
1939            if scan_proxy_irr.load(Ordering::Acquire) == 0 {
1940                return None;
1941            }
1942
1943            scan_proxy_irr.store(0, Ordering::SeqCst);
1944            let mut r = [0; 8];
1945            for (irr, r) in proxy_irr.iter().zip(r.iter_mut()) {
1946                if irr.load(Ordering::Relaxed) != 0 {
1947                    *r = irr.swap(0, Ordering::Relaxed);
1948                }
1949            }
1950            Some(r)
1951        }
1952    }
1953
1954    /// Update the `proxy_irr_blocked` for VTL 0 in the run page
1955    pub fn update_proxy_irr_filter_vtl0(&mut self, irr_filter: &[u32; 8]) {
1956        // SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could
1957        // be concurrently accessed by kernel too, hence accessing as Atomic
1958        let proxy_irr_blocked = unsafe {
1959            &mut *((&raw mut (*self.run.get()).proxy_irr_blocked).cast::<[AtomicU32; 8]>())
1960        };
1961
1962        // `irr_filter` bitmap has bits set for all allowed vectors (i.e. SINT and device interrupts)
1963        // Replace current `proxy_irr_blocked` with the given `irr_filter` bitmap.
1964        // By default block all (i.e. set all), and only allow (unset) given vectors from `irr_filter`.
1965        for (filter, irr) in proxy_irr_blocked.iter_mut().zip(irr_filter.iter()) {
1966            filter.store(!irr, Ordering::Relaxed);
1967            tracing::debug!(irr, "update_proxy_irr_filter");
1968        }
1969    }
1970
1971    /// Gets the proxy_irr_exit bitmask for VTL 0. This mask ensures that
1972    /// the masked interrupts always exit to user-space, and cannot
1973    /// be injected in the kernel. Interrupts matching this condition
1974    /// will be left on the proxy_irr field.
1975    pub fn proxy_irr_exit_mut_vtl0(&mut self) -> &mut [u32; 8] {
1976        // SAFETY: The `proxy_irr_exit` field of the run page will not be concurrently updated.
1977        unsafe { &mut (*self.run.get()).proxy_irr_exit }
1978    }
1979
1980    /// Gets the current offload_flags from the run page.
1981    pub fn offload_flags_mut(&mut self) -> &mut hcl_intr_offload_flags {
1982        // SAFETY: The `offload_flags` field of the run page will not be concurrently updated.
1983        unsafe { &mut (*self.run.get()).offload_flags }
1984    }
1985
1986    /// Runs the VP via the sidecar kernel.
1987    pub fn run_sidecar(&mut self) -> Result<SidecarRun<'_, 'a>, Error> {
1988        self.sidecar.as_mut().unwrap().run().map_err(Error::Sidecar)
1989    }
1990
1991    /// Run the following VP until an exit, error, or interrupt (cancel or
1992    /// signal) occurs.
1993    ///
1994    /// Returns `Ok(true)` if there is an exit to process, `Ok(false)` if there
1995    /// was a signal or cancel request.
1996    pub fn run(&mut self) -> Result<bool, Error> {
1997        assert!(self.sidecar.is_none());
1998        // Apply any deferred actions to the run page.
1999        if let Some(actions) = &mut self.deferred_actions {
2000            debug_assert!(self.hcl.supports_vtl_ret_action);
2001            // SAFETY: there are no concurrent accesses to the deferred action
2002            // slots.
2003            let mut slots = unsafe { DeferredActionSlots::new(self.run) };
2004            actions.move_to_slots(&mut slots);
2005        };
2006
2007        // N.B. cpu_context and exit_context are mutated by this call.
2008        //
2009        // SAFETY: no safety requirements for this ioctl.
2010        let r = unsafe { hcl_return_to_lower_vtl(self.hcl.mshv_vtl.file.as_raw_fd()) };
2011
2012        let has_intercept = match r {
2013            Ok(_) => true,
2014            Err(nix::errno::Errno::EINTR) => false,
2015            Err(err) => return Err(Error::ReturnToLowerVtl(err)),
2016        };
2017        Ok(has_intercept)
2018    }
2019
2020    /// Gets a reference to enter mode value, used by the kernel to specify the
2021    /// mode used when entering a lower VTL.
2022    pub fn enter_mode(&mut self) -> Option<&mut EnterModes> {
2023        if self.sidecar.is_some() {
2024            None
2025        } else {
2026            // SAFETY: self.run is mapped, and the mode field can only be mutated or accessed by
2027            // this object (or the kernel while `run` is called).
2028            Some(unsafe { &mut (*self.run.get()).mode })
2029        }
2030    }
2031
2032    /// Returns a reference to the exit message from the last exit.
2033    pub fn exit_message(&self) -> &HvMessage {
2034        // SAFETY: the exit message will not be concurrently accessed by the
2035        // kernel while this VP is in VTL2.
2036        unsafe { &*self.intercept_message.get() }
2037    }
2038
2039    /// Returns whether this is a sidecar VP.
2040    pub fn is_sidecar(&self) -> bool {
2041        self.sidecar.is_some()
2042    }
2043}
2044
2045impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
2046    fn get_vp_registers_inner<R: Copy + Into<HvRegisterName>>(
2047        &mut self,
2048        vtl: GuestVtl,
2049        names: &[R],
2050        values: &mut [HvRegisterValue],
2051    ) -> Result<(), Error> {
2052        assert_eq!(names.len(), values.len());
2053        let mut assoc = Vec::new();
2054        let mut offset = Vec::new();
2055        for (i, (&name, value)) in names.iter().zip(values.iter_mut()).enumerate() {
2056            if let Some(v) = T::try_get_reg(self, vtl, name.into())? {
2057                *value = v;
2058            } else {
2059                assoc.push(HvRegisterAssoc {
2060                    name: name.into(),
2061                    pad: Default::default(),
2062                    value: FromZeros::new_zeroed(),
2063                });
2064                offset.push(i);
2065            }
2066        }
2067
2068        self.get_reg(vtl, &mut assoc)?;
2069        for (&i, assoc) in offset.iter().zip(&assoc) {
2070            values[i] = assoc.value;
2071        }
2072        Ok(())
2073    }
2074
2075    /// Get the following register on the current VP.
2076    ///
2077    /// This will fail for registers that are in the mmapped CPU context, i.e.
2078    /// registers that are shared between VTL0 and VTL2.
2079    pub fn get_vp_register(
2080        &mut self,
2081        vtl: GuestVtl,
2082        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2083        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2084    ) -> Result<HvRegisterValue, Error> {
2085        let mut value = [0u64.into(); 1];
2086        self.get_vp_registers_inner(vtl, &[name], &mut value)?;
2087        Ok(value[0])
2088    }
2089
2090    /// Get the following VP registers on the current VP.
2091    ///
2092    /// # Panics
2093    /// Panics if `names.len() != values.len()`.
2094    pub fn get_vp_registers(
2095        &mut self,
2096        vtl: GuestVtl,
2097        #[cfg(guest_arch = "x86_64")] names: &[HvX64RegisterName],
2098        #[cfg(guest_arch = "aarch64")] names: &[HvArm64RegisterName],
2099        values: &mut [HvRegisterValue],
2100    ) -> Result<(), Error> {
2101        self.get_vp_registers_inner(vtl, names, values)
2102    }
2103
2104    /// Set the following register on the current VP.
2105    ///
2106    /// This will fail for registers that are in the mmapped CPU context, i.e.
2107    /// registers that are shared between VTL0 and VTL2.
2108    pub fn set_vp_register(
2109        &mut self,
2110        vtl: GuestVtl,
2111        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2112        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2113        value: HvRegisterValue,
2114    ) -> Result<(), Error> {
2115        self.set_vp_registers(vtl, [(name, value)])
2116    }
2117
2118    /// Sets a set of VP registers.
2119    pub fn set_vp_registers<I>(&mut self, vtl: GuestVtl, values: I) -> Result<(), Error>
2120    where
2121        I: IntoIterator,
2122        I::Item: Into<HvRegisterAssoc> + Clone,
2123    {
2124        let mut assoc = Vec::new();
2125        for HvRegisterAssoc { name, value, .. } in values.into_iter().map(Into::into) {
2126            if !assoc.is_empty() && T::must_flush_regs_on(self, name) {
2127                self.set_reg(vtl, &assoc)?;
2128                assoc.clear();
2129            }
2130            if !T::try_set_reg(self, vtl, name, value)? {
2131                assoc.push(HvRegisterAssoc {
2132                    name,
2133                    pad: Default::default(),
2134                    value,
2135                });
2136            }
2137        }
2138        if !assoc.is_empty() {
2139            self.set_reg(vtl, &assoc)?;
2140        }
2141        Ok(())
2142    }
2143
2144    fn set_vp_registers_hvcall_inner(
2145        &mut self,
2146        vtl: Vtl,
2147        registers: &[HvRegisterAssoc],
2148    ) -> Result<(), HvError> {
2149        let header = hvdef::hypercall::GetSetVpRegisters {
2150            partition_id: HV_PARTITION_ID_SELF,
2151            vp_index: HV_VP_INDEX_SELF,
2152            target_vtl: vtl.into(),
2153            rsvd: [0; 3],
2154        };
2155
2156        tracing::trace!(?registers, "HvCallSetVpRegisters rep");
2157
2158        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2159        //         The hypercall output is validated right after the hypercall is issued.
2160        let status = unsafe {
2161            self.hcl
2162                .mshv_hvcall
2163                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2164                    HypercallCode::HvCallSetVpRegisters,
2165                    &header,
2166                    HvcallRepInput::Elements(registers),
2167                    None,
2168                )
2169                .expect("set_vp_registers hypercall should not fail")
2170        };
2171
2172        // Status must be success
2173        status.result()?;
2174        Ok(())
2175    }
2176
2177    /// Sets the following registers on the current VP and given VTL using a
2178    /// direct hypercall.
2179    ///
2180    /// This should not be used on the fast path. Therefore only a select set of
2181    /// registers are supported, and others will cause a panic.
2182    ///
2183    /// This function can be used with VTL2 as a target.
2184    pub fn set_vp_registers_hvcall<I>(&mut self, vtl: Vtl, values: I) -> Result<(), HvError>
2185    where
2186        I: IntoIterator,
2187        I::Item: Into<HvRegisterAssoc> + Clone,
2188    {
2189        let registers: Vec<HvRegisterAssoc> = values.into_iter().map(Into::into).collect();
2190
2191        assert!(registers.iter().all(
2192            |HvRegisterAssoc {
2193                 name,
2194                 pad: _,
2195                 value: _,
2196             }| matches!(
2197                (*name).into(),
2198                HvX64RegisterName::PendingEvent0
2199                    | HvX64RegisterName::PendingEvent1
2200                    | HvX64RegisterName::Sipp
2201                    | HvX64RegisterName::Sifp
2202                    | HvX64RegisterName::Ghcb
2203                    | HvX64RegisterName::VsmPartitionConfig
2204                    | HvX64RegisterName::VsmVpWaitForTlbLock
2205                    | HvX64RegisterName::VsmVpSecureConfigVtl0
2206                    | HvX64RegisterName::VsmVpSecureConfigVtl1
2207                    | HvX64RegisterName::CrInterceptControl
2208            )
2209        ));
2210        self.set_vp_registers_hvcall_inner(vtl, &registers)
2211    }
2212
2213    /// Sets the VTL that should be returned to when underhill exits
2214    pub fn set_exit_vtl(&mut self, vtl: GuestVtl) {
2215        // SAFETY: self.run is mapped, and the target_vtl field can only be
2216        // mutated or accessed by this object and only before the kernel is
2217        // invoked during `run`
2218        unsafe { (*self.run.get()).target_vtl = vtl.into() }
2219    }
2220}
2221
2222impl Hcl {
2223    /// Returns a new HCL instance.
2224    pub fn new(isolation: IsolationType, sidecar: Option<SidecarClient>) -> Result<Hcl, Error> {
2225        static SIGNAL_HANDLER_INIT: Once = Once::new();
2226        // SAFETY: The signal handler does not perform any actions that are forbidden
2227        // for signal handlers to perform, as it performs nothing.
2228        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
2229            signal_hook::low_level::register(libc::SIGRTMIN(), || {
2230                // Do nothing, the ioctl will now return with EINTR.
2231            })
2232            .unwrap();
2233        });
2234
2235        // Open both mshv fds
2236        let mshv_fd = Mshv::new()?;
2237
2238        // Validate the hypervisor's advertised isolation type matches the
2239        // requested isolation type. In CVM scenarios, this is not trusted, so
2240        // we still need the isolation type from the caller.
2241        //
2242        // FUTURE: the kernel driver should probably tell us this, especially
2243        // since the kernel ABI is different for different isolation types.
2244        let supported_isolation = if cfg!(guest_arch = "x86_64") {
2245            // xtask-fmt allow-target-arch cpu-intrinsic
2246            #[cfg(target_arch = "x86_64")]
2247            {
2248                let result = safe_intrinsics::cpuid(
2249                    hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION,
2250                    0,
2251                );
2252                match result.ebx & 0xF {
2253                    0 => IsolationType::None,
2254                    1 => IsolationType::Vbs,
2255                    2 => IsolationType::Snp,
2256                    3 => IsolationType::Tdx,
2257                    ty => panic!("unknown isolation type {ty:#x}"),
2258                }
2259            }
2260            // xtask-fmt allow-target-arch cpu-intrinsic
2261            #[cfg(not(target_arch = "x86_64"))]
2262            {
2263                unreachable!()
2264            }
2265        } else {
2266            IsolationType::None
2267        };
2268
2269        if isolation != supported_isolation {
2270            return Err(Error::MismatchedIsolation {
2271                supported: supported_isolation,
2272                requested: isolation,
2273            });
2274        }
2275
2276        let supports_vtl_ret_action = mshv_fd.check_extension(HCL_CAP_VTL_RETURN_ACTION)?;
2277        let supports_register_page = mshv_fd.check_extension(HCL_CAP_REGISTER_PAGE)?;
2278        let dr6_shared = mshv_fd.check_extension(HCL_CAP_DR6_SHARED)?;
2279        tracing::debug!(
2280            supports_vtl_ret_action,
2281            supports_register_page,
2282            "HCL capabilities",
2283        );
2284
2285        let vtl_fd = mshv_fd.create_vtl()?;
2286
2287        // Open the hypercall pseudo-device
2288        let mshv_hvcall = MshvHvcall::new()?;
2289
2290        // Override certain features for hardware isolated VMs.
2291        // TODO: vtl return actions are inhibited for hardware isolated VMs because they currently
2292        // are a pessimization since interrupt handling (and synic handling) are all done from
2293        // within VTL2. Future vtl return actions may be different, requiring granular handling.
2294        let supports_vtl_ret_action = supports_vtl_ret_action && !isolation.is_hardware_isolated();
2295        let supports_register_page = supports_register_page && !isolation.is_hardware_isolated();
2296        let snp_register_bitmap = [0u8; 64];
2297
2298        Ok(Hcl {
2299            mshv_hvcall,
2300            mshv_vtl: vtl_fd,
2301            vps: Vec::new(),
2302            supports_vtl_ret_action,
2303            supports_register_page,
2304            dr6_shared,
2305            isolation,
2306            snp_register_bitmap,
2307            sidecar,
2308        })
2309    }
2310
2311    /// Set allowed hypercalls.
2312    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
2313        self.mshv_hvcall.set_allowed_hypercalls(codes)
2314    }
2315
2316    /// Initializes SNP register tweak bitmap
2317    pub fn set_snp_register_bitmap(&mut self, register_bitmap: [u8; 64]) {
2318        self.snp_register_bitmap = register_bitmap;
2319    }
2320
2321    /// Adds `vp_count` VPs.
2322    pub fn add_vps(
2323        &mut self,
2324        vp_count: u32,
2325        private_pool: Option<&Arc<dyn DmaClient>>,
2326    ) -> Result<(), Error> {
2327        self.vps = (0..vp_count)
2328            .map(|vp| {
2329                HclVp::new(
2330                    self,
2331                    vp,
2332                    self.supports_register_page,
2333                    self.isolation,
2334                    private_pool,
2335                )
2336            })
2337            .collect::<Result<_, _>>()?;
2338
2339        Ok(())
2340    }
2341
2342    /// Registers with the hypervisor for an intercept.
2343    pub fn register_intercept(
2344        &self,
2345        intercept_type: HvInterceptType,
2346        access_type_mask: u32,
2347        intercept_parameters: HvInterceptParameters,
2348    ) -> Result<(), HvError> {
2349        let intercept_info = hvdef::hypercall::InstallIntercept {
2350            partition_id: HV_PARTITION_ID_SELF,
2351            access_type_mask,
2352            intercept_type,
2353            intercept_parameters,
2354        };
2355
2356        // SAFETY: calling hypercall with appropriate input and output.
2357        unsafe {
2358            self.mshv_hvcall
2359                .hvcall(
2360                    HypercallCode::HvCallInstallIntercept,
2361                    &intercept_info,
2362                    &mut (),
2363                )
2364                .unwrap()
2365                .result()
2366        }
2367    }
2368
2369    /// Returns the base CPU that manages the given sidecar VP.
2370    pub fn sidecar_base_cpu(&self, vp_index: u32) -> Option<u32> {
2371        Some(self.sidecar.as_ref()?.base_cpu(vp_index))
2372    }
2373
2374    /// Create a VP runner for the given partition.
2375    pub fn runner<'a, T: Backing<'a>>(
2376        &'a self,
2377        vp_index: u32,
2378        use_sidecar: bool,
2379    ) -> Result<ProcessorRunner<'a, T>, NoRunner> {
2380        let vp = &self.vps[vp_index as usize];
2381
2382        let sidecar = if use_sidecar {
2383            Some(
2384                self.sidecar
2385                    .as_ref()
2386                    .ok_or(NoRunner::MissingSidecar)?
2387                    .vp(vp_index),
2388            )
2389        } else {
2390            None
2391        };
2392
2393        let state = T::new(vp, sidecar.as_ref(), self)?;
2394
2395        // Set this thread as the runner.
2396        let VpState::NotRunning =
2397            std::mem::replace(&mut *vp.state.lock(), VpState::Running(Pthread::current()))
2398        else {
2399            panic!("another runner already exists")
2400        };
2401
2402        let actions = if sidecar.is_none() && self.supports_vtl_ret_action {
2403            Some(register_deferred_actions(self))
2404        } else {
2405            None
2406        };
2407
2408        // SAFETY: The run page is guaranteed to be mapped and valid.
2409        // While the exit message might not be filled in yet we're only computing its address.
2410        let intercept_message = unsafe {
2411            &*sidecar.as_ref().map_or(
2412                std::ptr::addr_of!((*vp.run.as_ptr()).exit_message).cast(),
2413                |s| s.intercept_message().cast(),
2414            )
2415        };
2416
2417        Ok(ProcessorRunner {
2418            hcl: self,
2419            vp,
2420            deferred_actions: actions,
2421            run: vp.run.as_ref(),
2422            intercept_message,
2423            state,
2424            sidecar,
2425        })
2426    }
2427
2428    /// Trigger the following interrupt request.
2429    pub fn request_interrupt(
2430        &self,
2431        interrupt_control: hvdef::HvInterruptControl,
2432        destination_address: u64,
2433        requested_vector: u32,
2434        target_vtl: GuestVtl,
2435    ) -> Result<(), Error> {
2436        tracing::trace!(
2437            ?interrupt_control,
2438            destination_address,
2439            requested_vector,
2440            "requesting interrupt"
2441        );
2442
2443        assert!(!self.isolation.is_hardware_isolated());
2444
2445        let request = AssertVirtualInterrupt {
2446            partition_id: HV_PARTITION_ID_SELF,
2447            interrupt_control,
2448            destination_address,
2449            requested_vector,
2450            target_vtl: target_vtl as u8,
2451            rsvd0: 0,
2452            rsvd1: 0,
2453        };
2454
2455        // SAFETY: calling the hypercall with correct input buffer.
2456        let output = unsafe {
2457            self.mshv_hvcall.hvcall(
2458                HypercallCode::HvCallAssertVirtualInterrupt,
2459                &request,
2460                &mut (),
2461            )
2462        }
2463        .unwrap();
2464
2465        output.result().map_err(Error::RequestInterrupt)
2466    }
2467
2468    /// Attempts to signal a given vp/sint/flag combo using HvSignalEventDirect.
2469    ///
2470    /// No result is returned because this request may be deferred until the
2471    /// hypervisor is returning to a lower VTL.
2472    pub fn signal_event_direct(&self, vp: u32, sint: u8, flag: u16) {
2473        tracing::trace!(vp, sint, flag, "signaling event");
2474        push_deferred_action(self, DeferredAction::SignalEvent { vp, sint, flag });
2475    }
2476
2477    fn hvcall_signal_event_direct(&self, vp: u32, sint: u8, flag: u16) -> Result<bool, Error> {
2478        let signal_event_input = hvdef::hypercall::SignalEventDirect {
2479            target_partition: HV_PARTITION_ID_SELF,
2480            target_vp: vp,
2481            target_vtl: Vtl::Vtl0 as u8,
2482            target_sint: sint,
2483            flag_number: flag,
2484        };
2485        let mut signal_event_output = hvdef::hypercall::SignalEventDirectOutput {
2486            newly_signaled: 0,
2487            rsvd: [0; 7],
2488        };
2489
2490        // SAFETY: calling the hypercall with correct input buffer.
2491        let output = unsafe {
2492            self.mshv_hvcall.hvcall(
2493                HypercallCode::HvCallSignalEventDirect,
2494                &signal_event_input,
2495                &mut signal_event_output,
2496            )
2497        }
2498        .unwrap();
2499
2500        output
2501            .result()
2502            .map(|_| signal_event_output.newly_signaled != 0)
2503            .map_err(Error::SignalEvent)
2504    }
2505
2506    /// Attempts to post a given message to a vp/sint combo using HvPostMessageDirect.
2507    pub fn post_message_direct(
2508        &self,
2509        vp: u32,
2510        sint: u8,
2511        message: &HvMessage,
2512    ) -> Result<(), HvError> {
2513        tracing::trace!(vp, sint, "posting message");
2514
2515        let post_message = hvdef::hypercall::PostMessageDirect {
2516            partition_id: HV_PARTITION_ID_SELF,
2517            vp_index: vp,
2518            vtl: Vtl::Vtl0 as u8,
2519            padding0: [0; 3],
2520            sint,
2521            padding1: [0; 3],
2522            message: zerocopy::Unalign::new(*message),
2523            padding2: 0,
2524        };
2525
2526        // SAFETY: calling the hypercall with correct input buffer.
2527        let output = unsafe {
2528            self.mshv_hvcall.hvcall(
2529                HypercallCode::HvCallPostMessageDirect,
2530                &post_message,
2531                &mut (),
2532            )
2533        }
2534        .unwrap();
2535
2536        output.result()
2537    }
2538
2539    /// Sets a file to poll during run. When the file's poll state changes, the
2540    /// run will be automatically cancelled.
2541    pub fn set_poll_file(&self, vp: u32, file: RawFd) -> Result<(), Error> {
2542        // SAFETY: calling the IOCTL as defined. This is safe even if the caller
2543        // does not own `file` since all this does is register the file for
2544        // polling.
2545        unsafe {
2546            hcl_set_poll_file(
2547                self.mshv_vtl.file.as_raw_fd(),
2548                &protocol::hcl_set_poll_file {
2549                    cpu: vp as i32,
2550                    fd: file,
2551                },
2552            )
2553            .map_err(Error::SetPollFile)?;
2554        }
2555        Ok(())
2556    }
2557
2558    /// Gets the current hypervisor reference time.
2559    pub fn reference_time(&self) -> Result<u64, Error> {
2560        Ok(self
2561            .get_vp_register(HvAllArchRegisterName::TimeRefCount, HvInputVtl::CURRENT_VTL)?
2562            .as_u64())
2563    }
2564
2565    /// Get a single VP register for the given VTL via hypercall. Only a select
2566    /// set of registers are supported; others will cause a panic.
2567    #[cfg(guest_arch = "x86_64")]
2568    pub fn get_vp_register(
2569        &self,
2570        name: impl Into<HvX64RegisterName>,
2571        vtl: HvInputVtl,
2572    ) -> Result<HvRegisterValue, Error> {
2573        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2574    }
2575
2576    /// Get a single VP register for the given VTL via hypercall. Only a select
2577    /// set of registers are supported; others will cause a panic.
2578    #[cfg(guest_arch = "aarch64")]
2579    pub fn get_vp_register(
2580        &self,
2581        name: impl Into<HvArm64RegisterName>,
2582        vtl: HvInputVtl,
2583    ) -> Result<HvRegisterValue, Error> {
2584        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2585    }
2586
2587    /// Set a single VP register via hypercall as VTL2. Only a select set of registers are
2588    /// supported, others will cause a panic.
2589    fn set_vp_register(
2590        &self,
2591        name: HvRegisterName,
2592        value: HvRegisterValue,
2593        vtl: HvInputVtl,
2594    ) -> Result<(), HvError> {
2595        match vtl.target_vtl().unwrap() {
2596            None | Some(Vtl::Vtl2) => {
2597                #[cfg(guest_arch = "x86_64")]
2598                assert!(matches!(
2599                    name.into(),
2600                    HvX64RegisterName::GuestVsmPartitionConfig
2601                        | HvX64RegisterName::VsmPartitionConfig
2602                        | HvX64RegisterName::PmTimerAssist
2603                ));
2604
2605                #[cfg(guest_arch = "aarch64")]
2606                assert!(matches!(
2607                    name.into(),
2608                    HvArm64RegisterName::GuestVsmPartitionConfig
2609                        | HvArm64RegisterName::VsmPartitionConfig
2610                ));
2611            }
2612            Some(Vtl::Vtl1) => {
2613                // TODO: allowed registers for VTL1
2614                todo!();
2615            }
2616            Some(Vtl::Vtl0) => {
2617                // TODO: allowed registers for VTL0
2618                todo!();
2619            }
2620        }
2621
2622        let header = hvdef::hypercall::GetSetVpRegisters {
2623            partition_id: HV_PARTITION_ID_SELF,
2624            vp_index: HV_VP_INDEX_SELF,
2625            target_vtl: HvInputVtl::CURRENT_VTL,
2626            rsvd: [0; 3],
2627        };
2628
2629        let input = HvRegisterAssoc {
2630            name,
2631            pad: Default::default(),
2632            value,
2633        };
2634
2635        tracing::trace!(?name, register = ?value, "HvCallSetVpRegisters");
2636
2637        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2638        //         The hypercall output is validated right after the hypercall is issued.
2639        let output = unsafe {
2640            self.mshv_hvcall
2641                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2642                    HypercallCode::HvCallSetVpRegisters,
2643                    &header,
2644                    HvcallRepInput::Elements(&[input]),
2645                    None,
2646                )
2647                .expect("set_vp_registers hypercall should not fail")
2648        };
2649
2650        output.result()?;
2651
2652        // hypercall must succeed with 1 rep completed
2653        assert_eq!(output.elements_processed(), 1);
2654        Ok(())
2655    }
2656
2657    /// Translate the following gva to a gpa page.
2658    ///
2659    /// The caller must ensure `control_flags.input_vtl()` is set to a specific
2660    /// VTL.
2661    #[cfg(guest_arch = "aarch64")]
2662    pub fn translate_gva_to_gpa(
2663        &self,
2664        gva: u64,
2665        control_flags: hvdef::hypercall::TranslateGvaControlFlagsArm64,
2666    ) -> Result<Result<TranslateResult, aarch64::TranslateErrorAarch64>, TranslateGvaToGpaError>
2667    {
2668        use hvdef::hypercall;
2669
2670        assert!(!self.isolation.is_hardware_isolated());
2671        assert!(
2672            control_flags.input_vtl().use_target_vtl(),
2673            "did not specify a target VTL"
2674        );
2675
2676        let header = hypercall::TranslateVirtualAddressArm64 {
2677            partition_id: HV_PARTITION_ID_SELF,
2678            vp_index: HV_VP_INDEX_SELF,
2679            reserved: 0,
2680            control_flags,
2681            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
2682        };
2683
2684        let mut output: hypercall::TranslateVirtualAddressExOutputArm64 = FromZeros::new_zeroed();
2685
2686        // SAFETY: The input header and slice are the correct types for this hypercall.
2687        //         The hypercall output is validated right after the hypercall is issued.
2688        let status = unsafe {
2689            self.mshv_hvcall
2690                .hvcall(
2691                    HypercallCode::HvCallTranslateVirtualAddressEx,
2692                    &header,
2693                    &mut output,
2694                )
2695                .expect("translate can never fail")
2696        };
2697
2698        status
2699            .result()
2700            .map_err(|hv_error| TranslateGvaToGpaError::Hypervisor { gva, hv_error })?;
2701
2702        // Note: WHP doesn't currently support TranslateVirtualAddressEx, so overlay_page, cache_type,
2703        // event_info aren't trustworthy values if the results came from WHP.
2704        match output.translation_result.result.result_code() {
2705            c if c == hypercall::TranslateGvaResultCode::SUCCESS.0 => Ok(Ok(TranslateResult {
2706                gpa_page: output.gpa_page,
2707                overlay_page: output.translation_result.result.overlay_page(),
2708            })),
2709            x => Ok(Err(aarch64::TranslateErrorAarch64 { code: x })),
2710        }
2711    }
2712
2713    fn to_hv_gpa_range_array(gpa_memory_ranges: &[MemoryRange]) -> Vec<HvGpaRange> {
2714        const PAGES_PER_ENTRY: u64 = 2048;
2715        const PAGE_SIZE: u64 = HV_PAGE_SIZE;
2716
2717        // Estimate the total number of pages across all memory ranges
2718        let estimated_size: usize = gpa_memory_ranges
2719            .iter()
2720            .map(|memory_range| {
2721                let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2722                total_pages.div_ceil(PAGES_PER_ENTRY)
2723            })
2724            .sum::<u64>() as usize;
2725
2726        // Create a vector with the estimated size
2727        let mut hv_gpa_ranges = Vec::with_capacity(estimated_size);
2728
2729        for memory_range in gpa_memory_ranges {
2730            // Calculate the total number of pages in the memory range
2731            let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2732
2733            // Convert start address to page number
2734            let start_page = memory_range.start_4k_gpn();
2735
2736            // Generate the ranges and append them to the vector
2737            hv_gpa_ranges.extend(
2738                (0..total_pages)
2739                    .step_by(PAGES_PER_ENTRY as usize)
2740                    .map(|start| {
2741                        let end = std::cmp::min(total_pages, start + PAGES_PER_ENTRY);
2742                        let pages_in_this_range = end - start;
2743                        let gpa_page_number = start_page + start;
2744
2745                        let extended = HvGpaRangeExtended::new()
2746                            .with_additional_pages(pages_in_this_range - 1)
2747                            .with_large_page(false) // Assuming not a large page
2748                            .with_gpa_page_number(gpa_page_number);
2749
2750                        HvGpaRange(extended.into_bits())
2751                    }),
2752            );
2753        }
2754
2755        hv_gpa_ranges // Return the vector at the end
2756    }
2757
2758    fn pin_unpin_gpa_ranges_internal(
2759        &self,
2760        gpa_ranges: &[HvGpaRange],
2761        action: GpaPinUnpinAction,
2762    ) -> Result<(), PinUnpinError> {
2763        const PIN_REQUEST_HEADER_SIZE: usize =
2764            size_of::<hvdef::hypercall::PinUnpinGpaPageRangesHeader>();
2765        const MAX_INPUT_ELEMENTS: usize =
2766            (HV_PAGE_SIZE as usize - PIN_REQUEST_HEADER_SIZE) / size_of::<u64>();
2767
2768        let header = hvdef::hypercall::PinUnpinGpaPageRangesHeader { reserved: 0 };
2769        let mut ranges_processed = 0;
2770
2771        for chunk in gpa_ranges.chunks(MAX_INPUT_ELEMENTS) {
2772            // SAFETY: This unsafe block is valid because:
2773            // 1. The code and header going to match the expected input for the hypercall.
2774            //
2775            // 2. Hypercall result is checked right after the hypercall is issued.
2776            //
2777            let output = unsafe {
2778                self.mshv_hvcall
2779                    .hvcall_rep(
2780                        match action {
2781                            GpaPinUnpinAction::PinGpaRange => HypercallCode::HvCallPinGpaPageRanges,
2782                            GpaPinUnpinAction::UnpinGpaRange => {
2783                                HypercallCode::HvCallUnpinGpaPageRanges
2784                            }
2785                        },
2786                        &header,
2787                        HvcallRepInput::Elements(chunk),
2788                        None::<&mut [u8]>,
2789                    )
2790                    .expect("submitting pin/unpin hypercall should not fail")
2791            };
2792
2793            ranges_processed += output.elements_processed();
2794
2795            output.result().map_err(|e| PinUnpinError {
2796                ranges_processed,
2797                error: e,
2798            })?;
2799        }
2800
2801        // At end all the ranges should be processed
2802        if ranges_processed == gpa_ranges.len() {
2803            Ok(())
2804        } else {
2805            Err(PinUnpinError {
2806                ranges_processed,
2807                error: HvError::OperationFailed,
2808            })
2809        }
2810    }
2811
2812    fn perform_pin_unpin_gpa_ranges(
2813        &self,
2814        gpa_ranges: &[MemoryRange],
2815        action: GpaPinUnpinAction,
2816        rollback_action: GpaPinUnpinAction,
2817    ) -> Result<(), HvError> {
2818        let hv_gpa_ranges: Vec<HvGpaRange> = Self::to_hv_gpa_range_array(gpa_ranges);
2819
2820        // Attempt to pin/unpin the ranges
2821        match self.pin_unpin_gpa_ranges_internal(&hv_gpa_ranges, action) {
2822            Ok(_) => Ok(()),
2823            Err(PinUnpinError {
2824                error,
2825                ranges_processed,
2826            }) => {
2827                // Unpin the ranges that were successfully pinned
2828                let pinned_ranges = &hv_gpa_ranges[..ranges_processed];
2829                if let Err(rollback_error) =
2830                    self.pin_unpin_gpa_ranges_internal(pinned_ranges, rollback_action)
2831                {
2832                    // Panic if rollback is failing
2833                    panic!(
2834                        "Failed to perform action {:?} on ranges. Error : {:?}. \
2835                        Attempted to rollback {:?} ranges out of {:?}.\n rollback error: {:?}",
2836                        action,
2837                        error,
2838                        ranges_processed,
2839                        gpa_ranges.len(),
2840                        rollback_error
2841                    );
2842                }
2843                // Surface the original error
2844                Err(error)
2845            }
2846        }
2847    }
2848
2849    /// Pins the specified guest physical address ranges in the hypervisor.
2850    /// The memory ranges passed to this function must be VA backed memory.
2851    /// If a partial failure occurs (i.e., some but not all the ranges were successfully pinned),
2852    /// the function will automatically attempt to unpin any successfully pinned ranges.
2853    /// This "rollback" behavior ensures that no partially pinned state remains, which
2854    /// could otherwise lead to inconsistencies.
2855    ///
2856    pub fn pin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2857        self.perform_pin_unpin_gpa_ranges(
2858            ranges,
2859            GpaPinUnpinAction::PinGpaRange,
2860            GpaPinUnpinAction::UnpinGpaRange,
2861        )
2862    }
2863
2864    /// Unpins the specified guest physical address ranges in the hypervisor.
2865    /// The memory ranges passed to this function must be VA backed memory.
2866    /// If a partial failure occurs (i.e., some but not all the ranges were successfully unpinned),
2867    /// the function will automatically attempt to pin any successfully unpinned ranges. This "rollback"
2868    /// behavior ensures that no partially unpinned state remains, which could otherwise lead to inconsistencies.
2869    ///
2870    pub fn unpin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2871        self.perform_pin_unpin_gpa_ranges(
2872            ranges,
2873            GpaPinUnpinAction::UnpinGpaRange,
2874            GpaPinUnpinAction::PinGpaRange,
2875        )
2876    }
2877
2878    /// Read the vsm capabilities register for VTL2.
2879    pub fn get_vsm_capabilities(&self) -> Result<hvdef::HvRegisterVsmCapabilities, Error> {
2880        let caps = hvdef::HvRegisterVsmCapabilities::from(
2881            self.get_vp_register(
2882                HvAllArchRegisterName::VsmCapabilities,
2883                HvInputVtl::CURRENT_VTL,
2884            )?
2885            .as_u64(),
2886        );
2887
2888        let caps = match self.isolation {
2889            IsolationType::None | IsolationType::Vbs => caps,
2890            // TODO SNP: Return actions may be useful, but with alternate injection many of these need
2891            // cannot actually be processed by the hypervisor without returning to VTL2.
2892            // Filter them out for now.
2893            IsolationType::Snp => hvdef::HvRegisterVsmCapabilities::new()
2894                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2895                .with_intercept_page_available(caps.intercept_page_available()),
2896            IsolationType::Tdx => hvdef::HvRegisterVsmCapabilities::new()
2897                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2898                .with_intercept_page_available(caps.intercept_page_available())
2899                .with_dr6_shared(true),
2900        };
2901
2902        assert_eq!(caps.dr6_shared(), self.dr6_shared());
2903
2904        Ok(caps)
2905    }
2906
2907    /// Set the [`hvdef::HvRegisterVsmPartitionConfig`] register.
2908    pub fn set_vtl2_vsm_partition_config(
2909        &self,
2910        vsm_config: HvRegisterVsmPartitionConfig,
2911    ) -> Result<(), SetVsmPartitionConfigError> {
2912        self.set_vp_register(
2913            HvAllArchRegisterName::VsmPartitionConfig.into(),
2914            HvRegisterValue::from(u64::from(vsm_config)),
2915            HvInputVtl::CURRENT_VTL,
2916        )
2917        .map_err(|e| SetVsmPartitionConfigError::Hypervisor {
2918            config: vsm_config,
2919            hv_error: e,
2920        })
2921    }
2922
2923    /// Get the [`hvdef::HvRegisterGuestVsmPartitionConfig`] register
2924    pub fn get_guest_vsm_partition_config(
2925        &self,
2926    ) -> Result<hvdef::HvRegisterGuestVsmPartitionConfig, Error> {
2927        Ok(hvdef::HvRegisterGuestVsmPartitionConfig::from(
2928            self.get_vp_register(
2929                HvAllArchRegisterName::GuestVsmPartitionConfig,
2930                HvInputVtl::CURRENT_VTL,
2931            )?
2932            .as_u64(),
2933        ))
2934    }
2935
2936    /// Configure guest VSM.
2937    /// The only configuration attribute currently supported is changing the maximum number of
2938    /// guest-visible virtual trust levels for the partition. (VTL 1)
2939    pub fn set_guest_vsm_partition_config(
2940        &self,
2941        enable_guest_vsm: bool,
2942    ) -> Result<(), SetGuestVsmConfigError> {
2943        let register_value = hvdef::HvRegisterGuestVsmPartitionConfig::new()
2944            .with_maximum_vtl(if enable_guest_vsm { 1 } else { 0 })
2945            .with_reserved(0);
2946
2947        tracing::trace!(enable_guest_vsm, "set_guest_vsm_partition_config");
2948        if self.isolation.is_hardware_isolated() {
2949            unimplemented!("set_guest_vsm_partition_config");
2950        }
2951
2952        self.set_vp_register(
2953            HvAllArchRegisterName::GuestVsmPartitionConfig.into(),
2954            HvRegisterValue::from(u64::from(register_value)),
2955            HvInputVtl::CURRENT_VTL,
2956        )
2957        .map_err(|e| SetGuestVsmConfigError::Hypervisor {
2958            enable_guest_vsm,
2959            hv_error: e,
2960        })
2961    }
2962
2963    /// Sets the Power Management Timer assist in the hypervisor.
2964    #[cfg(guest_arch = "x86_64")]
2965    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
2966        tracing::debug!(?port, "set_pm_timer_assist");
2967        if self.isolation.is_hardware_isolated() {
2968            if port.is_some() {
2969                unimplemented!("set_pm_timer_assist");
2970            }
2971        }
2972
2973        let val = HvRegisterValue::from(u64::from(match port {
2974            Some(p) => hvdef::HvPmTimerInfo::new()
2975                .with_port(p)
2976                .with_enabled(true)
2977                .with_width_24(false),
2978            None => 0.into(),
2979        }));
2980
2981        self.set_vp_register(
2982            HvX64RegisterName::PmTimerAssist.into(),
2983            val,
2984            HvInputVtl::CURRENT_VTL,
2985        )
2986    }
2987
2988    /// Sets the Power Management Timer assist in the hypervisor.
2989    #[cfg(guest_arch = "aarch64")]
2990    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
2991        tracing::debug!(?port, "set_pm_timer_assist unimplemented on aarch64");
2992        Err(HvError::UnknownRegisterName)
2993    }
2994
2995    /// Sets the VTL protection mask for the specified memory range.
2996    pub fn modify_vtl_protection_mask(
2997        &self,
2998        range: MemoryRange,
2999        map_flags: HvMapGpaFlags,
3000        target_vtl: HvInputVtl,
3001    ) -> Result<(), ApplyVtlProtectionsError> {
3002        if self.isolation.is_hardware_isolated() {
3003            // TODO SNP TODO TDX - required for vmbus relay monitor page support
3004            todo!();
3005        }
3006
3007        self.mshv_hvcall
3008            .modify_vtl_protection_mask(range, map_flags, target_vtl)
3009    }
3010
3011    /// Checks whether the target vtl has vtl permissions for the given gpa
3012    pub fn check_vtl_access(
3013        &self,
3014        gpa: u64,
3015        target_vtl: GuestVtl,
3016        flags: HvMapGpaFlags,
3017    ) -> Result<Option<CheckVtlAccessResult>, Error> {
3018        assert!(!self.isolation.is_hardware_isolated());
3019
3020        let header = hvdef::hypercall::CheckSparseGpaPageVtlAccess {
3021            partition_id: HV_PARTITION_ID_SELF,
3022            target_vtl: HvInputVtl::from(target_vtl),
3023            desired_access: u32::from(flags) as u8,
3024            reserved0: 0,
3025            reserved1: 0,
3026        };
3027
3028        let mut output = [hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput::new()];
3029
3030        // SAFETY: The input header and rep slice are the correct types for this hypercall.
3031        //         The hypercall output is validated right after the hypercall is issued.
3032        let status = unsafe {
3033            self.mshv_hvcall.hvcall_rep::<hvdef::hypercall::CheckSparseGpaPageVtlAccess, u64, hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput>(
3034                HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
3035                &header,
3036                HvcallRepInput::Elements(&[gpa >> hvdef::HV_PAGE_SHIFT]),
3037                Some(&mut output),
3038            )
3039            .expect("check_vtl_access hypercall should not fail")
3040        };
3041
3042        status.result().map_err(Error::CheckVtlAccess)?;
3043
3044        let access_result = output[0];
3045
3046        if access_result.result_code() as u32
3047            != hvdef::hypercall::CheckGpaPageVtlAccessResultCode::SUCCESS.0
3048        {
3049            return Ok(Some(CheckVtlAccessResult {
3050                vtl: (access_result.intercepting_vtl() as u8)
3051                    .try_into()
3052                    .expect("checking vtl permissions failure should return valid vtl"),
3053                denied_flags: (access_result.denied_access() as u32).into(),
3054            }));
3055        }
3056
3057        assert_eq!(status.elements_processed(), 1);
3058        Ok(None)
3059    }
3060
3061    /// Enables a vtl for the partition
3062    pub fn enable_partition_vtl(
3063        &self,
3064        vtl: GuestVtl,
3065        flags: hvdef::hypercall::EnablePartitionVtlFlags,
3066    ) -> Result<(), HvError> {
3067        use hvdef::hypercall;
3068
3069        let header = hypercall::EnablePartitionVtl {
3070            partition_id: HV_PARTITION_ID_SELF,
3071            target_vtl: vtl.into(),
3072            flags,
3073            reserved_z0: 0,
3074            reserved_z1: 0,
3075        };
3076
3077        // SAFETY: The input header and slice are the correct types for this hypercall.
3078        //         The hypercall output is validated right after the hypercall is issued.
3079        let status = unsafe {
3080            self.mshv_hvcall
3081                .hvcall(HypercallCode::HvCallEnablePartitionVtl, &header, &mut ())
3082                .expect("submitting hypercall should not fail")
3083        };
3084
3085        status.result()
3086    }
3087
3088    /// Enables a vtl on a vp
3089    pub fn enable_vp_vtl(
3090        &self,
3091        vp_index: u32,
3092        vtl: GuestVtl,
3093        hv_vp_context: InitialVpContextX64,
3094    ) -> Result<(), HvError> {
3095        use hvdef::hypercall;
3096
3097        let header = hypercall::EnableVpVtlX64 {
3098            partition_id: HV_PARTITION_ID_SELF,
3099            vp_index,
3100            target_vtl: vtl.into(),
3101            reserved: [0; 3],
3102            vp_vtl_context: hv_vp_context,
3103        };
3104
3105        // SAFETY: The input header and slice are the correct types for this hypercall.
3106        //         The hypercall output is validated right after the hypercall is issued.
3107        let status = unsafe {
3108            self.mshv_hvcall
3109                .hvcall(HypercallCode::HvCallEnableVpVtl, &header, &mut ())
3110                .expect("submitting hypercall should not fail")
3111        };
3112
3113        status.result()
3114    }
3115
3116    /// Gets the PFN for the VTL 1 VMSA
3117    pub fn vtl1_vmsa_pfn(&self, cpu_index: u32) -> u64 {
3118        let mut vp_pfn = cpu_index as u64; // input vp, output pfn
3119
3120        // SAFETY: The ioctl requires no prerequisites other than the VTL 1 VMSA
3121        // should be mapped. This ioctl should never fail as long as the vtl 1
3122        // VMSA was mapped.
3123        unsafe {
3124            hcl_read_guest_vsm_page_pfn(self.mshv_vtl.file.as_raw_fd(), &mut vp_pfn)
3125                .expect("should always succeed");
3126        }
3127
3128        vp_pfn
3129    }
3130
3131    /// Returns the isolation type for the partition.
3132    pub fn isolation(&self) -> IsolationType {
3133        self.isolation
3134    }
3135
3136    /// Reads MSR_IA32_VMX_CR4_FIXED1 in kernel mode.
3137    pub fn read_vmx_cr4_fixed1(&self) -> u64 {
3138        let mut value = 0;
3139
3140        // SAFETY: The ioctl requires no prerequisites other than a location to
3141        // write the read MSR. This ioctl should never fail.
3142        unsafe {
3143            hcl_read_vmx_cr4_fixed1(self.mshv_vtl.file.as_raw_fd(), &mut value)
3144                .expect("should always succeed");
3145        }
3146
3147        value
3148    }
3149
3150    /// Invokes the HvCallRetargetDeviceInterrupt hypercall.
3151    /// `target_processors` must be sorted in ascending order.
3152    pub fn retarget_device_interrupt(
3153        &self,
3154        device_id: u64,
3155        entry: hvdef::hypercall::InterruptEntry,
3156        vector: u32,
3157        multicast: bool,
3158        target_processors: ProcessorSet<'_>,
3159    ) -> Result<(), HvError> {
3160        let header = hvdef::hypercall::RetargetDeviceInterrupt {
3161            partition_id: HV_PARTITION_ID_SELF,
3162            device_id,
3163            entry,
3164            rsvd: 0,
3165            target_header: hvdef::hypercall::InterruptTarget {
3166                vector,
3167                flags: hvdef::hypercall::HvInterruptTargetFlags::default()
3168                    .with_multicast(multicast)
3169                    .with_processor_set(true),
3170                // Always use a generic processor set to simplify construction. This hypercall is
3171                // invoked relatively infrequently, the overhead should be acceptable.
3172                mask_or_format: hvdef::hypercall::HV_GENERIC_SET_SPARSE_4K,
3173            },
3174        };
3175        let processor_set = Vec::from_iter(target_processors.as_generic_set());
3176
3177        // SAFETY: The input header and slice are the correct types for this hypercall.
3178        //         The hypercall output is validated right after the hypercall is issued.
3179        let status = unsafe {
3180            self.mshv_hvcall
3181                .hvcall_var(
3182                    HypercallCode::HvCallRetargetDeviceInterrupt,
3183                    &header,
3184                    processor_set.as_bytes(),
3185                    &mut (),
3186                )
3187                .expect("submitting hypercall should not fail")
3188        };
3189
3190        status.result()
3191    }
3192
3193    /// Gets the permissions for a vtl.
3194    /// Currently unused, but available for debugging purposes
3195    #[cfg(debug_assertions)]
3196    pub fn rmp_query(&self, gpa: u64, vtl: GuestVtl) -> x86defs::snp::SevRmpAdjust {
3197        use x86defs::snp::SevRmpAdjust;
3198
3199        let page_count = 1u64;
3200        let flags = [u64::from(SevRmpAdjust::new().with_target_vmpl(match vtl {
3201            GuestVtl::Vtl0 => 2,
3202            GuestVtl::Vtl1 => 1,
3203        }))];
3204        let page_size = [0u64];
3205        let pages_processed = 0;
3206
3207        debug_assert!(flags.len() == page_count as usize);
3208        debug_assert!(page_size.len() == page_count as usize);
3209
3210        let query = mshv_rmpquery {
3211            start_pfn: gpa / HV_PAGE_SIZE,
3212            page_count,
3213            terminate_on_failure: 0,
3214            ram: 0,
3215            padding: Default::default(),
3216            flags: flags.as_ptr().cast_mut(),
3217            page_size: page_size.as_ptr().cast_mut(),
3218            pages_processed: core::ptr::from_ref(&pages_processed).cast_mut(),
3219        };
3220
3221        // SAFETY: the input query is the correct type for this ioctl
3222        unsafe {
3223            hcl_rmpquery_pages(self.mshv_vtl.file.as_raw_fd(), &query)
3224                .expect("should always succeed");
3225        }
3226        debug_assert!(pages_processed <= page_count);
3227
3228        SevRmpAdjust::from(flags[0])
3229    }
3230
3231    /// Issues an INVLPGB instruction.
3232    pub fn invlpgb(&self, rax: u64, edx: u32, ecx: u32) {
3233        let data = mshv_invlpgb {
3234            rax,
3235            edx,
3236            ecx,
3237            _pad0: 0,
3238            _pad1: 0,
3239        };
3240        // SAFETY: ioctl has no prerequisites.
3241        unsafe {
3242            hcl_invlpgb(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3243        }
3244    }
3245
3246    /// Issues a TLBSYNC instruction.
3247    pub fn tlbsync(&self) {
3248        // SAFETY: ioctl has no prerequisites.
3249        unsafe {
3250            hcl_tlbsync(self.mshv_vtl.file.as_raw_fd()).expect("should always succeed");
3251        }
3252    }
3253
3254    /// Causes the specified CPUs to be woken out of a lower VTL.
3255    pub fn kick_cpus(
3256        &self,
3257        cpus: impl IntoIterator<Item = u32>,
3258        cancel_run: bool,
3259        wait_for_other_cpus: bool,
3260    ) {
3261        let mut cpu_bitmap: BitVec<u8> = BitVec::from_vec(vec![0; self.vps.len().div_ceil(8)]);
3262        for cpu in cpus {
3263            cpu_bitmap.set(cpu as usize, true);
3264        }
3265
3266        let data = protocol::hcl_kick_cpus {
3267            len: cpu_bitmap.len() as u64,
3268            cpu_mask: cpu_bitmap.as_bitptr().pointer(),
3269            flags: protocol::hcl_kick_cpus_flags::new()
3270                .with_cancel_run(cancel_run)
3271                .with_wait_for_other_cpus(wait_for_other_cpus),
3272        };
3273
3274        // SAFETY: ioctl has no prerequisites.
3275        unsafe {
3276            hcl_kickcpus(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3277        }
3278    }
3279}