hcl/
ioctl.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interface to `mshv_vtl` driver.
5
6mod deferred;
7
8pub mod aarch64;
9pub mod snp;
10pub mod tdx;
11pub mod x64;
12
13use self::deferred::DeferredActionSlots;
14use self::ioctls::*;
15use crate::GuestVtl;
16use crate::ioctl::deferred::DeferredAction;
17use crate::mapped_page::MappedPage;
18use crate::protocol;
19use crate::protocol::EnterModes;
20use crate::protocol::HCL_REG_PAGE_OFFSET;
21use crate::protocol::HCL_VMSA_GUEST_VSM_PAGE_OFFSET;
22use crate::protocol::HCL_VMSA_PAGE_OFFSET;
23use crate::protocol::MSHV_APIC_PAGE_OFFSET;
24use crate::protocol::hcl_intr_offload_flags;
25use crate::protocol::hcl_run;
26use bitvec::vec::BitVec;
27use cvm_tracing::CVM_ALLOWED;
28use deferred::RegisteredDeferredActions;
29use deferred::push_deferred_action;
30use deferred::register_deferred_actions;
31use hv1_structs::ProcessorSet;
32use hv1_structs::VtlArray;
33use hvdef::HV_PAGE_SIZE;
34use hvdef::HV_PARTITION_ID_SELF;
35use hvdef::HV_VP_INDEX_SELF;
36use hvdef::HvAarch64RegisterPage;
37use hvdef::HvAllArchRegisterName;
38#[cfg(guest_arch = "aarch64")]
39use hvdef::HvArm64RegisterName;
40use hvdef::HvError;
41use hvdef::HvMapGpaFlags;
42use hvdef::HvMessage;
43use hvdef::HvRegisterName;
44use hvdef::HvRegisterValue;
45use hvdef::HvRegisterVsmPartitionConfig;
46use hvdef::HvStatus;
47use hvdef::HvX64RegisterName;
48use hvdef::HvX64RegisterPage;
49use hvdef::HypercallCode;
50use hvdef::Vtl;
51use hvdef::hypercall::AssertVirtualInterrupt;
52use hvdef::hypercall::HostVisibilityType;
53use hvdef::hypercall::HvGpaRange;
54use hvdef::hypercall::HvGpaRangeExtended;
55use hvdef::hypercall::HvInputVtl;
56use hvdef::hypercall::HvInterceptParameters;
57use hvdef::hypercall::HvInterceptType;
58use hvdef::hypercall::HvRegisterAssoc;
59use hvdef::hypercall::HypercallOutput;
60use hvdef::hypercall::InitialVpContextX64;
61use hvdef::hypercall::ModifyHostVisibility;
62use memory_range::MemoryRange;
63use pal::unix::pthread::*;
64use parking_lot::Mutex;
65use private::BackingPrivate;
66use sidecar_client::NewSidecarClientError;
67use sidecar_client::SidecarClient;
68use sidecar_client::SidecarRun;
69use sidecar_client::SidecarVp;
70use std::cell::UnsafeCell;
71use std::fmt::Debug;
72use std::fs::File;
73use std::io;
74use std::os::unix::prelude::*;
75use std::sync::Arc;
76use std::sync::Once;
77use std::sync::atomic::AtomicU8;
78use std::sync::atomic::AtomicU32;
79use std::sync::atomic::Ordering;
80use thiserror::Error;
81use user_driver::DmaClient;
82use user_driver::memory::MemoryBlock;
83use x86defs::snp::SevVmsa;
84use x86defs::tdx::TdCallResultCode;
85use x86defs::vmx::ApicPage;
86use zerocopy::FromBytes;
87use zerocopy::FromZeros;
88use zerocopy::Immutable;
89use zerocopy::IntoBytes;
90use zerocopy::KnownLayout;
91
92// TODO: Chunk this up into smaller per-interface errors.
93/// Error returned by HCL operations.
94#[derive(Error, Debug)]
95#[expect(missing_docs)]
96pub enum Error {
97    #[error("failed to open mshv device")]
98    OpenMshv(#[source] io::Error),
99    #[error("failed to open hvcall device")]
100    OpenHvcall(#[source] io::Error),
101    #[error("failed to open lower VTL memory device")]
102    OpenGpa(#[source] io::Error),
103    #[error("ReturnToLowerVtl")]
104    ReturnToLowerVtl(#[source] nix::Error),
105    #[error("AddVtl0Memory")]
106    AddVtl0Memory(#[source] nix::Error),
107    #[error("hcl_set_vp_register")]
108    SetVpRegister(#[source] nix::Error),
109    #[error("hcl_get_vp_register")]
110    GetVpRegister(#[source] nix::Error),
111    #[error("failed to get VP register {reg:#x?} from hypercall")]
112    GetVpRegisterHypercall {
113        #[cfg(guest_arch = "x86_64")]
114        reg: HvX64RegisterName,
115        #[cfg(guest_arch = "aarch64")]
116        reg: HvArm64RegisterName,
117        #[source]
118        err: HvError,
119    },
120    #[error("hcl_request_interrupt")]
121    RequestInterrupt(#[source] HvError),
122    #[error("hcl_cancel_vp failed")]
123    CancelVp(#[source] nix::Error),
124    #[error("failed to signal event")]
125    SignalEvent(#[source] HvError),
126    #[error("failed to post message")]
127    PostMessage(#[source] HvError),
128    #[error("failed to mmap the vp context {:?}", .1.map(|vtl| format!("for VTL {:?}", vtl)).unwrap_or("".to_string()))]
129    MmapVp(#[source] io::Error, Option<Vtl>),
130    #[error("failed to set the poll file")]
131    SetPollFile(#[source] nix::Error),
132    #[error("failed to check hcl capabilities")]
133    CheckExtensions(#[source] nix::Error),
134    #[error("failed to mmap the register page")]
135    MmapRegPage(#[source] io::Error),
136    #[error("invalid num signal events")]
137    NumSignalEvent(#[source] io::Error),
138    #[error("failed to create vtl")]
139    CreateVTL(#[source] nix::Error),
140    #[error("gpa failed vtl access check")]
141    CheckVtlAccess(#[source] HvError),
142    #[error("failed to set registers using set_vp_registers hypercall")]
143    SetRegisters(#[source] HvError),
144    #[error("Unknown register name: {0:x}")]
145    UnknownRegisterName(u32),
146    #[error("Invalid register value")]
147    InvalidRegisterValue,
148    #[error("failed to set host visibility")]
149    SetHostVisibility(#[source] nix::Error),
150    #[error("failed to allocate host overlay page")]
151    HostOverlayPageExhausted,
152    #[error("sidecar error")]
153    Sidecar(#[source] sidecar_client::SidecarError),
154    #[error("failed to open sidecar")]
155    OpenSidecar(#[source] NewSidecarClientError),
156    #[error(
157        "mismatch between requested isolation type {requested:?} and supported isolation type {supported:?}"
158    )]
159    MismatchedIsolation {
160        supported: IsolationType,
161        requested: IsolationType,
162    },
163    #[error("private page pool allocator missing, required for requested isolation type")]
164    MissingPrivateMemory,
165    #[error("failed to allocate pages for vp")]
166    AllocVp(#[source] anyhow::Error),
167}
168
169/// Error for IOCTL errors specifically.
170#[derive(Debug, Error)]
171#[error("hcl request failed")]
172pub struct IoctlError(#[source] pub(crate) nix::Error);
173
174/// Error returned when issuing hypercalls.
175#[derive(Debug, Error)]
176#[expect(missing_docs)]
177pub enum HypercallError {
178    #[error("hypercall failed with {0:?}")]
179    Hypervisor(HvError),
180    #[error("ioctl failed")]
181    Ioctl(#[source] IoctlError),
182}
183
184impl HypercallError {
185    pub(crate) fn check(r: Result<i32, nix::Error>) -> Result<(), Self> {
186        match r {
187            Ok(n) => HvStatus(n.try_into().expect("hypervisor result out of range"))
188                .result()
189                .map_err(Self::Hypervisor),
190            Err(err) => Err(Self::Ioctl(IoctlError(err))),
191        }
192    }
193}
194
195/// Errors when issuing hypercalls via the kernel direct interface.
196#[derive(Error, Debug)]
197#[expect(missing_docs)]
198pub enum HvcallError {
199    #[error(
200        "kernel rejected the hypercall, most likely due to the hypercall code not being allowed via set_allowed_hypercalls"
201    )]
202    HypercallIoctlFailed(#[source] nix::Error),
203    #[error("input parameters are larger than a page")]
204    InputParametersTooLarge,
205    #[error("output parameters are larger than a page")]
206    OutputParametersTooLarge,
207    #[error("output and input list lengths do not match")]
208    InputOutputRepListMismatch,
209}
210
211/// Error applying VTL protections.
212// TODO: move to `underhill_mem`.
213#[derive(Error, Debug)]
214#[expect(missing_docs)]
215pub enum ApplyVtlProtectionsError {
216    #[error("hypervisor failed with {output:?} when protecting pages {range} for vtl {vtl:?}")]
217    Hypervisor {
218        range: MemoryRange,
219        output: HypercallOutput,
220        #[source]
221        hv_error: HvError,
222        vtl: HvInputVtl,
223    },
224    #[error("snp failure to protect pages {range} with {permissions:x?} for vtl {vtl:?}")]
225    Snp {
226        #[source]
227        failed_operation: snp::SnpPageError,
228        range: MemoryRange,
229        permissions: x86defs::snp::SevRmpAdjust,
230        vtl: HvInputVtl,
231    },
232    #[error(
233        "tdcall failed with {error:?} when protecting pages {range} with permissions {permissions:x?} for vtl {vtl:?}"
234    )]
235    Tdx {
236        error: TdCallResultCode,
237        range: MemoryRange,
238        permissions: x86defs::tdx::TdgMemPageGpaAttr,
239        vtl: HvInputVtl,
240    },
241    #[error("no valid protections for vtl {0:?}")]
242    InvalidVtl(Vtl),
243}
244
245/// Error setting guest VSM configuration.
246#[derive(Error, Debug)]
247#[expect(missing_docs)]
248pub enum SetGuestVsmConfigError {
249    #[error("hypervisor failed to configure guest vsm to {enable_guest_vsm}")]
250    Hypervisor {
251        enable_guest_vsm: bool,
252        #[source]
253        hv_error: HvError,
254    },
255}
256
257/// Error getting the VP idnex from an APIC ID.
258#[derive(Error, Debug)]
259#[expect(missing_docs)]
260pub enum GetVpIndexFromApicIdError {
261    #[error("hypervisor failed when querying vp index for {apic_id}")]
262    Hypervisor {
263        #[source]
264        hv_error: HvError,
265        apic_id: u32,
266    },
267}
268
269/// Error setting VSM partition configuration.
270#[derive(Error, Debug)]
271#[expect(missing_docs)]
272pub enum SetVsmPartitionConfigError {
273    #[error("hypervisor failed when configuring vsm partition config {config:?}")]
274    Hypervisor {
275        config: HvRegisterVsmPartitionConfig,
276        #[source]
277        hv_error: HvError,
278    },
279}
280
281/// Error translating a GVA to a GPA.
282#[derive(Error, Debug)]
283#[expect(missing_docs)]
284pub enum TranslateGvaToGpaError {
285    #[error("hypervisor failed when translating gva {gva:#x}")]
286    Hypervisor {
287        gva: u64,
288        #[source]
289        hv_error: HvError,
290    },
291    #[error("sidecar kernel failed when translating gva {gva:#x}")]
292    Sidecar {
293        gva: u64,
294        #[source]
295        error: sidecar_client::SidecarError,
296    },
297}
298
299/// Result from [`Hcl::check_vtl_access`] if vtl permissions were violated
300#[derive(Debug)]
301pub struct CheckVtlAccessResult {
302    /// The intercepting VTL.
303    pub vtl: Vtl,
304    /// The flags that were denied.
305    pub denied_flags: HvMapGpaFlags,
306}
307
308/// Error accepting pages.
309// TODO: move to `underhill_mem`.
310#[derive(Error, Debug)]
311#[expect(missing_docs)]
312pub enum AcceptPagesError {
313    #[error("hypervisor failed to accept pages {range} with {output:?}")]
314    Hypervisor {
315        range: MemoryRange,
316        output: HypercallOutput,
317        #[source]
318        hv_error: HvError,
319    },
320    #[error("snp failure to protect pages {range}")]
321    Snp {
322        #[source]
323        failed_operation: snp::SnpPageError,
324        range: MemoryRange,
325    },
326    #[error("tdcall failure when accepting pages {range}")]
327    Tdx {
328        #[source]
329        error: tdcall::AcceptPagesError,
330        range: MemoryRange,
331    },
332}
333
334// Action translation(to HVCALL) for pin/unpin GPA range.
335#[derive(Debug, Copy, Clone)]
336enum GpaPinUnpinAction {
337    PinGpaRange,
338    UnpinGpaRange,
339}
340
341/// Error pinning a GPA.
342#[derive(Error, Debug)]
343#[error("partial success: {ranges_processed} operations succeeded, but encountered an error")]
344struct PinUnpinError {
345    ranges_processed: usize,
346    #[source]
347    error: HvError,
348}
349
350/// Result of translate gva hypercall from [`Hcl`]
351pub struct TranslateResult {
352    /// The GPA that the GVA translated to.
353    pub gpa_page: u64,
354    /// Whether the page was an overlay page.
355    pub overlay_page: bool, // Note: hardcoded to false on WHP
356}
357
358/// Possible types for rep hypercalls
359enum HvcallRepInput<'a, T> {
360    /// The actual elements to rep over
361    Elements(&'a [T]),
362    /// The elements for the rep are implied and only a count is needed
363    Count(u16),
364}
365
366mod ioctls {
367    #![allow(non_camel_case_types)]
368
369    use crate::protocol;
370    use hvdef::hypercall::HvRegisterAssoc;
371    use nix::ioctl_none;
372    use nix::ioctl_read;
373    use nix::ioctl_readwrite;
374    use nix::ioctl_write_ptr;
375
376    // The unsafe interface to the `mshv` kernel module comprises
377    // the following IOCTLs.
378    const MSHV_IOCTL: u8 = 0xb8;
379    const MSHV_VTL_RETURN_TO_LOWER_VTL: u16 = 0x27;
380    const MSHV_SET_VP_REGISTERS: u16 = 0x6;
381    const MSHV_GET_VP_REGISTERS: u16 = 0x5;
382    const MSHV_HVCALL_SETUP: u16 = 0x1E;
383    const MSHV_HVCALL: u16 = 0x1F;
384    const MSHV_VTL_ADD_VTL0_MEMORY: u16 = 0x21;
385    const MSHV_VTL_SET_POLL_FILE: u16 = 0x25;
386    const MSHV_CREATE_VTL: u16 = 0x1D;
387    const MSHV_CHECK_EXTENSION: u16 = 0x00;
388    const MSHV_VTL_PVALIDATE: u16 = 0x28;
389    const MSHV_VTL_RMPADJUST: u16 = 0x29;
390    const MSHV_VTL_TDCALL: u16 = 0x32;
391    const MSHV_VTL_READ_VMX_CR4_FIXED1: u16 = 0x33;
392    const MSHV_VTL_GUEST_VSM_VMSA_PFN: u16 = 0x34;
393    const MSHV_VTL_RMPQUERY: u16 = 0x35;
394    const MSHV_INVLPGB: u16 = 0x36;
395    const MSHV_TLBSYNC: u16 = 0x37;
396    const MSHV_KICKCPUS: u16 = 0x38;
397
398    #[repr(C)]
399    #[derive(Copy, Clone)]
400    pub struct mshv_vp_registers {
401        pub count: ::std::os::raw::c_int,
402        pub regs: *mut HvRegisterAssoc,
403    }
404
405    #[repr(C, packed)]
406    #[derive(Copy, Clone)]
407    pub struct mshv_pvalidate {
408        /// Execute the pvalidate instruction on the set of memory pages specified
409        pub start_pfn: ::std::os::raw::c_ulonglong,
410        pub page_count: ::std::os::raw::c_ulonglong,
411        pub validate: ::std::os::raw::c_uchar,
412        pub terminate_on_failure: ::std::os::raw::c_uchar,
413        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
414        /// it's device memory.
415        pub ram: u8,
416        pub padding: [::std::os::raw::c_uchar; 1],
417    }
418
419    #[repr(C, packed)]
420    #[derive(Copy, Clone)]
421    pub struct mshv_rmpadjust {
422        /// Execute the rmpadjust instruction on the set of memory pages specified
423        pub start_pfn: ::std::os::raw::c_ulonglong,
424        pub page_count: ::std::os::raw::c_ulonglong,
425        pub value: ::std::os::raw::c_ulonglong,
426        pub terminate_on_failure: ::std::os::raw::c_uchar,
427        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
428        /// it's device memory.
429        pub ram: u8,
430        pub padding: [::std::os::raw::c_uchar; 6],
431    }
432
433    #[repr(C, packed)]
434    #[derive(Copy, Clone)]
435    pub struct mshv_rmpquery {
436        /// Execute the rmpquery instruction on the set of memory pages specified
437        pub start_pfn: ::std::os::raw::c_ulonglong,
438        pub page_count: ::std::os::raw::c_ulonglong,
439        pub terminate_on_failure: ::std::os::raw::c_uchar,
440        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
441        /// it's device memory.
442        pub ram: u8,
443        pub padding: [::std::os::raw::c_uchar; 6],
444        /// Output array for the flags, must have at least `page_count` entries.
445        pub flags: *mut ::std::os::raw::c_ulonglong,
446        /// Output array for the page sizes, must have at least `page_count` entries.
447        pub page_size: *mut ::std::os::raw::c_ulonglong,
448        /// Output for the amount of pages processed, a scalar.
449        pub pages_processed: *mut ::std::os::raw::c_ulonglong,
450    }
451
452    #[repr(C, packed)]
453    #[derive(Copy, Clone)]
454    pub struct mshv_tdcall {
455        pub rax: u64, // Call code and returned status
456        pub rcx: u64,
457        pub rdx: u64,
458        pub r8: u64,
459        pub r9: u64,
460        pub r10_out: u64, // only supported as output
461        pub r11_out: u64, // only supported as output
462    }
463
464    ioctl_none!(
465        /// Relinquish the processor to VTL0.
466        hcl_return_to_lower_vtl,
467        MSHV_IOCTL,
468        MSHV_VTL_RETURN_TO_LOWER_VTL
469    );
470
471    ioctl_write_ptr!(
472        /// Set a VTL0 register for the current processor of the current
473        /// partition.
474        /// It is not allowed to set registers for other processors or
475        /// other partitions for the security and coherency reasons.
476        hcl_set_vp_register,
477        MSHV_IOCTL,
478        MSHV_SET_VP_REGISTERS,
479        mshv_vp_registers
480    );
481
482    ioctl_readwrite!(
483        /// Get a VTL0 register for the current processor of the current
484        /// partition.
485        /// It is not allowed to get registers of other processors or
486        /// other partitions for the security and coherency reasons.
487        hcl_get_vp_register,
488        MSHV_IOCTL,
489        MSHV_GET_VP_REGISTERS,
490        mshv_vp_registers
491    );
492
493    ioctl_write_ptr!(
494        /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support
495        /// DMA from the guest.
496        hcl_add_vtl0_memory,
497        MSHV_IOCTL,
498        MSHV_VTL_ADD_VTL0_MEMORY,
499        protocol::hcl_pfn_range_t
500    );
501
502    ioctl_write_ptr!(
503        /// Sets the file to be polled while running a VP in VTL0. If the file
504        /// becomes readable, then the VP run will be cancelled.
505        hcl_set_poll_file,
506        MSHV_IOCTL,
507        MSHV_VTL_SET_POLL_FILE,
508        protocol::hcl_set_poll_file
509    );
510
511    ioctl_write_ptr!(
512        /// Sets up the hypercall allow map. Allowed once
513        /// per fd.
514        hcl_hvcall_setup,
515        MSHV_IOCTL,
516        MSHV_HVCALL_SETUP,
517        protocol::hcl_hvcall_setup
518    );
519
520    ioctl_readwrite!(
521        /// Performs a hypercall from the user mode.
522        hcl_hvcall,
523        MSHV_IOCTL,
524        MSHV_HVCALL,
525        protocol::hcl_hvcall
526    );
527
528    ioctl_write_ptr!(
529        /// Executes the pvalidate instruction on a page range.
530        hcl_pvalidate_pages,
531        MSHV_IOCTL,
532        MSHV_VTL_PVALIDATE,
533        mshv_pvalidate
534    );
535
536    ioctl_write_ptr!(
537        /// Executes the rmpadjust instruction on a page range.
538        hcl_rmpadjust_pages,
539        MSHV_IOCTL,
540        MSHV_VTL_RMPADJUST,
541        mshv_rmpadjust
542    );
543
544    ioctl_write_ptr!(
545        /// Executes the rmpquery instruction on a page range.
546        hcl_rmpquery_pages,
547        MSHV_IOCTL,
548        MSHV_VTL_RMPQUERY,
549        mshv_rmpquery
550    );
551
552    ioctl_readwrite!(
553        /// Executes a tdcall.
554        hcl_tdcall,
555        MSHV_IOCTL,
556        MSHV_VTL_TDCALL,
557        mshv_tdcall
558    );
559
560    ioctl_read!(
561        hcl_read_vmx_cr4_fixed1,
562        MSHV_IOCTL,
563        MSHV_VTL_READ_VMX_CR4_FIXED1,
564        u64
565    );
566
567    ioctl_readwrite!(
568        hcl_read_guest_vsm_page_pfn,
569        MSHV_IOCTL,
570        MSHV_VTL_GUEST_VSM_VMSA_PFN,
571        u64
572    );
573
574    pub const HCL_CAP_REGISTER_PAGE: u32 = 1;
575    pub const HCL_CAP_VTL_RETURN_ACTION: u32 = 2;
576    pub const HCL_CAP_DR6_SHARED: u32 = 3;
577
578    ioctl_write_ptr!(
579        /// Check for the presence of an extension capability.
580        hcl_check_extension,
581        MSHV_IOCTL,
582        MSHV_CHECK_EXTENSION,
583        u32
584    );
585
586    ioctl_read!(mshv_create_vtl, MSHV_IOCTL, MSHV_CREATE_VTL, u8);
587
588    #[repr(C)]
589    pub struct mshv_invlpgb {
590        pub rax: u64,
591        pub _pad0: u32,
592        pub edx: u32,
593        pub _pad1: u32,
594        pub ecx: u32,
595    }
596
597    ioctl_write_ptr!(
598        /// Issue an INVLPGB instruction.
599        hcl_invlpgb,
600        MSHV_IOCTL,
601        MSHV_INVLPGB,
602        mshv_invlpgb
603    );
604
605    ioctl_none!(
606        /// Issue a TLBSYNC instruction.
607        hcl_tlbsync,
608        MSHV_IOCTL,
609        MSHV_TLBSYNC
610    );
611
612    ioctl_write_ptr!(
613        /// Kick CPUs.
614        hcl_kickcpus,
615        MSHV_IOCTL,
616        MSHV_KICKCPUS,
617        protocol::hcl_kick_cpus
618    );
619}
620
621/// The `/dev/mshv_vtl_low` device for accessing VTL0 memory.
622pub struct MshvVtlLow {
623    file: File,
624}
625
626impl MshvVtlLow {
627    /// Opens the device.
628    pub fn new() -> Result<Self, Error> {
629        let file = fs_err::OpenOptions::new()
630            .read(true)
631            .write(true)
632            .open("/dev/mshv_vtl_low")
633            .map_err(Error::OpenGpa)?;
634
635        Ok(Self { file: file.into() })
636    }
637
638    /// Gets the device file.
639    pub fn get(&self) -> &File {
640        &self.file
641    }
642
643    /// The flag to set in the file offset to map guest memory as shared instead
644    /// of private.
645    pub const SHARED_MEMORY_FLAG: u64 = 1 << 63;
646}
647
648/// An open `/dev/mshv` device file.
649pub struct Mshv {
650    file: File,
651}
652
653impl Mshv {
654    /// Opens the mshv device.
655    pub fn new() -> Result<Self, Error> {
656        let file = fs_err::OpenOptions::new()
657            .read(true)
658            .write(true)
659            .open("/dev/mshv")
660            .map_err(Error::OpenMshv)?;
661
662        Ok(Self { file: file.into() })
663    }
664
665    fn check_extension(&self, cap: u32) -> Result<bool, Error> {
666        // SAFETY: calling IOCTL as documented, with no special requirements.
667        let supported = unsafe {
668            hcl_check_extension(self.file.as_raw_fd(), &cap).map_err(Error::CheckExtensions)?
669        };
670        Ok(supported != 0)
671    }
672
673    /// Opens an mshv_vtl device file.
674    pub fn create_vtl(&self) -> Result<MshvVtl, Error> {
675        let cap = &mut 0_u8;
676        // SAFETY: calling IOCTL as documented, with no special requirements.
677        let supported =
678            unsafe { mshv_create_vtl(self.file.as_raw_fd(), cap).map_err(Error::CreateVTL)? };
679        // SAFETY: calling IOCTL as documented, with no special requirements.
680        let vtl_file = unsafe { File::from_raw_fd(supported) };
681        Ok(MshvVtl { file: vtl_file })
682    }
683}
684
685/// An open mshv_vtl device file.
686#[derive(Debug)]
687pub struct MshvVtl {
688    file: File,
689}
690
691impl MshvVtl {
692    /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support DMA from the guest.
693    pub fn add_vtl0_memory(&self, mem_range: MemoryRange, shared: bool) -> Result<(), Error> {
694        let flags = if shared {
695            MshvVtlLow::SHARED_MEMORY_FLAG / HV_PAGE_SIZE
696        } else {
697            0
698        };
699        let ram_disposition = protocol::hcl_pfn_range_t {
700            start_pfn: mem_range.start_4k_gpn() | flags,
701            last_pfn: mem_range.end_4k_gpn(),
702        };
703
704        // SAFETY: calling IOCTL as documented, with no special requirements.
705        unsafe {
706            hcl_add_vtl0_memory(self.file.as_raw_fd(), &ram_disposition)
707                .map_err(Error::AddVtl0Memory)?;
708        }
709
710        Ok(())
711    }
712}
713
714#[cfg(guest_arch = "x86_64")]
715fn is_vtl_shared_mtrr(reg: HvX64RegisterName) -> bool {
716    matches!(
717        reg,
718        HvX64RegisterName::MsrMtrrCap
719            | HvX64RegisterName::MsrMtrrDefType
720            | HvX64RegisterName::MsrMtrrPhysBase0
721            | HvX64RegisterName::MsrMtrrPhysBase1
722            | HvX64RegisterName::MsrMtrrPhysBase2
723            | HvX64RegisterName::MsrMtrrPhysBase3
724            | HvX64RegisterName::MsrMtrrPhysBase4
725            | HvX64RegisterName::MsrMtrrPhysBase5
726            | HvX64RegisterName::MsrMtrrPhysBase6
727            | HvX64RegisterName::MsrMtrrPhysBase7
728            | HvX64RegisterName::MsrMtrrPhysBase8
729            | HvX64RegisterName::MsrMtrrPhysBase9
730            | HvX64RegisterName::MsrMtrrPhysBaseA
731            | HvX64RegisterName::MsrMtrrPhysBaseB
732            | HvX64RegisterName::MsrMtrrPhysBaseC
733            | HvX64RegisterName::MsrMtrrPhysBaseD
734            | HvX64RegisterName::MsrMtrrPhysBaseE
735            | HvX64RegisterName::MsrMtrrPhysBaseF
736            | HvX64RegisterName::MsrMtrrPhysMask0
737            | HvX64RegisterName::MsrMtrrPhysMask1
738            | HvX64RegisterName::MsrMtrrPhysMask2
739            | HvX64RegisterName::MsrMtrrPhysMask3
740            | HvX64RegisterName::MsrMtrrPhysMask4
741            | HvX64RegisterName::MsrMtrrPhysMask5
742            | HvX64RegisterName::MsrMtrrPhysMask6
743            | HvX64RegisterName::MsrMtrrPhysMask7
744            | HvX64RegisterName::MsrMtrrPhysMask8
745            | HvX64RegisterName::MsrMtrrPhysMask9
746            | HvX64RegisterName::MsrMtrrPhysMaskA
747            | HvX64RegisterName::MsrMtrrPhysMaskB
748            | HvX64RegisterName::MsrMtrrPhysMaskC
749            | HvX64RegisterName::MsrMtrrPhysMaskD
750            | HvX64RegisterName::MsrMtrrPhysMaskE
751            | HvX64RegisterName::MsrMtrrPhysMaskF
752            | HvX64RegisterName::MsrMtrrFix64k00000
753            | HvX64RegisterName::MsrMtrrFix16k80000
754            | HvX64RegisterName::MsrMtrrFix16kA0000
755            | HvX64RegisterName::MsrMtrrFix4kC0000
756            | HvX64RegisterName::MsrMtrrFix4kC8000
757            | HvX64RegisterName::MsrMtrrFix4kD0000
758            | HvX64RegisterName::MsrMtrrFix4kD8000
759            | HvX64RegisterName::MsrMtrrFix4kE0000
760            | HvX64RegisterName::MsrMtrrFix4kE8000
761            | HvX64RegisterName::MsrMtrrFix4kF0000
762            | HvX64RegisterName::MsrMtrrFix4kF8000
763    )
764}
765
766/// Indicate whether reg is shared across VTLs.
767///
768/// This function is not complete: DR6 may or may not be shared, depending on
769/// the processor type; the caller needs to check HvRegisterVsmCapabilities.
770/// Some MSRs are not included here as they are not represented in
771/// HvX64RegisterName, including MSR_TSC_FREQUENCY, MSR_MCG_CAP,
772/// MSR_MCG_STATUS, MSR_RESET, MSR_GUEST_IDLE, and MSR_DEBUG_DEVICE_OPTIONS.
773#[cfg(guest_arch = "x86_64")]
774fn is_vtl_shared_reg(reg: HvX64RegisterName) -> bool {
775    is_vtl_shared_mtrr(reg)
776        || matches!(
777            reg,
778            HvX64RegisterName::VpIndex
779                | HvX64RegisterName::VpRuntime
780                | HvX64RegisterName::TimeRefCount
781                | HvX64RegisterName::Rax
782                | HvX64RegisterName::Rbx
783                | HvX64RegisterName::Rcx
784                | HvX64RegisterName::Rdx
785                | HvX64RegisterName::Rsi
786                | HvX64RegisterName::Rdi
787                | HvX64RegisterName::Rbp
788                | HvX64RegisterName::Cr2
789                | HvX64RegisterName::R8
790                | HvX64RegisterName::R9
791                | HvX64RegisterName::R10
792                | HvX64RegisterName::R11
793                | HvX64RegisterName::R12
794                | HvX64RegisterName::R13
795                | HvX64RegisterName::R14
796                | HvX64RegisterName::R15
797                | HvX64RegisterName::Dr0
798                | HvX64RegisterName::Dr1
799                | HvX64RegisterName::Dr2
800                | HvX64RegisterName::Dr3
801                | HvX64RegisterName::Xmm0
802                | HvX64RegisterName::Xmm1
803                | HvX64RegisterName::Xmm2
804                | HvX64RegisterName::Xmm3
805                | HvX64RegisterName::Xmm4
806                | HvX64RegisterName::Xmm5
807                | HvX64RegisterName::Xmm6
808                | HvX64RegisterName::Xmm7
809                | HvX64RegisterName::Xmm8
810                | HvX64RegisterName::Xmm9
811                | HvX64RegisterName::Xmm10
812                | HvX64RegisterName::Xmm11
813                | HvX64RegisterName::Xmm12
814                | HvX64RegisterName::Xmm13
815                | HvX64RegisterName::Xmm14
816                | HvX64RegisterName::Xmm15
817                | HvX64RegisterName::FpMmx0
818                | HvX64RegisterName::FpMmx1
819                | HvX64RegisterName::FpMmx2
820                | HvX64RegisterName::FpMmx3
821                | HvX64RegisterName::FpMmx4
822                | HvX64RegisterName::FpMmx5
823                | HvX64RegisterName::FpMmx6
824                | HvX64RegisterName::FpMmx7
825                | HvX64RegisterName::FpControlStatus
826                | HvX64RegisterName::XmmControlStatus
827                | HvX64RegisterName::Xfem
828        )
829}
830
831/// Indicate whether reg is shared across VTLs.
832#[cfg(guest_arch = "aarch64")]
833fn is_vtl_shared_reg(reg: HvArm64RegisterName) -> bool {
834    use hvdef::HvArm64RegisterName;
835
836    matches!(
837        reg,
838        HvArm64RegisterName::X0
839            | HvArm64RegisterName::X1
840            | HvArm64RegisterName::X2
841            | HvArm64RegisterName::X3
842            | HvArm64RegisterName::X4
843            | HvArm64RegisterName::X5
844            | HvArm64RegisterName::X6
845            | HvArm64RegisterName::X7
846            | HvArm64RegisterName::X8
847            | HvArm64RegisterName::X9
848            | HvArm64RegisterName::X10
849            | HvArm64RegisterName::X11
850            | HvArm64RegisterName::X12
851            | HvArm64RegisterName::X13
852            | HvArm64RegisterName::X14
853            | HvArm64RegisterName::X15
854            | HvArm64RegisterName::X16
855            | HvArm64RegisterName::X17
856            | HvArm64RegisterName::X19
857            | HvArm64RegisterName::X20
858            | HvArm64RegisterName::X21
859            | HvArm64RegisterName::X22
860            | HvArm64RegisterName::X23
861            | HvArm64RegisterName::X24
862            | HvArm64RegisterName::X25
863            | HvArm64RegisterName::X26
864            | HvArm64RegisterName::X27
865            | HvArm64RegisterName::X28
866            | HvArm64RegisterName::XFp
867            | HvArm64RegisterName::XLr
868    )
869}
870
871/// The `/dev/mshv_hvcall` device for issuing hypercalls directly to the
872/// hypervisor.
873#[derive(Debug)]
874pub struct MshvHvcall(File);
875
876impl MshvHvcall {
877    /// Opens the device.
878    pub fn new() -> Result<Self, Error> {
879        let file = fs_err::OpenOptions::new()
880            .read(true)
881            .write(true)
882            .open("/dev/mshv_hvcall")
883            .map_err(Error::OpenHvcall)?;
884
885        Ok(Self(file.into()))
886    }
887
888    /// Set allowed hypercalls.
889    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
890        type ItemType = u64;
891        let item_size_bytes = size_of::<ItemType>();
892        let item_size_bits = item_size_bytes * 8;
893
894        let mut allow_bitmap = Vec::<ItemType>::new();
895        for &code in codes {
896            let map_index = (code.0 as usize) / item_size_bits;
897            if map_index >= allow_bitmap.len() {
898                allow_bitmap.resize(map_index + 1, 0);
899            }
900            allow_bitmap[map_index] |= (1 as ItemType) << (code.0 % item_size_bits as u16);
901        }
902
903        let hvcall_setup = protocol::hcl_hvcall_setup {
904            allow_bitmap_size: (allow_bitmap.len() * item_size_bytes) as u64,
905            allow_bitmap_ptr: allow_bitmap.as_ptr(),
906        };
907
908        // SAFETY: following the IOCTL definition.
909        unsafe {
910            hcl_hvcall_setup(self.0.as_raw_fd(), &hvcall_setup)
911                .expect("Hypercall setup IOCTL must be supported");
912        }
913    }
914
915    /// Accepts VTL 0 pages with no host visibility.
916    ///
917    /// [`HypercallCode::HvCallAcceptGpaPages`] must be allowed.
918    pub fn accept_gpa_pages(
919        &self,
920        range: MemoryRange,
921        memory_type: hvdef::hypercall::AcceptMemoryType,
922    ) -> Result<(), AcceptPagesError> {
923        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
924            - size_of::<hvdef::hypercall::AcceptGpaPages>())
925            / size_of::<u64>();
926
927        let span = tracing::info_span!("accept_pages", CVM_ALLOWED, ?range);
928        let _enter = span.enter();
929
930        let mut current_page = range.start() / HV_PAGE_SIZE;
931        let end = range.end() / HV_PAGE_SIZE;
932
933        while current_page < end {
934            let header = hvdef::hypercall::AcceptGpaPages {
935                partition_id: HV_PARTITION_ID_SELF,
936                page_attributes: hvdef::hypercall::AcceptPagesAttributes::new()
937                    .with_memory_type(memory_type.0)
938                    .with_host_visibility(HostVisibilityType::PRIVATE)
939                    .with_vtl_set(0), // vtl protections cannot be applied for VTL 0 memory
940                vtl_permission_set: hvdef::hypercall::VtlPermissionSet {
941                    vtl_permission_from_1: [0; hvdef::hypercall::HV_VTL_PERMISSION_SET_SIZE],
942                },
943                gpa_page_base: current_page,
944            };
945
946            let remaining_pages = end - current_page;
947            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
948
949            // SAFETY: The input header and rep slice are the correct types for
950            //         this hypercall. A dummy type of u8 is provided to satisfy
951            //         the compiler for input and output rep type. The given
952            //         input and slices are valid references while this function
953            //         is called.
954            //
955            //         The hypercall output is validated right after the hypercall is issued.
956            let output = unsafe {
957                self.hvcall_rep::<hvdef::hypercall::AcceptGpaPages, u8, u8>(
958                    HypercallCode::HvCallAcceptGpaPages,
959                    &header,
960                    HvcallRepInput::Count(count as u16),
961                    None,
962                )
963                .expect("kernel hypercall submission should always succeed")
964            };
965
966            output
967                .result()
968                .map_err(|err| AcceptPagesError::Hypervisor {
969                    range: MemoryRange::from_4k_gpn_range(current_page..current_page + count),
970                    output,
971                    hv_error: err,
972                })?;
973
974            current_page += count;
975
976            assert_eq!(output.elements_processed() as u64, count);
977        }
978        Ok(())
979    }
980
981    /// Modifies the host visibility of the given pages.
982    ///
983    /// [`HypercallCode::HvCallModifySparseGpaPageHostVisibility`] must be
984    /// allowed.
985    ///
986    /// Returns on error, the hypervisor error and the number of pages
987    /// processed.
988    //
989    // TODO SNP: this isn't really safe. Probably this should be an IOCTL in the
990    // kernel so that it can validate the page ranges are VTL0 memory.
991    pub fn modify_gpa_visibility(
992        &self,
993        host_visibility: HostVisibilityType,
994        mut gpns: &[u64],
995    ) -> Result<(), (HvError, usize)> {
996        const GPNS_PER_CALL: usize = (HV_PAGE_SIZE as usize
997            - size_of::<hvdef::hypercall::ModifySparsePageVisibility>())
998            / size_of::<u64>();
999
1000        while !gpns.is_empty() {
1001            let n = gpns.len().min(GPNS_PER_CALL);
1002            // SAFETY: The input header and rep slice are the correct types for this hypercall.
1003            //         The hypercall output is validated right after the hypercall is issued.
1004            let result = unsafe {
1005                self.hvcall_rep(
1006                    HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1007                    &hvdef::hypercall::ModifySparsePageVisibility {
1008                        partition_id: HV_PARTITION_ID_SELF,
1009                        host_visibility: ModifyHostVisibility::new()
1010                            .with_host_visibility(host_visibility),
1011                        reserved: 0,
1012                    },
1013                    HvcallRepInput::Elements(&gpns[..n]),
1014                    None::<&mut [u8]>,
1015                )
1016                .unwrap()
1017            };
1018
1019            match result.result() {
1020                Ok(()) => {
1021                    assert_eq!({ result.elements_processed() }, n);
1022                }
1023                Err(HvError::Timeout) => {}
1024                Err(e) => return Err((e, result.elements_processed())),
1025            }
1026            gpns = &gpns[result.elements_processed()..];
1027        }
1028        Ok(())
1029    }
1030
1031    /// Given a constructed hcl_hvcall protocol object, issues an IOCTL to invoke a hypercall via
1032    /// the direct hypercall kernel interface. This function will retry hypercalls if the hypervisor
1033    /// times out the hypercall.
1034    ///
1035    /// Input and output data are referenced as pointers in the call object.
1036    ///
1037    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A
1038    /// caller must check the return value for the result of the hypercall.
1039    ///
1040    /// Before invoking hypercalls, a list of hypercalls that are allowed
1041    /// has to be set with `Hcl::set_allowed_hypercalls`:
1042    /// ```ignore
1043    /// set_allowed_hypercalls(&[
1044    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1045    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1046    /// ]);
1047    /// ```
1048    /// # Safety
1049    /// This function makes no guarantees that the given input header, input and output types are
1050    /// valid for the given hypercall. It is the caller's responsibility to use the correct types
1051    /// with the specified hypercall.
1052    ///
1053    /// The caller must ensure that the input and output data are valid for the lifetime of this
1054    /// call.
1055    ///
1056    /// A caller must check the returned [HypercallOutput] for success or failure from the
1057    /// hypervisor.
1058    ///
1059    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be
1060    /// validated by the caller if needed.
1061    unsafe fn invoke_hvcall_ioctl(
1062        &self,
1063        mut call_object: protocol::hcl_hvcall,
1064    ) -> Result<HypercallOutput, HvcallError> {
1065        loop {
1066            // SAFETY: following the IOCTL definition. The data referenced in the call
1067            // lives as long as `self` does thus the lifetime elision doesn't contradict
1068            // the compiler's invariants.
1069            //
1070            // The hypervisor is trusted to fill out the output page with a valid
1071            // representation of an instance the output type, except in the case of hardware
1072            // isolated VMs where the caller must validate output as needed.
1073            unsafe {
1074                hcl_hvcall(self.0.as_raw_fd(), &mut call_object)
1075                    .map_err(HvcallError::HypercallIoctlFailed)?;
1076            }
1077
1078            if call_object.status.call_status() == Err(HvError::Timeout).into() {
1079                // Any hypercall can timeout, even one that doesn't have reps. Continue processing
1080                // from wherever the hypervisor left off.  The rep start index isn't checked for
1081                // validity, since it is only being used as an input to the untrusted hypervisor.
1082                // This applies to both simple and rep hypercalls.
1083                call_object
1084                    .control
1085                    .set_rep_start(call_object.status.elements_processed());
1086            } else {
1087                if call_object.control.rep_count() == 0 {
1088                    // For non-rep hypercalls, the elements processed field should be 0.
1089                    assert_eq!(call_object.status.elements_processed(), 0);
1090                } else {
1091                    // Hardware isolated VMs cannot trust output from the hypervisor, but check for
1092                    // consistency between the number of elements processed and the expected count. A
1093                    // violation of this assertion indicates a buggy or malicious hypervisor.
1094                    assert!(
1095                        (call_object.status.result().is_ok()
1096                            && call_object.control.rep_count()
1097                                == call_object.status.elements_processed())
1098                            || (call_object.status.result().is_err()
1099                                && call_object.control.rep_count()
1100                                    > call_object.status.elements_processed())
1101                    );
1102                }
1103
1104                return Ok(call_object.status);
1105            }
1106        }
1107    }
1108
1109    /// Issues a non-rep hypercall to the hypervisor via the direct hypercall kernel interface.
1110    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1111    /// This call constructs the appropriate hypercall input control from the described parameters.
1112    ///
1113    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1114    /// return value for the result of the hypercall.
1115    ///
1116    /// `code` is the hypercall code.
1117    /// `input` is the input type required by the hypercall.
1118    /// `output` is the output type required by the hypercall.
1119    ///
1120    /// Before invoking hypercalls, a list of hypercalls that are allowed
1121    /// has to be set with `Hcl::set_allowed_hypercalls`:
1122    /// ```ignore
1123    /// set_allowed_hypercalls(&[
1124    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1125    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1126    /// ]);
1127    /// ```
1128    /// # Safety
1129    /// This function makes no guarantees that the given input header, input and output types are valid for the
1130    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1131    ///
1132    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1133    ///
1134    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1135    /// caller if needed.
1136    unsafe fn hvcall<I, O>(
1137        &self,
1138        code: HypercallCode,
1139        input: &I,
1140        output: &mut O,
1141    ) -> Result<HypercallOutput, HvcallError>
1142    where
1143        I: IntoBytes + Sized + Immutable + KnownLayout,
1144        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1145    {
1146        const fn assert_size<I, O>()
1147        where
1148            I: Sized,
1149            O: Sized,
1150        {
1151            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1152            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1153        }
1154        assert_size::<I, O>();
1155
1156        let control = hvdef::hypercall::Control::new().with_code(code.0);
1157
1158        let call_object = protocol::hcl_hvcall {
1159            control,
1160            input_data: input.as_bytes().as_ptr().cast(),
1161            input_size: size_of::<I>(),
1162            status: FromZeros::new_zeroed(),
1163            output_data: output.as_bytes().as_ptr().cast(),
1164            output_size: size_of::<O>(),
1165        };
1166
1167        // SAFETY: The data referenced in the call lives as long as `self` does.
1168        unsafe { self.invoke_hvcall_ioctl(call_object) }
1169    }
1170
1171    /// Issues a rep hypercall to the hypervisor via the direct hypercall kernel
1172    /// interface. Like the non-rep version, this is not intended to be used
1173    /// externally other than to construct safe wrappers. This call constructs
1174    /// the appropriate hypercall input control from the described parameters.
1175    ///
1176    /// `Ok(HypercallOutput)` is returned if the kernel was successful in
1177    /// issuing the hypercall. A caller must check the return value for the
1178    /// result of the hypercall.
1179    ///
1180    /// `code` is the hypercall code. `input_header` is the hypercall fixed
1181    /// length input header. Variable length headers are not supported.
1182    /// `input_rep` is the list of input elements. The length of the slice is
1183    /// used as the rep count.
1184    ///
1185    /// `output_rep` is the optional output rep list. A caller must check the
1186    /// returned [HypercallOutput] for the number of valid elements in this
1187    /// list.
1188    ///
1189    /// # Safety
1190    /// This function makes no guarantees that the given input header, input rep
1191    /// and output rep types are valid for the given hypercall. It is the
1192    /// caller's responsibility to use the correct types with the specified
1193    /// hypercall.
1194    ///
1195    /// A caller must check the returned [HypercallOutput] for success or
1196    /// failure from the hypervisor and processed rep count.
1197    ///
1198    /// Hardware isolated VMs cannot trust output from the hypervisor. This
1199    /// routine will ensure that the hypervisor either returns success with all
1200    /// elements processed, or returns failure with an incomplete number of
1201    /// elements processed. Actual validation of the output elements is the
1202    /// respsonsibility of the caller.
1203    unsafe fn hvcall_rep<InputHeader, InputRep, O>(
1204        &self,
1205        code: HypercallCode,
1206        input_header: &InputHeader,
1207        input_rep: HvcallRepInput<'_, InputRep>,
1208        output_rep: Option<&mut [O]>,
1209    ) -> Result<HypercallOutput, HvcallError>
1210    where
1211        InputHeader: IntoBytes + Sized + Immutable + KnownLayout,
1212        InputRep: IntoBytes + Sized + Immutable + KnownLayout,
1213        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1214    {
1215        // Construct input buffer.
1216        let (input, count) = match input_rep {
1217            HvcallRepInput::Elements(e) => {
1218                ([input_header.as_bytes(), e.as_bytes()].concat(), e.len())
1219            }
1220            HvcallRepInput::Count(c) => (input_header.as_bytes().to_vec(), c.into()),
1221        };
1222
1223        if input.len() > HV_PAGE_SIZE as usize {
1224            return Err(HvcallError::InputParametersTooLarge);
1225        }
1226
1227        if let Some(output_rep) = &output_rep {
1228            if output_rep.as_bytes().len() > HV_PAGE_SIZE as usize {
1229                return Err(HvcallError::OutputParametersTooLarge);
1230            }
1231
1232            if count != output_rep.len() {
1233                return Err(HvcallError::InputOutputRepListMismatch);
1234            }
1235        }
1236
1237        let (output_data, output_size) = match output_rep {
1238            Some(output_rep) => (
1239                output_rep.as_bytes().as_ptr().cast(),
1240                output_rep.as_bytes().len(),
1241            ),
1242            None => (std::ptr::null(), 0),
1243        };
1244
1245        let control = hvdef::hypercall::Control::new()
1246            .with_code(code.0)
1247            .with_rep_count(count);
1248
1249        let call_object = protocol::hcl_hvcall {
1250            control,
1251            input_data: input.as_ptr().cast(),
1252            input_size: input.len(),
1253            status: HypercallOutput::new(),
1254            output_data,
1255            output_size,
1256        };
1257
1258        // SAFETY: The data referenced in the call lives as long as `self` does.
1259        unsafe { self.invoke_hvcall_ioctl(call_object) }
1260    }
1261
1262    /// Issues a non-rep hypercall with variable input to the hypervisor via the direct hypercall kernel interface.
1263    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1264    /// This call constructs the appropriate hypercall input control from the described parameters.
1265    ///
1266    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1267    /// return value for the result of the hypercall.
1268    ///
1269    /// `code` is the hypercall code.
1270    /// `input` is the input type required by the hypercall.
1271    /// `output` is the output type required by the hypercall.
1272    /// `variable_input` is the contents of the variable input to the hypercall. The length must be a multiple of 8 bytes.
1273    ///
1274    /// # Safety
1275    /// This function makes no guarantees that the given input header, input and output types are valid for the
1276    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1277    ///
1278    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1279    ///
1280    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1281    /// caller if needed.
1282    unsafe fn hvcall_var<I, O>(
1283        &self,
1284        code: HypercallCode,
1285        input: &I,
1286        variable_input: &[u8],
1287        output: &mut O,
1288    ) -> Result<HypercallOutput, HvcallError>
1289    where
1290        I: IntoBytes + Sized + Immutable + KnownLayout,
1291        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1292    {
1293        const fn assert_size<I, O>()
1294        where
1295            I: Sized,
1296            O: Sized,
1297        {
1298            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1299            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1300        }
1301        assert_size::<I, O>();
1302        assert!(variable_input.len() % 8 == 0);
1303
1304        let input = [input.as_bytes(), variable_input].concat();
1305        if input.len() > HV_PAGE_SIZE as usize {
1306            return Err(HvcallError::InputParametersTooLarge);
1307        }
1308
1309        let control = hvdef::hypercall::Control::new()
1310            .with_code(code.0)
1311            .with_variable_header_size(variable_input.len() / 8);
1312
1313        let call_object = protocol::hcl_hvcall {
1314            control,
1315            input_data: input.as_bytes().as_ptr().cast(),
1316            input_size: input.len(),
1317            status: FromZeros::new_zeroed(),
1318            output_data: output.as_bytes().as_ptr().cast(),
1319            output_size: size_of::<O>(),
1320        };
1321
1322        // SAFETY: The data referenced in the call lives as long as `self` does.
1323        unsafe { self.invoke_hvcall_ioctl(call_object) }
1324    }
1325
1326    /// Sets the VTL protection mask for the specified memory range.
1327    ///
1328    /// [`HypercallCode::HvCallModifyVtlProtectionMask`] must be allowed.
1329    pub fn modify_vtl_protection_mask(
1330        &self,
1331        range: MemoryRange,
1332        map_flags: HvMapGpaFlags,
1333        target_vtl: HvInputVtl,
1334    ) -> Result<(), ApplyVtlProtectionsError> {
1335        let header = hvdef::hypercall::ModifyVtlProtectionMask {
1336            partition_id: HV_PARTITION_ID_SELF,
1337            map_flags,
1338            target_vtl,
1339            reserved: [0; 3],
1340        };
1341
1342        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
1343            - size_of::<hvdef::hypercall::ModifyVtlProtectionMask>())
1344            / size_of::<u64>();
1345
1346        let span = tracing::info_span!("modify_vtl_protection_mask", CVM_ALLOWED, ?range);
1347        let _enter = span.enter();
1348
1349        let start = range.start() / HV_PAGE_SIZE;
1350        let end = range.end() / HV_PAGE_SIZE;
1351
1352        // Reuse the same vector for every hypercall.
1353        let mut pages = Vec::new();
1354        for current_page in (start..end).step_by(MAX_INPUT_ELEMENTS) {
1355            let remaining_pages = end - current_page;
1356            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
1357            pages.clear();
1358            pages.extend(current_page..current_page + count);
1359
1360            // SAFETY: The input header and rep slice are the correct types for this hypercall. A dummy type of u8 is
1361            //         provided to satisfy the compiler for output rep type. The given input and slices are valid
1362            //         references while this function is called.
1363            //
1364            //         The hypercall output is validated right after the hypercall is issued.
1365            let output = unsafe {
1366                self.hvcall_rep::<hvdef::hypercall::ModifyVtlProtectionMask, u64, u8>(
1367                    HypercallCode::HvCallModifyVtlProtectionMask,
1368                    &header,
1369                    HvcallRepInput::Elements(pages.as_slice()),
1370                    None,
1371                )
1372                .expect("kernel hypercall submission should always succeed")
1373            };
1374
1375            output.result().map_err(|err| {
1376                let page_range =
1377                    *pages.first().expect("not empty")..*pages.last().expect("not empty") + 1;
1378                ApplyVtlProtectionsError::Hypervisor {
1379                    range: MemoryRange::from_4k_gpn_range(page_range),
1380                    output,
1381                    hv_error: err,
1382                    vtl: target_vtl,
1383                }
1384            })?;
1385
1386            assert_eq!(output.elements_processed() as u64, count);
1387        }
1388
1389        Ok(())
1390    }
1391
1392    /// Get a single VP register for the given VTL via hypercall.
1393    fn get_vp_register_for_vtl_inner(
1394        &self,
1395        target_vtl: HvInputVtl,
1396        name: HvRegisterName,
1397    ) -> Result<HvRegisterValue, Error> {
1398        let header = hvdef::hypercall::GetSetVpRegisters {
1399            partition_id: HV_PARTITION_ID_SELF,
1400            vp_index: HV_VP_INDEX_SELF,
1401            target_vtl,
1402            rsvd: [0; 3],
1403        };
1404        let mut output = [HvRegisterValue::new_zeroed()];
1405
1406        // SAFETY: The input header and rep slice are the correct types for this hypercall.
1407        //         The hypercall output is validated right after the hypercall is issued.
1408        let status = unsafe {
1409            self.hvcall_rep(
1410                HypercallCode::HvCallGetVpRegisters,
1411                &header,
1412                HvcallRepInput::Elements(&[name]),
1413                Some(&mut output),
1414            )
1415            .expect("get_vp_register hypercall should not fail")
1416        };
1417
1418        // Status must be success with 1 rep completed
1419        status
1420            .result()
1421            .map_err(|err| Error::GetVpRegisterHypercall {
1422                reg: name.into(),
1423                err,
1424            })?;
1425        assert_eq!(status.elements_processed(), 1);
1426
1427        Ok(output[0])
1428    }
1429
1430    /// Get a single VP register for the given VTL via hypercall. Only a select
1431    /// set of registers are supported; others will cause a panic.
1432    #[cfg(guest_arch = "x86_64")]
1433    fn get_vp_register_for_vtl(
1434        &self,
1435        vtl: HvInputVtl,
1436        name: HvX64RegisterName,
1437    ) -> Result<HvRegisterValue, Error> {
1438        match vtl.target_vtl().unwrap() {
1439            None | Some(Vtl::Vtl2) => {
1440                assert!(matches!(
1441                    name,
1442                    HvX64RegisterName::GuestVsmPartitionConfig
1443                        | HvX64RegisterName::VsmPartitionConfig
1444                        | HvX64RegisterName::VsmPartitionStatus
1445                        | HvX64RegisterName::VsmCapabilities
1446                        | HvX64RegisterName::TimeRefCount
1447                        | HvX64RegisterName::VsmVpSecureConfigVtl0
1448                        | HvX64RegisterName::VsmVpSecureConfigVtl1
1449                ));
1450            }
1451            Some(Vtl::Vtl1) => {
1452                todo!("TODO: allowed registers for VTL1");
1453            }
1454            Some(Vtl::Vtl0) => {
1455                // Only VTL-private registers can go through this path.
1456                // VTL-shared registers have to go through the kernel (either
1457                // via the CPU context page or via the dedicated ioctl), as
1458                // they may require special handling there.
1459                //
1460                // Register access should go through the register page if
1461                // possible (as a performance optimization). In practice,
1462                // registers that are normally available on the register page
1463                // are handled here only when it is unavailable (e.g., running
1464                // in WHP).
1465                assert!(!is_vtl_shared_reg(name));
1466            }
1467        }
1468
1469        self.get_vp_register_for_vtl_inner(vtl, name.into())
1470    }
1471
1472    /// Get a single VP register for the given VTL via hypercall. Only a select
1473    /// set of registers are supported; others will cause a panic.
1474    #[cfg(guest_arch = "aarch64")]
1475    fn get_vp_register_for_vtl(
1476        &self,
1477        vtl: HvInputVtl,
1478        name: HvArm64RegisterName,
1479    ) -> Result<HvRegisterValue, Error> {
1480        match vtl.target_vtl().unwrap() {
1481            None | Some(Vtl::Vtl2) => {
1482                assert!(matches!(
1483                    name,
1484                    HvArm64RegisterName::GuestVsmPartitionConfig
1485                        | HvArm64RegisterName::VsmPartitionConfig
1486                        | HvArm64RegisterName::VsmPartitionStatus
1487                        | HvArm64RegisterName::VsmCapabilities
1488                        | HvArm64RegisterName::TimeRefCount
1489                        | HvArm64RegisterName::VsmVpSecureConfigVtl0
1490                        | HvArm64RegisterName::VsmVpSecureConfigVtl1
1491                        | HvArm64RegisterName::PrivilegesAndFeaturesInfo
1492                ));
1493            }
1494            Some(Vtl::Vtl1) => {
1495                // TODO: allowed registers for VTL1
1496                todo!();
1497            }
1498            Some(Vtl::Vtl0) => {
1499                // Only VTL-private registers can go through this path.
1500                // VTL-shared registers have to go through the kernel (either
1501                // via the CPU context page or via the dedicated ioctl), as
1502                // they may require special handling there.
1503                assert!(!is_vtl_shared_reg(name));
1504            }
1505        }
1506
1507        self.get_vp_register_for_vtl_inner(vtl, name.into())
1508    }
1509
1510    /// Invokes the HvCallMemoryMappedIoRead hypercall
1511    pub fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> Result<(), HvError> {
1512        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1513
1514        let header = hvdef::hypercall::MemoryMappedIoRead {
1515            gpa,
1516            access_width: data.len() as u32,
1517            reserved_z0: 0,
1518        };
1519
1520        let mut output: hvdef::hypercall::MemoryMappedIoReadOutput = FromZeros::new_zeroed();
1521
1522        // SAFETY: The input header and slice are the correct types for this hypercall.
1523        //         The hypercall output is validated right after the hypercall is issued.
1524        let status = unsafe {
1525            self.hvcall(
1526                HypercallCode::HvCallMemoryMappedIoRead,
1527                &header,
1528                &mut output,
1529            )
1530            .expect("submitting hypercall should not fail")
1531        };
1532
1533        // Only copy the data if the hypercall was successful
1534        if status.result().is_ok() {
1535            data.copy_from_slice(&output.data[..data.len()]);
1536        };
1537
1538        status.result()
1539    }
1540
1541    /// Invokes the HvCallMemoryMappedIoWrite hypercall
1542    pub fn mmio_write(&self, gpa: u64, data: &[u8]) -> Result<(), HvError> {
1543        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1544
1545        let mut header = hvdef::hypercall::MemoryMappedIoWrite {
1546            gpa,
1547            access_width: data.len() as u32,
1548            reserved_z0: 0,
1549            data: [0; hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH],
1550        };
1551
1552        header.data[..data.len()].copy_from_slice(data);
1553
1554        // SAFETY: The input header and slice are the correct types for this hypercall.
1555        //         The hypercall output is validated right after the hypercall is issued.
1556        let status = unsafe {
1557            self.hvcall(HypercallCode::HvCallMemoryMappedIoWrite, &header, &mut ())
1558                .expect("submitting hypercall should not fail")
1559        };
1560
1561        status.result()
1562    }
1563
1564    /// Request a VBS VM report from the host VSM.
1565    ///
1566    /// # Arguments
1567    /// - `report_data`: The data to include in the report.
1568    ///
1569    /// Returns a result containing the report or an error.
1570    pub fn vbs_vm_call_report(
1571        &self,
1572        report_data: &[u8],
1573    ) -> Result<[u8; hvdef::hypercall::VBS_VM_MAX_REPORT_SIZE], HvError> {
1574        if report_data.len() > hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE {
1575            return Err(HvError::InvalidParameter);
1576        }
1577
1578        let mut header = hvdef::hypercall::VbsVmCallReport {
1579            report_data: [0; hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE],
1580        };
1581
1582        header.report_data[..report_data.len()].copy_from_slice(report_data);
1583
1584        let mut output: hvdef::hypercall::VbsVmCallReportOutput = FromZeros::new_zeroed();
1585
1586        // SAFETY: The input header and slice are the correct types for this hypercall.
1587        //         The hypercall output is validated right after the hypercall is issued.
1588        let status = unsafe {
1589            self.hvcall(HypercallCode::HvCallVbsVmCallReport, &header, &mut output)
1590                .expect("submitting hypercall should not fail")
1591        };
1592
1593        if status.result().is_ok() {
1594            Ok(output.report)
1595        } else {
1596            Err(status.result().unwrap_err())
1597        }
1598    }
1599}
1600
1601/// The HCL device and collection of fds.
1602#[derive(Debug)]
1603pub struct Hcl {
1604    mshv_hvcall: MshvHvcall,
1605    mshv_vtl: MshvVtl,
1606    vps: Vec<HclVp>,
1607    supports_vtl_ret_action: bool,
1608    supports_register_page: bool,
1609    dr6_shared: bool,
1610    isolation: IsolationType,
1611    snp_register_bitmap: [u8; 64],
1612    sidecar: Option<SidecarClient>,
1613}
1614
1615/// The isolation type for a partition.
1616// TODO: Add guest_arch cfgs.
1617#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1618pub enum IsolationType {
1619    /// No isolation.
1620    None,
1621    /// Hyper-V software isolation.
1622    Vbs,
1623    /// AMD SNP.
1624    Snp,
1625    /// Intel TDX.
1626    Tdx,
1627}
1628
1629impl IsolationType {
1630    /// Returns true if the isolation type is not `None`.
1631    pub fn is_isolated(&self) -> bool {
1632        !matches!(self, Self::None)
1633    }
1634
1635    /// Returns whether the isolation type is hardware-backed.
1636    pub fn is_hardware_isolated(&self) -> bool {
1637        matches!(self, Self::Snp | Self::Tdx)
1638    }
1639}
1640
1641impl Hcl {
1642    /// Returns true if DR6 is a shared register on this processor.
1643    pub fn dr6_shared(&self) -> bool {
1644        self.dr6_shared
1645    }
1646}
1647
1648#[derive(Debug)]
1649struct HclVp {
1650    state: Mutex<VpState>,
1651    run: MappedPage<hcl_run>,
1652    backing: BackingState,
1653}
1654
1655#[derive(Debug)]
1656enum BackingState {
1657    MshvAarch64 {
1658        reg_page: Option<MappedPage<HvAarch64RegisterPage>>,
1659    },
1660    MshvX64 {
1661        reg_page: Option<MappedPage<HvX64RegisterPage>>,
1662    },
1663    Snp {
1664        vmsa: VtlArray<MappedPage<SevVmsa>, 2>,
1665    },
1666    Tdx {
1667        vtl0_apic_page: MappedPage<ApicPage>,
1668        vtl1_apic_page: MemoryBlock,
1669    },
1670}
1671
1672#[derive(Debug)]
1673enum VpState {
1674    Running(Pthread),
1675    NotRunning,
1676}
1677
1678impl HclVp {
1679    fn new(
1680        hcl: &Hcl,
1681        vp: u32,
1682        map_reg_page: bool,
1683        isolation_type: IsolationType,
1684        private_dma_client: Option<&Arc<dyn DmaClient>>,
1685    ) -> Result<Self, Error> {
1686        let fd = &hcl.mshv_vtl.file;
1687        let run: MappedPage<hcl_run> =
1688            MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?;
1689        // Block proxied interrupts on all vectors by default. The mask will be
1690        // relaxed as the guest runs.
1691        //
1692        // This is only used on CVMs. Skip it otherwise, since run page accesses
1693        // will fault on VPs that are still in the sidecar kernel.
1694        if isolation_type.is_hardware_isolated() {
1695            // SAFETY: `proxy_irr_blocked` is not accessed by any other VPs/kernel at this point (`HclVp` creation)
1696            // so we know we have exclusive access.
1697            let proxy_irr_blocked = unsafe { &mut (*run.as_ptr()).proxy_irr_blocked };
1698            proxy_irr_blocked.fill(!0);
1699        }
1700
1701        let backing = match isolation_type {
1702            IsolationType::None | IsolationType::Vbs if cfg!(guest_arch = "aarch64") => {
1703                BackingState::MshvAarch64 {
1704                    reg_page: if map_reg_page {
1705                        Some(
1706                            MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1707                                .map_err(Error::MmapRegPage)?,
1708                        )
1709                    } else {
1710                        None
1711                    },
1712                }
1713            }
1714            IsolationType::None | IsolationType::Vbs => BackingState::MshvX64 {
1715                reg_page: if map_reg_page {
1716                    Some(
1717                        MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1718                            .map_err(Error::MmapRegPage)?,
1719                    )
1720                } else {
1721                    None
1722                },
1723            },
1724            IsolationType::Snp => {
1725                let vmsa_vtl0 = MappedPage::new(fd, HCL_VMSA_PAGE_OFFSET | vp as i64)
1726                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?;
1727                let vmsa_vtl1 = MappedPage::new(fd, HCL_VMSA_GUEST_VSM_PAGE_OFFSET | vp as i64)
1728                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl1)))?;
1729                BackingState::Snp {
1730                    vmsa: [vmsa_vtl0, vmsa_vtl1].into(),
1731                }
1732            }
1733            IsolationType::Tdx => BackingState::Tdx {
1734                vtl0_apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
1735                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?,
1736                vtl1_apic_page: private_dma_client
1737                    .ok_or(Error::MissingPrivateMemory)?
1738                    .allocate_dma_buffer(HV_PAGE_SIZE as usize)
1739                    .map_err(Error::AllocVp)?,
1740            },
1741        };
1742
1743        Ok(Self {
1744            state: Mutex::new(VpState::NotRunning),
1745            run,
1746            backing,
1747        })
1748    }
1749}
1750
1751/// Object used to run and to access state for a specific VP.
1752pub struct ProcessorRunner<'a, T: Backing<'a>> {
1753    hcl: &'a Hcl,
1754    vp: &'a HclVp,
1755    sidecar: Option<SidecarVp<'a>>,
1756    deferred_actions: Option<RegisteredDeferredActions<'a>>,
1757    run: &'a UnsafeCell<hcl_run>,
1758    intercept_message: &'a UnsafeCell<HvMessage>,
1759    state: T,
1760}
1761
1762/// An error returned by [`Hcl::runner`].
1763#[derive(Debug, Error)]
1764pub enum NoRunner {
1765    /// The partition is for a different isolation type.
1766    #[error("mismatched isolation type")]
1767    MismatchedIsolation,
1768    /// A sidecar VP was requested, but no sidecar was provided.
1769    #[error("missing sidecar")]
1770    MissingSidecar,
1771    /// The sidecar VP could not be contacted.
1772    #[error("sidecar communication error")]
1773    Sidecar(#[source] sidecar_client::SidecarError),
1774}
1775
1776/// An isolation-type-specific backing for a processor runner.
1777#[expect(private_bounds)]
1778pub trait Backing<'a>: BackingPrivate<'a> {}
1779
1780impl<'a, T: BackingPrivate<'a>> Backing<'a> for T {}
1781
1782mod private {
1783    use super::Error;
1784    use super::Hcl;
1785    use super::HclVp;
1786    use super::NoRunner;
1787    use super::ProcessorRunner;
1788    use crate::GuestVtl;
1789    use hvdef::HvRegisterName;
1790    use hvdef::HvRegisterValue;
1791    use sidecar_client::SidecarVp;
1792
1793    pub(super) trait BackingPrivate<'a>: Sized {
1794        fn new(vp: &'a HclVp, sidecar: Option<&SidecarVp<'a>>, hcl: &Hcl)
1795        -> Result<Self, NoRunner>;
1796
1797        fn try_set_reg(
1798            runner: &mut ProcessorRunner<'a, Self>,
1799            vtl: GuestVtl,
1800            name: HvRegisterName,
1801            value: HvRegisterValue,
1802        ) -> Result<bool, Error>;
1803
1804        fn must_flush_regs_on(runner: &ProcessorRunner<'a, Self>, name: HvRegisterName) -> bool;
1805
1806        fn try_get_reg(
1807            runner: &ProcessorRunner<'a, Self>,
1808            vtl: GuestVtl,
1809            name: HvRegisterName,
1810        ) -> Result<Option<HvRegisterValue>, Error>;
1811
1812        fn flush_register_page(runner: &mut ProcessorRunner<'a, Self>);
1813    }
1814}
1815
1816impl<'a, T: Backing<'a>> Drop for ProcessorRunner<'a, T> {
1817    fn drop(&mut self) {
1818        self.flush_deferred_state();
1819        drop(self.deferred_actions.take());
1820        let old_state = std::mem::replace(&mut *self.vp.state.lock(), VpState::NotRunning);
1821        assert!(matches!(old_state, VpState::Running(thread) if thread == Pthread::current()));
1822    }
1823}
1824
1825impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
1826    /// Flushes any deferred state. Must be called if preparing the partition
1827    /// for save/restore (servicing).
1828    pub fn flush_deferred_state(&mut self) {
1829        T::flush_register_page(self);
1830        if let Some(actions) = &mut self.deferred_actions {
1831            actions.flush();
1832        }
1833    }
1834
1835    // Registers that are shared between VTLs need to be handled by the kernel
1836    // as they may require special handling there. set_reg and get_reg will
1837    // handle these registers using a dedicated ioctl, instead of the general-
1838    // purpose Set/GetVpRegisters hypercalls.
1839    #[cfg(guest_arch = "x86_64")]
1840    fn is_kernel_managed(&self, name: HvX64RegisterName) -> bool {
1841        if name == HvX64RegisterName::Dr6 {
1842            self.hcl.dr6_shared()
1843        } else {
1844            is_vtl_shared_reg(name)
1845        }
1846    }
1847
1848    #[cfg(guest_arch = "aarch64")]
1849    fn is_kernel_managed(&self, name: HvArm64RegisterName) -> bool {
1850        is_vtl_shared_reg(name)
1851    }
1852
1853    fn set_reg(&mut self, vtl: GuestVtl, regs: &[HvRegisterAssoc]) -> Result<(), Error> {
1854        if regs.is_empty() {
1855            return Ok(());
1856        }
1857
1858        if let Some(sidecar) = &mut self.sidecar {
1859            sidecar
1860                .set_vp_registers(vtl.into(), regs)
1861                .map_err(Error::Sidecar)?;
1862        } else {
1863            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1864            // currently has a bug where it only supports one register at a
1865            // time. Once that's fixed, this code could set a group of
1866            // registers in one ioctl.
1867            for reg in regs {
1868                let hc_regs = &mut [HvRegisterAssoc {
1869                    name: reg.name,
1870                    pad: [0; 3],
1871                    value: reg.value,
1872                }];
1873
1874                if self.is_kernel_managed(reg.name.into()) {
1875                    let hv_vp_register_args = mshv_vp_registers {
1876                        count: 1,
1877                        regs: hc_regs.as_mut_ptr(),
1878                    };
1879                    // SAFETY: ioctl call with correct types.
1880                    unsafe {
1881                        hcl_set_vp_register(
1882                            self.hcl.mshv_vtl.file.as_raw_fd(),
1883                            &hv_vp_register_args,
1884                        )
1885                        .map_err(Error::SetVpRegister)?;
1886                    }
1887                } else {
1888                    let hc_regs = [HvRegisterAssoc {
1889                        name: reg.name,
1890                        pad: [0; 3],
1891                        value: reg.value,
1892                    }];
1893                    self.set_vp_registers_hvcall_inner(vtl.into(), &hc_regs)
1894                        .map_err(Error::SetRegisters)?;
1895                }
1896            }
1897        }
1898        Ok(())
1899    }
1900
1901    fn get_reg(&mut self, vtl: Vtl, regs: &mut [HvRegisterAssoc]) -> Result<(), Error> {
1902        if regs.is_empty() {
1903            return Ok(());
1904        }
1905
1906        if let Some(sidecar) = &mut self.sidecar {
1907            sidecar
1908                .get_vp_registers(vtl.into(), regs)
1909                .map_err(Error::Sidecar)?;
1910        } else {
1911            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1912            // currently has a bug where it only supports one register at a
1913            // time. Once that's fixed, this code could set a group of
1914            // registers in one ioctl.
1915            for reg in regs {
1916                if self.is_kernel_managed(reg.name.into()) {
1917                    let mut mshv_vp_register_args = mshv_vp_registers {
1918                        count: 1,
1919                        regs: reg,
1920                    };
1921                    // SAFETY: we know that our file is a vCPU fd, we know the kernel will only read the
1922                    // correct amount of memory from our pointer, and we verify the return result.
1923                    unsafe {
1924                        hcl_get_vp_register(
1925                            self.hcl.mshv_vtl.file.as_raw_fd(),
1926                            &mut mshv_vp_register_args,
1927                        )
1928                        .map_err(Error::GetVpRegister)?;
1929                    }
1930                } else {
1931                    reg.value = self
1932                        .hcl
1933                        .mshv_hvcall
1934                        .get_vp_register_for_vtl(vtl.into(), reg.name.into())?;
1935                }
1936            }
1937        }
1938        Ok(())
1939    }
1940
1941    /// Clears the cancel flag so that the VP can be run again.
1942    pub fn clear_cancel(&mut self) {
1943        if !self.is_sidecar() {
1944            // SAFETY: self.run is mapped, and the cancel field is atomically
1945            // accessed by everyone.
1946            let cancel = unsafe { &*(&raw mut (*self.run.get()).cancel).cast::<AtomicU32>() };
1947            cancel.store(0, Ordering::SeqCst);
1948        }
1949    }
1950
1951    /// Set the halted state of the VP. If `true`, then `run()` will not
1952    /// actually run the VP but will just wait for a cancel request or signal.
1953    pub fn set_halted(&mut self, halted: bool) {
1954        // SAFETY: the `flags` field of the run page will not be concurrently
1955        // updated.
1956        let flags = unsafe { &mut (*self.run.get()).flags };
1957        if halted {
1958            *flags |= protocol::MSHV_VTL_RUN_FLAG_HALTED
1959        } else {
1960            *flags &= !protocol::MSHV_VTL_RUN_FLAG_HALTED
1961        }
1962    }
1963
1964    /// Gets the proxied interrupt request bitmap for VTL 0 from the hypervisor.
1965    pub fn proxy_irr_vtl0(&mut self) -> Option<[u32; 8]> {
1966        // SAFETY: the `scan_proxy_irr` and `proxy_irr` fields of the run page
1967        // are concurrently updated by the kernel on multiple processors. They
1968        // are accessed atomically everywhere.
1969        unsafe {
1970            let scan_proxy_irr = &*((&raw mut (*self.run.get()).scan_proxy_irr).cast::<AtomicU8>());
1971            let proxy_irr = &*((&raw mut (*self.run.get()).proxy_irr).cast::<[AtomicU32; 8]>());
1972            if scan_proxy_irr.load(Ordering::Acquire) == 0 {
1973                return None;
1974            }
1975
1976            scan_proxy_irr.store(0, Ordering::SeqCst);
1977            let mut r = [0; 8];
1978            for (irr, r) in proxy_irr.iter().zip(r.iter_mut()) {
1979                if irr.load(Ordering::Relaxed) != 0 {
1980                    *r = irr.swap(0, Ordering::Relaxed);
1981                }
1982            }
1983            Some(r)
1984        }
1985    }
1986
1987    /// Update the `proxy_irr_blocked` for VTL 0 in the run page
1988    pub fn update_proxy_irr_filter_vtl0(&mut self, irr_filter: &[u32; 8]) {
1989        // SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could
1990        // be concurrently accessed by kernel too, hence accessing as Atomic
1991        let proxy_irr_blocked = unsafe {
1992            &mut *((&raw mut (*self.run.get()).proxy_irr_blocked).cast::<[AtomicU32; 8]>())
1993        };
1994
1995        // `irr_filter` bitmap has bits set for all allowed vectors (i.e. SINT and device interrupts)
1996        // Replace current `proxy_irr_blocked` with the given `irr_filter` bitmap.
1997        // By default block all (i.e. set all), and only allow (unset) given vectors from `irr_filter`.
1998        for (filter, irr) in proxy_irr_blocked.iter_mut().zip(irr_filter.iter()) {
1999            filter.store(!irr, Ordering::Relaxed);
2000            tracing::debug!(irr, "update_proxy_irr_filter");
2001        }
2002    }
2003
2004    /// Gets the proxy_irr_exit bitmask for VTL 0. This mask ensures that
2005    /// the masked interrupts always exit to user-space, and cannot
2006    /// be injected in the kernel. Interrupts matching this condition
2007    /// will be left on the proxy_irr field.
2008    pub fn proxy_irr_exit_mut_vtl0(&mut self) -> &mut [u32; 8] {
2009        // SAFETY: The `proxy_irr_exit` field of the run page will not be concurrently updated.
2010        unsafe { &mut (*self.run.get()).proxy_irr_exit }
2011    }
2012
2013    /// Gets the current offload_flags from the run page.
2014    pub fn offload_flags_mut(&mut self) -> &mut hcl_intr_offload_flags {
2015        // SAFETY: The `offload_flags` field of the run page will not be concurrently updated.
2016        unsafe { &mut (*self.run.get()).offload_flags }
2017    }
2018
2019    /// Runs the VP via the sidecar kernel.
2020    pub fn run_sidecar(&mut self) -> Result<SidecarRun<'_, 'a>, Error> {
2021        self.sidecar.as_mut().unwrap().run().map_err(Error::Sidecar)
2022    }
2023
2024    /// Run the following VP until an exit, error, or interrupt (cancel or
2025    /// signal) occurs.
2026    ///
2027    /// Returns `Ok(true)` if there is an exit to process, `Ok(false)` if there
2028    /// was a signal or cancel request.
2029    pub fn run(&mut self) -> Result<bool, Error> {
2030        assert!(self.sidecar.is_none());
2031        // Apply any deferred actions to the run page.
2032        if let Some(actions) = &mut self.deferred_actions {
2033            debug_assert!(self.hcl.supports_vtl_ret_action);
2034            // SAFETY: there are no concurrent accesses to the deferred action
2035            // slots.
2036            let mut slots = unsafe { DeferredActionSlots::new(self.run) };
2037            actions.move_to_slots(&mut slots);
2038        };
2039
2040        // N.B. cpu_context and exit_context are mutated by this call.
2041        //
2042        // SAFETY: no safety requirements for this ioctl.
2043        let r = unsafe { hcl_return_to_lower_vtl(self.hcl.mshv_vtl.file.as_raw_fd()) };
2044
2045        let has_intercept = match r {
2046            Ok(_) => true,
2047            Err(nix::errno::Errno::EINTR) => false,
2048            Err(err) => return Err(Error::ReturnToLowerVtl(err)),
2049        };
2050        Ok(has_intercept)
2051    }
2052
2053    /// Gets a reference to enter mode value, used by the kernel to specify the
2054    /// mode used when entering a lower VTL.
2055    pub fn enter_mode(&mut self) -> Option<&mut EnterModes> {
2056        if self.sidecar.is_some() {
2057            None
2058        } else {
2059            // SAFETY: self.run is mapped, and the mode field can only be mutated or accessed by
2060            // this object (or the kernel while `run` is called).
2061            Some(unsafe { &mut (*self.run.get()).mode })
2062        }
2063    }
2064
2065    /// Returns a reference to the exit message from the last exit.
2066    pub fn exit_message(&self) -> &HvMessage {
2067        // SAFETY: the exit message will not be concurrently accessed by the
2068        // kernel while this VP is in VTL2.
2069        unsafe { &*self.intercept_message.get() }
2070    }
2071
2072    /// Returns whether this is a sidecar VP.
2073    pub fn is_sidecar(&self) -> bool {
2074        self.sidecar.is_some()
2075    }
2076
2077    fn get_vp_registers_inner<R: Copy + Into<HvRegisterName>>(
2078        &mut self,
2079        vtl: GuestVtl,
2080        names: &[R],
2081        values: &mut [HvRegisterValue],
2082    ) -> Result<(), Error> {
2083        assert_eq!(names.len(), values.len());
2084        let mut assoc = Vec::new();
2085        let mut offset = Vec::new();
2086        for (i, (&name, value)) in names.iter().zip(values.iter_mut()).enumerate() {
2087            if let Some(v) = T::try_get_reg(self, vtl, name.into())? {
2088                *value = v;
2089            } else {
2090                assoc.push(HvRegisterAssoc {
2091                    name: name.into(),
2092                    pad: Default::default(),
2093                    value: FromZeros::new_zeroed(),
2094                });
2095                offset.push(i);
2096            }
2097        }
2098
2099        self.get_reg(vtl.into(), &mut assoc)?;
2100        for (&i, assoc) in offset.iter().zip(&assoc) {
2101            values[i] = assoc.value;
2102        }
2103        Ok(())
2104    }
2105
2106    /// Get the following register on the current VP.
2107    ///
2108    /// This will fail for registers that are in the mmapped CPU context, i.e.
2109    /// registers that are shared between VTL0 and VTL2.
2110    pub fn get_vp_register(
2111        &mut self,
2112        vtl: GuestVtl,
2113        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2114        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2115    ) -> Result<HvRegisterValue, Error> {
2116        let mut value = [0u64.into(); 1];
2117        self.get_vp_registers_inner(vtl, &[name], &mut value)?;
2118        Ok(value[0])
2119    }
2120
2121    /// Get the following register on the current VP for VTL 2.
2122    ///
2123    /// This will fail for registers that are in the mmapped CPU context, i.e.
2124    /// registers that are shared between VTL0 and VTL2.
2125    pub fn get_vp_vtl2_register(
2126        &mut self,
2127        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2128        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2129    ) -> Result<HvRegisterValue, Error> {
2130        let mut assoc = [HvRegisterAssoc {
2131            name: name.into(),
2132            pad: Default::default(),
2133            value: FromZeros::new_zeroed(),
2134        }];
2135        self.get_reg(Vtl::Vtl2, &mut assoc)?;
2136        Ok(assoc[0].value)
2137    }
2138
2139    /// Get the following VP registers on the current VP.
2140    ///
2141    /// # Panics
2142    /// Panics if `names.len() != values.len()`.
2143    pub fn get_vp_registers(
2144        &mut self,
2145        vtl: GuestVtl,
2146        #[cfg(guest_arch = "x86_64")] names: &[HvX64RegisterName],
2147        #[cfg(guest_arch = "aarch64")] names: &[HvArm64RegisterName],
2148        values: &mut [HvRegisterValue],
2149    ) -> Result<(), Error> {
2150        self.get_vp_registers_inner(vtl, names, values)
2151    }
2152
2153    /// Set the following register on the current VP.
2154    ///
2155    /// This will fail for registers that are in the mmapped CPU context, i.e.
2156    /// registers that are shared between VTL0 and VTL2.
2157    pub fn set_vp_register(
2158        &mut self,
2159        vtl: GuestVtl,
2160        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2161        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2162        value: HvRegisterValue,
2163    ) -> Result<(), Error> {
2164        self.set_vp_registers(vtl, [(name, value)])
2165    }
2166
2167    /// Sets a set of VP registers.
2168    pub fn set_vp_registers<I>(&mut self, vtl: GuestVtl, values: I) -> Result<(), Error>
2169    where
2170        I: IntoIterator,
2171        I::Item: Into<HvRegisterAssoc> + Clone,
2172    {
2173        let mut assoc = Vec::new();
2174        for HvRegisterAssoc { name, value, .. } in values.into_iter().map(Into::into) {
2175            if !assoc.is_empty() && T::must_flush_regs_on(self, name) {
2176                self.set_reg(vtl, &assoc)?;
2177                assoc.clear();
2178            }
2179            if !T::try_set_reg(self, vtl, name, value)? {
2180                assoc.push(HvRegisterAssoc {
2181                    name,
2182                    pad: Default::default(),
2183                    value,
2184                });
2185            }
2186        }
2187        if !assoc.is_empty() {
2188            self.set_reg(vtl, &assoc)?;
2189        }
2190        Ok(())
2191    }
2192
2193    fn set_vp_registers_hvcall_inner(
2194        &mut self,
2195        vtl: Vtl,
2196        registers: &[HvRegisterAssoc],
2197    ) -> Result<(), HvError> {
2198        let header = hvdef::hypercall::GetSetVpRegisters {
2199            partition_id: HV_PARTITION_ID_SELF,
2200            vp_index: HV_VP_INDEX_SELF,
2201            target_vtl: vtl.into(),
2202            rsvd: [0; 3],
2203        };
2204
2205        tracing::trace!(?registers, "HvCallSetVpRegisters rep");
2206
2207        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2208        //         The hypercall output is validated right after the hypercall is issued.
2209        let status = unsafe {
2210            self.hcl
2211                .mshv_hvcall
2212                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2213                    HypercallCode::HvCallSetVpRegisters,
2214                    &header,
2215                    HvcallRepInput::Elements(registers),
2216                    None,
2217                )
2218                .expect("set_vp_registers hypercall should not fail")
2219        };
2220
2221        // Status must be success
2222        status.result()?;
2223        Ok(())
2224    }
2225
2226    /// Sets the following registers on the current VP and given VTL using a
2227    /// direct hypercall.
2228    ///
2229    /// This should not be used on the fast path. Therefore only a select set of
2230    /// registers are supported, and others will cause a panic.
2231    ///
2232    /// This function can be used with VTL2 as a target.
2233    pub fn set_vp_registers_hvcall<I>(&mut self, vtl: Vtl, values: I) -> Result<(), HvError>
2234    where
2235        I: IntoIterator,
2236        I::Item: Into<HvRegisterAssoc> + Clone,
2237    {
2238        let registers: Vec<HvRegisterAssoc> = values.into_iter().map(Into::into).collect();
2239
2240        assert!(registers.iter().all(
2241            |HvRegisterAssoc {
2242                 name,
2243                 pad: _,
2244                 value: _,
2245             }| matches!(
2246                (*name).into(),
2247                HvX64RegisterName::PendingEvent0
2248                    | HvX64RegisterName::PendingEvent1
2249                    | HvX64RegisterName::Sipp
2250                    | HvX64RegisterName::Sifp
2251                    | HvX64RegisterName::Ghcb
2252                    | HvX64RegisterName::VsmPartitionConfig
2253                    | HvX64RegisterName::VsmVpWaitForTlbLock
2254                    | HvX64RegisterName::VsmVpSecureConfigVtl0
2255                    | HvX64RegisterName::VsmVpSecureConfigVtl1
2256                    | HvX64RegisterName::CrInterceptControl
2257            )
2258        ));
2259        self.set_vp_registers_hvcall_inner(vtl, &registers)
2260    }
2261
2262    /// Sets the VTL that should be returned to when underhill exits
2263    pub fn set_exit_vtl(&mut self, vtl: GuestVtl) {
2264        // SAFETY: self.run is mapped, and the target_vtl field can only be
2265        // mutated or accessed by this object and only before the kernel is
2266        // invoked during `run`
2267        unsafe { (*self.run.get()).target_vtl = vtl.into() }
2268    }
2269}
2270
2271impl Hcl {
2272    /// Returns a new HCL instance.
2273    pub fn new(isolation: IsolationType, sidecar: Option<SidecarClient>) -> Result<Hcl, Error> {
2274        static SIGNAL_HANDLER_INIT: Once = Once::new();
2275        // SAFETY: The signal handler does not perform any actions that are forbidden
2276        // for signal handlers to perform, as it performs nothing.
2277        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
2278            signal_hook::low_level::register(libc::SIGRTMIN(), || {
2279                // Do nothing, the ioctl will now return with EINTR.
2280            })
2281            .unwrap();
2282        });
2283
2284        // Open both mshv fds
2285        let mshv_fd = Mshv::new()?;
2286
2287        // Validate the hypervisor's advertised isolation type matches the
2288        // requested isolation type. In CVM scenarios, this is not trusted, so
2289        // we still need the isolation type from the caller.
2290        //
2291        // FUTURE: the kernel driver should probably tell us this, especially
2292        // since the kernel ABI is different for different isolation types.
2293        let supported_isolation = if cfg!(guest_arch = "x86_64") {
2294            // xtask-fmt allow-target-arch cpu-intrinsic
2295            #[cfg(target_arch = "x86_64")]
2296            {
2297                let result = safe_intrinsics::cpuid(
2298                    hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION,
2299                    0,
2300                );
2301                match result.ebx & 0xF {
2302                    0 => IsolationType::None,
2303                    1 => IsolationType::Vbs,
2304                    2 => IsolationType::Snp,
2305                    3 => IsolationType::Tdx,
2306                    ty => panic!("unknown isolation type {ty:#x}"),
2307                }
2308            }
2309            // xtask-fmt allow-target-arch cpu-intrinsic
2310            #[cfg(not(target_arch = "x86_64"))]
2311            {
2312                unreachable!()
2313            }
2314        } else {
2315            IsolationType::None
2316        };
2317
2318        if isolation != supported_isolation {
2319            return Err(Error::MismatchedIsolation {
2320                supported: supported_isolation,
2321                requested: isolation,
2322            });
2323        }
2324
2325        let supports_vtl_ret_action = mshv_fd.check_extension(HCL_CAP_VTL_RETURN_ACTION)?;
2326        let supports_register_page = mshv_fd.check_extension(HCL_CAP_REGISTER_PAGE)?;
2327        let dr6_shared = mshv_fd.check_extension(HCL_CAP_DR6_SHARED)?;
2328        tracing::debug!(
2329            supports_vtl_ret_action,
2330            supports_register_page,
2331            "HCL capabilities",
2332        );
2333
2334        let vtl_fd = mshv_fd.create_vtl()?;
2335
2336        // Open the hypercall pseudo-device
2337        let mshv_hvcall = MshvHvcall::new()?;
2338
2339        // Override certain features for hardware isolated VMs.
2340        // TODO: vtl return actions are inhibited for hardware isolated VMs because they currently
2341        // are a pessimization since interrupt handling (and synic handling) are all done from
2342        // within VTL2. Future vtl return actions may be different, requiring granular handling.
2343        let supports_vtl_ret_action = supports_vtl_ret_action && !isolation.is_hardware_isolated();
2344        let supports_register_page = supports_register_page && !isolation.is_hardware_isolated();
2345        let snp_register_bitmap = [0u8; 64];
2346
2347        Ok(Hcl {
2348            mshv_hvcall,
2349            mshv_vtl: vtl_fd,
2350            vps: Vec::new(),
2351            supports_vtl_ret_action,
2352            supports_register_page,
2353            dr6_shared,
2354            isolation,
2355            snp_register_bitmap,
2356            sidecar,
2357        })
2358    }
2359
2360    /// Set allowed hypercalls.
2361    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
2362        self.mshv_hvcall.set_allowed_hypercalls(codes)
2363    }
2364
2365    /// Initializes SNP register tweak bitmap
2366    pub fn set_snp_register_bitmap(&mut self, register_bitmap: [u8; 64]) {
2367        self.snp_register_bitmap = register_bitmap;
2368    }
2369
2370    /// Adds `vp_count` VPs.
2371    pub fn add_vps(
2372        &mut self,
2373        vp_count: u32,
2374        private_pool: Option<&Arc<dyn DmaClient>>,
2375    ) -> Result<(), Error> {
2376        self.vps = (0..vp_count)
2377            .map(|vp| {
2378                HclVp::new(
2379                    self,
2380                    vp,
2381                    self.supports_register_page,
2382                    self.isolation,
2383                    private_pool,
2384                )
2385            })
2386            .collect::<Result<_, _>>()?;
2387
2388        Ok(())
2389    }
2390
2391    /// Registers with the hypervisor for an intercept.
2392    pub fn register_intercept(
2393        &self,
2394        intercept_type: HvInterceptType,
2395        access_type_mask: u32,
2396        intercept_parameters: HvInterceptParameters,
2397    ) -> Result<(), HvError> {
2398        let intercept_info = hvdef::hypercall::InstallIntercept {
2399            partition_id: HV_PARTITION_ID_SELF,
2400            access_type_mask,
2401            intercept_type,
2402            intercept_parameters,
2403        };
2404
2405        // SAFETY: calling hypercall with appropriate input and output.
2406        unsafe {
2407            self.mshv_hvcall
2408                .hvcall(
2409                    HypercallCode::HvCallInstallIntercept,
2410                    &intercept_info,
2411                    &mut (),
2412                )
2413                .unwrap()
2414                .result()
2415        }
2416    }
2417
2418    /// Returns the base CPU that manages the given sidecar VP.
2419    pub fn sidecar_base_cpu(&self, vp_index: u32) -> Option<u32> {
2420        Some(self.sidecar.as_ref()?.base_cpu(vp_index))
2421    }
2422
2423    /// Create a VP runner for the given partition.
2424    pub fn runner<'a, T: Backing<'a>>(
2425        &'a self,
2426        vp_index: u32,
2427        use_sidecar: bool,
2428    ) -> Result<ProcessorRunner<'a, T>, NoRunner> {
2429        let vp = &self.vps[vp_index as usize];
2430
2431        let sidecar = if use_sidecar {
2432            Some(
2433                self.sidecar
2434                    .as_ref()
2435                    .ok_or(NoRunner::MissingSidecar)?
2436                    .vp(vp_index),
2437            )
2438        } else {
2439            None
2440        };
2441
2442        let state = T::new(vp, sidecar.as_ref(), self)?;
2443
2444        // Set this thread as the runner.
2445        let VpState::NotRunning =
2446            std::mem::replace(&mut *vp.state.lock(), VpState::Running(Pthread::current()))
2447        else {
2448            panic!("another runner already exists")
2449        };
2450
2451        let actions = if sidecar.is_none() && self.supports_vtl_ret_action {
2452            Some(register_deferred_actions(self))
2453        } else {
2454            None
2455        };
2456
2457        // SAFETY: The run page is guaranteed to be mapped and valid.
2458        // While the exit message might not be filled in yet we're only computing its address.
2459        let intercept_message = unsafe {
2460            &*sidecar.as_ref().map_or(
2461                std::ptr::addr_of!((*vp.run.as_ptr()).exit_message).cast(),
2462                |s| s.intercept_message().cast(),
2463            )
2464        };
2465
2466        Ok(ProcessorRunner {
2467            hcl: self,
2468            vp,
2469            deferred_actions: actions,
2470            run: vp.run.as_ref(),
2471            intercept_message,
2472            state,
2473            sidecar,
2474        })
2475    }
2476
2477    /// Trigger the following interrupt request.
2478    pub fn request_interrupt(
2479        &self,
2480        interrupt_control: hvdef::HvInterruptControl,
2481        destination_address: u64,
2482        requested_vector: u32,
2483        target_vtl: GuestVtl,
2484    ) -> Result<(), Error> {
2485        tracing::trace!(
2486            ?interrupt_control,
2487            destination_address,
2488            requested_vector,
2489            "requesting interrupt"
2490        );
2491
2492        assert!(!self.isolation.is_hardware_isolated());
2493
2494        let request = AssertVirtualInterrupt {
2495            partition_id: HV_PARTITION_ID_SELF,
2496            interrupt_control,
2497            destination_address,
2498            requested_vector,
2499            target_vtl: target_vtl as u8,
2500            rsvd0: 0,
2501            rsvd1: 0,
2502        };
2503
2504        // SAFETY: calling the hypercall with correct input buffer.
2505        let output = unsafe {
2506            self.mshv_hvcall.hvcall(
2507                HypercallCode::HvCallAssertVirtualInterrupt,
2508                &request,
2509                &mut (),
2510            )
2511        }
2512        .unwrap();
2513
2514        output.result().map_err(Error::RequestInterrupt)
2515    }
2516
2517    /// Attempts to signal a given vp/sint/flag combo using HvSignalEventDirect.
2518    ///
2519    /// No result is returned because this request may be deferred until the
2520    /// hypervisor is returning to a lower VTL.
2521    pub fn signal_event_direct(&self, vp: u32, sint: u8, flag: u16) {
2522        tracing::trace!(vp, sint, flag, "signaling event");
2523        push_deferred_action(self, DeferredAction::SignalEvent { vp, sint, flag });
2524    }
2525
2526    fn hvcall_signal_event_direct(&self, vp: u32, sint: u8, flag: u16) -> Result<bool, Error> {
2527        let signal_event_input = hvdef::hypercall::SignalEventDirect {
2528            target_partition: HV_PARTITION_ID_SELF,
2529            target_vp: vp,
2530            target_vtl: Vtl::Vtl0 as u8,
2531            target_sint: sint,
2532            flag_number: flag,
2533        };
2534        let mut signal_event_output = hvdef::hypercall::SignalEventDirectOutput {
2535            newly_signaled: 0,
2536            rsvd: [0; 7],
2537        };
2538
2539        // SAFETY: calling the hypercall with correct input buffer.
2540        let output = unsafe {
2541            self.mshv_hvcall.hvcall(
2542                HypercallCode::HvCallSignalEventDirect,
2543                &signal_event_input,
2544                &mut signal_event_output,
2545            )
2546        }
2547        .unwrap();
2548
2549        output
2550            .result()
2551            .map(|_| signal_event_output.newly_signaled != 0)
2552            .map_err(Error::SignalEvent)
2553    }
2554
2555    /// Attempts to post a given message to a vp/sint combo using HvPostMessageDirect.
2556    pub fn post_message_direct(
2557        &self,
2558        vp: u32,
2559        sint: u8,
2560        message: &HvMessage,
2561    ) -> Result<(), HvError> {
2562        tracing::trace!(vp, sint, "posting message");
2563
2564        let post_message = hvdef::hypercall::PostMessageDirect {
2565            partition_id: HV_PARTITION_ID_SELF,
2566            vp_index: vp,
2567            vtl: Vtl::Vtl0 as u8,
2568            padding0: [0; 3],
2569            sint,
2570            padding1: [0; 3],
2571            message: zerocopy::Unalign::new(*message),
2572            padding2: 0,
2573        };
2574
2575        // SAFETY: calling the hypercall with correct input buffer.
2576        let output = unsafe {
2577            self.mshv_hvcall.hvcall(
2578                HypercallCode::HvCallPostMessageDirect,
2579                &post_message,
2580                &mut (),
2581            )
2582        }
2583        .unwrap();
2584
2585        output.result()
2586    }
2587
2588    /// Sets a file to poll during run. When the file's poll state changes, the
2589    /// run will be automatically cancelled.
2590    pub fn set_poll_file(&self, vp: u32, file: RawFd) -> Result<(), Error> {
2591        // SAFETY: calling the IOCTL as defined. This is safe even if the caller
2592        // does not own `file` since all this does is register the file for
2593        // polling.
2594        unsafe {
2595            hcl_set_poll_file(
2596                self.mshv_vtl.file.as_raw_fd(),
2597                &protocol::hcl_set_poll_file {
2598                    cpu: vp as i32,
2599                    fd: file,
2600                },
2601            )
2602            .map_err(Error::SetPollFile)?;
2603        }
2604        Ok(())
2605    }
2606
2607    /// Gets the current hypervisor reference time.
2608    pub fn reference_time(&self) -> Result<u64, Error> {
2609        Ok(self
2610            .get_vp_register(HvAllArchRegisterName::TimeRefCount, HvInputVtl::CURRENT_VTL)?
2611            .as_u64())
2612    }
2613
2614    /// Get a single VP register for the given VTL via hypercall. Only a select
2615    /// set of registers are supported; others will cause a panic.
2616    #[cfg(guest_arch = "x86_64")]
2617    fn get_vp_register(
2618        &self,
2619        name: impl Into<HvX64RegisterName>,
2620        vtl: HvInputVtl,
2621    ) -> Result<HvRegisterValue, Error> {
2622        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2623    }
2624
2625    /// Get a single VP register for the given VTL via hypercall. Only a select
2626    /// set of registers are supported; others will cause a panic.
2627    #[cfg(guest_arch = "aarch64")]
2628    fn get_vp_register(
2629        &self,
2630        name: impl Into<HvArm64RegisterName>,
2631        vtl: HvInputVtl,
2632    ) -> Result<HvRegisterValue, Error> {
2633        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2634    }
2635
2636    /// Set a single VP register via hypercall as VTL2. Only a select set of registers are
2637    /// supported, others will cause a panic.
2638    fn set_vp_register(
2639        &self,
2640        name: HvRegisterName,
2641        value: HvRegisterValue,
2642        vtl: HvInputVtl,
2643    ) -> Result<(), HvError> {
2644        match vtl.target_vtl().unwrap() {
2645            None | Some(Vtl::Vtl2) => {
2646                #[cfg(guest_arch = "x86_64")]
2647                assert!(matches!(
2648                    name.into(),
2649                    HvX64RegisterName::GuestVsmPartitionConfig
2650                        | HvX64RegisterName::VsmPartitionConfig
2651                        | HvX64RegisterName::PmTimerAssist
2652                ));
2653
2654                #[cfg(guest_arch = "aarch64")]
2655                assert!(matches!(
2656                    name.into(),
2657                    HvArm64RegisterName::GuestVsmPartitionConfig
2658                        | HvArm64RegisterName::VsmPartitionConfig
2659                ));
2660            }
2661            Some(Vtl::Vtl1) => {
2662                // TODO: allowed registers for VTL1
2663                todo!();
2664            }
2665            Some(Vtl::Vtl0) => {
2666                // TODO: allowed registers for VTL0
2667                todo!();
2668            }
2669        }
2670
2671        let header = hvdef::hypercall::GetSetVpRegisters {
2672            partition_id: HV_PARTITION_ID_SELF,
2673            vp_index: HV_VP_INDEX_SELF,
2674            target_vtl: HvInputVtl::CURRENT_VTL,
2675            rsvd: [0; 3],
2676        };
2677
2678        let input = HvRegisterAssoc {
2679            name,
2680            pad: Default::default(),
2681            value,
2682        };
2683
2684        tracing::trace!(?name, register = ?value, "HvCallSetVpRegisters");
2685
2686        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2687        //         The hypercall output is validated right after the hypercall is issued.
2688        let output = unsafe {
2689            self.mshv_hvcall
2690                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2691                    HypercallCode::HvCallSetVpRegisters,
2692                    &header,
2693                    HvcallRepInput::Elements(&[input]),
2694                    None,
2695                )
2696                .expect("set_vp_registers hypercall should not fail")
2697        };
2698
2699        output.result()?;
2700
2701        // hypercall must succeed with 1 rep completed
2702        assert_eq!(output.elements_processed(), 1);
2703        Ok(())
2704    }
2705
2706    /// Translate the following gva to a gpa page.
2707    ///
2708    /// The caller must ensure `control_flags.input_vtl()` is set to a specific
2709    /// VTL.
2710    #[cfg(guest_arch = "aarch64")]
2711    pub fn translate_gva_to_gpa(
2712        &self,
2713        gva: u64,
2714        control_flags: hvdef::hypercall::TranslateGvaControlFlagsArm64,
2715    ) -> Result<Result<TranslateResult, aarch64::TranslateErrorAarch64>, TranslateGvaToGpaError>
2716    {
2717        use hvdef::hypercall;
2718
2719        assert!(!self.isolation.is_hardware_isolated());
2720        assert!(
2721            control_flags.input_vtl().use_target_vtl(),
2722            "did not specify a target VTL"
2723        );
2724
2725        let header = hypercall::TranslateVirtualAddressArm64 {
2726            partition_id: HV_PARTITION_ID_SELF,
2727            vp_index: HV_VP_INDEX_SELF,
2728            reserved: 0,
2729            control_flags,
2730            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
2731        };
2732
2733        let mut output: hypercall::TranslateVirtualAddressExOutputArm64 = FromZeros::new_zeroed();
2734
2735        // SAFETY: The input header and slice are the correct types for this hypercall.
2736        //         The hypercall output is validated right after the hypercall is issued.
2737        let status = unsafe {
2738            self.mshv_hvcall
2739                .hvcall(
2740                    HypercallCode::HvCallTranslateVirtualAddressEx,
2741                    &header,
2742                    &mut output,
2743                )
2744                .expect("translate can never fail")
2745        };
2746
2747        status
2748            .result()
2749            .map_err(|hv_error| TranslateGvaToGpaError::Hypervisor { gva, hv_error })?;
2750
2751        // Note: WHP doesn't currently support TranslateVirtualAddressEx, so overlay_page, cache_type,
2752        // event_info aren't trustworthy values if the results came from WHP.
2753        match output.translation_result.result.result_code() {
2754            c if c == hypercall::TranslateGvaResultCode::SUCCESS.0 => Ok(Ok(TranslateResult {
2755                gpa_page: output.gpa_page,
2756                overlay_page: output.translation_result.result.overlay_page(),
2757            })),
2758            x => Ok(Err(aarch64::TranslateErrorAarch64 { code: x })),
2759        }
2760    }
2761
2762    fn to_hv_gpa_range_array(gpa_memory_ranges: &[MemoryRange]) -> Vec<HvGpaRange> {
2763        const PAGES_PER_ENTRY: u64 = 2048;
2764        const PAGE_SIZE: u64 = HV_PAGE_SIZE;
2765
2766        // Estimate the total number of pages across all memory ranges
2767        let estimated_size: usize = gpa_memory_ranges
2768            .iter()
2769            .map(|memory_range| {
2770                let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2771                total_pages.div_ceil(PAGES_PER_ENTRY)
2772            })
2773            .sum::<u64>() as usize;
2774
2775        // Create a vector with the estimated size
2776        let mut hv_gpa_ranges = Vec::with_capacity(estimated_size);
2777
2778        for memory_range in gpa_memory_ranges {
2779            // Calculate the total number of pages in the memory range
2780            let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2781
2782            // Convert start address to page number
2783            let start_page = memory_range.start_4k_gpn();
2784
2785            // Generate the ranges and append them to the vector
2786            hv_gpa_ranges.extend(
2787                (0..total_pages)
2788                    .step_by(PAGES_PER_ENTRY as usize)
2789                    .map(|start| {
2790                        let end = std::cmp::min(total_pages, start + PAGES_PER_ENTRY);
2791                        let pages_in_this_range = end - start;
2792                        let gpa_page_number = start_page + start;
2793
2794                        let extended = HvGpaRangeExtended::new()
2795                            .with_additional_pages(pages_in_this_range - 1)
2796                            .with_large_page(false) // Assuming not a large page
2797                            .with_gpa_page_number(gpa_page_number);
2798
2799                        HvGpaRange(extended.into_bits())
2800                    }),
2801            );
2802        }
2803
2804        hv_gpa_ranges // Return the vector at the end
2805    }
2806
2807    fn pin_unpin_gpa_ranges_internal(
2808        &self,
2809        gpa_ranges: &[HvGpaRange],
2810        action: GpaPinUnpinAction,
2811    ) -> Result<(), PinUnpinError> {
2812        const PIN_REQUEST_HEADER_SIZE: usize =
2813            size_of::<hvdef::hypercall::PinUnpinGpaPageRangesHeader>();
2814        const MAX_INPUT_ELEMENTS: usize =
2815            (HV_PAGE_SIZE as usize - PIN_REQUEST_HEADER_SIZE) / size_of::<u64>();
2816
2817        let header = hvdef::hypercall::PinUnpinGpaPageRangesHeader { reserved: 0 };
2818        let mut ranges_processed = 0;
2819
2820        for chunk in gpa_ranges.chunks(MAX_INPUT_ELEMENTS) {
2821            // SAFETY: This unsafe block is valid because:
2822            // 1. The code and header going to match the expected input for the hypercall.
2823            //
2824            // 2. Hypercall result is checked right after the hypercall is issued.
2825            //
2826            let output = unsafe {
2827                self.mshv_hvcall
2828                    .hvcall_rep(
2829                        match action {
2830                            GpaPinUnpinAction::PinGpaRange => HypercallCode::HvCallPinGpaPageRanges,
2831                            GpaPinUnpinAction::UnpinGpaRange => {
2832                                HypercallCode::HvCallUnpinGpaPageRanges
2833                            }
2834                        },
2835                        &header,
2836                        HvcallRepInput::Elements(chunk),
2837                        None::<&mut [u8]>,
2838                    )
2839                    .expect("submitting pin/unpin hypercall should not fail")
2840            };
2841
2842            ranges_processed += output.elements_processed();
2843
2844            output.result().map_err(|e| PinUnpinError {
2845                ranges_processed,
2846                error: e,
2847            })?;
2848        }
2849
2850        // At end all the ranges should be processed
2851        if ranges_processed == gpa_ranges.len() {
2852            Ok(())
2853        } else {
2854            Err(PinUnpinError {
2855                ranges_processed,
2856                error: HvError::OperationFailed,
2857            })
2858        }
2859    }
2860
2861    fn perform_pin_unpin_gpa_ranges(
2862        &self,
2863        gpa_ranges: &[MemoryRange],
2864        action: GpaPinUnpinAction,
2865        rollback_action: GpaPinUnpinAction,
2866    ) -> Result<(), HvError> {
2867        let hv_gpa_ranges: Vec<HvGpaRange> = Self::to_hv_gpa_range_array(gpa_ranges);
2868
2869        // Attempt to pin/unpin the ranges
2870        match self.pin_unpin_gpa_ranges_internal(&hv_gpa_ranges, action) {
2871            Ok(_) => Ok(()),
2872            Err(PinUnpinError {
2873                error,
2874                ranges_processed,
2875            }) => {
2876                // Unpin the ranges that were successfully pinned
2877                let pinned_ranges = &hv_gpa_ranges[..ranges_processed];
2878                if let Err(rollback_error) =
2879                    self.pin_unpin_gpa_ranges_internal(pinned_ranges, rollback_action)
2880                {
2881                    // Panic if rollback is failing
2882                    panic!(
2883                        "Failed to perform action {:?} on ranges. Error : {:?}. \
2884                        Attempted to rollback {:?} ranges out of {:?}.\n rollback error: {:?}",
2885                        action,
2886                        error,
2887                        ranges_processed,
2888                        gpa_ranges.len(),
2889                        rollback_error
2890                    );
2891                }
2892                // Surface the original error
2893                Err(error)
2894            }
2895        }
2896    }
2897
2898    /// Pins the specified guest physical address ranges in the hypervisor.
2899    /// The memory ranges passed to this function must be VA backed memory.
2900    /// If a partial failure occurs (i.e., some but not all the ranges were successfully pinned),
2901    /// the function will automatically attempt to unpin any successfully pinned ranges.
2902    /// This "rollback" behavior ensures that no partially pinned state remains, which
2903    /// could otherwise lead to inconsistencies.
2904    ///
2905    pub fn pin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2906        self.perform_pin_unpin_gpa_ranges(
2907            ranges,
2908            GpaPinUnpinAction::PinGpaRange,
2909            GpaPinUnpinAction::UnpinGpaRange,
2910        )
2911    }
2912
2913    /// Unpins the specified guest physical address ranges in the hypervisor.
2914    /// The memory ranges passed to this function must be VA backed memory.
2915    /// If a partial failure occurs (i.e., some but not all the ranges were successfully unpinned),
2916    /// the function will automatically attempt to pin any successfully unpinned ranges. This "rollback"
2917    /// behavior ensures that no partially unpinned state remains, which could otherwise lead to inconsistencies.
2918    ///
2919    pub fn unpin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2920        self.perform_pin_unpin_gpa_ranges(
2921            ranges,
2922            GpaPinUnpinAction::UnpinGpaRange,
2923            GpaPinUnpinAction::PinGpaRange,
2924        )
2925    }
2926
2927    /// Read the vsm capabilities register for VTL2.
2928    pub fn get_vsm_capabilities(&self) -> Result<hvdef::HvRegisterVsmCapabilities, Error> {
2929        let caps = hvdef::HvRegisterVsmCapabilities::from(
2930            self.get_vp_register(
2931                HvAllArchRegisterName::VsmCapabilities,
2932                HvInputVtl::CURRENT_VTL,
2933            )?
2934            .as_u64(),
2935        );
2936
2937        let caps = match self.isolation {
2938            IsolationType::None | IsolationType::Vbs => caps,
2939            // TODO SNP: Return actions may be useful, but with alternate injection many of these need
2940            // cannot actually be processed by the hypervisor without returning to VTL2.
2941            // Filter them out for now.
2942            IsolationType::Snp => hvdef::HvRegisterVsmCapabilities::new()
2943                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2944                .with_intercept_page_available(caps.intercept_page_available()),
2945            IsolationType::Tdx => hvdef::HvRegisterVsmCapabilities::new()
2946                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2947                .with_intercept_page_available(caps.intercept_page_available())
2948                .with_dr6_shared(true),
2949        };
2950
2951        assert_eq!(caps.dr6_shared(), self.dr6_shared());
2952
2953        Ok(caps)
2954    }
2955
2956    /// Set the [`hvdef::HvRegisterVsmPartitionConfig`] register.
2957    pub fn set_vtl2_vsm_partition_config(
2958        &self,
2959        vsm_config: HvRegisterVsmPartitionConfig,
2960    ) -> Result<(), SetVsmPartitionConfigError> {
2961        self.set_vp_register(
2962            HvAllArchRegisterName::VsmPartitionConfig.into(),
2963            HvRegisterValue::from(u64::from(vsm_config)),
2964            HvInputVtl::CURRENT_VTL,
2965        )
2966        .map_err(|e| SetVsmPartitionConfigError::Hypervisor {
2967            config: vsm_config,
2968            hv_error: e,
2969        })
2970    }
2971
2972    /// Get the [`hvdef::HvRegisterGuestVsmPartitionConfig`] register
2973    pub fn get_guest_vsm_partition_config(
2974        &self,
2975    ) -> Result<hvdef::HvRegisterGuestVsmPartitionConfig, Error> {
2976        Ok(hvdef::HvRegisterGuestVsmPartitionConfig::from(
2977            self.get_vp_register(
2978                HvAllArchRegisterName::GuestVsmPartitionConfig,
2979                HvInputVtl::CURRENT_VTL,
2980            )?
2981            .as_u64(),
2982        ))
2983    }
2984
2985    /// Get the [`hvdef::HvRegisterVsmPartitionStatus`] register
2986    pub fn get_vsm_partition_status(&self) -> Result<hvdef::HvRegisterVsmPartitionStatus, Error> {
2987        Ok(hvdef::HvRegisterVsmPartitionStatus::from(
2988            self.get_vp_register(
2989                HvAllArchRegisterName::VsmPartitionStatus,
2990                HvInputVtl::CURRENT_VTL,
2991            )?
2992            .as_u64(),
2993        ))
2994    }
2995
2996    #[cfg(guest_arch = "aarch64")]
2997    /// Get the [`hvdef::HvPartitionPrivilege`] register
2998    pub fn get_privileges_and_features_info(&self) -> Result<hvdef::HvPartitionPrivilege, Error> {
2999        Ok(hvdef::HvPartitionPrivilege::from(
3000            self.get_vp_register(
3001                HvArm64RegisterName::PrivilegesAndFeaturesInfo,
3002                HvInputVtl::CURRENT_VTL,
3003            )?
3004            .as_u64(),
3005        ))
3006    }
3007
3008    /// Get the [`hvdef::hypercall::HvGuestOsId`] register for the given VTL.
3009    pub fn get_guest_os_id(&self, vtl: Vtl) -> Result<hvdef::hypercall::HvGuestOsId, Error> {
3010        Ok(hvdef::hypercall::HvGuestOsId::from(
3011            self.get_vp_register(HvAllArchRegisterName::GuestOsId, vtl.into())?
3012                .as_u64(),
3013        ))
3014    }
3015
3016    /// Configure guest VSM.
3017    /// The only configuration attribute currently supported is changing the maximum number of
3018    /// guest-visible virtual trust levels for the partition. (VTL 1)
3019    pub fn set_guest_vsm_partition_config(
3020        &self,
3021        enable_guest_vsm: bool,
3022    ) -> Result<(), SetGuestVsmConfigError> {
3023        let register_value = hvdef::HvRegisterGuestVsmPartitionConfig::new()
3024            .with_maximum_vtl(if enable_guest_vsm { 1 } else { 0 })
3025            .with_reserved(0);
3026
3027        tracing::trace!(enable_guest_vsm, "set_guest_vsm_partition_config");
3028        if self.isolation.is_hardware_isolated() {
3029            unimplemented!("set_guest_vsm_partition_config");
3030        }
3031
3032        self.set_vp_register(
3033            HvAllArchRegisterName::GuestVsmPartitionConfig.into(),
3034            HvRegisterValue::from(u64::from(register_value)),
3035            HvInputVtl::CURRENT_VTL,
3036        )
3037        .map_err(|e| SetGuestVsmConfigError::Hypervisor {
3038            enable_guest_vsm,
3039            hv_error: e,
3040        })
3041    }
3042
3043    /// Sets the Power Management Timer assist in the hypervisor.
3044    #[cfg(guest_arch = "x86_64")]
3045    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
3046        tracing::debug!(?port, "set_pm_timer_assist");
3047        if self.isolation.is_hardware_isolated() {
3048            if port.is_some() {
3049                unimplemented!("set_pm_timer_assist");
3050            }
3051        }
3052
3053        let val = HvRegisterValue::from(u64::from(match port {
3054            Some(p) => hvdef::HvPmTimerInfo::new()
3055                .with_port(p)
3056                .with_enabled(true)
3057                .with_width_24(false),
3058            None => 0.into(),
3059        }));
3060
3061        self.set_vp_register(
3062            HvX64RegisterName::PmTimerAssist.into(),
3063            val,
3064            HvInputVtl::CURRENT_VTL,
3065        )
3066    }
3067
3068    /// Sets the Power Management Timer assist in the hypervisor.
3069    #[cfg(guest_arch = "aarch64")]
3070    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
3071        tracing::debug!(?port, "set_pm_timer_assist unimplemented on aarch64");
3072        Err(HvError::UnknownRegisterName)
3073    }
3074
3075    /// Sets the VTL protection mask for the specified memory range.
3076    pub fn modify_vtl_protection_mask(
3077        &self,
3078        range: MemoryRange,
3079        map_flags: HvMapGpaFlags,
3080        target_vtl: HvInputVtl,
3081    ) -> Result<(), ApplyVtlProtectionsError> {
3082        if self.isolation.is_hardware_isolated() {
3083            // TODO SNP TODO TDX - required for vmbus relay monitor page support
3084            todo!();
3085        }
3086
3087        self.mshv_hvcall
3088            .modify_vtl_protection_mask(range, map_flags, target_vtl)
3089    }
3090
3091    /// Checks whether the target vtl has vtl permissions for the given gpa
3092    pub fn check_vtl_access(
3093        &self,
3094        gpa: u64,
3095        target_vtl: GuestVtl,
3096        flags: HvMapGpaFlags,
3097    ) -> Result<Option<CheckVtlAccessResult>, Error> {
3098        assert!(!self.isolation.is_hardware_isolated());
3099
3100        let header = hvdef::hypercall::CheckSparseGpaPageVtlAccess {
3101            partition_id: HV_PARTITION_ID_SELF,
3102            target_vtl: HvInputVtl::from(target_vtl),
3103            desired_access: u32::from(flags) as u8,
3104            reserved0: 0,
3105            reserved1: 0,
3106        };
3107
3108        let mut output = [hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput::new()];
3109
3110        // SAFETY: The input header and rep slice are the correct types for this hypercall.
3111        //         The hypercall output is validated right after the hypercall is issued.
3112        let status = unsafe {
3113            self.mshv_hvcall.hvcall_rep::<hvdef::hypercall::CheckSparseGpaPageVtlAccess, u64, hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput>(
3114                HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
3115                &header,
3116                HvcallRepInput::Elements(&[gpa >> hvdef::HV_PAGE_SHIFT]),
3117                Some(&mut output),
3118            )
3119            .expect("check_vtl_access hypercall should not fail")
3120        };
3121
3122        status.result().map_err(Error::CheckVtlAccess)?;
3123
3124        let access_result = output[0];
3125
3126        if access_result.result_code() as u32
3127            != hvdef::hypercall::CheckGpaPageVtlAccessResultCode::SUCCESS.0
3128        {
3129            return Ok(Some(CheckVtlAccessResult {
3130                vtl: (access_result.intercepting_vtl() as u8)
3131                    .try_into()
3132                    .expect("checking vtl permissions failure should return valid vtl"),
3133                denied_flags: (access_result.denied_access() as u32).into(),
3134            }));
3135        }
3136
3137        assert_eq!(status.elements_processed(), 1);
3138        Ok(None)
3139    }
3140
3141    /// Enables a vtl for the partition
3142    pub fn enable_partition_vtl(
3143        &self,
3144        vtl: GuestVtl,
3145        flags: hvdef::hypercall::EnablePartitionVtlFlags,
3146    ) -> Result<(), HvError> {
3147        use hvdef::hypercall;
3148
3149        let header = hypercall::EnablePartitionVtl {
3150            partition_id: HV_PARTITION_ID_SELF,
3151            target_vtl: vtl.into(),
3152            flags,
3153            reserved_z0: 0,
3154            reserved_z1: 0,
3155        };
3156
3157        // SAFETY: The input header and slice are the correct types for this hypercall.
3158        //         The hypercall output is validated right after the hypercall is issued.
3159        let status = unsafe {
3160            self.mshv_hvcall
3161                .hvcall(HypercallCode::HvCallEnablePartitionVtl, &header, &mut ())
3162                .expect("submitting hypercall should not fail")
3163        };
3164
3165        status.result()
3166    }
3167
3168    /// Enables a vtl on a vp
3169    pub fn enable_vp_vtl(
3170        &self,
3171        vp_index: u32,
3172        vtl: GuestVtl,
3173        hv_vp_context: InitialVpContextX64,
3174    ) -> Result<(), HvError> {
3175        use hvdef::hypercall;
3176
3177        let header = hypercall::EnableVpVtlX64 {
3178            partition_id: HV_PARTITION_ID_SELF,
3179            vp_index,
3180            target_vtl: vtl.into(),
3181            reserved: [0; 3],
3182            vp_vtl_context: hv_vp_context,
3183        };
3184
3185        // SAFETY: The input header and slice are the correct types for this hypercall.
3186        //         The hypercall output is validated right after the hypercall is issued.
3187        let status = unsafe {
3188            self.mshv_hvcall
3189                .hvcall(HypercallCode::HvCallEnableVpVtl, &header, &mut ())
3190                .expect("submitting hypercall should not fail")
3191        };
3192
3193        status.result()
3194    }
3195
3196    /// Gets the PFN for the VTL 1 VMSA
3197    pub fn vtl1_vmsa_pfn(&self, cpu_index: u32) -> u64 {
3198        let mut vp_pfn = cpu_index as u64; // input vp, output pfn
3199
3200        // SAFETY: The ioctl requires no prerequisites other than the VTL 1 VMSA
3201        // should be mapped. This ioctl should never fail as long as the vtl 1
3202        // VMSA was mapped.
3203        unsafe {
3204            hcl_read_guest_vsm_page_pfn(self.mshv_vtl.file.as_raw_fd(), &mut vp_pfn)
3205                .expect("should always succeed");
3206        }
3207
3208        vp_pfn
3209    }
3210
3211    /// Returns the isolation type for the partition.
3212    pub fn isolation(&self) -> IsolationType {
3213        self.isolation
3214    }
3215
3216    /// Reads MSR_IA32_VMX_CR4_FIXED1 in kernel mode.
3217    pub fn read_vmx_cr4_fixed1(&self) -> u64 {
3218        let mut value = 0;
3219
3220        // SAFETY: The ioctl requires no prerequisites other than a location to
3221        // write the read MSR. This ioctl should never fail.
3222        unsafe {
3223            hcl_read_vmx_cr4_fixed1(self.mshv_vtl.file.as_raw_fd(), &mut value)
3224                .expect("should always succeed");
3225        }
3226
3227        value
3228    }
3229
3230    /// Invokes the HvCallRetargetDeviceInterrupt hypercall.
3231    /// `target_processors` must be sorted in ascending order.
3232    pub fn retarget_device_interrupt(
3233        &self,
3234        device_id: u64,
3235        entry: hvdef::hypercall::InterruptEntry,
3236        vector: u32,
3237        multicast: bool,
3238        target_processors: ProcessorSet<'_>,
3239    ) -> Result<(), HvError> {
3240        let header = hvdef::hypercall::RetargetDeviceInterrupt {
3241            partition_id: HV_PARTITION_ID_SELF,
3242            device_id,
3243            entry,
3244            rsvd: 0,
3245            target_header: hvdef::hypercall::InterruptTarget {
3246                vector,
3247                flags: hvdef::hypercall::HvInterruptTargetFlags::default()
3248                    .with_multicast(multicast)
3249                    .with_processor_set(true),
3250                // Always use a generic processor set to simplify construction. This hypercall is
3251                // invoked relatively infrequently, the overhead should be acceptable.
3252                mask_or_format: hvdef::hypercall::HV_GENERIC_SET_SPARSE_4K,
3253            },
3254        };
3255        let processor_set = Vec::from_iter(target_processors.as_generic_set());
3256
3257        // SAFETY: The input header and slice are the correct types for this hypercall.
3258        //         The hypercall output is validated right after the hypercall is issued.
3259        let status = unsafe {
3260            self.mshv_hvcall
3261                .hvcall_var(
3262                    HypercallCode::HvCallRetargetDeviceInterrupt,
3263                    &header,
3264                    processor_set.as_bytes(),
3265                    &mut (),
3266                )
3267                .expect("submitting hypercall should not fail")
3268        };
3269
3270        status.result()
3271    }
3272
3273    /// Gets the permissions for a vtl.
3274    /// Currently unused, but available for debugging purposes
3275    #[cfg(debug_assertions)]
3276    pub fn rmp_query(&self, gpa: u64, vtl: GuestVtl) -> x86defs::snp::SevRmpAdjust {
3277        use x86defs::snp::SevRmpAdjust;
3278
3279        let page_count = 1u64;
3280        let flags = [u64::from(SevRmpAdjust::new().with_target_vmpl(match vtl {
3281            GuestVtl::Vtl0 => 2,
3282            GuestVtl::Vtl1 => 1,
3283        }))];
3284        let page_size = [0u64];
3285        let pages_processed = 0;
3286
3287        debug_assert!(flags.len() == page_count as usize);
3288        debug_assert!(page_size.len() == page_count as usize);
3289
3290        let query = mshv_rmpquery {
3291            start_pfn: gpa / HV_PAGE_SIZE,
3292            page_count,
3293            terminate_on_failure: 0,
3294            ram: 0,
3295            padding: Default::default(),
3296            flags: flags.as_ptr().cast_mut(),
3297            page_size: page_size.as_ptr().cast_mut(),
3298            pages_processed: core::ptr::from_ref(&pages_processed).cast_mut(),
3299        };
3300
3301        // SAFETY: the input query is the correct type for this ioctl
3302        unsafe {
3303            hcl_rmpquery_pages(self.mshv_vtl.file.as_raw_fd(), &query)
3304                .expect("should always succeed");
3305        }
3306        debug_assert!(pages_processed <= page_count);
3307
3308        SevRmpAdjust::from(flags[0])
3309    }
3310
3311    /// Issues an INVLPGB instruction.
3312    pub fn invlpgb(&self, rax: u64, edx: u32, ecx: u32) {
3313        let data = mshv_invlpgb {
3314            rax,
3315            edx,
3316            ecx,
3317            _pad0: 0,
3318            _pad1: 0,
3319        };
3320        // SAFETY: ioctl has no prerequisites.
3321        unsafe {
3322            hcl_invlpgb(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3323        }
3324    }
3325
3326    /// Issues a TLBSYNC instruction.
3327    pub fn tlbsync(&self) {
3328        // SAFETY: ioctl has no prerequisites.
3329        unsafe {
3330            hcl_tlbsync(self.mshv_vtl.file.as_raw_fd()).expect("should always succeed");
3331        }
3332    }
3333
3334    /// Causes the specified CPUs to be woken out of a lower VTL.
3335    pub fn kick_cpus(
3336        &self,
3337        cpus: impl IntoIterator<Item = u32>,
3338        cancel_run: bool,
3339        wait_for_other_cpus: bool,
3340    ) {
3341        let mut cpu_bitmap: BitVec<u8> = BitVec::from_vec(vec![0; self.vps.len().div_ceil(8)]);
3342        for cpu in cpus {
3343            cpu_bitmap.set(cpu as usize, true);
3344        }
3345
3346        let data = protocol::hcl_kick_cpus {
3347            len: cpu_bitmap.len() as u64,
3348            cpu_mask: cpu_bitmap.as_bitptr().pointer(),
3349            flags: protocol::hcl_kick_cpus_flags::new()
3350                .with_cancel_run(cancel_run)
3351                .with_wait_for_other_cpus(wait_for_other_cpus),
3352        };
3353
3354        // SAFETY: ioctl has no prerequisites.
3355        unsafe {
3356            hcl_kickcpus(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3357        }
3358    }
3359}