hcl/
ioctl.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interface to `mshv_vtl` driver.
5
6mod deferred;
7
8pub mod aarch64;
9pub mod snp;
10pub mod tdx;
11pub mod x64;
12
13use self::deferred::DeferredActionSlots;
14use self::ioctls::*;
15use crate::GuestVtl;
16use crate::ioctl::deferred::DeferredAction;
17use crate::mapped_page::MappedPage;
18use crate::protocol;
19use crate::protocol::EnterModes;
20use crate::protocol::HCL_REG_PAGE_OFFSET;
21use crate::protocol::HCL_VMSA_GUEST_VSM_PAGE_OFFSET;
22use crate::protocol::HCL_VMSA_PAGE_OFFSET;
23use crate::protocol::MSHV_APIC_PAGE_OFFSET;
24use crate::protocol::hcl_intr_offload_flags;
25use crate::protocol::hcl_run;
26use bitvec::vec::BitVec;
27use cfg_if::cfg_if;
28use cvm_tracing::CVM_ALLOWED;
29use deferred::RegisteredDeferredActions;
30use deferred::push_deferred_action;
31use deferred::register_deferred_actions;
32use hv1_structs::ProcessorSet;
33use hv1_structs::VtlArray;
34use hvdef::HV_PAGE_SIZE;
35use hvdef::HV_PARTITION_ID_SELF;
36use hvdef::HV_VP_INDEX_SELF;
37use hvdef::HvAarch64RegisterPage;
38use hvdef::HvAllArchRegisterName;
39#[cfg(guest_arch = "aarch64")]
40use hvdef::HvArm64RegisterName;
41use hvdef::HvError;
42use hvdef::HvMapGpaFlags;
43use hvdef::HvMessage;
44use hvdef::HvRegisterName;
45use hvdef::HvRegisterValue;
46use hvdef::HvRegisterVsmPartitionConfig;
47use hvdef::HvStatus;
48use hvdef::HvX64RegisterName;
49use hvdef::HvX64RegisterPage;
50use hvdef::HypercallCode;
51use hvdef::Vtl;
52use hvdef::hypercall::AssertVirtualInterrupt;
53use hvdef::hypercall::HostVisibilityType;
54use hvdef::hypercall::HvGpaRange;
55use hvdef::hypercall::HvGpaRangeExtended;
56use hvdef::hypercall::HvInputVtl;
57use hvdef::hypercall::HvInterceptParameters;
58use hvdef::hypercall::HvInterceptType;
59use hvdef::hypercall::HvRegisterAssoc;
60use hvdef::hypercall::HypercallOutput;
61use hvdef::hypercall::InitialVpContextX64;
62use hvdef::hypercall::ModifyHostVisibility;
63use memory_range::MemoryRange;
64use pal::unix::pthread::*;
65use parking_lot::Mutex;
66use private::BackingPrivate;
67use sidecar_client::NewSidecarClientError;
68use sidecar_client::SidecarClient;
69use sidecar_client::SidecarRun;
70use sidecar_client::SidecarVp;
71use std::cell::UnsafeCell;
72use std::fmt::Debug;
73use std::fs::File;
74use std::io;
75use std::os::unix::prelude::*;
76use std::sync::Arc;
77use std::sync::Once;
78use std::sync::atomic::AtomicU8;
79use std::sync::atomic::AtomicU32;
80use std::sync::atomic::Ordering;
81use thiserror::Error;
82use user_driver::DmaClient;
83use user_driver::memory::MemoryBlock;
84use x86defs::snp::SevVmsa;
85use x86defs::tdx::TdCallResultCode;
86use x86defs::vmx::ApicPage;
87use zerocopy::FromBytes;
88use zerocopy::FromZeros;
89use zerocopy::Immutable;
90use zerocopy::IntoBytes;
91use zerocopy::KnownLayout;
92
93// TODO: Chunk this up into smaller per-interface errors.
94/// Error returned by HCL operations.
95#[derive(Error, Debug)]
96#[expect(missing_docs)]
97pub enum Error {
98    #[error("failed to open mshv device")]
99    OpenMshv(#[source] io::Error),
100    #[error("failed to open hvcall device")]
101    OpenHvcall(#[source] io::Error),
102    #[error("failed to open lower VTL memory device")]
103    OpenGpa(#[source] io::Error),
104    #[error("ReturnToLowerVtl")]
105    ReturnToLowerVtl(#[source] nix::Error),
106    #[error("AddVtl0Memory")]
107    AddVtl0Memory(#[source] nix::Error),
108    #[error("hcl_set_vp_register")]
109    SetVpRegister(#[source] nix::Error),
110    #[error("hcl_get_vp_register")]
111    GetVpRegister(#[source] nix::Error),
112    #[error("failed to get VP register {reg:#x?} from hypercall")]
113    GetVpRegisterHypercall {
114        #[cfg(guest_arch = "x86_64")]
115        reg: HvX64RegisterName,
116        #[cfg(guest_arch = "aarch64")]
117        reg: HvArm64RegisterName,
118        #[source]
119        err: HvError,
120    },
121    #[error("hcl_request_interrupt")]
122    RequestInterrupt(#[source] HvError),
123    #[error("hcl_cancel_vp failed")]
124    CancelVp(#[source] nix::Error),
125    #[error("failed to signal event")]
126    SignalEvent(#[source] HvError),
127    #[error("failed to post message")]
128    PostMessage(#[source] HvError),
129    #[error("failed to mmap the vp context {:?}", .1.map(|vtl| format!("for VTL {:?}", vtl)).unwrap_or("".to_string()))]
130    MmapVp(#[source] io::Error, Option<Vtl>),
131    #[error("failed to set the poll file")]
132    SetPollFile(#[source] nix::Error),
133    #[error("failed to check hcl capabilities")]
134    CheckExtensions(#[source] nix::Error),
135    #[error("failed to mmap the register page")]
136    MmapRegPage(#[source] io::Error),
137    #[error("invalid num signal events")]
138    NumSignalEvent(#[source] io::Error),
139    #[error("failed to create vtl")]
140    CreateVTL(#[source] nix::Error),
141    #[error("gpa failed vtl access check")]
142    CheckVtlAccess(#[source] HvError),
143    #[error("failed to set registers using set_vp_registers hypercall")]
144    SetRegisters(#[source] HvError),
145    #[error("Unknown register name: {0:x}")]
146    UnknownRegisterName(u32),
147    #[error("Invalid register value")]
148    InvalidRegisterValue,
149    #[error("failed to set host visibility")]
150    SetHostVisibility(#[source] nix::Error),
151    #[error("failed to allocate host overlay page")]
152    HostOverlayPageExhausted,
153    #[error("sidecar error")]
154    Sidecar(#[source] sidecar_client::SidecarError),
155    #[error("failed to open sidecar")]
156    OpenSidecar(#[source] NewSidecarClientError),
157    #[error(
158        "mismatch between requested isolation type {requested:?} and supported isolation type {supported:?}"
159    )]
160    MismatchedIsolation {
161        supported: IsolationType,
162        requested: IsolationType,
163    },
164    #[error("private page pool allocator missing, required for requested isolation type")]
165    MissingPrivateMemory,
166    #[error("failed to allocate pages for vp")]
167    AllocVp(#[source] anyhow::Error),
168    #[error("failed to map or unmap redirected device interrupt")]
169    MapRedirectedDeviceInterrupt(#[source] nix::Error),
170}
171
172/// Error for IOCTL errors specifically.
173#[derive(Debug, Error)]
174#[error("hcl request failed")]
175pub struct IoctlError(#[source] pub(crate) nix::Error);
176
177/// Error returned when issuing hypercalls.
178#[derive(Debug, Error)]
179#[expect(missing_docs)]
180pub enum HypercallError {
181    #[error("hypercall failed with {0:?}")]
182    Hypervisor(HvError),
183    #[error("ioctl failed")]
184    Ioctl(#[source] IoctlError),
185}
186
187impl HypercallError {
188    pub(crate) fn check(r: Result<i32, nix::Error>) -> Result<(), Self> {
189        match r {
190            Ok(n) => HvStatus(n.try_into().expect("hypervisor result out of range"))
191                .result()
192                .map_err(Self::Hypervisor),
193            Err(err) => Err(Self::Ioctl(IoctlError(err))),
194        }
195    }
196}
197
198/// Errors when issuing hypercalls via the kernel direct interface.
199#[derive(Error, Debug)]
200#[expect(missing_docs)]
201pub enum HvcallError {
202    #[error(
203        "kernel rejected the hypercall, most likely due to the hypercall code not being allowed via set_allowed_hypercalls"
204    )]
205    HypercallIoctlFailed(#[source] nix::Error),
206    #[error("input parameters are larger than a page")]
207    InputParametersTooLarge,
208    #[error("output parameters are larger than a page")]
209    OutputParametersTooLarge,
210    #[error("output and input list lengths do not match")]
211    InputOutputRepListMismatch,
212}
213
214/// Error applying VTL protections.
215// TODO: move to `underhill_mem`.
216#[derive(Error, Debug)]
217#[expect(missing_docs)]
218pub enum ApplyVtlProtectionsError {
219    #[error("hypervisor failed with {output:?} when protecting pages {range} for vtl {vtl:?}")]
220    Hypervisor {
221        range: MemoryRange,
222        output: HypercallOutput,
223        #[source]
224        hv_error: HvError,
225        vtl: HvInputVtl,
226    },
227    #[error("snp failure to protect pages {range} with {permissions:x?} for vtl {vtl:?}")]
228    Snp {
229        #[source]
230        failed_operation: snp::SnpPageError,
231        range: MemoryRange,
232        permissions: x86defs::snp::SevRmpAdjust,
233        vtl: HvInputVtl,
234    },
235    #[error(
236        "tdcall failed with {error:?} when protecting pages {range} with permissions {permissions:x?} for vtl {vtl:?}"
237    )]
238    Tdx {
239        error: TdCallResultCode,
240        range: MemoryRange,
241        permissions: x86defs::tdx::TdgMemPageGpaAttr,
242        vtl: HvInputVtl,
243    },
244    #[error("no valid protections for vtl {0:?}")]
245    InvalidVtl(Vtl),
246}
247
248/// Error setting guest VSM configuration.
249#[derive(Error, Debug)]
250#[expect(missing_docs)]
251pub enum SetGuestVsmConfigError {
252    #[error("hypervisor failed to configure guest vsm to {enable_guest_vsm}")]
253    Hypervisor {
254        enable_guest_vsm: bool,
255        #[source]
256        hv_error: HvError,
257    },
258}
259
260/// Error getting the VP idnex from an APIC ID.
261#[derive(Error, Debug)]
262#[expect(missing_docs)]
263pub enum GetVpIndexFromApicIdError {
264    #[error("hypervisor failed when querying vp index for {apic_id}")]
265    Hypervisor {
266        #[source]
267        hv_error: HvError,
268        apic_id: u32,
269    },
270}
271
272/// Error setting VSM partition configuration.
273#[derive(Error, Debug)]
274#[expect(missing_docs)]
275pub enum SetVsmPartitionConfigError {
276    #[error("hypervisor failed when configuring vsm partition config {config:?}")]
277    Hypervisor {
278        config: HvRegisterVsmPartitionConfig,
279        #[source]
280        hv_error: HvError,
281    },
282}
283
284/// Error translating a GVA to a GPA.
285#[derive(Error, Debug)]
286#[expect(missing_docs)]
287pub enum TranslateGvaToGpaError {
288    #[error("hypervisor failed when translating gva {gva:#x}")]
289    Hypervisor {
290        gva: u64,
291        #[source]
292        hv_error: HvError,
293    },
294    #[error("sidecar kernel failed when translating gva {gva:#x}")]
295    Sidecar {
296        gva: u64,
297        #[source]
298        error: sidecar_client::SidecarError,
299    },
300}
301
302/// Result from [`Hcl::check_vtl_access`] if vtl permissions were violated
303#[derive(Debug)]
304pub struct CheckVtlAccessResult {
305    /// The intercepting VTL.
306    pub vtl: Vtl,
307    /// The flags that were denied.
308    pub denied_flags: HvMapGpaFlags,
309}
310
311/// Error accepting pages.
312// TODO: move to `underhill_mem`.
313#[derive(Error, Debug)]
314#[expect(missing_docs)]
315pub enum AcceptPagesError {
316    #[error("hypervisor failed to accept pages {range} with {output:?}")]
317    Hypervisor {
318        range: MemoryRange,
319        output: HypercallOutput,
320        #[source]
321        hv_error: HvError,
322    },
323    #[error("snp failure to protect pages {range}")]
324    Snp {
325        #[source]
326        failed_operation: snp::SnpPageError,
327        range: MemoryRange,
328    },
329    #[error("tdcall failure when accepting pages {range}")]
330    Tdx {
331        #[source]
332        error: tdcall::AcceptPagesError,
333        range: MemoryRange,
334    },
335}
336
337// Action translation(to HVCALL) for pin/unpin GPA range.
338#[derive(Debug, Copy, Clone)]
339enum GpaPinUnpinAction {
340    PinGpaRange,
341    UnpinGpaRange,
342}
343
344/// Error pinning a GPA.
345#[derive(Error, Debug)]
346#[error("partial success: {ranges_processed} operations succeeded, but encountered an error")]
347struct PinUnpinError {
348    ranges_processed: usize,
349    #[source]
350    error: HvError,
351}
352
353/// Result of translate gva hypercall from [`Hcl`]
354pub struct TranslateResult {
355    /// The GPA that the GVA translated to.
356    pub gpa_page: u64,
357    /// Whether the page was an overlay page.
358    pub overlay_page: bool, // Note: hardcoded to false on WHP
359}
360
361/// Possible types for rep hypercalls
362enum HvcallRepInput<'a, T> {
363    /// The actual elements to rep over
364    Elements(&'a [T]),
365    /// The elements for the rep are implied and only a count is needed
366    Count(u16),
367}
368
369mod ioctls {
370    #![allow(non_camel_case_types)]
371
372    use crate::protocol;
373    use hvdef::hypercall::HvRegisterAssoc;
374    use nix::ioctl_none;
375    use nix::ioctl_read;
376    use nix::ioctl_readwrite;
377    use nix::ioctl_write_ptr;
378
379    // The unsafe interface to the `mshv` kernel module comprises
380    // the following IOCTLs.
381    const MSHV_IOCTL: u8 = 0xb8;
382    const MSHV_VTL_RETURN_TO_LOWER_VTL: u16 = 0x27;
383    const MSHV_SET_VP_REGISTERS: u16 = 0x6;
384    const MSHV_GET_VP_REGISTERS: u16 = 0x5;
385    const MSHV_HVCALL_SETUP: u16 = 0x1E;
386    const MSHV_HVCALL: u16 = 0x1F;
387    const MSHV_VTL_ADD_VTL0_MEMORY: u16 = 0x21;
388    const MSHV_VTL_SET_POLL_FILE: u16 = 0x25;
389    const MSHV_CREATE_VTL: u16 = 0x1D;
390    const MSHV_CHECK_EXTENSION: u16 = 0x00;
391    const MSHV_VTL_PVALIDATE: u16 = 0x28;
392    const MSHV_VTL_RMPADJUST: u16 = 0x29;
393    const MSHV_VTL_TDCALL: u16 = 0x32;
394    const MSHV_VTL_READ_VMX_CR4_FIXED1: u16 = 0x33;
395    const MSHV_VTL_GUEST_VSM_VMSA_PFN: u16 = 0x34;
396    const MSHV_VTL_RMPQUERY: u16 = 0x35;
397    const MSHV_INVLPGB: u16 = 0x36;
398    const MSHV_TLBSYNC: u16 = 0x37;
399    const MSHV_KICKCPUS: u16 = 0x38;
400    const MSHV_MAP_REDIRECTED_DEVICE_INTERRUPT: u16 = 0x39;
401
402    #[repr(C)]
403    #[derive(Copy, Clone)]
404    pub struct mshv_vp_registers {
405        pub count: ::std::os::raw::c_int,
406        pub regs: *mut HvRegisterAssoc,
407    }
408
409    #[repr(C, packed)]
410    #[derive(Copy, Clone)]
411    pub struct mshv_pvalidate {
412        /// Execute the pvalidate instruction on the set of memory pages specified
413        pub start_pfn: ::std::os::raw::c_ulonglong,
414        pub page_count: ::std::os::raw::c_ulonglong,
415        pub validate: ::std::os::raw::c_uchar,
416        pub terminate_on_failure: ::std::os::raw::c_uchar,
417        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
418        /// it's device memory.
419        pub ram: u8,
420        pub padding: [::std::os::raw::c_uchar; 1],
421    }
422
423    #[repr(C, packed)]
424    #[derive(Copy, Clone)]
425    pub struct mshv_rmpadjust {
426        /// Execute the rmpadjust instruction on the set of memory pages specified
427        pub start_pfn: ::std::os::raw::c_ulonglong,
428        pub page_count: ::std::os::raw::c_ulonglong,
429        pub value: ::std::os::raw::c_ulonglong,
430        pub terminate_on_failure: ::std::os::raw::c_uchar,
431        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
432        /// it's device memory.
433        pub ram: u8,
434        pub padding: [::std::os::raw::c_uchar; 6],
435    }
436
437    #[repr(C, packed)]
438    #[derive(Copy, Clone)]
439    pub struct mshv_rmpquery {
440        /// Execute the rmpquery instruction on the set of memory pages specified
441        pub start_pfn: ::std::os::raw::c_ulonglong,
442        pub page_count: ::std::os::raw::c_ulonglong,
443        pub terminate_on_failure: ::std::os::raw::c_uchar,
444        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
445        /// it's device memory.
446        pub ram: u8,
447        pub padding: [::std::os::raw::c_uchar; 6],
448        /// Output array for the flags, must have at least `page_count` entries.
449        pub flags: *mut ::std::os::raw::c_ulonglong,
450        /// Output array for the page sizes, must have at least `page_count` entries.
451        pub page_size: *mut ::std::os::raw::c_ulonglong,
452        /// Output for the amount of pages processed, a scalar.
453        pub pages_processed: *mut ::std::os::raw::c_ulonglong,
454    }
455
456    #[repr(C, packed)]
457    #[derive(Copy, Clone)]
458    pub struct mshv_tdcall {
459        pub rax: u64, // Call code and returned status
460        pub rcx: u64,
461        pub rdx: u64,
462        pub r8: u64,
463        pub r9: u64,
464        pub r10_out: u64, // only supported as output
465        pub r11_out: u64, // only supported as output
466    }
467
468    #[repr(C)]
469    #[derive(Copy, Clone)]
470    pub struct mshv_map_device_int {
471        pub vector: u32,
472        pub apic_id: u32,
473        pub create_mapping: u8,
474        pub padding: [u8; 7],
475    }
476
477    ioctl_none!(
478        /// Relinquish the processor to VTL0.
479        hcl_return_to_lower_vtl,
480        MSHV_IOCTL,
481        MSHV_VTL_RETURN_TO_LOWER_VTL
482    );
483
484    ioctl_write_ptr!(
485        /// Set a VTL0 register for the current processor of the current
486        /// partition.
487        /// It is not allowed to set registers for other processors or
488        /// other partitions for the security and coherency reasons.
489        hcl_set_vp_register,
490        MSHV_IOCTL,
491        MSHV_SET_VP_REGISTERS,
492        mshv_vp_registers
493    );
494
495    ioctl_readwrite!(
496        /// Get a VTL0 register for the current processor of the current
497        /// partition.
498        /// It is not allowed to get registers of other processors or
499        /// other partitions for the security and coherency reasons.
500        hcl_get_vp_register,
501        MSHV_IOCTL,
502        MSHV_GET_VP_REGISTERS,
503        mshv_vp_registers
504    );
505
506    ioctl_write_ptr!(
507        /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support
508        /// DMA from the guest.
509        hcl_add_vtl0_memory,
510        MSHV_IOCTL,
511        MSHV_VTL_ADD_VTL0_MEMORY,
512        protocol::hcl_pfn_range_t
513    );
514
515    ioctl_write_ptr!(
516        /// Sets the file to be polled while running a VP in VTL0. If the file
517        /// becomes readable, then the VP run will be cancelled.
518        hcl_set_poll_file,
519        MSHV_IOCTL,
520        MSHV_VTL_SET_POLL_FILE,
521        protocol::hcl_set_poll_file
522    );
523
524    ioctl_write_ptr!(
525        /// Sets up the hypercall allow map. Allowed once
526        /// per fd.
527        hcl_hvcall_setup,
528        MSHV_IOCTL,
529        MSHV_HVCALL_SETUP,
530        protocol::hcl_hvcall_setup
531    );
532
533    ioctl_readwrite!(
534        /// Performs a hypercall from the user mode.
535        hcl_hvcall,
536        MSHV_IOCTL,
537        MSHV_HVCALL,
538        protocol::hcl_hvcall
539    );
540
541    ioctl_write_ptr!(
542        /// Executes the pvalidate instruction on a page range.
543        hcl_pvalidate_pages,
544        MSHV_IOCTL,
545        MSHV_VTL_PVALIDATE,
546        mshv_pvalidate
547    );
548
549    ioctl_write_ptr!(
550        /// Executes the rmpadjust instruction on a page range.
551        hcl_rmpadjust_pages,
552        MSHV_IOCTL,
553        MSHV_VTL_RMPADJUST,
554        mshv_rmpadjust
555    );
556
557    ioctl_write_ptr!(
558        /// Executes the rmpquery instruction on a page range.
559        hcl_rmpquery_pages,
560        MSHV_IOCTL,
561        MSHV_VTL_RMPQUERY,
562        mshv_rmpquery
563    );
564
565    ioctl_readwrite!(
566        /// Executes a tdcall.
567        hcl_tdcall,
568        MSHV_IOCTL,
569        MSHV_VTL_TDCALL,
570        mshv_tdcall
571    );
572
573    ioctl_read!(
574        hcl_read_vmx_cr4_fixed1,
575        MSHV_IOCTL,
576        MSHV_VTL_READ_VMX_CR4_FIXED1,
577        u64
578    );
579
580    ioctl_readwrite!(
581        hcl_read_guest_vsm_page_pfn,
582        MSHV_IOCTL,
583        MSHV_VTL_GUEST_VSM_VMSA_PFN,
584        u64
585    );
586
587    pub const HCL_CAP_REGISTER_PAGE: u32 = 1;
588    pub const HCL_CAP_VTL_RETURN_ACTION: u32 = 2;
589    pub const HCL_CAP_DR6_SHARED: u32 = 3;
590
591    ioctl_write_ptr!(
592        /// Check for the presence of an extension capability.
593        hcl_check_extension,
594        MSHV_IOCTL,
595        MSHV_CHECK_EXTENSION,
596        u32
597    );
598
599    ioctl_read!(mshv_create_vtl, MSHV_IOCTL, MSHV_CREATE_VTL, u8);
600
601    #[repr(C)]
602    pub struct mshv_invlpgb {
603        pub rax: u64,
604        pub _pad0: u32,
605        pub edx: u32,
606        pub _pad1: u32,
607        pub ecx: u32,
608    }
609
610    ioctl_write_ptr!(
611        /// Issue an INVLPGB instruction.
612        hcl_invlpgb,
613        MSHV_IOCTL,
614        MSHV_INVLPGB,
615        mshv_invlpgb
616    );
617
618    ioctl_none!(
619        /// Issue a TLBSYNC instruction.
620        hcl_tlbsync,
621        MSHV_IOCTL,
622        MSHV_TLBSYNC
623    );
624
625    ioctl_write_ptr!(
626        /// Kick CPUs.
627        hcl_kickcpus,
628        MSHV_IOCTL,
629        MSHV_KICKCPUS,
630        protocol::hcl_kick_cpus
631    );
632
633    ioctl_readwrite!(
634        /// Map or unmap VTL0 device interrupt in VTL2.
635        hcl_map_redirected_device_interrupt,
636        MSHV_IOCTL,
637        MSHV_MAP_REDIRECTED_DEVICE_INTERRUPT,
638        mshv_map_device_int
639    );
640}
641
642/// The `/dev/mshv_vtl_low` device for accessing VTL0 memory.
643pub struct MshvVtlLow {
644    file: File,
645}
646
647impl MshvVtlLow {
648    /// Opens the device.
649    pub fn new() -> Result<Self, Error> {
650        let file = fs_err::OpenOptions::new()
651            .read(true)
652            .write(true)
653            .open("/dev/mshv_vtl_low")
654            .map_err(Error::OpenGpa)?;
655
656        Ok(Self { file: file.into() })
657    }
658
659    /// Gets the device file.
660    pub fn get(&self) -> &File {
661        &self.file
662    }
663
664    /// The flag to set in the file offset to map guest memory as shared instead
665    /// of private.
666    pub const SHARED_MEMORY_FLAG: u64 = 1 << 63;
667}
668
669/// An open `/dev/mshv` device file.
670pub struct Mshv {
671    file: File,
672}
673
674impl Mshv {
675    /// Opens the mshv device.
676    pub fn new() -> Result<Self, Error> {
677        let file = fs_err::OpenOptions::new()
678            .read(true)
679            .write(true)
680            .open("/dev/mshv")
681            .map_err(Error::OpenMshv)?;
682
683        Ok(Self { file: file.into() })
684    }
685
686    fn check_extension(&self, cap: u32) -> Result<bool, Error> {
687        // SAFETY: calling IOCTL as documented, with no special requirements.
688        let supported = unsafe {
689            hcl_check_extension(self.file.as_raw_fd(), &cap).map_err(Error::CheckExtensions)?
690        };
691        Ok(supported != 0)
692    }
693
694    /// Opens an mshv_vtl device file.
695    pub fn create_vtl(&self) -> Result<MshvVtl, Error> {
696        let cap = &mut 0_u8;
697        // SAFETY: calling IOCTL as documented, with no special requirements.
698        let supported =
699            unsafe { mshv_create_vtl(self.file.as_raw_fd(), cap).map_err(Error::CreateVTL)? };
700        // SAFETY: calling IOCTL as documented, with no special requirements.
701        let vtl_file = unsafe { File::from_raw_fd(supported) };
702        Ok(MshvVtl { file: vtl_file })
703    }
704}
705
706/// An open mshv_vtl device file.
707#[derive(Debug)]
708pub struct MshvVtl {
709    file: File,
710}
711
712impl MshvVtl {
713    /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support DMA from the guest.
714    pub fn add_vtl0_memory(&self, mem_range: MemoryRange, shared: bool) -> Result<(), Error> {
715        let flags = if shared {
716            MshvVtlLow::SHARED_MEMORY_FLAG / HV_PAGE_SIZE
717        } else {
718            0
719        };
720        let ram_disposition = protocol::hcl_pfn_range_t {
721            start_pfn: mem_range.start_4k_gpn() | flags,
722            last_pfn: mem_range.end_4k_gpn(),
723        };
724
725        // SAFETY: calling IOCTL as documented, with no special requirements.
726        unsafe {
727            hcl_add_vtl0_memory(self.file.as_raw_fd(), &ram_disposition)
728                .map_err(Error::AddVtl0Memory)?;
729        }
730
731        Ok(())
732    }
733}
734
735#[cfg(guest_arch = "x86_64")]
736fn is_vtl_shared_mtrr(reg: HvX64RegisterName) -> bool {
737    matches!(
738        reg,
739        HvX64RegisterName::MsrMtrrCap
740            | HvX64RegisterName::MsrMtrrDefType
741            | HvX64RegisterName::MsrMtrrPhysBase0
742            | HvX64RegisterName::MsrMtrrPhysBase1
743            | HvX64RegisterName::MsrMtrrPhysBase2
744            | HvX64RegisterName::MsrMtrrPhysBase3
745            | HvX64RegisterName::MsrMtrrPhysBase4
746            | HvX64RegisterName::MsrMtrrPhysBase5
747            | HvX64RegisterName::MsrMtrrPhysBase6
748            | HvX64RegisterName::MsrMtrrPhysBase7
749            | HvX64RegisterName::MsrMtrrPhysBase8
750            | HvX64RegisterName::MsrMtrrPhysBase9
751            | HvX64RegisterName::MsrMtrrPhysBaseA
752            | HvX64RegisterName::MsrMtrrPhysBaseB
753            | HvX64RegisterName::MsrMtrrPhysBaseC
754            | HvX64RegisterName::MsrMtrrPhysBaseD
755            | HvX64RegisterName::MsrMtrrPhysBaseE
756            | HvX64RegisterName::MsrMtrrPhysBaseF
757            | HvX64RegisterName::MsrMtrrPhysMask0
758            | HvX64RegisterName::MsrMtrrPhysMask1
759            | HvX64RegisterName::MsrMtrrPhysMask2
760            | HvX64RegisterName::MsrMtrrPhysMask3
761            | HvX64RegisterName::MsrMtrrPhysMask4
762            | HvX64RegisterName::MsrMtrrPhysMask5
763            | HvX64RegisterName::MsrMtrrPhysMask6
764            | HvX64RegisterName::MsrMtrrPhysMask7
765            | HvX64RegisterName::MsrMtrrPhysMask8
766            | HvX64RegisterName::MsrMtrrPhysMask9
767            | HvX64RegisterName::MsrMtrrPhysMaskA
768            | HvX64RegisterName::MsrMtrrPhysMaskB
769            | HvX64RegisterName::MsrMtrrPhysMaskC
770            | HvX64RegisterName::MsrMtrrPhysMaskD
771            | HvX64RegisterName::MsrMtrrPhysMaskE
772            | HvX64RegisterName::MsrMtrrPhysMaskF
773            | HvX64RegisterName::MsrMtrrFix64k00000
774            | HvX64RegisterName::MsrMtrrFix16k80000
775            | HvX64RegisterName::MsrMtrrFix16kA0000
776            | HvX64RegisterName::MsrMtrrFix4kC0000
777            | HvX64RegisterName::MsrMtrrFix4kC8000
778            | HvX64RegisterName::MsrMtrrFix4kD0000
779            | HvX64RegisterName::MsrMtrrFix4kD8000
780            | HvX64RegisterName::MsrMtrrFix4kE0000
781            | HvX64RegisterName::MsrMtrrFix4kE8000
782            | HvX64RegisterName::MsrMtrrFix4kF0000
783            | HvX64RegisterName::MsrMtrrFix4kF8000
784    )
785}
786
787/// Indicate whether reg is shared across VTLs.
788///
789/// This function is not complete: DR6 may or may not be shared, depending on
790/// the processor type; the caller needs to check HvRegisterVsmCapabilities.
791/// Some MSRs are not included here as they are not represented in
792/// HvX64RegisterName, including MSR_TSC_FREQUENCY, MSR_MCG_CAP,
793/// MSR_MCG_STATUS, MSR_RESET, MSR_GUEST_IDLE, and MSR_DEBUG_DEVICE_OPTIONS.
794#[cfg(guest_arch = "x86_64")]
795fn is_vtl_shared_reg(reg: HvX64RegisterName) -> bool {
796    is_vtl_shared_mtrr(reg)
797        || matches!(
798            reg,
799            HvX64RegisterName::VpIndex
800                | HvX64RegisterName::VpRuntime
801                | HvX64RegisterName::TimeRefCount
802                | HvX64RegisterName::Rax
803                | HvX64RegisterName::Rbx
804                | HvX64RegisterName::Rcx
805                | HvX64RegisterName::Rdx
806                | HvX64RegisterName::Rsi
807                | HvX64RegisterName::Rdi
808                | HvX64RegisterName::Rbp
809                | HvX64RegisterName::Cr2
810                | HvX64RegisterName::R8
811                | HvX64RegisterName::R9
812                | HvX64RegisterName::R10
813                | HvX64RegisterName::R11
814                | HvX64RegisterName::R12
815                | HvX64RegisterName::R13
816                | HvX64RegisterName::R14
817                | HvX64RegisterName::R15
818                | HvX64RegisterName::Dr0
819                | HvX64RegisterName::Dr1
820                | HvX64RegisterName::Dr2
821                | HvX64RegisterName::Dr3
822                | HvX64RegisterName::Xmm0
823                | HvX64RegisterName::Xmm1
824                | HvX64RegisterName::Xmm2
825                | HvX64RegisterName::Xmm3
826                | HvX64RegisterName::Xmm4
827                | HvX64RegisterName::Xmm5
828                | HvX64RegisterName::Xmm6
829                | HvX64RegisterName::Xmm7
830                | HvX64RegisterName::Xmm8
831                | HvX64RegisterName::Xmm9
832                | HvX64RegisterName::Xmm10
833                | HvX64RegisterName::Xmm11
834                | HvX64RegisterName::Xmm12
835                | HvX64RegisterName::Xmm13
836                | HvX64RegisterName::Xmm14
837                | HvX64RegisterName::Xmm15
838                | HvX64RegisterName::FpMmx0
839                | HvX64RegisterName::FpMmx1
840                | HvX64RegisterName::FpMmx2
841                | HvX64RegisterName::FpMmx3
842                | HvX64RegisterName::FpMmx4
843                | HvX64RegisterName::FpMmx5
844                | HvX64RegisterName::FpMmx6
845                | HvX64RegisterName::FpMmx7
846                | HvX64RegisterName::FpControlStatus
847                | HvX64RegisterName::XmmControlStatus
848                | HvX64RegisterName::Xfem
849        )
850}
851
852/// Indicate whether reg is shared across VTLs.
853#[cfg(guest_arch = "aarch64")]
854fn is_vtl_shared_reg(reg: HvArm64RegisterName) -> bool {
855    use hvdef::HvArm64RegisterName;
856
857    matches!(
858        reg,
859        HvArm64RegisterName::X0
860            | HvArm64RegisterName::X1
861            | HvArm64RegisterName::X2
862            | HvArm64RegisterName::X3
863            | HvArm64RegisterName::X4
864            | HvArm64RegisterName::X5
865            | HvArm64RegisterName::X6
866            | HvArm64RegisterName::X7
867            | HvArm64RegisterName::X8
868            | HvArm64RegisterName::X9
869            | HvArm64RegisterName::X10
870            | HvArm64RegisterName::X11
871            | HvArm64RegisterName::X12
872            | HvArm64RegisterName::X13
873            | HvArm64RegisterName::X14
874            | HvArm64RegisterName::X15
875            | HvArm64RegisterName::X16
876            | HvArm64RegisterName::X17
877            | HvArm64RegisterName::X19
878            | HvArm64RegisterName::X20
879            | HvArm64RegisterName::X21
880            | HvArm64RegisterName::X22
881            | HvArm64RegisterName::X23
882            | HvArm64RegisterName::X24
883            | HvArm64RegisterName::X25
884            | HvArm64RegisterName::X26
885            | HvArm64RegisterName::X27
886            | HvArm64RegisterName::X28
887            | HvArm64RegisterName::XFp
888            | HvArm64RegisterName::XLr
889    )
890}
891
892/// The `/dev/mshv_hvcall` device for issuing hypercalls directly to the
893/// hypervisor.
894#[derive(Debug)]
895pub struct MshvHvcall(File);
896
897impl MshvHvcall {
898    /// Opens the device.
899    pub fn new() -> Result<Self, Error> {
900        let file = fs_err::OpenOptions::new()
901            .read(true)
902            .write(true)
903            .open("/dev/mshv_hvcall")
904            .map_err(Error::OpenHvcall)?;
905
906        Ok(Self(file.into()))
907    }
908
909    /// Set allowed hypercalls.
910    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
911        type ItemType = u64;
912        let item_size_bytes = size_of::<ItemType>();
913        let item_size_bits = item_size_bytes * 8;
914
915        let mut allow_bitmap = Vec::<ItemType>::new();
916        for &code in codes {
917            let map_index = (code.0 as usize) / item_size_bits;
918            if map_index >= allow_bitmap.len() {
919                allow_bitmap.resize(map_index + 1, 0);
920            }
921            allow_bitmap[map_index] |= (1 as ItemType) << (code.0 % item_size_bits as u16);
922        }
923
924        let hvcall_setup = protocol::hcl_hvcall_setup {
925            allow_bitmap_size: (allow_bitmap.len() * item_size_bytes) as u64,
926            allow_bitmap_ptr: allow_bitmap.as_ptr(),
927        };
928
929        // SAFETY: following the IOCTL definition.
930        unsafe {
931            hcl_hvcall_setup(self.0.as_raw_fd(), &hvcall_setup)
932                .expect("Hypercall setup IOCTL must be supported");
933        }
934    }
935
936    /// Accepts VTL 0 pages with no host visibility.
937    ///
938    /// [`HypercallCode::HvCallAcceptGpaPages`] must be allowed.
939    pub fn accept_gpa_pages(
940        &self,
941        range: MemoryRange,
942        memory_type: hvdef::hypercall::AcceptMemoryType,
943    ) -> Result<(), AcceptPagesError> {
944        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
945            - size_of::<hvdef::hypercall::AcceptGpaPages>())
946            / size_of::<u64>();
947
948        let span = tracing::info_span!("accept_pages", CVM_ALLOWED, ?range);
949        let _enter = span.enter();
950
951        let mut current_page = range.start() / HV_PAGE_SIZE;
952        let end = range.end() / HV_PAGE_SIZE;
953
954        while current_page < end {
955            let header = hvdef::hypercall::AcceptGpaPages {
956                partition_id: HV_PARTITION_ID_SELF,
957                page_attributes: hvdef::hypercall::AcceptPagesAttributes::new()
958                    .with_memory_type(memory_type.0)
959                    .with_host_visibility(HostVisibilityType::PRIVATE)
960                    .with_vtl_set(0), // vtl protections cannot be applied for VTL 0 memory
961                vtl_permission_set: hvdef::hypercall::VtlPermissionSet {
962                    vtl_permission_from_1: [0; hvdef::hypercall::HV_VTL_PERMISSION_SET_SIZE],
963                },
964                gpa_page_base: current_page,
965            };
966
967            let remaining_pages = end - current_page;
968            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
969
970            // SAFETY: The input header and rep slice are the correct types for
971            //         this hypercall. A dummy type of u8 is provided to satisfy
972            //         the compiler for input and output rep type. The given
973            //         input and slices are valid references while this function
974            //         is called.
975            //
976            //         The hypercall output is validated right after the hypercall is issued.
977            let output = unsafe {
978                self.hvcall_rep::<hvdef::hypercall::AcceptGpaPages, u8, u8>(
979                    HypercallCode::HvCallAcceptGpaPages,
980                    &header,
981                    HvcallRepInput::Count(count as u16),
982                    None,
983                )
984                .expect("kernel hypercall submission should always succeed")
985            };
986
987            output
988                .result()
989                .map_err(|err| AcceptPagesError::Hypervisor {
990                    range: MemoryRange::from_4k_gpn_range(current_page..current_page + count),
991                    output,
992                    hv_error: err,
993                })?;
994
995            current_page += count;
996
997            assert_eq!(output.elements_processed() as u64, count);
998        }
999        Ok(())
1000    }
1001
1002    /// Modifies the host visibility of the given pages.
1003    ///
1004    /// [`HypercallCode::HvCallModifySparseGpaPageHostVisibility`] must be
1005    /// allowed.
1006    ///
1007    /// Returns on error, the hypervisor error and the number of pages
1008    /// processed.
1009    //
1010    // TODO SNP: this isn't really safe. Probably this should be an IOCTL in the
1011    // kernel so that it can validate the page ranges are VTL0 memory.
1012    pub fn modify_gpa_visibility(
1013        &self,
1014        host_visibility: HostVisibilityType,
1015        mut gpns: &[u64],
1016    ) -> Result<(), (HvError, usize)> {
1017        const GPNS_PER_CALL: usize = (HV_PAGE_SIZE as usize
1018            - size_of::<hvdef::hypercall::ModifySparsePageVisibility>())
1019            / size_of::<u64>();
1020
1021        while !gpns.is_empty() {
1022            let n = gpns.len().min(GPNS_PER_CALL);
1023            // SAFETY: The input header and rep slice are the correct types for this hypercall.
1024            //         The hypercall output is validated right after the hypercall is issued.
1025            let result = unsafe {
1026                self.hvcall_rep(
1027                    HypercallCode::HvCallModifySparseGpaPageHostVisibility,
1028                    &hvdef::hypercall::ModifySparsePageVisibility {
1029                        partition_id: HV_PARTITION_ID_SELF,
1030                        host_visibility: ModifyHostVisibility::new()
1031                            .with_host_visibility(host_visibility),
1032                        reserved: 0,
1033                    },
1034                    HvcallRepInput::Elements(&gpns[..n]),
1035                    None::<&mut [u8]>,
1036                )
1037                .unwrap()
1038            };
1039
1040            match result.result() {
1041                Ok(()) => {
1042                    assert_eq!({ result.elements_processed() }, n);
1043                }
1044                Err(HvError::Timeout) => {}
1045                Err(e) => return Err((e, result.elements_processed())),
1046            }
1047            gpns = &gpns[result.elements_processed()..];
1048        }
1049        Ok(())
1050    }
1051
1052    /// Given a constructed hcl_hvcall protocol object, issues an IOCTL to invoke a hypercall via
1053    /// the direct hypercall kernel interface. This function will retry hypercalls if the hypervisor
1054    /// times out the hypercall.
1055    ///
1056    /// Input and output data are referenced as pointers in the call object.
1057    ///
1058    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A
1059    /// caller must check the return value for the result of the hypercall.
1060    ///
1061    /// Before invoking hypercalls, a list of hypercalls that are allowed
1062    /// has to be set with `Hcl::set_allowed_hypercalls`:
1063    /// ```ignore
1064    /// set_allowed_hypercalls(&[
1065    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1066    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1067    /// ]);
1068    /// ```
1069    /// # Safety
1070    /// This function makes no guarantees that the given input header, input and output types are
1071    /// valid for the given hypercall. It is the caller's responsibility to use the correct types
1072    /// with the specified hypercall.
1073    ///
1074    /// The caller must ensure that the input and output data are valid for the lifetime of this
1075    /// call.
1076    ///
1077    /// A caller must check the returned [HypercallOutput] for success or failure from the
1078    /// hypervisor.
1079    ///
1080    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be
1081    /// validated by the caller if needed.
1082    unsafe fn invoke_hvcall_ioctl(
1083        &self,
1084        mut call_object: protocol::hcl_hvcall,
1085    ) -> Result<HypercallOutput, HvcallError> {
1086        loop {
1087            // SAFETY: following the IOCTL definition. The data referenced in the call
1088            // lives as long as `self` does thus the lifetime elision doesn't contradict
1089            // the compiler's invariants.
1090            //
1091            // The hypervisor is trusted to fill out the output page with a valid
1092            // representation of an instance the output type, except in the case of hardware
1093            // isolated VMs where the caller must validate output as needed.
1094            unsafe {
1095                hcl_hvcall(self.0.as_raw_fd(), &mut call_object)
1096                    .map_err(HvcallError::HypercallIoctlFailed)?;
1097            }
1098
1099            if call_object.status.call_status() == Err(HvError::Timeout).into() {
1100                // Any hypercall can timeout, even one that doesn't have reps. Continue processing
1101                // from wherever the hypervisor left off.  The rep start index isn't checked for
1102                // validity, since it is only being used as an input to the untrusted hypervisor.
1103                // This applies to both simple and rep hypercalls.
1104                call_object
1105                    .control
1106                    .set_rep_start(call_object.status.elements_processed());
1107            } else {
1108                if call_object.control.rep_count() == 0 {
1109                    // For non-rep hypercalls, the elements processed field should be 0.
1110                    assert_eq!(call_object.status.elements_processed(), 0);
1111                } else {
1112                    // Hardware isolated VMs cannot trust output from the hypervisor, but check for
1113                    // consistency between the number of elements processed and the expected count. A
1114                    // violation of this assertion indicates a buggy or malicious hypervisor.
1115                    assert!(
1116                        (call_object.status.result().is_ok()
1117                            && call_object.control.rep_count()
1118                                == call_object.status.elements_processed())
1119                            || (call_object.status.result().is_err()
1120                                && call_object.control.rep_count()
1121                                    > call_object.status.elements_processed())
1122                    );
1123                }
1124
1125                return Ok(call_object.status);
1126            }
1127        }
1128    }
1129
1130    /// Issues a non-rep hypercall to the hypervisor via the direct hypercall kernel interface.
1131    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1132    /// This call constructs the appropriate hypercall input control from the described parameters.
1133    ///
1134    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1135    /// return value for the result of the hypercall.
1136    ///
1137    /// `code` is the hypercall code.
1138    /// `input` is the input type required by the hypercall.
1139    /// `output` is the output type required by the hypercall.
1140    ///
1141    /// Before invoking hypercalls, a list of hypercalls that are allowed
1142    /// has to be set with `Hcl::set_allowed_hypercalls`:
1143    /// ```ignore
1144    /// set_allowed_hypercalls(&[
1145    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
1146    ///     hvdef::HypercallCode::HvCallInstallIntercept,
1147    /// ]);
1148    /// ```
1149    /// # Safety
1150    /// This function makes no guarantees that the given input header, input and output types are valid for the
1151    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1152    ///
1153    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1154    ///
1155    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1156    /// caller if needed.
1157    unsafe fn hvcall<I, O>(
1158        &self,
1159        code: HypercallCode,
1160        input: &I,
1161        output: &mut O,
1162    ) -> Result<HypercallOutput, HvcallError>
1163    where
1164        I: IntoBytes + Sized + Immutable + KnownLayout,
1165        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1166    {
1167        const fn assert_size<I, O>()
1168        where
1169            I: Sized,
1170            O: Sized,
1171        {
1172            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1173            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1174        }
1175        assert_size::<I, O>();
1176
1177        let control = hvdef::hypercall::Control::new().with_code(code.0);
1178
1179        let call_object = protocol::hcl_hvcall {
1180            control,
1181            input_data: input.as_bytes().as_ptr().cast(),
1182            input_size: size_of::<I>(),
1183            status: FromZeros::new_zeroed(),
1184            output_data: output.as_bytes().as_ptr().cast(),
1185            output_size: size_of::<O>(),
1186        };
1187
1188        // SAFETY: The data referenced in the call lives as long as `self` does.
1189        unsafe { self.invoke_hvcall_ioctl(call_object) }
1190    }
1191
1192    /// Issues a rep hypercall to the hypervisor via the direct hypercall kernel
1193    /// interface. Like the non-rep version, this is not intended to be used
1194    /// externally other than to construct safe wrappers. This call constructs
1195    /// the appropriate hypercall input control from the described parameters.
1196    ///
1197    /// `Ok(HypercallOutput)` is returned if the kernel was successful in
1198    /// issuing the hypercall. A caller must check the return value for the
1199    /// result of the hypercall.
1200    ///
1201    /// `code` is the hypercall code. `input_header` is the hypercall fixed
1202    /// length input header. Variable length headers are not supported.
1203    /// `input_rep` is the list of input elements. The length of the slice is
1204    /// used as the rep count.
1205    ///
1206    /// `output_rep` is the optional output rep list. A caller must check the
1207    /// returned [HypercallOutput] for the number of valid elements in this
1208    /// list.
1209    ///
1210    /// # Safety
1211    /// This function makes no guarantees that the given input header, input rep
1212    /// and output rep types are valid for the given hypercall. It is the
1213    /// caller's responsibility to use the correct types with the specified
1214    /// hypercall.
1215    ///
1216    /// A caller must check the returned [HypercallOutput] for success or
1217    /// failure from the hypervisor and processed rep count.
1218    ///
1219    /// Hardware isolated VMs cannot trust output from the hypervisor. This
1220    /// routine will ensure that the hypervisor either returns success with all
1221    /// elements processed, or returns failure with an incomplete number of
1222    /// elements processed. Actual validation of the output elements is the
1223    /// respsonsibility of the caller.
1224    unsafe fn hvcall_rep<InputHeader, InputRep, O>(
1225        &self,
1226        code: HypercallCode,
1227        input_header: &InputHeader,
1228        input_rep: HvcallRepInput<'_, InputRep>,
1229        output_rep: Option<&mut [O]>,
1230    ) -> Result<HypercallOutput, HvcallError>
1231    where
1232        InputHeader: IntoBytes + Sized + Immutable + KnownLayout,
1233        InputRep: IntoBytes + Sized + Immutable + KnownLayout,
1234        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1235    {
1236        // Construct input buffer.
1237        let (input, count) = match input_rep {
1238            HvcallRepInput::Elements(e) => {
1239                ([input_header.as_bytes(), e.as_bytes()].concat(), e.len())
1240            }
1241            HvcallRepInput::Count(c) => (input_header.as_bytes().to_vec(), c.into()),
1242        };
1243
1244        if input.len() > HV_PAGE_SIZE as usize {
1245            return Err(HvcallError::InputParametersTooLarge);
1246        }
1247
1248        if let Some(output_rep) = &output_rep {
1249            if output_rep.as_bytes().len() > HV_PAGE_SIZE as usize {
1250                return Err(HvcallError::OutputParametersTooLarge);
1251            }
1252
1253            if count != output_rep.len() {
1254                return Err(HvcallError::InputOutputRepListMismatch);
1255            }
1256        }
1257
1258        let (output_data, output_size) = match output_rep {
1259            Some(output_rep) => (
1260                output_rep.as_bytes().as_ptr().cast(),
1261                output_rep.as_bytes().len(),
1262            ),
1263            None => (std::ptr::null(), 0),
1264        };
1265
1266        let control = hvdef::hypercall::Control::new()
1267            .with_code(code.0)
1268            .with_rep_count(count);
1269
1270        let call_object = protocol::hcl_hvcall {
1271            control,
1272            input_data: input.as_ptr().cast(),
1273            input_size: input.len(),
1274            status: HypercallOutput::new(),
1275            output_data,
1276            output_size,
1277        };
1278
1279        // SAFETY: The data referenced in the call lives as long as `self` does.
1280        unsafe { self.invoke_hvcall_ioctl(call_object) }
1281    }
1282
1283    /// Issues a non-rep hypercall with variable input to the hypervisor via the direct hypercall kernel interface.
1284    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1285    /// This call constructs the appropriate hypercall input control from the described parameters.
1286    ///
1287    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1288    /// return value for the result of the hypercall.
1289    ///
1290    /// `code` is the hypercall code.
1291    /// `input` is the input type required by the hypercall.
1292    /// `output` is the output type required by the hypercall.
1293    /// `variable_input` is the contents of the variable input to the hypercall. The length must be a multiple of 8 bytes.
1294    ///
1295    /// # Safety
1296    /// This function makes no guarantees that the given input header, input and output types are valid for the
1297    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1298    ///
1299    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1300    ///
1301    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1302    /// caller if needed.
1303    unsafe fn hvcall_var<I, O>(
1304        &self,
1305        code: HypercallCode,
1306        input: &I,
1307        variable_input: &[u8],
1308        output: &mut O,
1309    ) -> Result<HypercallOutput, HvcallError>
1310    where
1311        I: IntoBytes + Sized + Immutable + KnownLayout,
1312        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1313    {
1314        const fn assert_size<I, O>()
1315        where
1316            I: Sized,
1317            O: Sized,
1318        {
1319            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1320            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1321        }
1322        assert_size::<I, O>();
1323        assert!(variable_input.len().is_multiple_of(8));
1324
1325        let input = [input.as_bytes(), variable_input].concat();
1326        if input.len() > HV_PAGE_SIZE as usize {
1327            return Err(HvcallError::InputParametersTooLarge);
1328        }
1329
1330        let control = hvdef::hypercall::Control::new()
1331            .with_code(code.0)
1332            .with_variable_header_size(variable_input.len() / 8);
1333
1334        let call_object = protocol::hcl_hvcall {
1335            control,
1336            input_data: input.as_bytes().as_ptr().cast(),
1337            input_size: input.len(),
1338            status: FromZeros::new_zeroed(),
1339            output_data: output.as_bytes().as_ptr().cast(),
1340            output_size: size_of::<O>(),
1341        };
1342
1343        // SAFETY: The data referenced in the call lives as long as `self` does.
1344        unsafe { self.invoke_hvcall_ioctl(call_object) }
1345    }
1346
1347    /// Sets the VTL protection mask for the specified memory range.
1348    ///
1349    /// [`HypercallCode::HvCallModifyVtlProtectionMask`] must be allowed.
1350    pub fn modify_vtl_protection_mask(
1351        &self,
1352        range: MemoryRange,
1353        map_flags: HvMapGpaFlags,
1354        target_vtl: HvInputVtl,
1355    ) -> Result<(), ApplyVtlProtectionsError> {
1356        let header = hvdef::hypercall::ModifyVtlProtectionMask {
1357            partition_id: HV_PARTITION_ID_SELF,
1358            map_flags,
1359            target_vtl,
1360            reserved: [0; 3],
1361        };
1362
1363        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
1364            - size_of::<hvdef::hypercall::ModifyVtlProtectionMask>())
1365            / size_of::<u64>();
1366
1367        let span = tracing::info_span!("modify_vtl_protection_mask", CVM_ALLOWED, ?range);
1368        let _enter = span.enter();
1369
1370        let start = range.start() / HV_PAGE_SIZE;
1371        let end = range.end() / HV_PAGE_SIZE;
1372
1373        // Reuse the same vector for every hypercall.
1374        let mut pages = Vec::new();
1375        for current_page in (start..end).step_by(MAX_INPUT_ELEMENTS) {
1376            let remaining_pages = end - current_page;
1377            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
1378            pages.clear();
1379            pages.extend(current_page..current_page + count);
1380
1381            // SAFETY: The input header and rep slice are the correct types for this hypercall. A dummy type of u8 is
1382            //         provided to satisfy the compiler for output rep type. The given input and slices are valid
1383            //         references while this function is called.
1384            //
1385            //         The hypercall output is validated right after the hypercall is issued.
1386            let output = unsafe {
1387                self.hvcall_rep::<hvdef::hypercall::ModifyVtlProtectionMask, u64, u8>(
1388                    HypercallCode::HvCallModifyVtlProtectionMask,
1389                    &header,
1390                    HvcallRepInput::Elements(pages.as_slice()),
1391                    None,
1392                )
1393                .expect("kernel hypercall submission should always succeed")
1394            };
1395
1396            output.result().map_err(|err| {
1397                let page_range =
1398                    *pages.first().expect("not empty")..*pages.last().expect("not empty") + 1;
1399                ApplyVtlProtectionsError::Hypervisor {
1400                    range: MemoryRange::from_4k_gpn_range(page_range),
1401                    output,
1402                    hv_error: err,
1403                    vtl: target_vtl,
1404                }
1405            })?;
1406
1407            assert_eq!(output.elements_processed() as u64, count);
1408        }
1409
1410        Ok(())
1411    }
1412
1413    /// Get a single VP register for the given VTL via hypercall.
1414    fn get_vp_register_for_vtl_inner(
1415        &self,
1416        target_vtl: HvInputVtl,
1417        name: HvRegisterName,
1418    ) -> Result<HvRegisterValue, Error> {
1419        let header = hvdef::hypercall::GetSetVpRegisters {
1420            partition_id: HV_PARTITION_ID_SELF,
1421            vp_index: HV_VP_INDEX_SELF,
1422            target_vtl,
1423            rsvd: [0; 3],
1424        };
1425        let mut output = [HvRegisterValue::new_zeroed()];
1426
1427        // SAFETY: The input header and rep slice are the correct types for this hypercall.
1428        //         The hypercall output is validated right after the hypercall is issued.
1429        let status = unsafe {
1430            self.hvcall_rep(
1431                HypercallCode::HvCallGetVpRegisters,
1432                &header,
1433                HvcallRepInput::Elements(&[name]),
1434                Some(&mut output),
1435            )
1436            .expect("get_vp_register hypercall should not fail")
1437        };
1438
1439        // Status must be success with 1 rep completed
1440        status
1441            .result()
1442            .map_err(|err| Error::GetVpRegisterHypercall {
1443                reg: name.into(),
1444                err,
1445            })?;
1446        assert_eq!(status.elements_processed(), 1);
1447
1448        Ok(output[0])
1449    }
1450
1451    /// Get a single VP register for the given VTL via hypercall. Only a select
1452    /// set of registers are supported; others will cause a panic.
1453    #[cfg(guest_arch = "x86_64")]
1454    fn get_vp_register_for_vtl(
1455        &self,
1456        vtl: HvInputVtl,
1457        name: HvX64RegisterName,
1458    ) -> Result<HvRegisterValue, Error> {
1459        match vtl.target_vtl().unwrap() {
1460            None | Some(Vtl::Vtl2) => {
1461                assert!(matches!(
1462                    name,
1463                    HvX64RegisterName::GuestVsmPartitionConfig
1464                        | HvX64RegisterName::VsmPartitionConfig
1465                        | HvX64RegisterName::VsmPartitionStatus
1466                        | HvX64RegisterName::VsmCapabilities
1467                        | HvX64RegisterName::TimeRefCount
1468                        | HvX64RegisterName::VsmVpSecureConfigVtl0
1469                        | HvX64RegisterName::VsmVpSecureConfigVtl1
1470                ));
1471            }
1472            Some(Vtl::Vtl1) | Some(Vtl::Vtl0) => {
1473                // Only VTL-private registers can go through this path.
1474                // VTL-shared registers have to go through the kernel (either
1475                // via the CPU context page or via the dedicated ioctl), as
1476                // they may require special handling there.
1477                //
1478                // Register access should go through the register page if
1479                // possible (as a performance optimization). In practice,
1480                // registers that are normally available on the register page
1481                // are handled here only when it is unavailable (e.g., running
1482                // in WHP).
1483                assert!(!is_vtl_shared_reg(name));
1484            }
1485        }
1486
1487        self.get_vp_register_for_vtl_inner(vtl, name.into())
1488    }
1489
1490    /// Get a single VP register for the given VTL via hypercall. Only a select
1491    /// set of registers are supported; others will cause a panic.
1492    #[cfg(guest_arch = "aarch64")]
1493    fn get_vp_register_for_vtl(
1494        &self,
1495        vtl: HvInputVtl,
1496        name: HvArm64RegisterName,
1497    ) -> Result<HvRegisterValue, Error> {
1498        match vtl.target_vtl().unwrap() {
1499            None | Some(Vtl::Vtl2) => {
1500                assert!(matches!(
1501                    name,
1502                    HvArm64RegisterName::GuestVsmPartitionConfig
1503                        | HvArm64RegisterName::VsmPartitionConfig
1504                        | HvArm64RegisterName::VsmPartitionStatus
1505                        | HvArm64RegisterName::VsmCapabilities
1506                        | HvArm64RegisterName::TimeRefCount
1507                        | HvArm64RegisterName::VsmVpSecureConfigVtl0
1508                        | HvArm64RegisterName::VsmVpSecureConfigVtl1
1509                        | HvArm64RegisterName::PrivilegesAndFeaturesInfo
1510                ));
1511            }
1512            Some(Vtl::Vtl1) | Some(Vtl::Vtl0) => {
1513                // Only VTL-private registers can go through this path.
1514                // VTL-shared registers have to go through the kernel (either
1515                // via the CPU context page or via the dedicated ioctl), as
1516                // they may require special handling there.
1517                assert!(!is_vtl_shared_reg(name));
1518            }
1519        }
1520
1521        self.get_vp_register_for_vtl_inner(vtl, name.into())
1522    }
1523
1524    /// Invokes the HvCallMemoryMappedIoRead hypercall
1525    pub fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> Result<(), HvError> {
1526        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1527
1528        let header = hvdef::hypercall::MemoryMappedIoRead {
1529            gpa,
1530            access_width: data.len() as u32,
1531            reserved_z0: 0,
1532        };
1533
1534        let mut output: hvdef::hypercall::MemoryMappedIoReadOutput = FromZeros::new_zeroed();
1535
1536        // SAFETY: The input header and slice are the correct types for this hypercall.
1537        //         The hypercall output is validated right after the hypercall is issued.
1538        let status = unsafe {
1539            self.hvcall(
1540                HypercallCode::HvCallMemoryMappedIoRead,
1541                &header,
1542                &mut output,
1543            )
1544            .expect("submitting hypercall should not fail")
1545        };
1546
1547        // Only copy the data if the hypercall was successful
1548        if status.result().is_ok() {
1549            data.copy_from_slice(&output.data[..data.len()]);
1550        };
1551
1552        status.result()
1553    }
1554
1555    /// Invokes the HvCallMemoryMappedIoWrite hypercall
1556    pub fn mmio_write(&self, gpa: u64, data: &[u8]) -> Result<(), HvError> {
1557        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1558
1559        let mut header = hvdef::hypercall::MemoryMappedIoWrite {
1560            gpa,
1561            access_width: data.len() as u32,
1562            reserved_z0: 0,
1563            data: [0; hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH],
1564        };
1565
1566        header.data[..data.len()].copy_from_slice(data);
1567
1568        // SAFETY: The input header and slice are the correct types for this hypercall.
1569        //         The hypercall output is validated right after the hypercall is issued.
1570        let status = unsafe {
1571            self.hvcall(HypercallCode::HvCallMemoryMappedIoWrite, &header, &mut ())
1572                .expect("submitting hypercall should not fail")
1573        };
1574
1575        status.result()
1576    }
1577
1578    /// Request a VBS VM report from the host VSM.
1579    ///
1580    /// # Arguments
1581    /// - `report_data`: The data to include in the report.
1582    ///
1583    /// Returns a result containing the report or an error.
1584    pub fn vbs_vm_call_report(
1585        &self,
1586        report_data: &[u8],
1587    ) -> Result<[u8; hvdef::hypercall::VBS_VM_MAX_REPORT_SIZE], HvError> {
1588        if report_data.len() > hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE {
1589            return Err(HvError::InvalidParameter);
1590        }
1591
1592        let mut header = hvdef::hypercall::VbsVmCallReport {
1593            report_data: [0; hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE],
1594        };
1595
1596        header.report_data[..report_data.len()].copy_from_slice(report_data);
1597
1598        let mut output: hvdef::hypercall::VbsVmCallReportOutput = FromZeros::new_zeroed();
1599
1600        // SAFETY: The input header and slice are the correct types for this hypercall.
1601        //         The hypercall output is validated right after the hypercall is issued.
1602        let status = unsafe {
1603            self.hvcall(HypercallCode::HvCallVbsVmCallReport, &header, &mut output)
1604                .expect("submitting hypercall should not fail")
1605        };
1606
1607        if status.result().is_ok() {
1608            Ok(output.report)
1609        } else {
1610            Err(status.result().unwrap_err())
1611        }
1612    }
1613}
1614
1615/// The HCL device and collection of fds.
1616#[derive(Debug)]
1617pub struct Hcl {
1618    mshv_hvcall: MshvHvcall,
1619    mshv_vtl: MshvVtl,
1620    vps: Vec<HclVp>,
1621    supports_vtl_ret_action: bool,
1622    supports_register_page: bool,
1623    dr6_shared: bool,
1624    isolation: IsolationType,
1625    snp_register_bitmap: [u8; 64],
1626    sidecar: Option<SidecarClient>,
1627}
1628
1629/// The isolation type for a partition.
1630// TODO: Add guest_arch cfgs.
1631#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1632pub enum IsolationType {
1633    /// No isolation.
1634    None,
1635    /// Hyper-V software isolation.
1636    Vbs,
1637    /// AMD SNP.
1638    Snp,
1639    /// Intel TDX.
1640    Tdx,
1641}
1642
1643impl IsolationType {
1644    /// Returns true if the isolation type is not `None`.
1645    pub fn is_isolated(&self) -> bool {
1646        !matches!(self, Self::None)
1647    }
1648
1649    /// Returns whether the isolation type is hardware-backed.
1650    pub fn is_hardware_isolated(&self) -> bool {
1651        matches!(self, Self::Snp | Self::Tdx)
1652    }
1653}
1654
1655impl Hcl {
1656    /// Returns true if DR6 is a shared register on this processor.
1657    pub fn dr6_shared(&self) -> bool {
1658        self.dr6_shared
1659    }
1660}
1661
1662#[derive(Debug)]
1663struct HclVp {
1664    state: Mutex<VpState>,
1665    run: MappedPage<hcl_run>,
1666    backing: BackingState,
1667}
1668
1669#[derive(Debug)]
1670enum BackingState {
1671    MshvAarch64 {
1672        reg_page: Option<MappedPage<HvAarch64RegisterPage>>,
1673    },
1674    MshvX64 {
1675        reg_page: Option<MappedPage<HvX64RegisterPage>>,
1676    },
1677    Snp {
1678        vmsa: VtlArray<MappedPage<SevVmsa>, 2>,
1679    },
1680    Tdx {
1681        vtl0_apic_page: MappedPage<ApicPage>,
1682        vtl1_apic_page: MemoryBlock,
1683    },
1684}
1685
1686#[derive(Debug)]
1687enum VpState {
1688    Running(Pthread),
1689    NotRunning,
1690}
1691
1692impl HclVp {
1693    fn new(
1694        hcl: &Hcl,
1695        vp: u32,
1696        map_reg_page: bool,
1697        isolation_type: IsolationType,
1698        private_dma_client: Option<&Arc<dyn DmaClient>>,
1699    ) -> Result<Self, Error> {
1700        let fd = &hcl.mshv_vtl.file;
1701        let run: MappedPage<hcl_run> =
1702            MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?;
1703        // Block proxied interrupts on all vectors by default. The mask will be
1704        // relaxed as the guest runs.
1705        //
1706        // This is only used on CVMs. Skip it otherwise, since run page accesses
1707        // will fault on VPs that are still in the sidecar kernel.
1708        if isolation_type.is_hardware_isolated() {
1709            // SAFETY: `proxy_irr_blocked` is not accessed by any other VPs/kernel at this point (`HclVp` creation)
1710            // so we know we have exclusive access.
1711            let proxy_irr_blocked = unsafe { &mut (*run.as_ptr()).proxy_irr_blocked };
1712            proxy_irr_blocked.fill(!0);
1713        }
1714
1715        let backing = match isolation_type {
1716            IsolationType::None | IsolationType::Vbs if cfg!(guest_arch = "aarch64") => {
1717                BackingState::MshvAarch64 {
1718                    reg_page: if map_reg_page {
1719                        Some(
1720                            MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1721                                .map_err(Error::MmapRegPage)?,
1722                        )
1723                    } else {
1724                        None
1725                    },
1726                }
1727            }
1728            IsolationType::None | IsolationType::Vbs => BackingState::MshvX64 {
1729                reg_page: if map_reg_page {
1730                    Some(
1731                        MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1732                            .map_err(Error::MmapRegPage)?,
1733                    )
1734                } else {
1735                    None
1736                },
1737            },
1738            IsolationType::Snp => {
1739                let vmsa_vtl0 = MappedPage::new(fd, HCL_VMSA_PAGE_OFFSET | vp as i64)
1740                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?;
1741                let vmsa_vtl1 = MappedPage::new(fd, HCL_VMSA_GUEST_VSM_PAGE_OFFSET | vp as i64)
1742                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl1)))?;
1743                BackingState::Snp {
1744                    vmsa: [vmsa_vtl0, vmsa_vtl1].into(),
1745                }
1746            }
1747            IsolationType::Tdx => BackingState::Tdx {
1748                vtl0_apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
1749                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?,
1750                vtl1_apic_page: private_dma_client
1751                    .ok_or(Error::MissingPrivateMemory)?
1752                    .allocate_dma_buffer(HV_PAGE_SIZE as usize)
1753                    .map_err(Error::AllocVp)?,
1754            },
1755        };
1756
1757        Ok(Self {
1758            state: Mutex::new(VpState::NotRunning),
1759            run,
1760            backing,
1761        })
1762    }
1763}
1764
1765/// Object used to run and to access state for a specific VP.
1766pub struct ProcessorRunner<'a, T: Backing<'a>> {
1767    hcl: &'a Hcl,
1768    vp: &'a HclVp,
1769    sidecar: Option<SidecarVp<'a>>,
1770    deferred_actions: Option<RegisteredDeferredActions<'a>>,
1771    run: &'a UnsafeCell<hcl_run>,
1772    intercept_message: &'a UnsafeCell<HvMessage>,
1773    state: T,
1774}
1775
1776/// An error returned by [`Hcl::runner`].
1777#[derive(Debug, Error)]
1778pub enum NoRunner {
1779    /// The partition is for a different isolation type.
1780    #[error("mismatched isolation type")]
1781    MismatchedIsolation,
1782    /// A sidecar VP was requested, but no sidecar was provided.
1783    #[error("missing sidecar")]
1784    MissingSidecar,
1785    /// The sidecar VP could not be contacted.
1786    #[error("sidecar communication error")]
1787    Sidecar(#[source] sidecar_client::SidecarError),
1788}
1789
1790/// An isolation-type-specific backing for a processor runner.
1791#[expect(private_bounds)]
1792pub trait Backing<'a>: BackingPrivate<'a> {}
1793
1794impl<'a, T: BackingPrivate<'a>> Backing<'a> for T {}
1795
1796mod private {
1797    use super::Error;
1798    use super::Hcl;
1799    use super::HclVp;
1800    use super::NoRunner;
1801    use super::ProcessorRunner;
1802    use crate::GuestVtl;
1803    use hvdef::HvRegisterName;
1804    use hvdef::HvRegisterValue;
1805    use sidecar_client::SidecarVp;
1806
1807    pub(super) trait BackingPrivate<'a>: Sized {
1808        fn new(vp: &'a HclVp, sidecar: Option<&SidecarVp<'a>>, hcl: &Hcl)
1809        -> Result<Self, NoRunner>;
1810
1811        fn try_set_reg(
1812            runner: &mut ProcessorRunner<'a, Self>,
1813            vtl: GuestVtl,
1814            name: HvRegisterName,
1815            value: HvRegisterValue,
1816        ) -> Result<bool, Error>;
1817
1818        fn must_flush_regs_on(runner: &ProcessorRunner<'a, Self>, name: HvRegisterName) -> bool;
1819
1820        fn try_get_reg(
1821            runner: &ProcessorRunner<'a, Self>,
1822            vtl: GuestVtl,
1823            name: HvRegisterName,
1824        ) -> Result<Option<HvRegisterValue>, Error>;
1825
1826        fn flush_register_page(runner: &mut ProcessorRunner<'a, Self>);
1827    }
1828}
1829
1830impl<'a, T: Backing<'a>> Drop for ProcessorRunner<'a, T> {
1831    fn drop(&mut self) {
1832        self.flush_deferred_state();
1833        drop(self.deferred_actions.take());
1834        let old_state = std::mem::replace(&mut *self.vp.state.lock(), VpState::NotRunning);
1835        assert!(matches!(old_state, VpState::Running(thread) if thread == Pthread::current()));
1836    }
1837}
1838
1839impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
1840    /// Flushes any deferred state. Must be called if preparing the partition
1841    /// for save/restore (servicing).
1842    pub fn flush_deferred_state(&mut self) {
1843        T::flush_register_page(self);
1844        if let Some(actions) = &mut self.deferred_actions {
1845            actions.flush();
1846        }
1847    }
1848
1849    // Registers that are shared between VTLs need to be handled by the kernel
1850    // as they may require special handling there. set_reg and get_reg will
1851    // handle these registers using a dedicated ioctl, instead of the general-
1852    // purpose Set/GetVpRegisters hypercalls.
1853    #[cfg(guest_arch = "x86_64")]
1854    fn is_kernel_managed(&self, name: HvX64RegisterName) -> bool {
1855        if name == HvX64RegisterName::Dr6 {
1856            self.hcl.dr6_shared()
1857        } else {
1858            is_vtl_shared_reg(name)
1859        }
1860    }
1861
1862    #[cfg(guest_arch = "aarch64")]
1863    fn is_kernel_managed(&self, name: HvArm64RegisterName) -> bool {
1864        is_vtl_shared_reg(name)
1865    }
1866
1867    fn set_reg(&mut self, vtl: GuestVtl, regs: &[HvRegisterAssoc]) -> Result<(), Error> {
1868        if regs.is_empty() {
1869            return Ok(());
1870        }
1871
1872        if let Some(sidecar) = &mut self.sidecar {
1873            sidecar
1874                .set_vp_registers(vtl.into(), regs)
1875                .map_err(Error::Sidecar)?;
1876        } else {
1877            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1878            // currently has a bug where it only supports one register at a
1879            // time. Once that's fixed, this code could set a group of
1880            // registers in one ioctl.
1881            for reg in regs {
1882                let hc_regs = &mut [HvRegisterAssoc {
1883                    name: reg.name,
1884                    pad: [0; 3],
1885                    value: reg.value,
1886                }];
1887
1888                if self.is_kernel_managed(reg.name.into()) {
1889                    let hv_vp_register_args = mshv_vp_registers {
1890                        count: 1,
1891                        regs: hc_regs.as_mut_ptr(),
1892                    };
1893                    // SAFETY: ioctl call with correct types.
1894                    unsafe {
1895                        hcl_set_vp_register(
1896                            self.hcl.mshv_vtl.file.as_raw_fd(),
1897                            &hv_vp_register_args,
1898                        )
1899                        .map_err(Error::SetVpRegister)?;
1900                    }
1901                } else {
1902                    let hc_regs = [HvRegisterAssoc {
1903                        name: reg.name,
1904                        pad: [0; 3],
1905                        value: reg.value,
1906                    }];
1907                    self.set_vp_registers_hvcall_inner(vtl.into(), &hc_regs)
1908                        .map_err(Error::SetRegisters)?;
1909                }
1910            }
1911        }
1912        Ok(())
1913    }
1914
1915    fn get_reg(&mut self, vtl: Vtl, regs: &mut [HvRegisterAssoc]) -> Result<(), Error> {
1916        if regs.is_empty() {
1917            return Ok(());
1918        }
1919
1920        if let Some(sidecar) = &mut self.sidecar {
1921            sidecar
1922                .get_vp_registers(vtl.into(), regs)
1923                .map_err(Error::Sidecar)?;
1924        } else {
1925            // TODO: group up to MSHV_VP_MAX_REGISTERS regs. The kernel
1926            // currently has a bug where it only supports one register at a
1927            // time. Once that's fixed, this code could set a group of
1928            // registers in one ioctl.
1929            for reg in regs {
1930                if self.is_kernel_managed(reg.name.into()) {
1931                    let mut mshv_vp_register_args = mshv_vp_registers {
1932                        count: 1,
1933                        regs: reg,
1934                    };
1935                    // SAFETY: we know that our file is a vCPU fd, we know the kernel will only read the
1936                    // correct amount of memory from our pointer, and we verify the return result.
1937                    unsafe {
1938                        hcl_get_vp_register(
1939                            self.hcl.mshv_vtl.file.as_raw_fd(),
1940                            &mut mshv_vp_register_args,
1941                        )
1942                        .map_err(Error::GetVpRegister)?;
1943                    }
1944                } else {
1945                    reg.value = self
1946                        .hcl
1947                        .mshv_hvcall
1948                        .get_vp_register_for_vtl(vtl.into(), reg.name.into())?;
1949                }
1950            }
1951        }
1952        Ok(())
1953    }
1954
1955    /// Clears the cancel flag so that the VP can be run again.
1956    pub fn clear_cancel(&mut self) {
1957        if !self.is_sidecar() {
1958            // SAFETY: self.run is mapped, and the cancel field is atomically
1959            // accessed by everyone.
1960            let cancel = unsafe { &*(&raw mut (*self.run.get()).cancel).cast::<AtomicU32>() };
1961            cancel.store(0, Ordering::SeqCst);
1962        }
1963    }
1964
1965    /// Set the halted state of the VP. If `true`, then `run()` will not
1966    /// actually run the VP but will just wait for a cancel request or signal.
1967    pub fn set_halted(&mut self, halted: bool) {
1968        // SAFETY: the `flags` field of the run page will not be concurrently
1969        // updated.
1970        let flags = unsafe { &mut (*self.run.get()).flags };
1971        if halted {
1972            *flags |= protocol::MSHV_VTL_RUN_FLAG_HALTED
1973        } else {
1974            *flags &= !protocol::MSHV_VTL_RUN_FLAG_HALTED
1975        }
1976    }
1977
1978    /// Gets the proxied interrupt request bitmap for VTL 0 from the hypervisor.
1979    pub fn proxy_irr_vtl0(&mut self) -> Option<[u32; 8]> {
1980        // SAFETY: the `scan_proxy_irr` and `proxy_irr` fields of the run page
1981        // are concurrently updated by the kernel on multiple processors. They
1982        // are accessed atomically everywhere.
1983        unsafe {
1984            let scan_proxy_irr = &*((&raw mut (*self.run.get()).scan_proxy_irr).cast::<AtomicU8>());
1985            let proxy_irr = &*((&raw mut (*self.run.get()).proxy_irr).cast::<[AtomicU32; 8]>());
1986            if scan_proxy_irr.load(Ordering::Acquire) == 0 {
1987                return None;
1988            }
1989
1990            scan_proxy_irr.store(0, Ordering::SeqCst);
1991            let mut r = [0; 8];
1992            for (irr, r) in proxy_irr.iter().zip(r.iter_mut()) {
1993                if irr.load(Ordering::Relaxed) != 0 {
1994                    *r = irr.swap(0, Ordering::Relaxed);
1995                }
1996            }
1997            Some(r)
1998        }
1999    }
2000
2001    /// Update the `proxy_irr_blocked` for VTL 0 in the run page
2002    pub fn update_proxy_irr_filter_vtl0(&mut self, irr_filter: &[u32; 8]) {
2003        // SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could
2004        // be concurrently accessed by kernel too, hence accessing as Atomic
2005        let proxy_irr_blocked = unsafe {
2006            &mut *((&raw mut (*self.run.get()).proxy_irr_blocked).cast::<[AtomicU32; 8]>())
2007        };
2008
2009        // `irr_filter` bitmap has bits set for all allowed vectors (i.e. SINT and device interrupts)
2010        // Replace current `proxy_irr_blocked` with the given `irr_filter` bitmap.
2011        // By default block all (i.e. set all), and only allow (unset) given vectors from `irr_filter`.
2012        for (filter, irr) in proxy_irr_blocked.iter_mut().zip(irr_filter.iter()) {
2013            filter.store(!irr, Ordering::Relaxed);
2014            tracing::debug!(irr, "update_proxy_irr_filter");
2015        }
2016    }
2017
2018    /// Gets the proxy_irr_exit bitmask for VTL 0. This mask ensures that
2019    /// the masked interrupts always exit to user-space, and cannot
2020    /// be injected in the kernel. Interrupts matching this condition
2021    /// will be left on the proxy_irr field.
2022    pub fn proxy_irr_exit_mut_vtl0(&mut self) -> &mut [u32; 8] {
2023        // SAFETY: The `proxy_irr_exit` field of the run page will not be concurrently updated.
2024        unsafe { &mut (*self.run.get()).proxy_irr_exit }
2025    }
2026
2027    /// Gets the current offload_flags from the run page.
2028    pub fn offload_flags_mut(&mut self) -> &mut hcl_intr_offload_flags {
2029        // SAFETY: The `offload_flags` field of the run page will not be concurrently updated.
2030        unsafe { &mut (*self.run.get()).offload_flags }
2031    }
2032
2033    /// Runs the VP via the sidecar kernel.
2034    pub fn run_sidecar(&mut self) -> Result<SidecarRun<'_, 'a>, Error> {
2035        self.sidecar.as_mut().unwrap().run().map_err(Error::Sidecar)
2036    }
2037
2038    /// Run the following VP until an exit, error, or interrupt (cancel or
2039    /// signal) occurs.
2040    ///
2041    /// Returns `Ok(true)` if there is an exit to process, `Ok(false)` if there
2042    /// was a signal or cancel request.
2043    pub fn run(&mut self) -> Result<bool, Error> {
2044        assert!(self.sidecar.is_none());
2045        // Apply any deferred actions to the run page.
2046        if let Some(actions) = &mut self.deferred_actions {
2047            debug_assert!(self.hcl.supports_vtl_ret_action);
2048            // SAFETY: there are no concurrent accesses to the deferred action
2049            // slots.
2050            let mut slots = unsafe { DeferredActionSlots::new(self.run) };
2051            actions.move_to_slots(&mut slots);
2052        };
2053
2054        // N.B. cpu_context and exit_context are mutated by this call.
2055        //
2056        // SAFETY: no safety requirements for this ioctl.
2057        let r = unsafe { hcl_return_to_lower_vtl(self.hcl.mshv_vtl.file.as_raw_fd()) };
2058
2059        let has_intercept = match r {
2060            Ok(_) => true,
2061            Err(nix::errno::Errno::EINTR) => false,
2062            Err(err) => return Err(Error::ReturnToLowerVtl(err)),
2063        };
2064        Ok(has_intercept)
2065    }
2066
2067    /// Gets a reference to enter mode value, used by the kernel to specify the
2068    /// mode used when entering a lower VTL.
2069    pub fn enter_mode(&mut self) -> Option<&mut EnterModes> {
2070        if self.sidecar.is_some() {
2071            None
2072        } else {
2073            // SAFETY: self.run is mapped, and the mode field can only be mutated or accessed by
2074            // this object (or the kernel while `run` is called).
2075            Some(unsafe { &mut (*self.run.get()).mode })
2076        }
2077    }
2078
2079    /// Returns a reference to the exit message from the last exit.
2080    pub fn exit_message(&self) -> &HvMessage {
2081        // SAFETY: the exit message will not be concurrently accessed by the
2082        // kernel while this VP is in VTL2.
2083        unsafe { &*self.intercept_message.get() }
2084    }
2085
2086    /// Returns whether this is a sidecar VP.
2087    pub fn is_sidecar(&self) -> bool {
2088        self.sidecar.is_some()
2089    }
2090
2091    fn get_vp_registers_inner<R: Copy + Into<HvRegisterName>>(
2092        &mut self,
2093        vtl: GuestVtl,
2094        names: &[R],
2095        values: &mut [HvRegisterValue],
2096    ) -> Result<(), Error> {
2097        assert_eq!(names.len(), values.len());
2098        let mut assoc = Vec::new();
2099        let mut offset = Vec::new();
2100        for (i, (&name, value)) in names.iter().zip(values.iter_mut()).enumerate() {
2101            if let Some(v) = T::try_get_reg(self, vtl, name.into())? {
2102                *value = v;
2103            } else {
2104                assoc.push(HvRegisterAssoc {
2105                    name: name.into(),
2106                    pad: Default::default(),
2107                    value: FromZeros::new_zeroed(),
2108                });
2109                offset.push(i);
2110            }
2111        }
2112
2113        self.get_reg(vtl.into(), &mut assoc)?;
2114        for (&i, assoc) in offset.iter().zip(&assoc) {
2115            values[i] = assoc.value;
2116        }
2117        Ok(())
2118    }
2119
2120    /// Get the following register on the current VP.
2121    ///
2122    /// This will fail for registers that are in the mmapped CPU context, i.e.
2123    /// registers that are shared between VTL0 and VTL2.
2124    pub fn get_vp_register(
2125        &mut self,
2126        vtl: GuestVtl,
2127        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2128        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2129    ) -> Result<HvRegisterValue, Error> {
2130        let mut value = [0u64.into(); 1];
2131        self.get_vp_registers_inner(vtl, &[name], &mut value)?;
2132        Ok(value[0])
2133    }
2134
2135    /// Get the following register on the current VP for VTL 2.
2136    ///
2137    /// This will fail for registers that are in the mmapped CPU context, i.e.
2138    /// registers that are shared between VTL0 and VTL2.
2139    pub fn get_vp_vtl2_register(
2140        &mut self,
2141        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2142        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2143    ) -> Result<HvRegisterValue, Error> {
2144        let mut assoc = [HvRegisterAssoc {
2145            name: name.into(),
2146            pad: Default::default(),
2147            value: FromZeros::new_zeroed(),
2148        }];
2149        self.get_reg(Vtl::Vtl2, &mut assoc)?;
2150        Ok(assoc[0].value)
2151    }
2152
2153    /// Get the following VP registers on the current VP.
2154    ///
2155    /// # Panics
2156    /// Panics if `names.len() != values.len()`.
2157    pub fn get_vp_registers(
2158        &mut self,
2159        vtl: GuestVtl,
2160        #[cfg(guest_arch = "x86_64")] names: &[HvX64RegisterName],
2161        #[cfg(guest_arch = "aarch64")] names: &[HvArm64RegisterName],
2162        values: &mut [HvRegisterValue],
2163    ) -> Result<(), Error> {
2164        self.get_vp_registers_inner(vtl, names, values)
2165    }
2166
2167    /// Set the following register on the current VP.
2168    ///
2169    /// This will fail for registers that are in the mmapped CPU context, i.e.
2170    /// registers that are shared between VTL0 and VTL2.
2171    pub fn set_vp_register(
2172        &mut self,
2173        vtl: GuestVtl,
2174        #[cfg(guest_arch = "x86_64")] name: HvX64RegisterName,
2175        #[cfg(guest_arch = "aarch64")] name: HvArm64RegisterName,
2176        value: HvRegisterValue,
2177    ) -> Result<(), Error> {
2178        self.set_vp_registers(vtl, [(name, value)])
2179    }
2180
2181    /// Sets a set of VP registers.
2182    pub fn set_vp_registers<I>(&mut self, vtl: GuestVtl, values: I) -> Result<(), Error>
2183    where
2184        I: IntoIterator,
2185        I::Item: Into<HvRegisterAssoc> + Clone,
2186    {
2187        let mut assoc = Vec::new();
2188        for HvRegisterAssoc { name, value, .. } in values.into_iter().map(Into::into) {
2189            if !assoc.is_empty() && T::must_flush_regs_on(self, name) {
2190                self.set_reg(vtl, &assoc)?;
2191                assoc.clear();
2192            }
2193            if !T::try_set_reg(self, vtl, name, value)? {
2194                assoc.push(HvRegisterAssoc {
2195                    name,
2196                    pad: Default::default(),
2197                    value,
2198                });
2199            }
2200        }
2201        if !assoc.is_empty() {
2202            self.set_reg(vtl, &assoc)?;
2203        }
2204        Ok(())
2205    }
2206
2207    fn set_vp_registers_hvcall_inner(
2208        &mut self,
2209        vtl: Vtl,
2210        registers: &[HvRegisterAssoc],
2211    ) -> Result<(), HvError> {
2212        let header = hvdef::hypercall::GetSetVpRegisters {
2213            partition_id: HV_PARTITION_ID_SELF,
2214            vp_index: HV_VP_INDEX_SELF,
2215            target_vtl: vtl.into(),
2216            rsvd: [0; 3],
2217        };
2218
2219        tracing::trace!(?registers, "HvCallSetVpRegisters rep");
2220
2221        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2222        //         The hypercall output is validated right after the hypercall is issued.
2223        let status = unsafe {
2224            self.hcl
2225                .mshv_hvcall
2226                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2227                    HypercallCode::HvCallSetVpRegisters,
2228                    &header,
2229                    HvcallRepInput::Elements(registers),
2230                    None,
2231                )
2232                .expect("set_vp_registers hypercall should not fail")
2233        };
2234
2235        // Status must be success
2236        status.result()?;
2237        Ok(())
2238    }
2239
2240    /// Sets the following registers on the current VP and given VTL using a
2241    /// direct hypercall.
2242    ///
2243    /// This should not be used on the fast path. Therefore only a select set of
2244    /// registers are supported, and others will cause a panic.
2245    ///
2246    /// This function can be used with VTL2 as a target.
2247    pub fn set_vp_registers_hvcall<I>(&mut self, vtl: Vtl, values: I) -> Result<(), HvError>
2248    where
2249        I: IntoIterator,
2250        I::Item: Into<HvRegisterAssoc> + Clone,
2251    {
2252        let registers: Vec<HvRegisterAssoc> = values.into_iter().map(Into::into).collect();
2253
2254        assert!(registers.iter().all(
2255            |HvRegisterAssoc {
2256                 name,
2257                 pad: _,
2258                 value: _,
2259             }| matches!(
2260                (*name).into(),
2261                HvX64RegisterName::PendingEvent0
2262                    | HvX64RegisterName::PendingEvent1
2263                    | HvX64RegisterName::Sipp
2264                    | HvX64RegisterName::Sifp
2265                    | HvX64RegisterName::Ghcb
2266                    | HvX64RegisterName::VsmPartitionConfig
2267                    | HvX64RegisterName::VsmVpWaitForTlbLock
2268                    | HvX64RegisterName::VsmVpSecureConfigVtl0
2269                    | HvX64RegisterName::VsmVpSecureConfigVtl1
2270                    | HvX64RegisterName::CrInterceptControl
2271            )
2272        ));
2273        self.set_vp_registers_hvcall_inner(vtl, &registers)
2274    }
2275
2276    /// Sets the VTL that should be returned to when underhill exits
2277    pub fn set_exit_vtl(&mut self, vtl: GuestVtl) {
2278        // SAFETY: self.run is mapped, and the target_vtl field can only be
2279        // mutated or accessed by this object and only before the kernel is
2280        // invoked during `run`
2281        unsafe { (*self.run.get()).target_vtl = vtl.into() }
2282    }
2283}
2284
2285impl Hcl {
2286    /// Returns a new HCL instance.
2287    pub fn new(isolation: IsolationType, sidecar: Option<SidecarClient>) -> Result<Hcl, Error> {
2288        static SIGNAL_HANDLER_INIT: Once = Once::new();
2289        // SAFETY: The signal handler does not perform any actions that are forbidden
2290        // for signal handlers to perform, as it performs nothing.
2291        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
2292            signal_hook::low_level::register(libc::SIGRTMIN(), || {
2293                // Do nothing, the ioctl will now return with EINTR.
2294            })
2295            .unwrap();
2296        });
2297
2298        // Open both mshv fds
2299        let mshv_fd = Mshv::new()?;
2300
2301        // Validate the hypervisor's advertised isolation type matches the
2302        // requested isolation type. In CVM scenarios, this is not trusted, so
2303        // we still need the isolation type from the caller.
2304        //
2305        // FUTURE: the kernel driver should probably tell us this, especially
2306        // since the kernel ABI is different for different isolation types.
2307        let supported_isolation = if cfg!(guest_arch = "x86_64") {
2308            // xtask-fmt allow-target-arch cpu-intrinsic
2309            #[cfg(target_arch = "x86_64")]
2310            {
2311                let result = safe_intrinsics::cpuid(
2312                    hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION,
2313                    0,
2314                );
2315                match result.ebx & 0xF {
2316                    0 => IsolationType::None,
2317                    1 => IsolationType::Vbs,
2318                    2 => IsolationType::Snp,
2319                    3 => IsolationType::Tdx,
2320                    ty => panic!("unknown isolation type {ty:#x}"),
2321                }
2322            }
2323            // xtask-fmt allow-target-arch cpu-intrinsic
2324            #[cfg(not(target_arch = "x86_64"))]
2325            {
2326                unreachable!()
2327            }
2328        } else {
2329            IsolationType::None
2330        };
2331
2332        if isolation != supported_isolation {
2333            return Err(Error::MismatchedIsolation {
2334                supported: supported_isolation,
2335                requested: isolation,
2336            });
2337        }
2338
2339        let supports_vtl_ret_action = mshv_fd.check_extension(HCL_CAP_VTL_RETURN_ACTION)?;
2340        let supports_register_page = mshv_fd.check_extension(HCL_CAP_REGISTER_PAGE)?;
2341        let dr6_shared = mshv_fd.check_extension(HCL_CAP_DR6_SHARED)?;
2342        tracing::debug!(
2343            supports_vtl_ret_action,
2344            supports_register_page,
2345            "HCL capabilities",
2346        );
2347
2348        let vtl_fd = mshv_fd.create_vtl()?;
2349
2350        // Open the hypercall pseudo-device
2351        let mshv_hvcall = MshvHvcall::new()?;
2352
2353        // Override certain features for hardware isolated VMs.
2354        // TODO: vtl return actions are inhibited for hardware isolated VMs because they currently
2355        // are a pessimization since interrupt handling (and synic handling) are all done from
2356        // within VTL2. Future vtl return actions may be different, requiring granular handling.
2357        let supports_vtl_ret_action = supports_vtl_ret_action && !isolation.is_hardware_isolated();
2358        let supports_register_page = supports_register_page && !isolation.is_hardware_isolated();
2359        let snp_register_bitmap = [0u8; 64];
2360
2361        Ok(Hcl {
2362            mshv_hvcall,
2363            mshv_vtl: vtl_fd,
2364            vps: Vec::new(),
2365            supports_vtl_ret_action,
2366            supports_register_page,
2367            dr6_shared,
2368            isolation,
2369            snp_register_bitmap,
2370            sidecar,
2371        })
2372    }
2373
2374    /// Set allowed hypercalls.
2375    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
2376        self.mshv_hvcall.set_allowed_hypercalls(codes)
2377    }
2378
2379    /// Initializes SNP register tweak bitmap
2380    pub fn set_snp_register_bitmap(&mut self, register_bitmap: [u8; 64]) {
2381        self.snp_register_bitmap = register_bitmap;
2382    }
2383
2384    /// Adds `vp_count` VPs.
2385    pub fn add_vps(
2386        &mut self,
2387        vp_count: u32,
2388        private_pool: Option<&Arc<dyn DmaClient>>,
2389    ) -> Result<(), Error> {
2390        self.vps = (0..vp_count)
2391            .map(|vp| {
2392                HclVp::new(
2393                    self,
2394                    vp,
2395                    self.supports_register_page,
2396                    self.isolation,
2397                    private_pool,
2398                )
2399            })
2400            .collect::<Result<_, _>>()?;
2401
2402        Ok(())
2403    }
2404
2405    /// Registers with the hypervisor for an intercept.
2406    pub fn register_intercept(
2407        &self,
2408        intercept_type: HvInterceptType,
2409        access_type_mask: u32,
2410        intercept_parameters: HvInterceptParameters,
2411    ) -> Result<(), HvError> {
2412        let intercept_info = hvdef::hypercall::InstallIntercept {
2413            partition_id: HV_PARTITION_ID_SELF,
2414            access_type_mask,
2415            intercept_type,
2416            intercept_parameters,
2417        };
2418
2419        // SAFETY: calling hypercall with appropriate input and output.
2420        unsafe {
2421            self.mshv_hvcall
2422                .hvcall(
2423                    HypercallCode::HvCallInstallIntercept,
2424                    &intercept_info,
2425                    &mut (),
2426                )
2427                .unwrap()
2428                .result()
2429        }
2430    }
2431
2432    /// Returns the base CPU that manages the given sidecar VP.
2433    pub fn sidecar_base_cpu(&self, vp_index: u32) -> Option<u32> {
2434        Some(self.sidecar.as_ref()?.base_cpu(vp_index))
2435    }
2436
2437    /// Create a VP runner for the given partition.
2438    pub fn runner<'a, T: Backing<'a>>(
2439        &'a self,
2440        vp_index: u32,
2441        use_sidecar: bool,
2442    ) -> Result<ProcessorRunner<'a, T>, NoRunner> {
2443        let vp = &self.vps[vp_index as usize];
2444
2445        let sidecar = if use_sidecar {
2446            Some(
2447                self.sidecar
2448                    .as_ref()
2449                    .ok_or(NoRunner::MissingSidecar)?
2450                    .vp(vp_index),
2451            )
2452        } else {
2453            None
2454        };
2455
2456        let state = T::new(vp, sidecar.as_ref(), self)?;
2457
2458        // Set this thread as the runner.
2459        let VpState::NotRunning =
2460            std::mem::replace(&mut *vp.state.lock(), VpState::Running(Pthread::current()))
2461        else {
2462            panic!("another runner already exists")
2463        };
2464
2465        let actions = if sidecar.is_none() && self.supports_vtl_ret_action {
2466            Some(register_deferred_actions(self))
2467        } else {
2468            None
2469        };
2470
2471        // SAFETY: The run page is guaranteed to be mapped and valid.
2472        // While the exit message might not be filled in yet we're only computing its address.
2473        let intercept_message = unsafe {
2474            &*sidecar.as_ref().map_or(
2475                std::ptr::addr_of!((*vp.run.as_ptr()).exit_message).cast(),
2476                |s| s.intercept_message().cast(),
2477            )
2478        };
2479
2480        Ok(ProcessorRunner {
2481            hcl: self,
2482            vp,
2483            deferred_actions: actions,
2484            run: vp.run.as_ref(),
2485            intercept_message,
2486            state,
2487            sidecar,
2488        })
2489    }
2490
2491    /// Trigger the following interrupt request.
2492    pub fn request_interrupt(
2493        &self,
2494        interrupt_control: hvdef::HvInterruptControl,
2495        destination_address: u64,
2496        requested_vector: u32,
2497        target_vtl: GuestVtl,
2498    ) -> Result<(), Error> {
2499        tracing::trace!(
2500            ?interrupt_control,
2501            destination_address,
2502            requested_vector,
2503            "requesting interrupt"
2504        );
2505
2506        assert!(!self.isolation.is_hardware_isolated());
2507
2508        let request = AssertVirtualInterrupt {
2509            partition_id: HV_PARTITION_ID_SELF,
2510            interrupt_control,
2511            destination_address,
2512            requested_vector,
2513            target_vtl: target_vtl as u8,
2514            rsvd0: 0,
2515            rsvd1: 0,
2516        };
2517
2518        // SAFETY: calling the hypercall with correct input buffer.
2519        let output = unsafe {
2520            self.mshv_hvcall.hvcall(
2521                HypercallCode::HvCallAssertVirtualInterrupt,
2522                &request,
2523                &mut (),
2524            )
2525        }
2526        .unwrap();
2527
2528        output.result().map_err(Error::RequestInterrupt)
2529    }
2530
2531    /// Attempts to signal a given vp/sint/flag combo using HvSignalEventDirect.
2532    ///
2533    /// No result is returned because this request may be deferred until the
2534    /// hypervisor is returning to a lower VTL.
2535    pub fn signal_event_direct(&self, vp: u32, sint: u8, flag: u16) {
2536        tracing::trace!(vp, sint, flag, "signaling event");
2537        push_deferred_action(self, DeferredAction::SignalEvent { vp, sint, flag });
2538    }
2539
2540    fn hvcall_signal_event_direct(&self, vp: u32, sint: u8, flag: u16) -> Result<bool, Error> {
2541        let signal_event_input = hvdef::hypercall::SignalEventDirect {
2542            target_partition: HV_PARTITION_ID_SELF,
2543            target_vp: vp,
2544            target_vtl: Vtl::Vtl0 as u8,
2545            target_sint: sint,
2546            flag_number: flag,
2547        };
2548        let mut signal_event_output = hvdef::hypercall::SignalEventDirectOutput {
2549            newly_signaled: 0,
2550            rsvd: [0; 7],
2551        };
2552
2553        // SAFETY: calling the hypercall with correct input buffer.
2554        let output = unsafe {
2555            self.mshv_hvcall.hvcall(
2556                HypercallCode::HvCallSignalEventDirect,
2557                &signal_event_input,
2558                &mut signal_event_output,
2559            )
2560        }
2561        .unwrap();
2562
2563        output
2564            .result()
2565            .map(|_| signal_event_output.newly_signaled != 0)
2566            .map_err(Error::SignalEvent)
2567    }
2568
2569    /// Attempts to post a given message to a vp/sint combo using HvPostMessageDirect.
2570    pub fn post_message_direct(
2571        &self,
2572        vp: u32,
2573        sint: u8,
2574        message: &HvMessage,
2575    ) -> Result<(), HvError> {
2576        tracing::trace!(vp, sint, "posting message");
2577
2578        let post_message = hvdef::hypercall::PostMessageDirect {
2579            partition_id: HV_PARTITION_ID_SELF,
2580            vp_index: vp,
2581            vtl: Vtl::Vtl0 as u8,
2582            padding0: [0; 3],
2583            sint,
2584            padding1: [0; 3],
2585            message: zerocopy::Unalign::new(*message),
2586            padding2: 0,
2587        };
2588
2589        // SAFETY: calling the hypercall with correct input buffer.
2590        let output = unsafe {
2591            self.mshv_hvcall.hvcall(
2592                HypercallCode::HvCallPostMessageDirect,
2593                &post_message,
2594                &mut (),
2595            )
2596        }
2597        .unwrap();
2598
2599        output.result()
2600    }
2601
2602    /// Sets a file to poll during run. When the file's poll state changes, the
2603    /// run will be automatically cancelled.
2604    pub fn set_poll_file(&self, vp: u32, file: RawFd) -> Result<(), Error> {
2605        // SAFETY: calling the IOCTL as defined. This is safe even if the caller
2606        // does not own `file` since all this does is register the file for
2607        // polling.
2608        unsafe {
2609            hcl_set_poll_file(
2610                self.mshv_vtl.file.as_raw_fd(),
2611                &protocol::hcl_set_poll_file {
2612                    cpu: vp as i32,
2613                    fd: file,
2614                },
2615            )
2616            .map_err(Error::SetPollFile)?;
2617        }
2618        Ok(())
2619    }
2620
2621    /// Gets the current hypervisor reference time.
2622    pub fn reference_time(&self) -> Result<u64, Error> {
2623        Ok(self
2624            .get_vp_register(HvAllArchRegisterName::TimeRefCount, HvInputVtl::CURRENT_VTL)?
2625            .as_u64())
2626    }
2627
2628    /// Get a single VP register for the given VTL via hypercall. Only a select
2629    /// set of registers are supported; others will cause a panic.
2630    #[cfg(guest_arch = "x86_64")]
2631    fn get_vp_register(
2632        &self,
2633        name: impl Into<HvX64RegisterName>,
2634        vtl: HvInputVtl,
2635    ) -> Result<HvRegisterValue, Error> {
2636        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2637    }
2638
2639    /// Get a single VP register for the given VTL via hypercall. Only a select
2640    /// set of registers are supported; others will cause a panic.
2641    #[cfg(guest_arch = "aarch64")]
2642    fn get_vp_register(
2643        &self,
2644        name: impl Into<HvArm64RegisterName>,
2645        vtl: HvInputVtl,
2646    ) -> Result<HvRegisterValue, Error> {
2647        self.mshv_hvcall.get_vp_register_for_vtl(vtl, name.into())
2648    }
2649
2650    /// Set a single VP register via hypercall as VTL2. Only a select set of registers are
2651    /// supported, others will cause a panic.
2652    fn set_vp_register(
2653        &self,
2654        name: HvRegisterName,
2655        value: HvRegisterValue,
2656        vtl: HvInputVtl,
2657    ) -> Result<(), HvError> {
2658        match vtl.target_vtl().unwrap() {
2659            None | Some(Vtl::Vtl2) => {
2660                #[cfg(guest_arch = "x86_64")]
2661                assert!(matches!(
2662                    name.into(),
2663                    HvX64RegisterName::GuestVsmPartitionConfig
2664                        | HvX64RegisterName::VsmPartitionConfig
2665                        | HvX64RegisterName::PmTimerAssist
2666                ));
2667
2668                #[cfg(guest_arch = "aarch64")]
2669                assert!(matches!(
2670                    name.into(),
2671                    HvArm64RegisterName::GuestVsmPartitionConfig
2672                        | HvArm64RegisterName::VsmPartitionConfig
2673                ));
2674            }
2675            Some(Vtl::Vtl1) => {
2676                // TODO: allowed registers for VTL1
2677                todo!();
2678            }
2679            Some(Vtl::Vtl0) => {
2680                // TODO: allowed registers for VTL0
2681                todo!();
2682            }
2683        }
2684
2685        let header = hvdef::hypercall::GetSetVpRegisters {
2686            partition_id: HV_PARTITION_ID_SELF,
2687            vp_index: HV_VP_INDEX_SELF,
2688            target_vtl: HvInputVtl::CURRENT_VTL,
2689            rsvd: [0; 3],
2690        };
2691
2692        let input = HvRegisterAssoc {
2693            name,
2694            pad: Default::default(),
2695            value,
2696        };
2697
2698        tracing::trace!(?name, register = ?value, "HvCallSetVpRegisters");
2699
2700        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2701        //         The hypercall output is validated right after the hypercall is issued.
2702        let output = unsafe {
2703            self.mshv_hvcall
2704                .hvcall_rep::<hvdef::hypercall::GetSetVpRegisters, HvRegisterAssoc, u8>(
2705                    HypercallCode::HvCallSetVpRegisters,
2706                    &header,
2707                    HvcallRepInput::Elements(&[input]),
2708                    None,
2709                )
2710                .expect("set_vp_registers hypercall should not fail")
2711        };
2712
2713        output.result()?;
2714
2715        // hypercall must succeed with 1 rep completed
2716        assert_eq!(output.elements_processed(), 1);
2717        Ok(())
2718    }
2719
2720    /// Translate the following gva to a gpa page.
2721    ///
2722    /// The caller must ensure `control_flags.input_vtl()` is set to a specific
2723    /// VTL.
2724    #[cfg(guest_arch = "aarch64")]
2725    pub fn translate_gva_to_gpa(
2726        &self,
2727        gva: u64,
2728        control_flags: hvdef::hypercall::TranslateGvaControlFlagsArm64,
2729    ) -> Result<Result<TranslateResult, aarch64::TranslateErrorAarch64>, TranslateGvaToGpaError>
2730    {
2731        use hvdef::hypercall;
2732
2733        assert!(!self.isolation.is_hardware_isolated());
2734        assert!(
2735            control_flags.input_vtl().use_target_vtl(),
2736            "did not specify a target VTL"
2737        );
2738
2739        let header = hypercall::TranslateVirtualAddressArm64 {
2740            partition_id: HV_PARTITION_ID_SELF,
2741            vp_index: HV_VP_INDEX_SELF,
2742            reserved: 0,
2743            control_flags,
2744            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
2745        };
2746
2747        let mut output: hypercall::TranslateVirtualAddressExOutputArm64 = FromZeros::new_zeroed();
2748
2749        // SAFETY: The input header and slice are the correct types for this hypercall.
2750        //         The hypercall output is validated right after the hypercall is issued.
2751        let status = unsafe {
2752            self.mshv_hvcall
2753                .hvcall(
2754                    HypercallCode::HvCallTranslateVirtualAddressEx,
2755                    &header,
2756                    &mut output,
2757                )
2758                .expect("translate can never fail")
2759        };
2760
2761        status
2762            .result()
2763            .map_err(|hv_error| TranslateGvaToGpaError::Hypervisor { gva, hv_error })?;
2764
2765        // Note: WHP doesn't currently support TranslateVirtualAddressEx, so overlay_page, cache_type,
2766        // event_info aren't trustworthy values if the results came from WHP.
2767        match output.translation_result.result.result_code() {
2768            c if c == hypercall::TranslateGvaResultCode::SUCCESS.0 => Ok(Ok(TranslateResult {
2769                gpa_page: output.gpa_page,
2770                overlay_page: output.translation_result.result.overlay_page(),
2771            })),
2772            x => Ok(Err(aarch64::TranslateErrorAarch64 { code: x })),
2773        }
2774    }
2775
2776    fn to_hv_gpa_range_array(gpa_memory_ranges: &[MemoryRange]) -> Vec<HvGpaRange> {
2777        const PAGES_PER_ENTRY: u64 = 2048;
2778        const PAGE_SIZE: u64 = HV_PAGE_SIZE;
2779
2780        // Estimate the total number of pages across all memory ranges
2781        let estimated_size: usize = gpa_memory_ranges
2782            .iter()
2783            .map(|memory_range| {
2784                let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2785                total_pages.div_ceil(PAGES_PER_ENTRY)
2786            })
2787            .sum::<u64>() as usize;
2788
2789        // Create a vector with the estimated size
2790        let mut hv_gpa_ranges = Vec::with_capacity(estimated_size);
2791
2792        for memory_range in gpa_memory_ranges {
2793            // Calculate the total number of pages in the memory range
2794            let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2795
2796            // Convert start address to page number
2797            let start_page = memory_range.start_4k_gpn();
2798
2799            // Generate the ranges and append them to the vector
2800            hv_gpa_ranges.extend(
2801                (0..total_pages)
2802                    .step_by(PAGES_PER_ENTRY as usize)
2803                    .map(|start| {
2804                        let end = std::cmp::min(total_pages, start + PAGES_PER_ENTRY);
2805                        let pages_in_this_range = end - start;
2806                        let gpa_page_number = start_page + start;
2807
2808                        let extended = HvGpaRangeExtended::new()
2809                            .with_additional_pages(pages_in_this_range - 1)
2810                            .with_large_page(false) // Assuming not a large page
2811                            .with_gpa_page_number(gpa_page_number);
2812
2813                        HvGpaRange(extended.into_bits())
2814                    }),
2815            );
2816        }
2817
2818        hv_gpa_ranges // Return the vector at the end
2819    }
2820
2821    fn pin_unpin_gpa_ranges_internal(
2822        &self,
2823        gpa_ranges: &[HvGpaRange],
2824        action: GpaPinUnpinAction,
2825    ) -> Result<(), PinUnpinError> {
2826        const PIN_REQUEST_HEADER_SIZE: usize =
2827            size_of::<hvdef::hypercall::PinUnpinGpaPageRangesHeader>();
2828        const MAX_INPUT_ELEMENTS: usize =
2829            (HV_PAGE_SIZE as usize - PIN_REQUEST_HEADER_SIZE) / size_of::<u64>();
2830
2831        let header = hvdef::hypercall::PinUnpinGpaPageRangesHeader { reserved: 0 };
2832        let mut ranges_processed = 0;
2833
2834        for chunk in gpa_ranges.chunks(MAX_INPUT_ELEMENTS) {
2835            // SAFETY: This unsafe block is valid because:
2836            // 1. The code and header going to match the expected input for the hypercall.
2837            //
2838            // 2. Hypercall result is checked right after the hypercall is issued.
2839            //
2840            let output = unsafe {
2841                self.mshv_hvcall
2842                    .hvcall_rep(
2843                        match action {
2844                            GpaPinUnpinAction::PinGpaRange => HypercallCode::HvCallPinGpaPageRanges,
2845                            GpaPinUnpinAction::UnpinGpaRange => {
2846                                HypercallCode::HvCallUnpinGpaPageRanges
2847                            }
2848                        },
2849                        &header,
2850                        HvcallRepInput::Elements(chunk),
2851                        None::<&mut [u8]>,
2852                    )
2853                    .expect("submitting pin/unpin hypercall should not fail")
2854            };
2855
2856            ranges_processed += output.elements_processed();
2857
2858            output.result().map_err(|e| PinUnpinError {
2859                ranges_processed,
2860                error: e,
2861            })?;
2862        }
2863
2864        // At end all the ranges should be processed
2865        if ranges_processed == gpa_ranges.len() {
2866            Ok(())
2867        } else {
2868            Err(PinUnpinError {
2869                ranges_processed,
2870                error: HvError::OperationFailed,
2871            })
2872        }
2873    }
2874
2875    fn perform_pin_unpin_gpa_ranges(
2876        &self,
2877        gpa_ranges: &[MemoryRange],
2878        action: GpaPinUnpinAction,
2879        rollback_action: GpaPinUnpinAction,
2880    ) -> Result<(), HvError> {
2881        let hv_gpa_ranges: Vec<HvGpaRange> = Self::to_hv_gpa_range_array(gpa_ranges);
2882
2883        // Attempt to pin/unpin the ranges
2884        match self.pin_unpin_gpa_ranges_internal(&hv_gpa_ranges, action) {
2885            Ok(_) => Ok(()),
2886            Err(PinUnpinError {
2887                error,
2888                ranges_processed,
2889            }) => {
2890                // Unpin the ranges that were successfully pinned
2891                let pinned_ranges = &hv_gpa_ranges[..ranges_processed];
2892                if let Err(rollback_error) =
2893                    self.pin_unpin_gpa_ranges_internal(pinned_ranges, rollback_action)
2894                {
2895                    // Panic if rollback is failing
2896                    panic!(
2897                        "Failed to perform action {:?} on ranges. Error : {:?}. \
2898                        Attempted to rollback {:?} ranges out of {:?}.\n rollback error: {:?}",
2899                        action,
2900                        error,
2901                        ranges_processed,
2902                        gpa_ranges.len(),
2903                        rollback_error
2904                    );
2905                }
2906                // Surface the original error
2907                Err(error)
2908            }
2909        }
2910    }
2911
2912    /// Pins the specified guest physical address ranges in the hypervisor.
2913    /// The memory ranges passed to this function must be VA backed memory.
2914    /// If a partial failure occurs (i.e., some but not all the ranges were successfully pinned),
2915    /// the function will automatically attempt to unpin any successfully pinned ranges.
2916    /// This "rollback" behavior ensures that no partially pinned state remains, which
2917    /// could otherwise lead to inconsistencies.
2918    ///
2919    pub fn pin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2920        self.perform_pin_unpin_gpa_ranges(
2921            ranges,
2922            GpaPinUnpinAction::PinGpaRange,
2923            GpaPinUnpinAction::UnpinGpaRange,
2924        )
2925    }
2926
2927    /// Unpins the specified guest physical address ranges in the hypervisor.
2928    /// The memory ranges passed to this function must be VA backed memory.
2929    /// If a partial failure occurs (i.e., some but not all the ranges were successfully unpinned),
2930    /// the function will automatically attempt to pin any successfully unpinned ranges. This "rollback"
2931    /// behavior ensures that no partially unpinned state remains, which could otherwise lead to inconsistencies.
2932    ///
2933    pub fn unpin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2934        self.perform_pin_unpin_gpa_ranges(
2935            ranges,
2936            GpaPinUnpinAction::UnpinGpaRange,
2937            GpaPinUnpinAction::PinGpaRange,
2938        )
2939    }
2940
2941    /// Read the vsm capabilities register for VTL2.
2942    pub fn get_vsm_capabilities(&self) -> Result<hvdef::HvRegisterVsmCapabilities, Error> {
2943        let caps = hvdef::HvRegisterVsmCapabilities::from(
2944            self.get_vp_register(
2945                HvAllArchRegisterName::VsmCapabilities,
2946                HvInputVtl::CURRENT_VTL,
2947            )?
2948            .as_u64(),
2949        );
2950
2951        let caps = match self.isolation {
2952            IsolationType::None | IsolationType::Vbs => caps,
2953            IsolationType::Snp => hvdef::HvRegisterVsmCapabilities::new()
2954                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2955                .with_intercept_page_available(caps.intercept_page_available()),
2956            IsolationType::Tdx => hvdef::HvRegisterVsmCapabilities::new()
2957                .with_deny_lower_vtl_startup(caps.deny_lower_vtl_startup())
2958                .with_intercept_page_available(caps.intercept_page_available())
2959                .with_dr6_shared(true)
2960                .with_proxy_interrupt_redirect_available(caps.proxy_interrupt_redirect_available()),
2961        };
2962
2963        assert_eq!(caps.dr6_shared(), self.dr6_shared());
2964
2965        Ok(caps)
2966    }
2967
2968    /// Set the [`hvdef::HvRegisterVsmPartitionConfig`] register.
2969    pub fn set_vtl2_vsm_partition_config(
2970        &self,
2971        vsm_config: HvRegisterVsmPartitionConfig,
2972    ) -> Result<(), SetVsmPartitionConfigError> {
2973        self.set_vp_register(
2974            HvAllArchRegisterName::VsmPartitionConfig.into(),
2975            HvRegisterValue::from(u64::from(vsm_config)),
2976            HvInputVtl::CURRENT_VTL,
2977        )
2978        .map_err(|e| SetVsmPartitionConfigError::Hypervisor {
2979            config: vsm_config,
2980            hv_error: e,
2981        })
2982    }
2983
2984    /// Get the [`hvdef::HvRegisterGuestVsmPartitionConfig`] register
2985    pub fn get_guest_vsm_partition_config(
2986        &self,
2987    ) -> Result<hvdef::HvRegisterGuestVsmPartitionConfig, Error> {
2988        Ok(hvdef::HvRegisterGuestVsmPartitionConfig::from(
2989            self.get_vp_register(
2990                HvAllArchRegisterName::GuestVsmPartitionConfig,
2991                HvInputVtl::CURRENT_VTL,
2992            )?
2993            .as_u64(),
2994        ))
2995    }
2996
2997    /// Get the [`hvdef::HvRegisterVsmPartitionStatus`] register
2998    pub fn get_vsm_partition_status(&self) -> Result<hvdef::HvRegisterVsmPartitionStatus, Error> {
2999        Ok(hvdef::HvRegisterVsmPartitionStatus::from(
3000            self.get_vp_register(
3001                HvAllArchRegisterName::VsmPartitionStatus,
3002                HvInputVtl::CURRENT_VTL,
3003            )?
3004            .as_u64(),
3005        ))
3006    }
3007
3008    /// Get the [`hvdef::HvPartitionPrivilege`] info. On x86_64, this uses
3009    /// CPUID. On aarch64, it uses get_vp_register.
3010    pub fn get_privileges_and_features_info(&self) -> Result<hvdef::HvPartitionPrivilege, Error> {
3011        cfg_if! {
3012            if #[cfg(guest_arch = "x86_64")] {
3013                let result = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
3014                let num = result.eax as u64 | ((result.ebx as u64) << 32);
3015                Ok(hvdef::HvPartitionPrivilege::from(num))
3016            } else if #[cfg(guest_arch = "aarch64")] {
3017                Ok(hvdef::HvPartitionPrivilege::from(
3018                    self.get_vp_register(
3019                        HvArm64RegisterName::PrivilegesAndFeaturesInfo,
3020                        HvInputVtl::CURRENT_VTL,
3021                    )?
3022                    .as_u64(),
3023                ))
3024            } else {
3025                compile_error!("unsupported guest_arch configuration");
3026            }
3027        }
3028    }
3029
3030    /// Get the [`hvdef::hypercall::HvGuestOsId`] register for the given VTL.
3031    pub fn get_guest_os_id(&self, vtl: Vtl) -> Result<hvdef::hypercall::HvGuestOsId, Error> {
3032        Ok(hvdef::hypercall::HvGuestOsId::from(
3033            self.get_vp_register(HvAllArchRegisterName::GuestOsId, vtl.into())?
3034                .as_u64(),
3035        ))
3036    }
3037
3038    /// Configure guest VSM.
3039    /// The only configuration attribute currently supported is changing the maximum number of
3040    /// guest-visible virtual trust levels for the partition. (VTL 1)
3041    pub fn set_guest_vsm_partition_config(
3042        &self,
3043        enable_guest_vsm: bool,
3044    ) -> Result<(), SetGuestVsmConfigError> {
3045        let register_value = hvdef::HvRegisterGuestVsmPartitionConfig::new()
3046            .with_maximum_vtl(if enable_guest_vsm { 1 } else { 0 })
3047            .with_reserved(0);
3048
3049        tracing::trace!(enable_guest_vsm, "set_guest_vsm_partition_config");
3050        if self.isolation.is_hardware_isolated() {
3051            unimplemented!("set_guest_vsm_partition_config");
3052        }
3053
3054        self.set_vp_register(
3055            HvAllArchRegisterName::GuestVsmPartitionConfig.into(),
3056            HvRegisterValue::from(u64::from(register_value)),
3057            HvInputVtl::CURRENT_VTL,
3058        )
3059        .map_err(|e| SetGuestVsmConfigError::Hypervisor {
3060            enable_guest_vsm,
3061            hv_error: e,
3062        })
3063    }
3064
3065    /// Sets the Power Management Timer assist in the hypervisor.
3066    #[cfg(guest_arch = "x86_64")]
3067    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
3068        tracing::debug!(?port, "set_pm_timer_assist");
3069        if self.isolation.is_hardware_isolated() {
3070            if port.is_some() {
3071                unimplemented!("set_pm_timer_assist");
3072            }
3073        }
3074
3075        let val = HvRegisterValue::from(u64::from(match port {
3076            Some(p) => hvdef::HvPmTimerInfo::new()
3077                .with_port(p)
3078                .with_enabled(true)
3079                .with_width_24(false),
3080            None => 0.into(),
3081        }));
3082
3083        self.set_vp_register(
3084            HvX64RegisterName::PmTimerAssist.into(),
3085            val,
3086            HvInputVtl::CURRENT_VTL,
3087        )
3088    }
3089
3090    /// Sets the Power Management Timer assist in the hypervisor.
3091    #[cfg(guest_arch = "aarch64")]
3092    pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
3093        tracing::debug!(?port, "set_pm_timer_assist unimplemented on aarch64");
3094        Err(HvError::UnknownRegisterName)
3095    }
3096
3097    /// Sets the VTL protection mask for the specified memory range.
3098    pub fn modify_vtl_protection_mask(
3099        &self,
3100        range: MemoryRange,
3101        map_flags: HvMapGpaFlags,
3102        target_vtl: HvInputVtl,
3103    ) -> Result<(), ApplyVtlProtectionsError> {
3104        if self.isolation.is_hardware_isolated() {
3105            // TODO SNP TODO TDX - required for vmbus relay monitor page support
3106            todo!();
3107        }
3108
3109        self.mshv_hvcall
3110            .modify_vtl_protection_mask(range, map_flags, target_vtl)
3111    }
3112
3113    /// Checks whether the target vtl has vtl permissions for the given gpa
3114    pub fn check_vtl_access(
3115        &self,
3116        gpa: u64,
3117        target_vtl: GuestVtl,
3118        flags: HvMapGpaFlags,
3119    ) -> Result<Option<CheckVtlAccessResult>, Error> {
3120        assert!(!self.isolation.is_hardware_isolated());
3121
3122        let header = hvdef::hypercall::CheckSparseGpaPageVtlAccess {
3123            partition_id: HV_PARTITION_ID_SELF,
3124            target_vtl: HvInputVtl::from(target_vtl),
3125            desired_access: u32::from(flags) as u8,
3126            reserved0: 0,
3127            reserved1: 0,
3128        };
3129
3130        let mut output = [hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput::new()];
3131
3132        // SAFETY: The input header and rep slice are the correct types for this hypercall.
3133        //         The hypercall output is validated right after the hypercall is issued.
3134        let status = unsafe {
3135            self.mshv_hvcall.hvcall_rep::<hvdef::hypercall::CheckSparseGpaPageVtlAccess, u64, hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput>(
3136                HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
3137                &header,
3138                HvcallRepInput::Elements(&[gpa >> hvdef::HV_PAGE_SHIFT]),
3139                Some(&mut output),
3140            )
3141            .expect("check_vtl_access hypercall should not fail")
3142        };
3143
3144        status.result().map_err(Error::CheckVtlAccess)?;
3145
3146        let access_result = output[0];
3147
3148        if access_result.result_code() as u32
3149            != hvdef::hypercall::CheckGpaPageVtlAccessResultCode::SUCCESS.0
3150        {
3151            return Ok(Some(CheckVtlAccessResult {
3152                vtl: (access_result.intercepting_vtl() as u8)
3153                    .try_into()
3154                    .expect("checking vtl permissions failure should return valid vtl"),
3155                denied_flags: (access_result.denied_access() as u32).into(),
3156            }));
3157        }
3158
3159        assert_eq!(status.elements_processed(), 1);
3160        Ok(None)
3161    }
3162
3163    /// Enables a vtl for the partition
3164    pub fn enable_partition_vtl(
3165        &self,
3166        vtl: GuestVtl,
3167        flags: hvdef::hypercall::EnablePartitionVtlFlags,
3168    ) -> Result<(), HvError> {
3169        use hvdef::hypercall;
3170
3171        let header = hypercall::EnablePartitionVtl {
3172            partition_id: HV_PARTITION_ID_SELF,
3173            target_vtl: vtl.into(),
3174            flags,
3175            reserved_z0: 0,
3176            reserved_z1: 0,
3177        };
3178
3179        // SAFETY: The input header and slice are the correct types for this hypercall.
3180        //         The hypercall output is validated right after the hypercall is issued.
3181        let status = unsafe {
3182            self.mshv_hvcall
3183                .hvcall(HypercallCode::HvCallEnablePartitionVtl, &header, &mut ())
3184                .expect("submitting hypercall should not fail")
3185        };
3186
3187        status.result()
3188    }
3189
3190    /// Enables a vtl on a vp
3191    pub fn enable_vp_vtl(
3192        &self,
3193        vp_index: u32,
3194        vtl: GuestVtl,
3195        hv_vp_context: InitialVpContextX64,
3196    ) -> Result<(), HvError> {
3197        use hvdef::hypercall;
3198
3199        let header = hypercall::EnableVpVtlX64 {
3200            partition_id: HV_PARTITION_ID_SELF,
3201            vp_index,
3202            target_vtl: vtl.into(),
3203            reserved: [0; 3],
3204            vp_vtl_context: hv_vp_context,
3205        };
3206
3207        // SAFETY: The input header and slice are the correct types for this hypercall.
3208        //         The hypercall output is validated right after the hypercall is issued.
3209        let status = unsafe {
3210            self.mshv_hvcall
3211                .hvcall(HypercallCode::HvCallEnableVpVtl, &header, &mut ())
3212                .expect("submitting hypercall should not fail")
3213        };
3214
3215        status.result()
3216    }
3217
3218    /// Gets the PFN for the VTL 1 VMSA
3219    pub fn vtl1_vmsa_pfn(&self, cpu_index: u32) -> u64 {
3220        let mut vp_pfn = cpu_index as u64; // input vp, output pfn
3221
3222        // SAFETY: The ioctl requires no prerequisites other than the VTL 1 VMSA
3223        // should be mapped. This ioctl should never fail as long as the vtl 1
3224        // VMSA was mapped.
3225        unsafe {
3226            hcl_read_guest_vsm_page_pfn(self.mshv_vtl.file.as_raw_fd(), &mut vp_pfn)
3227                .expect("should always succeed");
3228        }
3229
3230        vp_pfn
3231    }
3232
3233    /// Returns the isolation type for the partition.
3234    pub fn isolation(&self) -> IsolationType {
3235        self.isolation
3236    }
3237
3238    /// Reads MSR_IA32_VMX_CR4_FIXED1 in kernel mode.
3239    pub fn read_vmx_cr4_fixed1(&self) -> u64 {
3240        let mut value = 0;
3241
3242        // SAFETY: The ioctl requires no prerequisites other than a location to
3243        // write the read MSR. This ioctl should never fail.
3244        unsafe {
3245            hcl_read_vmx_cr4_fixed1(self.mshv_vtl.file.as_raw_fd(), &mut value)
3246                .expect("should always succeed");
3247        }
3248
3249        value
3250    }
3251
3252    /// Invokes the HvCallRetargetDeviceInterrupt hypercall.
3253    /// `target_processors` must be sorted in ascending order.
3254    pub fn retarget_device_interrupt(
3255        &self,
3256        device_id: u64,
3257        entry: hvdef::hypercall::InterruptEntry,
3258        vector: u32,
3259        multicast: bool,
3260        target_processors: ProcessorSet<'_>,
3261        proxy_redirect: bool,
3262    ) -> Result<(), HvError> {
3263        let header = hvdef::hypercall::RetargetDeviceInterrupt {
3264            partition_id: HV_PARTITION_ID_SELF,
3265            device_id,
3266            entry,
3267            rsvd: 0,
3268            target_header: hvdef::hypercall::InterruptTarget {
3269                vector,
3270                flags: hvdef::hypercall::HvInterruptTargetFlags::default()
3271                    .with_multicast(multicast)
3272                    .with_processor_set(true)
3273                    .with_proxy_redirect(proxy_redirect),
3274                // Always use a generic processor set to simplify construction. This hypercall is
3275                // invoked relatively infrequently, the overhead should be acceptable.
3276                mask_or_format: hvdef::hypercall::HV_GENERIC_SET_SPARSE_4K,
3277            },
3278        };
3279        let processor_set = Vec::from_iter(target_processors.as_generic_set());
3280
3281        // SAFETY: The input header and slice are the correct types for this hypercall.
3282        //         The hypercall output is validated right after the hypercall is issued.
3283        let status = unsafe {
3284            self.mshv_hvcall
3285                .hvcall_var(
3286                    HypercallCode::HvCallRetargetDeviceInterrupt,
3287                    &header,
3288                    processor_set.as_bytes(),
3289                    &mut (),
3290                )
3291                .expect("submitting hypercall should not fail")
3292        };
3293
3294        status.result()
3295    }
3296
3297    /// Gets the permissions for a vtl.
3298    /// Currently unused, but available for debugging purposes
3299    #[cfg(debug_assertions)]
3300    pub fn rmp_query(&self, gpa: u64, vtl: GuestVtl) -> x86defs::snp::SevRmpAdjust {
3301        use x86defs::snp::SevRmpAdjust;
3302
3303        let page_count = 1u64;
3304        let flags = [u64::from(SevRmpAdjust::new().with_target_vmpl(match vtl {
3305            GuestVtl::Vtl0 => 2,
3306            GuestVtl::Vtl1 => 1,
3307        }))];
3308        let page_size = [0u64];
3309        let pages_processed = 0;
3310
3311        debug_assert!(flags.len() == page_count as usize);
3312        debug_assert!(page_size.len() == page_count as usize);
3313
3314        let query = mshv_rmpquery {
3315            start_pfn: gpa / HV_PAGE_SIZE,
3316            page_count,
3317            terminate_on_failure: 0,
3318            ram: 0,
3319            padding: Default::default(),
3320            flags: flags.as_ptr().cast_mut(),
3321            page_size: page_size.as_ptr().cast_mut(),
3322            pages_processed: core::ptr::from_ref(&pages_processed).cast_mut(),
3323        };
3324
3325        // SAFETY: the input query is the correct type for this ioctl
3326        unsafe {
3327            hcl_rmpquery_pages(self.mshv_vtl.file.as_raw_fd(), &query)
3328                .expect("should always succeed");
3329        }
3330        debug_assert!(pages_processed <= page_count);
3331
3332        SevRmpAdjust::from(flags[0])
3333    }
3334
3335    /// Issues an INVLPGB instruction.
3336    pub fn invlpgb(&self, rax: u64, edx: u32, ecx: u32) {
3337        let data = mshv_invlpgb {
3338            rax,
3339            edx,
3340            ecx,
3341            _pad0: 0,
3342            _pad1: 0,
3343        };
3344        // SAFETY: ioctl has no prerequisites.
3345        unsafe {
3346            hcl_invlpgb(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3347        }
3348    }
3349
3350    /// Issues a TLBSYNC instruction.
3351    pub fn tlbsync(&self) {
3352        // SAFETY: ioctl has no prerequisites.
3353        unsafe {
3354            hcl_tlbsync(self.mshv_vtl.file.as_raw_fd()).expect("should always succeed");
3355        }
3356    }
3357
3358    /// Causes the specified CPUs to be woken out of a lower VTL.
3359    pub fn kick_cpus(
3360        &self,
3361        cpus: impl IntoIterator<Item = u32>,
3362        cancel_run: bool,
3363        wait_for_other_cpus: bool,
3364    ) {
3365        let mut cpu_bitmap: BitVec<u8> = BitVec::from_vec(vec![0; self.vps.len().div_ceil(8)]);
3366        for cpu in cpus {
3367            cpu_bitmap.set(cpu as usize, true);
3368        }
3369
3370        let data = protocol::hcl_kick_cpus {
3371            len: cpu_bitmap.len() as u64,
3372            cpu_mask: cpu_bitmap.as_bitptr().pointer(),
3373            flags: protocol::hcl_kick_cpus_flags::new()
3374                .with_cancel_run(cancel_run)
3375                .with_wait_for_other_cpus(wait_for_other_cpus),
3376        };
3377
3378        // SAFETY: ioctl has no prerequisites.
3379        unsafe {
3380            hcl_kickcpus(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
3381        }
3382    }
3383
3384    /// Map or unmap guest device interrupt vector in VTL2 kernel
3385    pub fn map_redirected_device_interrupt(
3386        &self,
3387        vector: u32,
3388        apic_id: u32,
3389        create_mapping: bool,
3390    ) -> Result<u32, Error> {
3391        let mut param = mshv_map_device_int {
3392            vector,
3393            apic_id,
3394            create_mapping: create_mapping.into(),
3395            padding: [0; 7],
3396        };
3397
3398        // SAFETY: following the IOCTL definition.
3399        unsafe {
3400            hcl_map_redirected_device_interrupt(self.mshv_vtl.file.as_raw_fd(), &mut param)
3401                .map_err(Error::MapRedirectedDeviceInterrupt)?;
3402        }
3403
3404        Ok(param.vector)
3405    }
3406}