hcl/
ioctl.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interface to `mshv_vtl` driver.
5
6mod deferred;
7pub mod register;
8
9pub mod aarch64;
10pub mod snp;
11pub mod tdx;
12pub mod x64;
13
14use self::deferred::DeferredActionSlots;
15use self::ioctls::*;
16use crate::GuestVtl;
17use crate::ioctl::deferred::DeferredAction;
18use crate::mapped_page::MappedPage;
19use crate::protocol;
20use crate::protocol::EnterModes;
21use crate::protocol::HCL_REG_PAGE_OFFSET;
22use crate::protocol::HCL_VMSA_GUEST_VSM_PAGE_OFFSET;
23use crate::protocol::HCL_VMSA_PAGE_OFFSET;
24use crate::protocol::MSHV_APIC_PAGE_OFFSET;
25use crate::protocol::hcl_intr_offload_flags;
26use crate::protocol::hcl_run;
27use bitvec::vec::BitVec;
28use cvm_tracing::CVM_ALLOWED;
29use deferred::RegisteredDeferredActions;
30use deferred::push_deferred_action;
31use deferred::register_deferred_actions;
32use hv1_structs::ProcessorSet;
33use hv1_structs::VtlArray;
34use hvdef::HV_PAGE_SIZE;
35use hvdef::HV_PARTITION_ID_SELF;
36use hvdef::HvAarch64RegisterPage;
37use hvdef::HvError;
38use hvdef::HvMapGpaFlags;
39use hvdef::HvMessage;
40use hvdef::HvRegisterVsmPartitionConfig;
41use hvdef::HvStatus;
42use hvdef::HvX64RegisterPage;
43use hvdef::HypercallCode;
44use hvdef::Vtl;
45use hvdef::hypercall::AssertVirtualInterrupt;
46use hvdef::hypercall::HostVisibilityType;
47use hvdef::hypercall::HvGpaRange;
48use hvdef::hypercall::HvGpaRangeExtended;
49use hvdef::hypercall::HvInputVtl;
50use hvdef::hypercall::HvInterceptParameters;
51use hvdef::hypercall::HvInterceptType;
52use hvdef::hypercall::HypercallOutput;
53use hvdef::hypercall::InitialVpContextX64;
54use hvdef::hypercall::ModifyHostVisibility;
55use memory_range::MemoryRange;
56use pal::unix::pthread::*;
57use parking_lot::Mutex;
58use private::BackingPrivate;
59use sidecar_client::SidecarClient;
60use sidecar_client::SidecarRun;
61use sidecar_client::SidecarVp;
62use std::cell::UnsafeCell;
63use std::fmt::Debug;
64use std::fs::File;
65use std::io;
66use std::os::unix::prelude::*;
67use std::sync::Arc;
68use std::sync::Once;
69use std::sync::atomic::AtomicU8;
70use std::sync::atomic::AtomicU32;
71use std::sync::atomic::Ordering;
72use thiserror::Error;
73use user_driver::DmaClient;
74use user_driver::memory::MemoryBlock;
75use x86defs::snp::SevVmsa;
76use x86defs::tdx::TdCallResultCode;
77use x86defs::vmx::ApicPage;
78use zerocopy::FromBytes;
79use zerocopy::FromZeros;
80use zerocopy::Immutable;
81use zerocopy::IntoBytes;
82use zerocopy::KnownLayout;
83
84// TODO: Chunk this up into smaller per-interface errors.
85/// Error returned by HCL operations.
86#[derive(Error, Debug)]
87#[expect(missing_docs)]
88pub enum Error {
89    #[error("failed to open mshv device")]
90    OpenMshv(#[source] io::Error),
91    #[error("failed to open hvcall device")]
92    OpenHvcall(#[source] io::Error),
93    #[error("failed to open lower VTL memory device")]
94    OpenGpa(#[source] io::Error),
95    #[error("ReturnToLowerVtl")]
96    ReturnToLowerVtl(#[source] nix::Error),
97    #[error("AddVtl0Memory")]
98    AddVtl0Memory(#[source] nix::Error),
99    #[error("hcl_request_interrupt")]
100    RequestInterrupt(#[source] HvError),
101    #[error("failed to signal event")]
102    SignalEvent(#[source] HvError),
103    #[error("failed to mmap the vp context {:?}", .1.map(|vtl| format!("for VTL {:?}", vtl)).unwrap_or("".to_string()))]
104    MmapVp(#[source] io::Error, Option<Vtl>),
105    #[error("failed to set the poll file")]
106    SetPollFile(#[source] nix::Error),
107    #[error("failed to check hcl capabilities")]
108    CheckExtensions(#[source] nix::Error),
109    #[error("failed to mmap the register page")]
110    MmapRegPage(#[source] io::Error),
111    #[error("failed to create vtl")]
112    CreateVTL(#[source] nix::Error),
113    #[error("gpa failed vtl access check")]
114    CheckVtlAccess(#[source] HvError),
115    #[error("sidecar error")]
116    Sidecar(#[source] sidecar_client::SidecarError),
117    #[error(
118        "mismatch between requested isolation type {requested:?} and supported isolation type {supported:?}"
119    )]
120    MismatchedIsolation {
121        supported: IsolationType,
122        requested: IsolationType,
123    },
124    #[error("private page pool allocator missing, required for requested isolation type")]
125    MissingPrivateMemory,
126    #[error("failed to allocate pages for vp")]
127    AllocVp(#[source] anyhow::Error),
128    #[error("failed to map or unmap redirected device interrupt")]
129    MapRedirectedDeviceInterrupt(#[source] nix::Error),
130    #[error("failed to restore partition time")]
131    RestorePartitionTime(#[source] nix::Error),
132}
133
134/// Error for IOCTL errors specifically.
135#[derive(Debug, Error)]
136#[error("hcl request failed")]
137pub struct IoctlError(#[source] pub(crate) nix::Error);
138
139/// Error returned when issuing hypercalls.
140#[derive(Debug, Error)]
141#[expect(missing_docs)]
142pub enum HypercallError {
143    #[error("hypercall failed with {0:?}")]
144    Hypervisor(HvError),
145    #[error("ioctl failed")]
146    Ioctl(#[source] IoctlError),
147}
148
149impl HypercallError {
150    pub(crate) fn check(r: Result<i32, nix::Error>) -> Result<(), Self> {
151        match r {
152            Ok(n) => HvStatus(n.try_into().expect("hypervisor result out of range"))
153                .result()
154                .map_err(Self::Hypervisor),
155            Err(err) => Err(Self::Ioctl(IoctlError(err))),
156        }
157    }
158}
159
160/// Errors when issuing hypercalls via the kernel direct interface.
161#[derive(Error, Debug)]
162#[expect(missing_docs)]
163pub enum HvcallError {
164    #[error(
165        "kernel rejected the hypercall, most likely due to the hypercall code not being allowed via set_allowed_hypercalls"
166    )]
167    HypercallIoctlFailed(#[source] nix::Error),
168    #[error("input parameters are larger than a page")]
169    InputParametersTooLarge,
170    #[error("output parameters are larger than a page")]
171    OutputParametersTooLarge,
172    #[error("output and input list lengths do not match")]
173    InputOutputRepListMismatch,
174}
175
176/// Error applying VTL protections.
177// TODO: move to `underhill_mem`.
178#[derive(Error, Debug)]
179#[expect(missing_docs)]
180pub enum ApplyVtlProtectionsError {
181    #[error("hypervisor failed with {output:?} when protecting pages {range} for vtl {vtl:?}")]
182    Hypervisor {
183        range: MemoryRange,
184        output: HypercallOutput,
185        #[source]
186        hv_error: HvError,
187        vtl: HvInputVtl,
188    },
189    #[error("snp failure to protect pages {range} with {permissions:x?} for vtl {vtl:?}")]
190    Snp {
191        #[source]
192        failed_operation: snp::SnpPageError,
193        range: MemoryRange,
194        permissions: x86defs::snp::SevRmpAdjust,
195        vtl: HvInputVtl,
196    },
197    #[error(
198        "tdcall failed with {error:?} when protecting pages {range} with permissions {permissions:x?} for vtl {vtl:?}"
199    )]
200    Tdx {
201        error: TdCallResultCode,
202        range: MemoryRange,
203        permissions: x86defs::tdx::TdgMemPageGpaAttr,
204        vtl: HvInputVtl,
205    },
206    #[error("no valid protections for vtl {0:?}")]
207    InvalidVtl(Vtl),
208}
209
210/// Error setting guest VSM configuration.
211#[derive(Error, Debug)]
212#[expect(missing_docs)]
213pub enum SetGuestVsmConfigError {
214    #[error("hypervisor failed to configure guest vsm to {enable_guest_vsm}")]
215    Hypervisor {
216        enable_guest_vsm: bool,
217        #[source]
218        hv_error: HvError,
219    },
220}
221
222/// Error getting the VP idnex from an APIC ID.
223#[derive(Error, Debug)]
224#[expect(missing_docs)]
225pub enum GetVpIndexFromApicIdError {
226    #[error("hypervisor failed when querying vp index for {apic_id}")]
227    Hypervisor {
228        #[source]
229        hv_error: HvError,
230        apic_id: u32,
231    },
232}
233
234/// Error setting VSM partition configuration.
235#[derive(Error, Debug)]
236#[expect(missing_docs)]
237pub enum SetVsmPartitionConfigError {
238    #[error("hypervisor failed when configuring vsm partition config {config:?}")]
239    Hypervisor {
240        config: HvRegisterVsmPartitionConfig,
241        #[source]
242        hv_error: HvError,
243    },
244}
245
246/// Error translating a GVA to a GPA.
247#[derive(Error, Debug)]
248#[expect(missing_docs)]
249pub enum TranslateGvaToGpaError {
250    #[error("hypervisor failed when translating gva {gva:#x}")]
251    Hypervisor {
252        gva: u64,
253        #[source]
254        hv_error: HvError,
255    },
256    #[error("sidecar kernel failed when translating gva {gva:#x}")]
257    Sidecar {
258        gva: u64,
259        #[source]
260        error: sidecar_client::SidecarError,
261    },
262}
263
264/// Result from [`Hcl::check_vtl_access`] if vtl permissions were violated
265#[derive(Debug)]
266pub struct CheckVtlAccessResult {
267    /// The intercepting VTL.
268    pub vtl: Vtl,
269    /// The flags that were denied.
270    pub denied_flags: HvMapGpaFlags,
271}
272
273/// Error accepting pages.
274// TODO: move to `underhill_mem`.
275#[derive(Error, Debug)]
276#[expect(missing_docs)]
277pub enum AcceptPagesError {
278    #[error("hypervisor failed to accept pages {range} with {output:?}")]
279    Hypervisor {
280        range: MemoryRange,
281        output: HypercallOutput,
282        #[source]
283        hv_error: HvError,
284    },
285    #[error("snp failure to protect pages {range}")]
286    Snp {
287        #[source]
288        failed_operation: snp::SnpPageError,
289        range: MemoryRange,
290    },
291    #[error("tdcall failure when accepting pages {range}")]
292    Tdx {
293        #[source]
294        error: tdcall::AcceptPagesError,
295        range: MemoryRange,
296    },
297}
298
299// Action translation(to HVCALL) for pin/unpin GPA range.
300#[derive(Debug, Copy, Clone)]
301enum GpaPinUnpinAction {
302    PinGpaRange,
303    UnpinGpaRange,
304}
305
306/// Error pinning a GPA.
307#[derive(Error, Debug)]
308#[error("partial success: {ranges_processed} operations succeeded, but encountered an error")]
309struct PinUnpinError {
310    ranges_processed: usize,
311    #[source]
312    error: HvError,
313}
314
315/// Result of translate gva hypercall from [`Hcl`]
316pub struct TranslateResult {
317    /// The GPA that the GVA translated to.
318    pub gpa_page: u64,
319    /// Whether the page was an overlay page.
320    pub overlay_page: bool, // Note: hardcoded to false on WHP
321}
322
323/// Possible types for rep hypercalls
324enum HvcallRepInput<'a, T> {
325    /// The actual elements to rep over
326    Elements(&'a [T]),
327    /// The elements for the rep are implied and only a count is needed
328    Count(u16),
329}
330
331pub(crate) mod ioctls {
332    #![allow(non_camel_case_types)]
333
334    use crate::protocol;
335    use hvdef::hypercall::HvRegisterAssoc;
336    use nix::ioctl_none;
337    use nix::ioctl_read;
338    use nix::ioctl_readwrite;
339    use nix::ioctl_write_ptr;
340
341    // The unsafe interface to the `mshv` kernel module comprises
342    // the following IOCTLs.
343    const MSHV_IOCTL: u8 = 0xb8;
344    const MSHV_VTL_RETURN_TO_LOWER_VTL: u16 = 0x27;
345    const MSHV_SET_VP_REGISTERS: u16 = 0x6;
346    const MSHV_GET_VP_REGISTERS: u16 = 0x5;
347    const MSHV_RESTORE_PARTITION_TIME: u16 = 0x13;
348    const MSHV_HVCALL_SETUP: u16 = 0x1E;
349    const MSHV_HVCALL: u16 = 0x1F;
350    const MSHV_VTL_ADD_VTL0_MEMORY: u16 = 0x21;
351    const MSHV_VTL_SET_POLL_FILE: u16 = 0x25;
352    const MSHV_CREATE_VTL: u16 = 0x1D;
353    const MSHV_CHECK_EXTENSION: u16 = 0x00;
354    const MSHV_VTL_PVALIDATE: u16 = 0x28;
355    const MSHV_VTL_RMPADJUST: u16 = 0x29;
356    const MSHV_VTL_TDCALL: u16 = 0x32;
357    const MSHV_VTL_READ_VMX_CR4_FIXED1: u16 = 0x33;
358    const MSHV_VTL_GUEST_VSM_VMSA_PFN: u16 = 0x34;
359    const MSHV_VTL_RMPQUERY: u16 = 0x35;
360    const MSHV_INVLPGB: u16 = 0x36;
361    const MSHV_TLBSYNC: u16 = 0x37;
362    const MSHV_KICKCPUS: u16 = 0x38;
363    const MSHV_MAP_REDIRECTED_DEVICE_INTERRUPT: u16 = 0x39;
364
365    #[repr(C)]
366    #[derive(Copy, Clone)]
367    pub struct mshv_vp_registers {
368        pub count: ::std::os::raw::c_int,
369        pub regs: *mut HvRegisterAssoc,
370    }
371
372    #[repr(C, packed)]
373    #[derive(Copy, Clone)]
374    pub struct mshv_pvalidate {
375        /// Execute the pvalidate instruction on the set of memory pages specified
376        pub start_pfn: ::std::os::raw::c_ulonglong,
377        pub page_count: ::std::os::raw::c_ulonglong,
378        pub validate: ::std::os::raw::c_uchar,
379        pub terminate_on_failure: ::std::os::raw::c_uchar,
380        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
381        /// it's device memory.
382        pub ram: u8,
383        pub padding: [::std::os::raw::c_uchar; 1],
384    }
385
386    #[repr(C, packed)]
387    #[derive(Copy, Clone)]
388    pub struct mshv_rmpadjust {
389        /// Execute the rmpadjust instruction on the set of memory pages specified
390        pub start_pfn: ::std::os::raw::c_ulonglong,
391        pub page_count: ::std::os::raw::c_ulonglong,
392        pub value: ::std::os::raw::c_ulonglong,
393        pub terminate_on_failure: ::std::os::raw::c_uchar,
394        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
395        /// it's device memory.
396        pub ram: u8,
397        pub padding: [::std::os::raw::c_uchar; 6],
398    }
399
400    #[repr(C, packed)]
401    #[derive(Copy, Clone)]
402    pub struct mshv_rmpquery {
403        /// Execute the rmpquery instruction on the set of memory pages specified
404        pub start_pfn: ::std::os::raw::c_ulonglong,
405        pub page_count: ::std::os::raw::c_ulonglong,
406        pub terminate_on_failure: ::std::os::raw::c_uchar,
407        /// Set to 1 if the page is RAM (from the kernel's perspective), 0 if
408        /// it's device memory.
409        pub ram: u8,
410        pub padding: [::std::os::raw::c_uchar; 6],
411        /// Output array for the flags, must have at least `page_count` entries.
412        pub flags: *mut ::std::os::raw::c_ulonglong,
413        /// Output array for the page sizes, must have at least `page_count` entries.
414        pub page_size: *mut ::std::os::raw::c_ulonglong,
415        /// Output for the amount of pages processed, a scalar.
416        pub pages_processed: *mut ::std::os::raw::c_ulonglong,
417    }
418
419    #[repr(C, packed)]
420    #[derive(Copy, Clone)]
421    pub struct mshv_tdcall {
422        pub rax: u64, // Call code and returned status
423        pub rcx: u64,
424        pub rdx: u64,
425        pub r8: u64,
426        pub r9: u64,
427        pub r10_out: u64, // only supported as output
428        pub r11_out: u64, // only supported as output
429    }
430
431    #[repr(C)]
432    #[derive(Copy, Clone)]
433    pub struct mshv_map_device_int {
434        pub vector: u32,
435        pub apic_id: u32,
436        pub create_mapping: u8,
437        pub padding: [u8; 7],
438    }
439
440    #[repr(C)]
441    #[derive(Copy, Clone)]
442    pub struct mshv_restore_partition_time {
443        pub tsc_sequence: u32,
444        pub reserved: u32,
445        pub reference_time_in_100_ns: u64,
446        pub tsc: u64,
447    }
448
449    ioctl_none!(
450        /// Relinquish the processor to VTL0.
451        hcl_return_to_lower_vtl,
452        MSHV_IOCTL,
453        MSHV_VTL_RETURN_TO_LOWER_VTL
454    );
455
456    ioctl_write_ptr!(
457        /// Set a VTL0 register for the current processor of the current
458        /// partition.
459        /// It is not allowed to set registers for other processors or
460        /// other partitions for the security and coherency reasons.
461        hcl_set_vp_register,
462        MSHV_IOCTL,
463        MSHV_SET_VP_REGISTERS,
464        mshv_vp_registers
465    );
466
467    ioctl_readwrite!(
468        /// Get a VTL0 register for the current processor of the current
469        /// partition.
470        /// It is not allowed to get registers of other processors or
471        /// other partitions for the security and coherency reasons.
472        hcl_get_vp_register,
473        MSHV_IOCTL,
474        MSHV_GET_VP_REGISTERS,
475        mshv_vp_registers
476    );
477
478    ioctl_write_ptr!(
479        /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support
480        /// DMA from the guest.
481        hcl_add_vtl0_memory,
482        MSHV_IOCTL,
483        MSHV_VTL_ADD_VTL0_MEMORY,
484        protocol::hcl_pfn_range_t
485    );
486
487    ioctl_write_ptr!(
488        /// Sets the file to be polled while running a VP in VTL0. If the file
489        /// becomes readable, then the VP run will be cancelled.
490        hcl_set_poll_file,
491        MSHV_IOCTL,
492        MSHV_VTL_SET_POLL_FILE,
493        protocol::hcl_set_poll_file
494    );
495
496    ioctl_write_ptr!(
497        /// Sets up the hypercall allow map. Allowed once
498        /// per fd.
499        hcl_hvcall_setup,
500        MSHV_IOCTL,
501        MSHV_HVCALL_SETUP,
502        protocol::hcl_hvcall_setup
503    );
504
505    ioctl_readwrite!(
506        /// Performs a hypercall from the user mode.
507        hcl_hvcall,
508        MSHV_IOCTL,
509        MSHV_HVCALL,
510        protocol::hcl_hvcall
511    );
512
513    ioctl_write_ptr!(
514        /// Executes the pvalidate instruction on a page range.
515        hcl_pvalidate_pages,
516        MSHV_IOCTL,
517        MSHV_VTL_PVALIDATE,
518        mshv_pvalidate
519    );
520
521    ioctl_write_ptr!(
522        /// Executes the rmpadjust instruction on a page range.
523        hcl_rmpadjust_pages,
524        MSHV_IOCTL,
525        MSHV_VTL_RMPADJUST,
526        mshv_rmpadjust
527    );
528
529    ioctl_write_ptr!(
530        /// Executes the rmpquery instruction on a page range.
531        hcl_rmpquery_pages,
532        MSHV_IOCTL,
533        MSHV_VTL_RMPQUERY,
534        mshv_rmpquery
535    );
536
537    ioctl_readwrite!(
538        /// Executes a tdcall.
539        hcl_tdcall,
540        MSHV_IOCTL,
541        MSHV_VTL_TDCALL,
542        mshv_tdcall
543    );
544
545    ioctl_read!(
546        hcl_read_vmx_cr4_fixed1,
547        MSHV_IOCTL,
548        MSHV_VTL_READ_VMX_CR4_FIXED1,
549        u64
550    );
551
552    ioctl_readwrite!(
553        hcl_read_guest_vsm_page_pfn,
554        MSHV_IOCTL,
555        MSHV_VTL_GUEST_VSM_VMSA_PFN,
556        u64
557    );
558
559    pub const HCL_CAP_REGISTER_PAGE: u32 = 1;
560    pub const HCL_CAP_VTL_RETURN_ACTION: u32 = 2;
561    pub const HCL_CAP_DR6_SHARED: u32 = 3;
562    pub const HCL_CAP_LOWER_VTL_TIMER_VIRT: u32 = 4;
563
564    ioctl_write_ptr!(
565        /// Check for the presence of an extension capability.
566        hcl_check_extension,
567        MSHV_IOCTL,
568        MSHV_CHECK_EXTENSION,
569        u32
570    );
571
572    ioctl_read!(mshv_create_vtl, MSHV_IOCTL, MSHV_CREATE_VTL, u8);
573
574    #[repr(C)]
575    pub struct mshv_invlpgb {
576        pub rax: u64,
577        pub _pad0: u32,
578        pub edx: u32,
579        pub _pad1: u32,
580        pub ecx: u32,
581    }
582
583    ioctl_write_ptr!(
584        /// Issue an INVLPGB instruction.
585        hcl_invlpgb,
586        MSHV_IOCTL,
587        MSHV_INVLPGB,
588        mshv_invlpgb
589    );
590
591    ioctl_none!(
592        /// Issue a TLBSYNC instruction.
593        hcl_tlbsync,
594        MSHV_IOCTL,
595        MSHV_TLBSYNC
596    );
597
598    ioctl_write_ptr!(
599        /// Kick CPUs.
600        hcl_kickcpus,
601        MSHV_IOCTL,
602        MSHV_KICKCPUS,
603        protocol::hcl_kick_cpus
604    );
605
606    ioctl_readwrite!(
607        /// Map or unmap VTL0 device interrupt in VTL2.
608        hcl_map_redirected_device_interrupt,
609        MSHV_IOCTL,
610        MSHV_MAP_REDIRECTED_DEVICE_INTERRUPT,
611        mshv_map_device_int
612    );
613
614    ioctl_write_ptr!(
615        /// Restore partition time.
616        hcl_restore_partition_time,
617        MSHV_IOCTL,
618        MSHV_RESTORE_PARTITION_TIME,
619        mshv_restore_partition_time
620    );
621}
622
623/// The `/dev/mshv_vtl_low` device for accessing VTL0 memory.
624pub struct MshvVtlLow {
625    file: File,
626}
627
628impl MshvVtlLow {
629    /// Opens the device.
630    pub fn new() -> Result<Self, Error> {
631        let file = fs_err::OpenOptions::new()
632            .read(true)
633            .write(true)
634            .open("/dev/mshv_vtl_low")
635            .map_err(Error::OpenGpa)?;
636
637        Ok(Self { file: file.into() })
638    }
639
640    /// Gets the device file.
641    pub fn get(&self) -> &File {
642        &self.file
643    }
644
645    /// The flag to set in the file offset to map guest memory as shared instead
646    /// of private.
647    pub const SHARED_MEMORY_FLAG: u64 = 1 << 63;
648}
649
650/// An open `/dev/mshv` device file.
651pub struct Mshv {
652    file: File,
653}
654
655impl Mshv {
656    /// Opens the mshv device.
657    pub fn new() -> Result<Self, Error> {
658        let file = fs_err::OpenOptions::new()
659            .read(true)
660            .write(true)
661            .open("/dev/mshv")
662            .map_err(Error::OpenMshv)?;
663
664        Ok(Self { file: file.into() })
665    }
666
667    fn check_extension(&self, cap: u32) -> Result<bool, Error> {
668        // SAFETY: calling IOCTL as documented, with no special requirements.
669        let supported = unsafe {
670            hcl_check_extension(self.file.as_raw_fd(), &cap).map_err(Error::CheckExtensions)?
671        };
672        Ok(supported != 0)
673    }
674
675    /// Opens an mshv_vtl device file.
676    pub fn create_vtl(&self) -> Result<MshvVtl, Error> {
677        let cap = &mut 0_u8;
678        // SAFETY: calling IOCTL as documented, with no special requirements.
679        let supported =
680            unsafe { mshv_create_vtl(self.file.as_raw_fd(), cap).map_err(Error::CreateVTL)? };
681        // SAFETY: calling IOCTL as documented, with no special requirements.
682        let vtl_file = unsafe { File::from_raw_fd(supported) };
683        Ok(MshvVtl { file: vtl_file })
684    }
685}
686
687/// An open mshv_vtl device file.
688#[derive(Debug)]
689pub struct MshvVtl {
690    file: File,
691}
692
693impl MshvVtl {
694    /// Adds the VTL0 memory as a ZONE_DEVICE memory (I/O) to support DMA from the guest.
695    pub fn add_vtl0_memory(&self, mem_range: MemoryRange, shared: bool) -> Result<(), Error> {
696        let flags = if shared {
697            MshvVtlLow::SHARED_MEMORY_FLAG / HV_PAGE_SIZE
698        } else {
699            0
700        };
701        let ram_disposition = protocol::hcl_pfn_range_t {
702            start_pfn: mem_range.start_4k_gpn() | flags,
703            last_pfn: mem_range.end_4k_gpn(),
704        };
705
706        // SAFETY: calling IOCTL as documented, with no special requirements.
707        unsafe {
708            hcl_add_vtl0_memory(self.file.as_raw_fd(), &ram_disposition)
709                .map_err(Error::AddVtl0Memory)?;
710        }
711
712        Ok(())
713    }
714}
715
716/// The `/dev/mshv_hvcall` device for issuing hypercalls directly to the
717/// hypervisor.
718#[derive(Debug)]
719pub struct MshvHvcall(File);
720
721impl MshvHvcall {
722    /// Opens the device.
723    pub fn new() -> Result<Self, Error> {
724        let file = fs_err::OpenOptions::new()
725            .read(true)
726            .write(true)
727            .open("/dev/mshv_hvcall")
728            .map_err(Error::OpenHvcall)?;
729
730        Ok(Self(file.into()))
731    }
732
733    /// Set allowed hypercalls.
734    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
735        type ItemType = u64;
736        let item_size_bytes = size_of::<ItemType>();
737        let item_size_bits = item_size_bytes * 8;
738
739        let mut allow_bitmap = Vec::<ItemType>::new();
740        for &code in codes {
741            let map_index = (code.0 as usize) / item_size_bits;
742            if map_index >= allow_bitmap.len() {
743                allow_bitmap.resize(map_index + 1, 0);
744            }
745            allow_bitmap[map_index] |= (1 as ItemType) << (code.0 % item_size_bits as u16);
746        }
747
748        let hvcall_setup = protocol::hcl_hvcall_setup {
749            allow_bitmap_size: (allow_bitmap.len() * item_size_bytes) as u64,
750            allow_bitmap_ptr: allow_bitmap.as_ptr(),
751        };
752
753        // SAFETY: following the IOCTL definition.
754        unsafe {
755            hcl_hvcall_setup(self.0.as_raw_fd(), &hvcall_setup)
756                .expect("Hypercall setup IOCTL must be supported");
757        }
758    }
759
760    /// Accepts VTL 0 pages with no host visibility.
761    ///
762    /// [`HypercallCode::HvCallAcceptGpaPages`] must be allowed.
763    pub fn accept_gpa_pages(
764        &self,
765        range: MemoryRange,
766        memory_type: hvdef::hypercall::AcceptMemoryType,
767    ) -> Result<(), AcceptPagesError> {
768        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
769            - size_of::<hvdef::hypercall::AcceptGpaPages>())
770            / size_of::<u64>();
771
772        let span = tracing::info_span!("accept_pages", CVM_ALLOWED, ?range);
773        let _enter = span.enter();
774
775        let mut current_page = range.start() / HV_PAGE_SIZE;
776        let end = range.end() / HV_PAGE_SIZE;
777
778        while current_page < end {
779            let header = hvdef::hypercall::AcceptGpaPages {
780                partition_id: HV_PARTITION_ID_SELF,
781                page_attributes: hvdef::hypercall::AcceptPagesAttributes::new()
782                    .with_memory_type(memory_type.0)
783                    .with_host_visibility(HostVisibilityType::PRIVATE)
784                    .with_vtl_set(0), // vtl protections cannot be applied for VTL 0 memory
785                vtl_permission_set: hvdef::hypercall::VtlPermissionSet {
786                    vtl_permission_from_1: [0; hvdef::hypercall::HV_VTL_PERMISSION_SET_SIZE],
787                },
788                gpa_page_base: current_page,
789            };
790
791            let remaining_pages = end - current_page;
792            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
793
794            // SAFETY: The input header and rep slice are the correct types for
795            //         this hypercall. A dummy type of u8 is provided to satisfy
796            //         the compiler for input and output rep type. The given
797            //         input and slices are valid references while this function
798            //         is called.
799            //
800            //         The hypercall output is validated right after the hypercall is issued.
801            let output = unsafe {
802                self.hvcall_rep::<hvdef::hypercall::AcceptGpaPages, u8, u8>(
803                    HypercallCode::HvCallAcceptGpaPages,
804                    &header,
805                    HvcallRepInput::Count(count as u16),
806                    None,
807                )
808                .expect("kernel hypercall submission should always succeed")
809            };
810
811            output
812                .result()
813                .map_err(|err| AcceptPagesError::Hypervisor {
814                    range: MemoryRange::from_4k_gpn_range(current_page..current_page + count),
815                    output,
816                    hv_error: err,
817                })?;
818
819            current_page += count;
820
821            assert_eq!(output.elements_processed() as u64, count);
822        }
823        Ok(())
824    }
825
826    /// Modifies the host visibility of the given pages.
827    ///
828    /// [`HypercallCode::HvCallModifySparseGpaPageHostVisibility`] must be
829    /// allowed.
830    ///
831    /// Returns on error, the hypervisor error and the number of pages
832    /// processed.
833    ///
834    /// VBS FUTURE TODO: For defense in depth it could be useful to prevent usermode from
835    /// changing visibility of a VTL2 kernel page in the kernel.
836    pub fn modify_gpa_visibility(
837        &self,
838        host_visibility: HostVisibilityType,
839        mut gpns: &[u64],
840    ) -> Result<(), (HvError, usize)> {
841        const GPNS_PER_CALL: usize = (HV_PAGE_SIZE as usize
842            - size_of::<hvdef::hypercall::ModifySparsePageVisibility>())
843            / size_of::<u64>();
844
845        while !gpns.is_empty() {
846            let n = gpns.len().min(GPNS_PER_CALL);
847            // SAFETY: The input header and rep slice are the correct types for this hypercall.
848            //         The hypercall output is validated right after the hypercall is issued.
849            let result = unsafe {
850                self.hvcall_rep(
851                    HypercallCode::HvCallModifySparseGpaPageHostVisibility,
852                    &hvdef::hypercall::ModifySparsePageVisibility {
853                        partition_id: HV_PARTITION_ID_SELF,
854                        host_visibility: ModifyHostVisibility::new()
855                            .with_host_visibility(host_visibility),
856                        reserved: 0,
857                    },
858                    HvcallRepInput::Elements(&gpns[..n]),
859                    None::<&mut [u8]>,
860                )
861                .unwrap()
862            };
863
864            match result.result() {
865                Ok(()) => {
866                    assert_eq!({ result.elements_processed() }, n);
867                }
868                Err(HvError::Timeout) => {}
869                Err(e) => return Err((e, result.elements_processed())),
870            }
871            gpns = &gpns[result.elements_processed()..];
872        }
873        Ok(())
874    }
875
876    /// Given a constructed hcl_hvcall protocol object, issues an IOCTL to invoke a hypercall via
877    /// the direct hypercall kernel interface. This function will retry hypercalls if the hypervisor
878    /// times out the hypercall.
879    ///
880    /// Input and output data are referenced as pointers in the call object.
881    ///
882    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A
883    /// caller must check the return value for the result of the hypercall.
884    ///
885    /// Before invoking hypercalls, a list of hypercalls that are allowed
886    /// has to be set with `Hcl::set_allowed_hypercalls`:
887    /// ```ignore
888    /// set_allowed_hypercalls(&[
889    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
890    ///     hvdef::HypercallCode::HvCallInstallIntercept,
891    /// ]);
892    /// ```
893    /// # Safety
894    /// This function makes no guarantees that the given input header, input and output types are
895    /// valid for the given hypercall. It is the caller's responsibility to use the correct types
896    /// with the specified hypercall.
897    ///
898    /// The caller must ensure that the input and output data are valid for the lifetime of this
899    /// call.
900    ///
901    /// A caller must check the returned [HypercallOutput] for success or failure from the
902    /// hypervisor.
903    ///
904    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be
905    /// validated by the caller if needed.
906    unsafe fn invoke_hvcall_ioctl(
907        &self,
908        mut call_object: protocol::hcl_hvcall,
909    ) -> Result<HypercallOutput, HvcallError> {
910        loop {
911            // SAFETY: following the IOCTL definition. The data referenced in the call
912            // lives as long as `self` does thus the lifetime elision doesn't contradict
913            // the compiler's invariants.
914            //
915            // The hypervisor is trusted to fill out the output page with a valid
916            // representation of an instance the output type, except in the case of hardware
917            // isolated VMs where the caller must validate output as needed.
918            unsafe {
919                hcl_hvcall(self.0.as_raw_fd(), &mut call_object)
920                    .map_err(HvcallError::HypercallIoctlFailed)?;
921            }
922
923            if call_object.status.call_status() == Err(HvError::Timeout).into() {
924                // Any hypercall can timeout, even one that doesn't have reps. Continue processing
925                // from wherever the hypervisor left off.  The rep start index isn't checked for
926                // validity, since it is only being used as an input to the untrusted hypervisor.
927                // This applies to both simple and rep hypercalls.
928                call_object
929                    .control
930                    .set_rep_start(call_object.status.elements_processed());
931            } else {
932                if call_object.control.rep_count() == 0 {
933                    // For non-rep hypercalls, the elements processed field should be 0.
934                    assert_eq!(call_object.status.elements_processed(), 0);
935                } else {
936                    // Hardware isolated VMs cannot trust output from the hypervisor, but check for
937                    // consistency between the number of elements processed and the expected count. A
938                    // violation of this assertion indicates a buggy or malicious hypervisor.
939                    assert!(
940                        (call_object.status.result().is_ok()
941                            && call_object.control.rep_count()
942                                == call_object.status.elements_processed())
943                            || (call_object.status.result().is_err()
944                                && call_object.control.rep_count()
945                                    > call_object.status.elements_processed())
946                    );
947                }
948
949                return Ok(call_object.status);
950            }
951        }
952    }
953
954    /// Issues a non-rep hypercall to the hypervisor via the direct hypercall kernel interface.
955    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
956    /// This call constructs the appropriate hypercall input control from the described parameters.
957    ///
958    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
959    /// return value for the result of the hypercall.
960    ///
961    /// `code` is the hypercall code.
962    /// `input` is the input type required by the hypercall.
963    /// `output` is the output type required by the hypercall.
964    ///
965    /// Before invoking hypercalls, a list of hypercalls that are allowed
966    /// has to be set with `Hcl::set_allowed_hypercalls`:
967    /// ```ignore
968    /// set_allowed_hypercalls(&[
969    ///     hvdef::HypercallCode::HvCallCheckForIoIntercept,
970    ///     hvdef::HypercallCode::HvCallInstallIntercept,
971    /// ]);
972    /// ```
973    /// # Safety
974    /// This function makes no guarantees that the given input header, input and output types are valid for the
975    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
976    ///
977    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
978    ///
979    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
980    /// caller if needed.
981    unsafe fn hvcall<I, O>(
982        &self,
983        code: HypercallCode,
984        input: &I,
985        output: &mut O,
986    ) -> Result<HypercallOutput, HvcallError>
987    where
988        I: IntoBytes + Sized + Immutable + KnownLayout,
989        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
990    {
991        const fn assert_size<I, O>()
992        where
993            I: Sized,
994            O: Sized,
995        {
996            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
997            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
998        }
999        assert_size::<I, O>();
1000
1001        let control = hvdef::hypercall::Control::new().with_code(code.0);
1002
1003        let call_object = protocol::hcl_hvcall {
1004            control,
1005            input_data: input.as_bytes().as_ptr().cast(),
1006            input_size: size_of::<I>(),
1007            status: FromZeros::new_zeroed(),
1008            output_data: output.as_bytes().as_ptr().cast(),
1009            output_size: size_of::<O>(),
1010        };
1011
1012        // SAFETY: The data referenced in the call lives as long as `self` does.
1013        unsafe { self.invoke_hvcall_ioctl(call_object) }
1014    }
1015
1016    /// Issues a rep hypercall to the hypervisor via the direct hypercall kernel
1017    /// interface. Like the non-rep version, this is not intended to be used
1018    /// externally other than to construct safe wrappers. This call constructs
1019    /// the appropriate hypercall input control from the described parameters.
1020    ///
1021    /// `Ok(HypercallOutput)` is returned if the kernel was successful in
1022    /// issuing the hypercall. A caller must check the return value for the
1023    /// result of the hypercall.
1024    ///
1025    /// `code` is the hypercall code. `input_header` is the hypercall fixed
1026    /// length input header. Variable length headers are not supported.
1027    /// `input_rep` is the list of input elements. The length of the slice is
1028    /// used as the rep count.
1029    ///
1030    /// `output_rep` is the optional output rep list. A caller must check the
1031    /// returned [HypercallOutput] for the number of valid elements in this
1032    /// list.
1033    ///
1034    /// # Safety
1035    /// This function makes no guarantees that the given input header, input rep
1036    /// and output rep types are valid for the given hypercall. It is the
1037    /// caller's responsibility to use the correct types with the specified
1038    /// hypercall.
1039    ///
1040    /// A caller must check the returned [HypercallOutput] for success or
1041    /// failure from the hypervisor and processed rep count.
1042    ///
1043    /// Hardware isolated VMs cannot trust output from the hypervisor. This
1044    /// routine will ensure that the hypervisor either returns success with all
1045    /// elements processed, or returns failure with an incomplete number of
1046    /// elements processed. Actual validation of the output elements is the
1047    /// respsonsibility of the caller.
1048    unsafe fn hvcall_rep<InputHeader, InputRep, O>(
1049        &self,
1050        code: HypercallCode,
1051        input_header: &InputHeader,
1052        input_rep: HvcallRepInput<'_, InputRep>,
1053        output_rep: Option<&mut [O]>,
1054    ) -> Result<HypercallOutput, HvcallError>
1055    where
1056        InputHeader: IntoBytes + Sized + Immutable + KnownLayout,
1057        InputRep: IntoBytes + Sized + Immutable + KnownLayout,
1058        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1059    {
1060        // Construct input buffer.
1061        let (input, count) = match input_rep {
1062            HvcallRepInput::Elements(e) => {
1063                ([input_header.as_bytes(), e.as_bytes()].concat(), e.len())
1064            }
1065            HvcallRepInput::Count(c) => (input_header.as_bytes().to_vec(), c.into()),
1066        };
1067
1068        if input.len() > HV_PAGE_SIZE as usize {
1069            return Err(HvcallError::InputParametersTooLarge);
1070        }
1071
1072        if let Some(output_rep) = &output_rep {
1073            if output_rep.as_bytes().len() > HV_PAGE_SIZE as usize {
1074                return Err(HvcallError::OutputParametersTooLarge);
1075            }
1076
1077            if count != output_rep.len() {
1078                return Err(HvcallError::InputOutputRepListMismatch);
1079            }
1080        }
1081
1082        let (output_data, output_size) = match output_rep {
1083            Some(output_rep) => (
1084                output_rep.as_bytes().as_ptr().cast(),
1085                output_rep.as_bytes().len(),
1086            ),
1087            None => (std::ptr::null(), 0),
1088        };
1089
1090        let control = hvdef::hypercall::Control::new()
1091            .with_code(code.0)
1092            .with_rep_count(count);
1093
1094        let call_object = protocol::hcl_hvcall {
1095            control,
1096            input_data: input.as_ptr().cast(),
1097            input_size: input.len(),
1098            status: HypercallOutput::new(),
1099            output_data,
1100            output_size,
1101        };
1102
1103        // SAFETY: The data referenced in the call lives as long as `self` does.
1104        unsafe { self.invoke_hvcall_ioctl(call_object) }
1105    }
1106
1107    /// Issues a non-rep hypercall with variable input to the hypervisor via the direct hypercall kernel interface.
1108    /// This is not intended to be used directly by external callers, rather via write safe hypercall wrappers.
1109    /// This call constructs the appropriate hypercall input control from the described parameters.
1110    ///
1111    /// `Ok(HypercallOutput)` is returned if the kernel was successful in issuing the hypercall. A caller must check the
1112    /// return value for the result of the hypercall.
1113    ///
1114    /// `code` is the hypercall code.
1115    /// `input` is the input type required by the hypercall.
1116    /// `output` is the output type required by the hypercall.
1117    /// `variable_input` is the contents of the variable input to the hypercall. The length must be a multiple of 8 bytes.
1118    ///
1119    /// # Safety
1120    /// This function makes no guarantees that the given input header, input and output types are valid for the
1121    /// given hypercall. It is the caller's responsibility to use the correct types with the specified hypercall.
1122    ///
1123    /// A caller must check the returned [HypercallOutput] for success or failure from the hypervisor.
1124    ///
1125    /// Hardware isolated VMs cannot trust the output from the hypervisor and so it must be validated by the
1126    /// caller if needed.
1127    unsafe fn hvcall_var<I, O>(
1128        &self,
1129        code: HypercallCode,
1130        input: &I,
1131        variable_input: &[u8],
1132        output: &mut O,
1133    ) -> Result<HypercallOutput, HvcallError>
1134    where
1135        I: IntoBytes + Sized + Immutable + KnownLayout,
1136        O: IntoBytes + FromBytes + Sized + Immutable + KnownLayout,
1137    {
1138        const fn assert_size<I, O>()
1139        where
1140            I: Sized,
1141            O: Sized,
1142        {
1143            assert!(size_of::<I>() <= HV_PAGE_SIZE as usize);
1144            assert!(size_of::<O>() <= HV_PAGE_SIZE as usize);
1145        }
1146        assert_size::<I, O>();
1147        assert!(variable_input.len().is_multiple_of(8));
1148
1149        let input = [input.as_bytes(), variable_input].concat();
1150        if input.len() > HV_PAGE_SIZE as usize {
1151            return Err(HvcallError::InputParametersTooLarge);
1152        }
1153
1154        let control = hvdef::hypercall::Control::new()
1155            .with_code(code.0)
1156            .with_variable_header_size(variable_input.len() / 8);
1157
1158        let call_object = protocol::hcl_hvcall {
1159            control,
1160            input_data: input.as_bytes().as_ptr().cast(),
1161            input_size: input.len(),
1162            status: FromZeros::new_zeroed(),
1163            output_data: output.as_bytes().as_ptr().cast(),
1164            output_size: size_of::<O>(),
1165        };
1166
1167        // SAFETY: The data referenced in the call lives as long as `self` does.
1168        unsafe { self.invoke_hvcall_ioctl(call_object) }
1169    }
1170
1171    /// Sets the VTL protection mask for the specified memory range.
1172    ///
1173    /// [`HypercallCode::HvCallModifyVtlProtectionMask`] must be allowed.
1174    pub fn modify_vtl_protection_mask(
1175        &self,
1176        range: MemoryRange,
1177        map_flags: HvMapGpaFlags,
1178        target_vtl: HvInputVtl,
1179    ) -> Result<(), ApplyVtlProtectionsError> {
1180        let header = hvdef::hypercall::ModifyVtlProtectionMask {
1181            partition_id: HV_PARTITION_ID_SELF,
1182            map_flags,
1183            target_vtl,
1184            reserved: [0; 3],
1185        };
1186
1187        const MAX_INPUT_ELEMENTS: usize = (HV_PAGE_SIZE as usize
1188            - size_of::<hvdef::hypercall::ModifyVtlProtectionMask>())
1189            / size_of::<u64>();
1190
1191        let span = tracing::info_span!("modify_vtl_protection_mask", CVM_ALLOWED, ?range);
1192        let _enter = span.enter();
1193
1194        let start = range.start() / HV_PAGE_SIZE;
1195        let end = range.end() / HV_PAGE_SIZE;
1196
1197        // Reuse the same vector for every hypercall.
1198        let mut pages = Vec::new();
1199        for current_page in (start..end).step_by(MAX_INPUT_ELEMENTS) {
1200            let remaining_pages = end - current_page;
1201            let count = remaining_pages.min(MAX_INPUT_ELEMENTS as u64);
1202            pages.clear();
1203            pages.extend(current_page..current_page + count);
1204
1205            // SAFETY: The input header and rep slice are the correct types for this hypercall. A dummy type of u8 is
1206            //         provided to satisfy the compiler for output rep type. The given input and slices are valid
1207            //         references while this function is called.
1208            //
1209            //         The hypercall output is validated right after the hypercall is issued.
1210            let output = unsafe {
1211                self.hvcall_rep::<hvdef::hypercall::ModifyVtlProtectionMask, u64, u8>(
1212                    HypercallCode::HvCallModifyVtlProtectionMask,
1213                    &header,
1214                    HvcallRepInput::Elements(pages.as_slice()),
1215                    None,
1216                )
1217                .expect("kernel hypercall submission should always succeed")
1218            };
1219
1220            output.result().map_err(|err| {
1221                let page_range =
1222                    *pages.first().expect("not empty")..*pages.last().expect("not empty") + 1;
1223                ApplyVtlProtectionsError::Hypervisor {
1224                    range: MemoryRange::from_4k_gpn_range(page_range),
1225                    output,
1226                    hv_error: err,
1227                    vtl: target_vtl,
1228                }
1229            })?;
1230
1231            assert_eq!(output.elements_processed() as u64, count);
1232        }
1233
1234        Ok(())
1235    }
1236
1237    /// Invokes the HvCallMemoryMappedIoRead hypercall
1238    pub fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> Result<(), HvError> {
1239        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1240
1241        let header = hvdef::hypercall::MemoryMappedIoRead {
1242            gpa,
1243            access_width: data.len() as u32,
1244            reserved_z0: 0,
1245        };
1246
1247        let mut output: hvdef::hypercall::MemoryMappedIoReadOutput = FromZeros::new_zeroed();
1248
1249        // SAFETY: The input header and slice are the correct types for this hypercall.
1250        //         The hypercall output is validated right after the hypercall is issued.
1251        let status = unsafe {
1252            self.hvcall(
1253                HypercallCode::HvCallMemoryMappedIoRead,
1254                &header,
1255                &mut output,
1256            )
1257            .expect("submitting hypercall should not fail")
1258        };
1259
1260        // Only copy the data if the hypercall was successful
1261        if status.result().is_ok() {
1262            data.copy_from_slice(&output.data[..data.len()]);
1263        };
1264
1265        status.result()
1266    }
1267
1268    /// Invokes the HvCallMemoryMappedIoWrite hypercall
1269    pub fn mmio_write(&self, gpa: u64, data: &[u8]) -> Result<(), HvError> {
1270        assert!(data.len() <= hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH);
1271
1272        let mut header = hvdef::hypercall::MemoryMappedIoWrite {
1273            gpa,
1274            access_width: data.len() as u32,
1275            reserved_z0: 0,
1276            data: [0; hvdef::hypercall::HV_HYPERCALL_MMIO_MAX_DATA_LENGTH],
1277        };
1278
1279        header.data[..data.len()].copy_from_slice(data);
1280
1281        // SAFETY: The input header and slice are the correct types for this hypercall.
1282        //         The hypercall output is validated right after the hypercall is issued.
1283        let status = unsafe {
1284            self.hvcall(HypercallCode::HvCallMemoryMappedIoWrite, &header, &mut ())
1285                .expect("submitting hypercall should not fail")
1286        };
1287
1288        status.result()
1289    }
1290
1291    /// Request a VBS VM report from the host VSM.
1292    ///
1293    /// # Arguments
1294    /// - `report_data`: The data to include in the report.
1295    ///
1296    /// Returns a result containing the report or an error.
1297    pub fn vbs_vm_call_report(
1298        &self,
1299        report_data: &[u8],
1300    ) -> Result<[u8; hvdef::hypercall::VBS_VM_MAX_REPORT_SIZE], HvError> {
1301        if report_data.len() > hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE {
1302            return Err(HvError::InvalidParameter);
1303        }
1304
1305        let mut header = hvdef::hypercall::VbsVmCallReport {
1306            report_data: [0; hvdef::hypercall::VBS_VM_REPORT_DATA_SIZE],
1307        };
1308
1309        header.report_data[..report_data.len()].copy_from_slice(report_data);
1310
1311        let mut output: hvdef::hypercall::VbsVmCallReportOutput = FromZeros::new_zeroed();
1312
1313        // SAFETY: The input header and slice are the correct types for this hypercall.
1314        //         The hypercall output is validated right after the hypercall is issued.
1315        let status = unsafe {
1316            self.hvcall(HypercallCode::HvCallVbsVmCallReport, &header, &mut output)
1317                .expect("submitting hypercall should not fail")
1318        };
1319
1320        if status.result().is_ok() {
1321            Ok(output.report)
1322        } else {
1323            Err(status.result().unwrap_err())
1324        }
1325    }
1326}
1327
1328/// The HCL device and collection of fds.
1329#[derive(Debug)]
1330pub struct Hcl {
1331    mshv_hvcall: MshvHvcall,
1332    mshv_vtl: MshvVtl,
1333    vps: Vec<HclVp>,
1334    supports_vtl_ret_action: bool,
1335    supports_register_page: bool,
1336    dr6_shared: bool,
1337    supports_lower_vtl_timer_virt: bool,
1338    isolation: IsolationType,
1339    snp_register_bitmap: [u8; 64],
1340    sidecar: Option<SidecarClient>,
1341}
1342
1343/// The isolation type for a partition.
1344#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1345pub enum IsolationType {
1346    /// No isolation.
1347    None,
1348    /// Hyper-V software isolation.
1349    Vbs,
1350    /// AMD SNP.
1351    Snp,
1352    /// Intel TDX.
1353    Tdx,
1354}
1355
1356impl IsolationType {
1357    /// Returns true if the isolation type is not `None`.
1358    pub fn is_isolated(&self) -> bool {
1359        !matches!(self, Self::None)
1360    }
1361
1362    /// Returns whether the isolation type is hardware-backed.
1363    pub fn is_hardware_isolated(&self) -> bool {
1364        matches!(self, Self::Snp | Self::Tdx)
1365    }
1366}
1367
1368impl Hcl {
1369    /// Returns true if DR6 is a shared register on this processor.
1370    pub fn dr6_shared(&self) -> bool {
1371        self.dr6_shared
1372    }
1373
1374    /// Returns true if timer virtualization for lower VTL is supported.
1375    pub fn supports_lower_vtl_timer_virt(&self) -> bool {
1376        self.supports_lower_vtl_timer_virt
1377    }
1378}
1379
1380#[derive(Debug)]
1381struct HclVp {
1382    state: Mutex<VpState>,
1383    run: MappedPage<hcl_run>,
1384    backing: BackingState,
1385}
1386
1387#[derive(Debug)]
1388enum BackingState {
1389    MshvAarch64 {
1390        reg_page: Option<MappedPage<HvAarch64RegisterPage>>,
1391    },
1392    MshvX64 {
1393        reg_page: Option<MappedPage<HvX64RegisterPage>>,
1394    },
1395    Snp {
1396        vmsa: VtlArray<MappedPage<SevVmsa>, 2>,
1397    },
1398    Tdx {
1399        vtl0_apic_page: MappedPage<ApicPage>,
1400        vtl1_apic_page: MemoryBlock,
1401    },
1402}
1403
1404#[derive(Debug)]
1405enum VpState {
1406    Running(Pthread),
1407    NotRunning,
1408}
1409
1410impl HclVp {
1411    fn new(
1412        hcl: &Hcl,
1413        vp: u32,
1414        map_reg_page: bool,
1415        isolation_type: IsolationType,
1416        private_dma_client: Option<&Arc<dyn DmaClient>>,
1417    ) -> Result<Self, Error> {
1418        let fd = &hcl.mshv_vtl.file;
1419        let run: MappedPage<hcl_run> =
1420            MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?;
1421        // Block proxied interrupts on all vectors by default. The mask will be
1422        // relaxed as the guest runs.
1423        //
1424        // This is only used on CVMs. Skip it otherwise, since run page accesses
1425        // will fault on VPs that are still in the sidecar kernel.
1426        if isolation_type.is_hardware_isolated() {
1427            // SAFETY: `proxy_irr_blocked` is not accessed by any other VPs/kernel at this point (`HclVp` creation)
1428            // so we know we have exclusive access.
1429            let proxy_irr_blocked = unsafe { &mut (*run.as_ptr()).proxy_irr_blocked };
1430            proxy_irr_blocked.fill(!0);
1431        }
1432
1433        let backing = match isolation_type {
1434            IsolationType::None | IsolationType::Vbs if cfg!(guest_arch = "aarch64") => {
1435                BackingState::MshvAarch64 {
1436                    reg_page: if map_reg_page {
1437                        Some(
1438                            MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1439                                .map_err(Error::MmapRegPage)?,
1440                        )
1441                    } else {
1442                        None
1443                    },
1444                }
1445            }
1446            IsolationType::None | IsolationType::Vbs => BackingState::MshvX64 {
1447                reg_page: if map_reg_page {
1448                    Some(
1449                        MappedPage::new(fd, HCL_REG_PAGE_OFFSET | vp as i64)
1450                            .map_err(Error::MmapRegPage)?,
1451                    )
1452                } else {
1453                    None
1454                },
1455            },
1456            IsolationType::Snp => {
1457                let vmsa_vtl0 = MappedPage::new(fd, HCL_VMSA_PAGE_OFFSET | vp as i64)
1458                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?;
1459                let vmsa_vtl1 = MappedPage::new(fd, HCL_VMSA_GUEST_VSM_PAGE_OFFSET | vp as i64)
1460                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl1)))?;
1461                BackingState::Snp {
1462                    vmsa: [vmsa_vtl0, vmsa_vtl1].into(),
1463                }
1464            }
1465            IsolationType::Tdx => BackingState::Tdx {
1466                vtl0_apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
1467                    .map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?,
1468                vtl1_apic_page: private_dma_client
1469                    .ok_or(Error::MissingPrivateMemory)?
1470                    .allocate_dma_buffer(HV_PAGE_SIZE as usize)
1471                    .map_err(Error::AllocVp)?,
1472            },
1473        };
1474
1475        Ok(Self {
1476            state: Mutex::new(VpState::NotRunning),
1477            run,
1478            backing,
1479        })
1480    }
1481}
1482
1483/// Object used to run and to access state for a specific VP.
1484pub struct ProcessorRunner<'a, T: Backing<'a>> {
1485    hcl: &'a Hcl,
1486    vp: &'a HclVp,
1487    sidecar: Option<SidecarVp<'a>>,
1488    deferred_actions: Option<RegisteredDeferredActions<'a>>,
1489    run: &'a UnsafeCell<hcl_run>,
1490    intercept_message: &'a UnsafeCell<HvMessage>,
1491    state: T,
1492}
1493
1494/// An error returned by [`Hcl::runner`].
1495#[derive(Debug, Error)]
1496pub enum NoRunner {
1497    /// The partition is for a different isolation type.
1498    #[error("mismatched isolation type")]
1499    MismatchedIsolation,
1500    /// A sidecar VP was requested, but no sidecar was provided.
1501    #[error("missing sidecar")]
1502    MissingSidecar,
1503    /// The sidecar VP could not be contacted.
1504    #[error("sidecar communication error")]
1505    Sidecar(#[source] sidecar_client::SidecarError),
1506}
1507
1508/// An isolation-type-specific backing for a processor runner.
1509#[expect(private_bounds)]
1510pub trait Backing<'a>: BackingPrivate<'a> {}
1511
1512impl<'a, T: BackingPrivate<'a>> Backing<'a> for T {}
1513
1514mod private {
1515    use super::Hcl;
1516    use super::HclVp;
1517    use super::NoRunner;
1518    use super::ProcessorRunner;
1519    use crate::GuestVtl;
1520    use hvdef::HvRegisterName;
1521    use hvdef::HvRegisterValue;
1522    use sidecar_client::SidecarVp;
1523
1524    pub(super) trait BackingPrivate<'a>: Sized {
1525        fn new(vp: &'a HclVp, sidecar: Option<&SidecarVp<'a>>, hcl: &Hcl)
1526        -> Result<Self, NoRunner>;
1527
1528        fn try_set_reg(
1529            runner: &mut ProcessorRunner<'a, Self>,
1530            vtl: GuestVtl,
1531            name: HvRegisterName,
1532            value: HvRegisterValue,
1533        ) -> bool;
1534
1535        fn must_flush_regs_on(runner: &ProcessorRunner<'a, Self>, name: HvRegisterName) -> bool;
1536
1537        fn try_get_reg(
1538            runner: &ProcessorRunner<'a, Self>,
1539            vtl: GuestVtl,
1540            name: HvRegisterName,
1541        ) -> Option<HvRegisterValue>;
1542
1543        fn flush_register_page(runner: &mut ProcessorRunner<'a, Self>);
1544    }
1545}
1546
1547impl<'a, T: Backing<'a>> Drop for ProcessorRunner<'a, T> {
1548    fn drop(&mut self) {
1549        self.flush_deferred_state();
1550        drop(self.deferred_actions.take());
1551        let old_state = std::mem::replace(&mut *self.vp.state.lock(), VpState::NotRunning);
1552        assert!(matches!(old_state, VpState::Running(thread) if thread == Pthread::current()));
1553    }
1554}
1555
1556impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
1557    /// Flushes any deferred state. Must be called if preparing the partition
1558    /// for save/restore (servicing).
1559    pub fn flush_deferred_state(&mut self) {
1560        T::flush_register_page(self);
1561        if let Some(actions) = &mut self.deferred_actions {
1562            actions.flush();
1563        }
1564    }
1565
1566    /// Clears the cancel flag so that the VP can be run again.
1567    pub fn clear_cancel(&mut self) {
1568        if !self.is_sidecar() {
1569            // SAFETY: self.run is mapped, and the cancel field is atomically
1570            // accessed by everyone.
1571            let cancel = unsafe { &*(&raw mut (*self.run.get()).cancel).cast::<AtomicU32>() };
1572            cancel.store(0, Ordering::SeqCst);
1573        }
1574    }
1575
1576    /// Set the halted state of the VP. If `true`, then `run()` will not
1577    /// actually run the VP but will just wait for a cancel request or signal.
1578    pub fn set_halted(&mut self, halted: bool) {
1579        // SAFETY: the `flags` field of the run page will not be concurrently
1580        // updated.
1581        let flags = unsafe { &mut (*self.run.get()).flags };
1582        if halted {
1583            *flags |= protocol::MSHV_VTL_RUN_FLAG_HALTED
1584        } else {
1585            *flags &= !protocol::MSHV_VTL_RUN_FLAG_HALTED
1586        }
1587    }
1588
1589    /// Gets the proxied interrupt request bitmap for VTL 0 from the hypervisor.
1590    pub fn proxy_irr_vtl0(&mut self) -> Option<[u32; 8]> {
1591        // SAFETY: the `scan_proxy_irr` and `proxy_irr` fields of the run page
1592        // are concurrently updated by the kernel on multiple processors. They
1593        // are accessed atomically everywhere.
1594        unsafe {
1595            let scan_proxy_irr = &*((&raw mut (*self.run.get()).scan_proxy_irr).cast::<AtomicU8>());
1596            let proxy_irr = &*((&raw mut (*self.run.get()).proxy_irr).cast::<[AtomicU32; 8]>());
1597            if scan_proxy_irr.load(Ordering::Acquire) == 0 {
1598                return None;
1599            }
1600
1601            scan_proxy_irr.store(0, Ordering::SeqCst);
1602            let mut r = [0; 8];
1603            for (irr, r) in proxy_irr.iter().zip(r.iter_mut()) {
1604                if irr.load(Ordering::Relaxed) != 0 {
1605                    *r = irr.swap(0, Ordering::Relaxed);
1606                }
1607            }
1608            Some(r)
1609        }
1610    }
1611
1612    /// Update the `proxy_irr_blocked` for VTL 0 in the run page
1613    pub fn update_proxy_irr_filter_vtl0(&mut self, irr_filter: &[u32; 8]) {
1614        // SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could
1615        // be concurrently accessed by kernel too, hence accessing as Atomic
1616        let proxy_irr_blocked = unsafe {
1617            &mut *((&raw mut (*self.run.get()).proxy_irr_blocked).cast::<[AtomicU32; 8]>())
1618        };
1619
1620        // `irr_filter` bitmap has bits set for all allowed vectors (i.e. SINT and device interrupts)
1621        // Replace current `proxy_irr_blocked` with the given `irr_filter` bitmap.
1622        // By default block all (i.e. set all), and only allow (unset) given vectors from `irr_filter`.
1623        for (filter, irr) in proxy_irr_blocked.iter_mut().zip(irr_filter.iter()) {
1624            filter.store(!irr, Ordering::Relaxed);
1625            tracing::debug!(irr, "update_proxy_irr_filter");
1626        }
1627    }
1628
1629    /// Gets the proxy_irr_exit bitmask for VTL 0. This mask ensures that
1630    /// the masked interrupts always exit to user-space, and cannot
1631    /// be injected in the kernel. Interrupts matching this condition
1632    /// will be left on the proxy_irr field.
1633    pub fn proxy_irr_exit_mut_vtl0(&mut self) -> &mut [u32; 8] {
1634        // SAFETY: The `proxy_irr_exit` field of the run page will not be concurrently updated.
1635        unsafe { &mut (*self.run.get()).proxy_irr_exit }
1636    }
1637
1638    /// Gets the current offload_flags from the run page.
1639    pub fn offload_flags_mut(&mut self) -> &mut hcl_intr_offload_flags {
1640        // SAFETY: The `offload_flags` field of the run page will not be concurrently updated.
1641        unsafe { &mut (*self.run.get()).offload_flags }
1642    }
1643
1644    /// Runs the VP via the sidecar kernel.
1645    pub fn run_sidecar(&mut self) -> Result<SidecarRun<'_, 'a>, Error> {
1646        self.sidecar.as_mut().unwrap().run().map_err(Error::Sidecar)
1647    }
1648
1649    /// Run the following VP until an exit, error, or interrupt (cancel or
1650    /// signal) occurs.
1651    ///
1652    /// Returns `Ok(true)` if there is an exit to process, `Ok(false)` if there
1653    /// was a signal or cancel request.
1654    pub fn run(&mut self) -> Result<bool, Error> {
1655        assert!(self.sidecar.is_none());
1656        // Apply any deferred actions to the run page.
1657        if let Some(actions) = &mut self.deferred_actions {
1658            debug_assert!(self.hcl.supports_vtl_ret_action);
1659            // SAFETY: there are no concurrent accesses to the deferred action
1660            // slots.
1661            let mut slots = unsafe { DeferredActionSlots::new(self.run) };
1662            actions.move_to_slots(&mut slots);
1663        };
1664
1665        // N.B. cpu_context and exit_context are mutated by this call.
1666        //
1667        // SAFETY: no safety requirements for this ioctl.
1668        let r = unsafe { hcl_return_to_lower_vtl(self.hcl.mshv_vtl.file.as_raw_fd()) };
1669
1670        let has_intercept = match r {
1671            Ok(_) => true,
1672            Err(nix::errno::Errno::EINTR) => false,
1673            Err(err) => return Err(Error::ReturnToLowerVtl(err)),
1674        };
1675        Ok(has_intercept)
1676    }
1677
1678    /// Gets a reference to enter mode value, used by the kernel to specify the
1679    /// mode used when entering a lower VTL.
1680    pub fn enter_mode(&mut self) -> Option<&mut EnterModes> {
1681        if self.sidecar.is_some() {
1682            None
1683        } else {
1684            // SAFETY: self.run is mapped, and the mode field can only be mutated or accessed by
1685            // this object (or the kernel while `run` is called).
1686            Some(unsafe { &mut (*self.run.get()).mode })
1687        }
1688    }
1689
1690    /// Returns a reference to the exit message from the last exit.
1691    pub fn exit_message(&self) -> &HvMessage {
1692        // SAFETY: the exit message will not be concurrently accessed by the
1693        // kernel while this VP is in VTL2.
1694        unsafe { &*self.intercept_message.get() }
1695    }
1696
1697    /// Returns whether this is a sidecar VP.
1698    pub fn is_sidecar(&self) -> bool {
1699        self.sidecar.is_some()
1700    }
1701
1702    /// Sets the VTL that should be returned to when underhill exits
1703    pub fn set_exit_vtl(&mut self, vtl: GuestVtl) {
1704        // SAFETY: self.run is mapped, and the target_vtl field can only be
1705        // mutated or accessed by this object and only before the kernel is
1706        // invoked during `run`
1707        unsafe { (*self.run.get()).target_vtl = vtl.into() }
1708    }
1709}
1710
1711impl Hcl {
1712    /// Returns a new HCL instance.
1713    pub fn new(isolation: IsolationType, sidecar: Option<SidecarClient>) -> Result<Hcl, Error> {
1714        static SIGNAL_HANDLER_INIT: Once = Once::new();
1715        // SAFETY: The signal handler does not perform any actions that are forbidden
1716        // for signal handlers to perform, as it performs nothing.
1717        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
1718            signal_hook::low_level::register(libc::SIGRTMIN(), || {
1719                // Do nothing, the ioctl will now return with EINTR.
1720            })
1721            .unwrap();
1722        });
1723
1724        // Open both mshv fds
1725        let mshv_fd = Mshv::new()?;
1726
1727        // Validate the hypervisor's advertised isolation type matches the
1728        // requested isolation type. In CVM scenarios, this is not trusted, so
1729        // we still need the isolation type from the caller.
1730        //
1731        // FUTURE: the kernel driver should probably tell us this, especially
1732        // since the kernel ABI is different for different isolation types.
1733        let supported_isolation = if cfg!(guest_arch = "x86_64") {
1734            // xtask-fmt allow-target-arch cpu-intrinsic
1735            #[cfg(target_arch = "x86_64")]
1736            {
1737                let result = safe_intrinsics::cpuid(
1738                    hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION,
1739                    0,
1740                );
1741                match result.ebx & 0xF {
1742                    0 => IsolationType::None,
1743                    1 => IsolationType::Vbs,
1744                    2 => IsolationType::Snp,
1745                    3 => IsolationType::Tdx,
1746                    ty => panic!("unknown isolation type {ty:#x}"),
1747                }
1748            }
1749            // xtask-fmt allow-target-arch cpu-intrinsic
1750            #[cfg(not(target_arch = "x86_64"))]
1751            {
1752                unreachable!()
1753            }
1754        } else {
1755            IsolationType::None
1756        };
1757
1758        if isolation != supported_isolation {
1759            return Err(Error::MismatchedIsolation {
1760                supported: supported_isolation,
1761                requested: isolation,
1762            });
1763        }
1764
1765        let supports_vtl_ret_action = mshv_fd.check_extension(HCL_CAP_VTL_RETURN_ACTION)?;
1766        let supports_register_page = mshv_fd.check_extension(HCL_CAP_REGISTER_PAGE)?;
1767        let dr6_shared = mshv_fd.check_extension(HCL_CAP_DR6_SHARED)?;
1768        let supports_lower_vtl_timer_virt =
1769            mshv_fd.check_extension(HCL_CAP_LOWER_VTL_TIMER_VIRT)?;
1770        tracing::debug!(
1771            supports_vtl_ret_action,
1772            supports_register_page,
1773            supports_lower_vtl_timer_virt,
1774            "HCL capabilities",
1775        );
1776
1777        let vtl_fd = mshv_fd.create_vtl()?;
1778
1779        // Open the hypercall pseudo-device
1780        let mshv_hvcall = MshvHvcall::new()?;
1781
1782        // Override certain features for hardware isolated VMs.
1783        // TODO: vtl return actions are inhibited for hardware isolated VMs because they currently
1784        // are a pessimization since interrupt handling (and synic handling) are all done from
1785        // within VTL2. Future vtl return actions may be different, requiring granular handling.
1786        let supports_vtl_ret_action = supports_vtl_ret_action && !isolation.is_hardware_isolated();
1787        let supports_register_page = supports_register_page && !isolation.is_hardware_isolated();
1788        let snp_register_bitmap = [0u8; 64];
1789
1790        Ok(Hcl {
1791            mshv_hvcall,
1792            mshv_vtl: vtl_fd,
1793            vps: Vec::new(),
1794            supports_vtl_ret_action,
1795            supports_register_page,
1796            dr6_shared,
1797            supports_lower_vtl_timer_virt,
1798            isolation,
1799            snp_register_bitmap,
1800            sidecar,
1801        })
1802    }
1803
1804    /// Set allowed hypercalls.
1805    pub fn set_allowed_hypercalls(&self, codes: &[HypercallCode]) {
1806        self.mshv_hvcall.set_allowed_hypercalls(codes)
1807    }
1808
1809    /// Initializes SNP register tweak bitmap
1810    pub fn set_snp_register_bitmap(&mut self, register_bitmap: [u8; 64]) {
1811        self.snp_register_bitmap = register_bitmap;
1812    }
1813
1814    /// Adds `vp_count` VPs.
1815    pub fn add_vps(
1816        &mut self,
1817        vp_count: u32,
1818        private_pool: Option<&Arc<dyn DmaClient>>,
1819    ) -> Result<(), Error> {
1820        self.vps = (0..vp_count)
1821            .map(|vp| {
1822                HclVp::new(
1823                    self,
1824                    vp,
1825                    self.supports_register_page,
1826                    self.isolation,
1827                    private_pool,
1828                )
1829            })
1830            .collect::<Result<_, _>>()?;
1831
1832        Ok(())
1833    }
1834
1835    /// Registers with the hypervisor for an intercept.
1836    pub fn register_intercept(
1837        &self,
1838        intercept_type: HvInterceptType,
1839        access_type_mask: u32,
1840        intercept_parameters: HvInterceptParameters,
1841    ) -> Result<(), HvError> {
1842        let intercept_info = hvdef::hypercall::InstallIntercept {
1843            partition_id: HV_PARTITION_ID_SELF,
1844            access_type_mask,
1845            intercept_type,
1846            intercept_parameters,
1847        };
1848
1849        // SAFETY: calling hypercall with appropriate input and output.
1850        unsafe {
1851            self.mshv_hvcall
1852                .hvcall(
1853                    HypercallCode::HvCallInstallIntercept,
1854                    &intercept_info,
1855                    &mut (),
1856                )
1857                .unwrap()
1858                .result()
1859        }
1860    }
1861
1862    /// Returns the base CPU that manages the given sidecar VP.
1863    pub fn sidecar_base_cpu(&self, vp_index: u32) -> Option<u32> {
1864        Some(self.sidecar.as_ref()?.base_cpu(vp_index))
1865    }
1866
1867    /// Create a VP runner for the given partition.
1868    pub fn runner<'a, T: Backing<'a>>(
1869        &'a self,
1870        vp_index: u32,
1871        use_sidecar: bool,
1872    ) -> Result<ProcessorRunner<'a, T>, NoRunner> {
1873        let vp = &self.vps[vp_index as usize];
1874
1875        let sidecar = if use_sidecar {
1876            Some(
1877                self.sidecar
1878                    .as_ref()
1879                    .ok_or(NoRunner::MissingSidecar)?
1880                    .vp(vp_index),
1881            )
1882        } else {
1883            None
1884        };
1885
1886        let state = T::new(vp, sidecar.as_ref(), self)?;
1887
1888        // Set this thread as the runner.
1889        let VpState::NotRunning =
1890            std::mem::replace(&mut *vp.state.lock(), VpState::Running(Pthread::current()))
1891        else {
1892            panic!("another runner already exists")
1893        };
1894
1895        let actions = if sidecar.is_none() && self.supports_vtl_ret_action {
1896            Some(register_deferred_actions(self))
1897        } else {
1898            None
1899        };
1900
1901        // SAFETY: The run page is guaranteed to be mapped and valid.
1902        // While the exit message might not be filled in yet we're only computing its address.
1903        let intercept_message = unsafe {
1904            &*sidecar.as_ref().map_or(
1905                std::ptr::addr_of!((*vp.run.as_ptr()).exit_message).cast(),
1906                |s| s.intercept_message().cast(),
1907            )
1908        };
1909
1910        Ok(ProcessorRunner {
1911            hcl: self,
1912            vp,
1913            deferred_actions: actions,
1914            run: vp.run.as_ref(),
1915            intercept_message,
1916            state,
1917            sidecar,
1918        })
1919    }
1920
1921    /// Trigger the following interrupt request.
1922    pub fn request_interrupt(
1923        &self,
1924        interrupt_control: hvdef::HvInterruptControl,
1925        destination_address: u64,
1926        requested_vector: u32,
1927        target_vtl: GuestVtl,
1928    ) -> Result<(), Error> {
1929        tracing::trace!(
1930            ?interrupt_control,
1931            destination_address,
1932            requested_vector,
1933            "requesting interrupt"
1934        );
1935
1936        assert!(!self.isolation.is_hardware_isolated());
1937
1938        let request = AssertVirtualInterrupt {
1939            partition_id: HV_PARTITION_ID_SELF,
1940            interrupt_control,
1941            destination_address,
1942            requested_vector,
1943            target_vtl: target_vtl as u8,
1944            rsvd0: 0,
1945            rsvd1: 0,
1946        };
1947
1948        // SAFETY: calling the hypercall with correct input buffer.
1949        let output = unsafe {
1950            self.mshv_hvcall.hvcall(
1951                HypercallCode::HvCallAssertVirtualInterrupt,
1952                &request,
1953                &mut (),
1954            )
1955        }
1956        .unwrap();
1957
1958        output.result().map_err(Error::RequestInterrupt)
1959    }
1960
1961    /// Attempts to signal a given vp/sint/flag combo using HvSignalEventDirect.
1962    ///
1963    /// No result is returned because this request may be deferred until the
1964    /// hypervisor is returning to a lower VTL.
1965    pub fn signal_event_direct(&self, vp: u32, sint: u8, flag: u16) {
1966        tracing::trace!(vp, sint, flag, "signaling event");
1967        push_deferred_action(self, DeferredAction::SignalEvent { vp, sint, flag });
1968    }
1969
1970    fn hvcall_signal_event_direct(&self, vp: u32, sint: u8, flag: u16) -> Result<bool, Error> {
1971        let signal_event_input = hvdef::hypercall::SignalEventDirect {
1972            target_partition: HV_PARTITION_ID_SELF,
1973            target_vp: vp,
1974            target_vtl: Vtl::Vtl0 as u8,
1975            target_sint: sint,
1976            flag_number: flag,
1977        };
1978        let mut signal_event_output = hvdef::hypercall::SignalEventDirectOutput {
1979            newly_signaled: 0,
1980            rsvd: [0; 7],
1981        };
1982
1983        // SAFETY: calling the hypercall with correct input buffer.
1984        let output = unsafe {
1985            self.mshv_hvcall.hvcall(
1986                HypercallCode::HvCallSignalEventDirect,
1987                &signal_event_input,
1988                &mut signal_event_output,
1989            )
1990        }
1991        .unwrap();
1992
1993        output
1994            .result()
1995            .map(|_| signal_event_output.newly_signaled != 0)
1996            .map_err(Error::SignalEvent)
1997    }
1998
1999    /// Attempts to post a given message to a vp/sint combo using HvPostMessageDirect.
2000    pub fn post_message_direct(
2001        &self,
2002        vp: u32,
2003        sint: u8,
2004        message: &HvMessage,
2005    ) -> Result<(), HvError> {
2006        tracing::trace!(vp, sint, "posting message");
2007
2008        let post_message = hvdef::hypercall::PostMessageDirect {
2009            partition_id: HV_PARTITION_ID_SELF,
2010            vp_index: vp,
2011            vtl: Vtl::Vtl0 as u8,
2012            padding0: [0; 3],
2013            sint,
2014            padding1: [0; 3],
2015            message: zerocopy::Unalign::new(*message),
2016            padding2: 0,
2017        };
2018
2019        // SAFETY: calling the hypercall with correct input buffer.
2020        let output = unsafe {
2021            self.mshv_hvcall.hvcall(
2022                HypercallCode::HvCallPostMessageDirect,
2023                &post_message,
2024                &mut (),
2025            )
2026        }
2027        .unwrap();
2028
2029        output.result()
2030    }
2031
2032    /// Sets a file to poll during run. When the file's poll state changes, the
2033    /// run will be automatically cancelled.
2034    pub fn set_poll_file(&self, vp: u32, file: RawFd) -> Result<(), Error> {
2035        // SAFETY: calling the IOCTL as defined. This is safe even if the caller
2036        // does not own `file` since all this does is register the file for
2037        // polling.
2038        unsafe {
2039            hcl_set_poll_file(
2040                self.mshv_vtl.file.as_raw_fd(),
2041                &protocol::hcl_set_poll_file {
2042                    cpu: vp as i32,
2043                    fd: file,
2044                },
2045            )
2046            .map_err(Error::SetPollFile)?;
2047        }
2048        Ok(())
2049    }
2050
2051    fn to_hv_gpa_range_array(gpa_memory_ranges: &[MemoryRange]) -> Vec<HvGpaRange> {
2052        const PAGES_PER_ENTRY: u64 = 2048;
2053        const PAGE_SIZE: u64 = HV_PAGE_SIZE;
2054
2055        // Estimate the total number of pages across all memory ranges
2056        let estimated_size: usize = gpa_memory_ranges
2057            .iter()
2058            .map(|memory_range| {
2059                let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2060                total_pages.div_ceil(PAGES_PER_ENTRY)
2061            })
2062            .sum::<u64>() as usize;
2063
2064        // Create a vector with the estimated size
2065        let mut hv_gpa_ranges = Vec::with_capacity(estimated_size);
2066
2067        for memory_range in gpa_memory_ranges {
2068            // Calculate the total number of pages in the memory range
2069            let total_pages = (memory_range.end() - memory_range.start()).div_ceil(PAGE_SIZE);
2070
2071            // Convert start address to page number
2072            let start_page = memory_range.start_4k_gpn();
2073
2074            // Generate the ranges and append them to the vector
2075            hv_gpa_ranges.extend(
2076                (0..total_pages)
2077                    .step_by(PAGES_PER_ENTRY as usize)
2078                    .map(|start| {
2079                        let end = std::cmp::min(total_pages, start + PAGES_PER_ENTRY);
2080                        let pages_in_this_range = end - start;
2081                        let gpa_page_number = start_page + start;
2082
2083                        let extended = HvGpaRangeExtended::new()
2084                            .with_additional_pages(pages_in_this_range - 1)
2085                            .with_large_page(false) // Assuming not a large page
2086                            .with_gpa_page_number(gpa_page_number);
2087
2088                        HvGpaRange(extended.into_bits())
2089                    }),
2090            );
2091        }
2092
2093        hv_gpa_ranges // Return the vector at the end
2094    }
2095
2096    fn pin_unpin_gpa_ranges_internal(
2097        &self,
2098        gpa_ranges: &[HvGpaRange],
2099        action: GpaPinUnpinAction,
2100    ) -> Result<(), PinUnpinError> {
2101        const PIN_REQUEST_HEADER_SIZE: usize =
2102            size_of::<hvdef::hypercall::PinUnpinGpaPageRangesHeader>();
2103        const MAX_INPUT_ELEMENTS: usize =
2104            (HV_PAGE_SIZE as usize - PIN_REQUEST_HEADER_SIZE) / size_of::<u64>();
2105
2106        let header = hvdef::hypercall::PinUnpinGpaPageRangesHeader { reserved: 0 };
2107        let mut ranges_processed = 0;
2108
2109        for chunk in gpa_ranges.chunks(MAX_INPUT_ELEMENTS) {
2110            // SAFETY: This unsafe block is valid because:
2111            // 1. The code and header going to match the expected input for the hypercall.
2112            //
2113            // 2. Hypercall result is checked right after the hypercall is issued.
2114            //
2115            let output = unsafe {
2116                self.mshv_hvcall
2117                    .hvcall_rep(
2118                        match action {
2119                            GpaPinUnpinAction::PinGpaRange => HypercallCode::HvCallPinGpaPageRanges,
2120                            GpaPinUnpinAction::UnpinGpaRange => {
2121                                HypercallCode::HvCallUnpinGpaPageRanges
2122                            }
2123                        },
2124                        &header,
2125                        HvcallRepInput::Elements(chunk),
2126                        None::<&mut [u8]>,
2127                    )
2128                    .expect("submitting pin/unpin hypercall should not fail")
2129            };
2130
2131            ranges_processed += output.elements_processed();
2132
2133            output.result().map_err(|e| PinUnpinError {
2134                ranges_processed,
2135                error: e,
2136            })?;
2137        }
2138
2139        // At end all the ranges should be processed
2140        if ranges_processed == gpa_ranges.len() {
2141            Ok(())
2142        } else {
2143            Err(PinUnpinError {
2144                ranges_processed,
2145                error: HvError::OperationFailed,
2146            })
2147        }
2148    }
2149
2150    fn perform_pin_unpin_gpa_ranges(
2151        &self,
2152        gpa_ranges: &[MemoryRange],
2153        action: GpaPinUnpinAction,
2154        rollback_action: GpaPinUnpinAction,
2155    ) -> Result<(), HvError> {
2156        let hv_gpa_ranges: Vec<HvGpaRange> = Self::to_hv_gpa_range_array(gpa_ranges);
2157
2158        // Attempt to pin/unpin the ranges
2159        match self.pin_unpin_gpa_ranges_internal(&hv_gpa_ranges, action) {
2160            Ok(_) => Ok(()),
2161            Err(PinUnpinError {
2162                error,
2163                ranges_processed,
2164            }) => {
2165                // Unpin the ranges that were successfully pinned
2166                let pinned_ranges = &hv_gpa_ranges[..ranges_processed];
2167                if let Err(rollback_error) =
2168                    self.pin_unpin_gpa_ranges_internal(pinned_ranges, rollback_action)
2169                {
2170                    // Panic if rollback is failing
2171                    panic!(
2172                        "Failed to perform action {:?} on ranges. Error : {:?}. \
2173                        Attempted to rollback {:?} ranges out of {:?}.\n rollback error: {:?}",
2174                        action,
2175                        error,
2176                        ranges_processed,
2177                        gpa_ranges.len(),
2178                        rollback_error
2179                    );
2180                }
2181                // Surface the original error
2182                Err(error)
2183            }
2184        }
2185    }
2186
2187    /// Pins the specified guest physical address ranges in the hypervisor.
2188    /// The memory ranges passed to this function must be VA backed memory.
2189    /// If a partial failure occurs (i.e., some but not all the ranges were successfully pinned),
2190    /// the function will automatically attempt to unpin any successfully pinned ranges.
2191    /// This "rollback" behavior ensures that no partially pinned state remains, which
2192    /// could otherwise lead to inconsistencies.
2193    ///
2194    pub fn pin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2195        self.perform_pin_unpin_gpa_ranges(
2196            ranges,
2197            GpaPinUnpinAction::PinGpaRange,
2198            GpaPinUnpinAction::UnpinGpaRange,
2199        )
2200    }
2201
2202    /// Unpins the specified guest physical address ranges in the hypervisor.
2203    /// The memory ranges passed to this function must be VA backed memory.
2204    /// If a partial failure occurs (i.e., some but not all the ranges were successfully unpinned),
2205    /// the function will automatically attempt to pin any successfully unpinned ranges. This "rollback"
2206    /// behavior ensures that no partially unpinned state remains, which could otherwise lead to inconsistencies.
2207    ///
2208    pub fn unpin_gpa_ranges(&self, ranges: &[MemoryRange]) -> Result<(), HvError> {
2209        self.perform_pin_unpin_gpa_ranges(
2210            ranges,
2211            GpaPinUnpinAction::UnpinGpaRange,
2212            GpaPinUnpinAction::PinGpaRange,
2213        )
2214    }
2215
2216    /// Sets the VTL protection mask for the specified memory range.
2217    pub fn modify_vtl_protection_mask(
2218        &self,
2219        range: MemoryRange,
2220        map_flags: HvMapGpaFlags,
2221        target_vtl: HvInputVtl,
2222    ) -> Result<(), ApplyVtlProtectionsError> {
2223        if self.isolation.is_hardware_isolated() {
2224            // TODO SNP TODO TDX - required for vmbus relay monitor page support
2225            todo!();
2226        }
2227
2228        self.mshv_hvcall
2229            .modify_vtl_protection_mask(range, map_flags, target_vtl)
2230    }
2231
2232    /// Checks whether the target vtl has vtl permissions for the given gpa
2233    pub fn check_vtl_access(
2234        &self,
2235        gpa: u64,
2236        target_vtl: GuestVtl,
2237        flags: HvMapGpaFlags,
2238    ) -> Result<Option<CheckVtlAccessResult>, Error> {
2239        assert!(!self.isolation.is_hardware_isolated());
2240
2241        let header = hvdef::hypercall::CheckSparseGpaPageVtlAccess {
2242            partition_id: HV_PARTITION_ID_SELF,
2243            target_vtl: HvInputVtl::from(target_vtl),
2244            desired_access: u32::from(flags) as u8,
2245            reserved0: 0,
2246            reserved1: 0,
2247        };
2248
2249        let mut output = [hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput::new()];
2250
2251        // SAFETY: The input header and rep slice are the correct types for this hypercall.
2252        //         The hypercall output is validated right after the hypercall is issued.
2253        let status = unsafe {
2254            self.mshv_hvcall.hvcall_rep::<hvdef::hypercall::CheckSparseGpaPageVtlAccess, u64, hvdef::hypercall::CheckSparseGpaPageVtlAccessOutput>(
2255                HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
2256                &header,
2257                HvcallRepInput::Elements(&[gpa >> hvdef::HV_PAGE_SHIFT]),
2258                Some(&mut output),
2259            )
2260            .expect("check_vtl_access hypercall should not fail")
2261        };
2262
2263        status.result().map_err(Error::CheckVtlAccess)?;
2264
2265        let access_result = output[0];
2266
2267        if access_result.result_code() as u32
2268            != hvdef::hypercall::CheckGpaPageVtlAccessResultCode::SUCCESS.0
2269        {
2270            return Ok(Some(CheckVtlAccessResult {
2271                vtl: (access_result.intercepting_vtl() as u8)
2272                    .try_into()
2273                    .expect("checking vtl permissions failure should return valid vtl"),
2274                denied_flags: (access_result.denied_access() as u32).into(),
2275            }));
2276        }
2277
2278        assert_eq!(status.elements_processed(), 1);
2279        Ok(None)
2280    }
2281
2282    /// Enables a vtl for the partition
2283    pub fn enable_partition_vtl(
2284        &self,
2285        vtl: GuestVtl,
2286        flags: hvdef::hypercall::EnablePartitionVtlFlags,
2287    ) -> Result<(), HvError> {
2288        use hvdef::hypercall;
2289
2290        let header = hypercall::EnablePartitionVtl {
2291            partition_id: HV_PARTITION_ID_SELF,
2292            target_vtl: vtl.into(),
2293            flags,
2294            reserved_z0: 0,
2295            reserved_z1: 0,
2296        };
2297
2298        // SAFETY: The input header and slice are the correct types for this hypercall.
2299        //         The hypercall output is validated right after the hypercall is issued.
2300        let status = unsafe {
2301            self.mshv_hvcall
2302                .hvcall(HypercallCode::HvCallEnablePartitionVtl, &header, &mut ())
2303                .expect("submitting hypercall should not fail")
2304        };
2305
2306        status.result()
2307    }
2308
2309    /// Enables a vtl on a vp
2310    pub fn enable_vp_vtl(
2311        &self,
2312        vp_index: u32,
2313        vtl: GuestVtl,
2314        hv_vp_context: InitialVpContextX64,
2315    ) -> Result<(), HvError> {
2316        use hvdef::hypercall;
2317
2318        let header = hypercall::EnableVpVtlX64 {
2319            partition_id: HV_PARTITION_ID_SELF,
2320            vp_index,
2321            target_vtl: vtl.into(),
2322            reserved: [0; 3],
2323            vp_vtl_context: hv_vp_context,
2324        };
2325
2326        // SAFETY: The input header and slice are the correct types for this hypercall.
2327        //         The hypercall output is validated right after the hypercall is issued.
2328        let status = unsafe {
2329            self.mshv_hvcall
2330                .hvcall(HypercallCode::HvCallEnableVpVtl, &header, &mut ())
2331                .expect("submitting hypercall should not fail")
2332        };
2333
2334        status.result()
2335    }
2336
2337    /// Gets the PFN for the VTL 1 VMSA
2338    pub fn vtl1_vmsa_pfn(&self, cpu_index: u32) -> u64 {
2339        let mut vp_pfn = cpu_index as u64; // input vp, output pfn
2340
2341        // SAFETY: The ioctl requires no prerequisites other than the VTL 1 VMSA
2342        // should be mapped. This ioctl should never fail as long as the vtl 1
2343        // VMSA was mapped.
2344        unsafe {
2345            hcl_read_guest_vsm_page_pfn(self.mshv_vtl.file.as_raw_fd(), &mut vp_pfn)
2346                .expect("should always succeed");
2347        }
2348
2349        vp_pfn
2350    }
2351
2352    /// Returns the isolation type for the partition.
2353    pub fn isolation(&self) -> IsolationType {
2354        self.isolation
2355    }
2356
2357    /// Reads MSR_IA32_VMX_CR4_FIXED1 in kernel mode.
2358    pub fn read_vmx_cr4_fixed1(&self) -> u64 {
2359        let mut value = 0;
2360
2361        // SAFETY: The ioctl requires no prerequisites other than a location to
2362        // write the read MSR. This ioctl should never fail.
2363        unsafe {
2364            hcl_read_vmx_cr4_fixed1(self.mshv_vtl.file.as_raw_fd(), &mut value)
2365                .expect("should always succeed");
2366        }
2367
2368        value
2369    }
2370
2371    /// Invokes the HvCallRetargetDeviceInterrupt hypercall.
2372    /// `target_processors` must be sorted in ascending order.
2373    pub fn retarget_device_interrupt(
2374        &self,
2375        device_id: u64,
2376        entry: hvdef::hypercall::InterruptEntry,
2377        vector: u32,
2378        multicast: bool,
2379        target_processors: ProcessorSet<'_>,
2380        proxy_redirect: bool,
2381    ) -> Result<(), HvError> {
2382        let header = hvdef::hypercall::RetargetDeviceInterrupt {
2383            partition_id: HV_PARTITION_ID_SELF,
2384            device_id,
2385            entry,
2386            rsvd: 0,
2387            target_header: hvdef::hypercall::InterruptTarget {
2388                vector,
2389                flags: hvdef::hypercall::HvInterruptTargetFlags::default()
2390                    .with_multicast(multicast)
2391                    .with_processor_set(true)
2392                    .with_proxy_redirect(proxy_redirect),
2393                // Always use a generic processor set to simplify construction. This hypercall is
2394                // invoked relatively infrequently, the overhead should be acceptable.
2395                mask_or_format: hvdef::hypercall::HV_GENERIC_SET_SPARSE_4K,
2396            },
2397        };
2398        let processor_set = Vec::from_iter(target_processors.as_generic_set());
2399
2400        // SAFETY: The input header and slice are the correct types for this hypercall.
2401        //         The hypercall output is validated right after the hypercall is issued.
2402        let status = unsafe {
2403            self.mshv_hvcall
2404                .hvcall_var(
2405                    HypercallCode::HvCallRetargetDeviceInterrupt,
2406                    &header,
2407                    processor_set.as_bytes(),
2408                    &mut (),
2409                )
2410                .expect("submitting hypercall should not fail")
2411        };
2412
2413        status.result()
2414    }
2415
2416    /// Gets the permissions for a vtl.
2417    /// Currently unused, but available for debugging purposes
2418    #[cfg(debug_assertions)]
2419    pub fn rmp_query(&self, gpa: u64, vtl: GuestVtl) -> x86defs::snp::SevRmpAdjust {
2420        use x86defs::snp::SevRmpAdjust;
2421
2422        let page_count = 1u64;
2423        let flags = [u64::from(SevRmpAdjust::new().with_target_vmpl(match vtl {
2424            GuestVtl::Vtl0 => 2,
2425            GuestVtl::Vtl1 => 1,
2426        }))];
2427        let page_size = [0u64];
2428        let pages_processed = 0;
2429
2430        debug_assert!(flags.len() == page_count as usize);
2431        debug_assert!(page_size.len() == page_count as usize);
2432
2433        let query = mshv_rmpquery {
2434            start_pfn: gpa / HV_PAGE_SIZE,
2435            page_count,
2436            terminate_on_failure: 0,
2437            ram: 0,
2438            padding: Default::default(),
2439            flags: flags.as_ptr().cast_mut(),
2440            page_size: page_size.as_ptr().cast_mut(),
2441            pages_processed: core::ptr::from_ref(&pages_processed).cast_mut(),
2442        };
2443
2444        // SAFETY: the input query is the correct type for this ioctl
2445        unsafe {
2446            hcl_rmpquery_pages(self.mshv_vtl.file.as_raw_fd(), &query)
2447                .expect("should always succeed");
2448        }
2449        debug_assert!(pages_processed <= page_count);
2450
2451        SevRmpAdjust::from(flags[0])
2452    }
2453
2454    /// Issues an INVLPGB instruction.
2455    pub fn invlpgb(&self, rax: u64, edx: u32, ecx: u32) {
2456        let data = mshv_invlpgb {
2457            rax,
2458            edx,
2459            ecx,
2460            _pad0: 0,
2461            _pad1: 0,
2462        };
2463        // SAFETY: ioctl has no prerequisites.
2464        unsafe {
2465            hcl_invlpgb(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
2466        }
2467    }
2468
2469    /// Issues a TLBSYNC instruction.
2470    pub fn tlbsync(&self) {
2471        // SAFETY: ioctl has no prerequisites.
2472        unsafe {
2473            hcl_tlbsync(self.mshv_vtl.file.as_raw_fd()).expect("should always succeed");
2474        }
2475    }
2476
2477    /// Causes the specified CPUs to be woken out of a lower VTL.
2478    pub fn kick_cpus(
2479        &self,
2480        cpus: impl IntoIterator<Item = u32>,
2481        cancel_run: bool,
2482        wait_for_other_cpus: bool,
2483    ) {
2484        let mut cpu_bitmap: BitVec<u8> = BitVec::from_vec(vec![0; self.vps.len().div_ceil(8)]);
2485        for cpu in cpus {
2486            cpu_bitmap.set(cpu as usize, true);
2487        }
2488
2489        let data = protocol::hcl_kick_cpus {
2490            len: cpu_bitmap.len() as u64,
2491            cpu_mask: cpu_bitmap.as_bitptr().pointer(),
2492            flags: protocol::hcl_kick_cpus_flags::new()
2493                .with_cancel_run(cancel_run)
2494                .with_wait_for_other_cpus(wait_for_other_cpus),
2495        };
2496
2497        // SAFETY: ioctl has no prerequisites.
2498        unsafe {
2499            hcl_kickcpus(self.mshv_vtl.file.as_raw_fd(), &data).expect("should always succeed");
2500        }
2501    }
2502
2503    /// Map or unmap guest device interrupt vector in VTL2 kernel
2504    pub fn map_redirected_device_interrupt(
2505        &self,
2506        vector: u32,
2507        apic_id: u32,
2508        create_mapping: bool,
2509    ) -> Result<u32, Error> {
2510        let mut param = mshv_map_device_int {
2511            vector,
2512            apic_id,
2513            create_mapping: create_mapping.into(),
2514            padding: [0; 7],
2515        };
2516
2517        // SAFETY: following the IOCTL definition.
2518        unsafe {
2519            hcl_map_redirected_device_interrupt(self.mshv_vtl.file.as_raw_fd(), &mut param)
2520                .map_err(Error::MapRedirectedDeviceInterrupt)?;
2521        }
2522
2523        Ok(param.vector)
2524    }
2525
2526    /// Restore partition time. This is typically called after resume from
2527    /// hibernate to synchronize the TSC with the value at hibernate time.
2528    pub fn restore_partition_time(
2529        &self,
2530        tsc_sequence: u32,
2531        reference_time_in_100_ns: u64,
2532        tsc: u64,
2533    ) -> Result<(), Error> {
2534        let partition_time = mshv_restore_partition_time {
2535            tsc_sequence,
2536            reserved: 0,
2537            reference_time_in_100_ns,
2538            tsc,
2539        };
2540
2541        // SAFETY: ioctl has no prerequisites.
2542        unsafe {
2543            hcl_restore_partition_time(self.mshv_vtl.file.as_raw_fd(), &partition_time)
2544                .map_err(Error::RestorePartitionTime)?;
2545        }
2546
2547        Ok(())
2548    }
2549}