guestmem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interfaces to read and write guest memory.
5
6// UNSAFETY: This crate's whole purpose is manual memory mapping and management.
7#![expect(unsafe_code)]
8#![expect(missing_docs)]
9
10pub mod ranges;
11
12use self::ranges::PagedRange;
13use inspect::Inspect;
14use pal_event::Event;
15use sparse_mmap::AsMappableRef;
16use std::any::Any;
17use std::fmt::Debug;
18use std::future::Future;
19use std::io;
20use std::ops::Deref;
21use std::ops::DerefMut;
22use std::ops::Range;
23use std::ptr::NonNull;
24use std::sync::Arc;
25use std::sync::atomic::AtomicU8;
26use thiserror::Error;
27use zerocopy::FromBytes;
28use zerocopy::FromZeros;
29use zerocopy::Immutable;
30use zerocopy::IntoBytes;
31use zerocopy::KnownLayout;
32
33// Effective page size for page-related operations in this crate.
34pub const PAGE_SIZE: usize = 4096;
35const PAGE_SIZE64: u64 = 4096;
36
37/// A memory access error returned by one of the [`GuestMemory`] methods.
38#[derive(Debug, Error)]
39#[error(transparent)]
40pub struct GuestMemoryError(Box<GuestMemoryErrorInner>);
41
42impl GuestMemoryError {
43    fn new(
44        debug_name: &Arc<str>,
45        range: Option<Range<u64>>,
46        op: GuestMemoryOperation,
47        err: GuestMemoryBackingError,
48    ) -> Self {
49        GuestMemoryError(Box::new(GuestMemoryErrorInner {
50            op,
51            debug_name: debug_name.clone(),
52            range,
53            gpa: (err.gpa != INVALID_ERROR_GPA).then_some(err.gpa),
54            kind: err.kind,
55            err: err.err,
56        }))
57    }
58
59    /// Returns the kind of the error.
60    pub fn kind(&self) -> GuestMemoryErrorKind {
61        self.0.kind
62    }
63}
64
65#[derive(Debug, Copy, Clone)]
66enum GuestMemoryOperation {
67    Read,
68    Write,
69    Fill,
70    CompareExchange,
71    Lock,
72    Subrange,
73    Probe,
74}
75
76impl std::fmt::Display for GuestMemoryOperation {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        f.pad(match self {
79            GuestMemoryOperation::Read => "read",
80            GuestMemoryOperation::Write => "write",
81            GuestMemoryOperation::Fill => "fill",
82            GuestMemoryOperation::CompareExchange => "compare exchange",
83            GuestMemoryOperation::Lock => "lock",
84            GuestMemoryOperation::Subrange => "subrange",
85            GuestMemoryOperation::Probe => "probe",
86        })
87    }
88}
89
90#[derive(Debug, Error)]
91struct GuestMemoryErrorInner {
92    op: GuestMemoryOperation,
93    debug_name: Arc<str>,
94    range: Option<Range<u64>>,
95    gpa: Option<u64>,
96    kind: GuestMemoryErrorKind,
97    #[source]
98    err: Box<dyn std::error::Error + Send + Sync>,
99}
100
101impl std::fmt::Display for GuestMemoryErrorInner {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        write!(
104            f,
105            "guest memory '{debug_name}': {op} error: failed to access ",
106            debug_name = self.debug_name,
107            op = self.op
108        )?;
109        if let Some(range) = &self.range {
110            write!(f, "{:#x}-{:#x}", range.start, range.end)?;
111        } else {
112            f.write_str("memory")?;
113        }
114        // Include the precise GPA if provided and different from the start of
115        // the range.
116        if let Some(gpa) = self.gpa {
117            if self.range.as_ref().is_none_or(|range| range.start != gpa) {
118                write!(f, " at {:#x}", gpa)?;
119            }
120        }
121        Ok(())
122    }
123}
124
125/// A memory access error returned by a [`GuestMemoryAccess`] trait method.
126#[derive(Debug)]
127pub struct GuestMemoryBackingError {
128    gpa: u64,
129    kind: GuestMemoryErrorKind,
130    err: Box<dyn std::error::Error + Send + Sync>,
131}
132
133/// The kind of memory access error.
134#[derive(Debug, Copy, Clone, PartialEq, Eq)]
135#[non_exhaustive]
136pub enum GuestMemoryErrorKind {
137    /// An error that does not fit any other category.
138    Other,
139    /// The address is outside the valid range of the memory.
140    OutOfRange,
141    /// The memory has been protected by a higher virtual trust level.
142    VtlProtected,
143    /// The memory is shared but was accessed via a private address.
144    NotPrivate,
145    /// The memory is private but was accessed via a shared address.
146    NotShared,
147}
148
149/// An error returned by a page fault handler in [`GuestMemoryAccess::page_fault`].
150pub struct PageFaultError {
151    kind: GuestMemoryErrorKind,
152    err: Box<dyn std::error::Error + Send + Sync>,
153}
154
155impl PageFaultError {
156    /// Returns a new page fault error.
157    pub fn new(
158        kind: GuestMemoryErrorKind,
159        err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
160    ) -> Self {
161        Self {
162            kind,
163            err: err.into(),
164        }
165    }
166
167    /// Returns a page fault error without an explicit kind.
168    pub fn other(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Self {
169        Self::new(GuestMemoryErrorKind::Other, err)
170    }
171}
172
173/// Used to avoid needing an `Option` for [`GuestMemoryBackingError::gpa`], to
174/// save size in hot paths.
175const INVALID_ERROR_GPA: u64 = !0;
176
177impl GuestMemoryBackingError {
178    /// Returns a new error for a memory access failure at address `gpa`.
179    pub fn new(
180        kind: GuestMemoryErrorKind,
181        gpa: u64,
182        err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
183    ) -> Self {
184        // `gpa` might incorrectly be INVALID_ERROR_GPA; this is harmless (just
185        // affecting the error message), so don't assert on it in case this is
186        // an untrusted value in some path.
187        Self {
188            kind,
189            gpa,
190            err: err.into(),
191        }
192    }
193
194    /// Returns a new error without an explicit kind.
195    pub fn other(gpa: u64, err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Self {
196        Self::new(GuestMemoryErrorKind::Other, gpa, err)
197    }
198
199    fn gpn(err: InvalidGpn) -> Self {
200        Self {
201            kind: GuestMemoryErrorKind::OutOfRange,
202            gpa: INVALID_ERROR_GPA,
203            err: err.into(),
204        }
205    }
206}
207
208#[derive(Debug, Error)]
209#[error("no memory at address")]
210struct OutOfRange;
211
212#[derive(Debug, Error)]
213#[error("memory not lockable")]
214struct NotLockable;
215
216#[derive(Debug, Error)]
217#[error("no fallback for this operation")]
218struct NoFallback;
219
220#[derive(Debug, Error)]
221#[error("the specified page is not mapped")]
222struct NotMapped;
223
224#[derive(Debug, Error)]
225#[error("page inaccessible in bitmap")]
226struct BitmapFailure;
227
228/// A trait for a guest memory backing that is fully available via a virtual
229/// address mapping, as opposed to the fallback functions such as
230/// [`GuestMemoryAccess::read_fallback`].
231///
232/// By implementing this trait, a type guarantees that its
233/// [`GuestMemoryAccess::mapping`] will return `Some(_)` and that all of its
234/// memory can be accessed through that mapping, without needing to call the
235/// fallback functions.
236pub trait LinearGuestMemory: GuestMemoryAccess {}
237
238// SAFETY: the allocation will stay valid for the lifetime of the object.
239unsafe impl GuestMemoryAccess for sparse_mmap::alloc::SharedMem {
240    fn mapping(&self) -> Option<NonNull<u8>> {
241        NonNull::new(self.as_ptr().cast_mut().cast())
242    }
243
244    fn max_address(&self) -> u64 {
245        self.len() as u64
246    }
247}
248
249impl LinearGuestMemory for sparse_mmap::alloc::SharedMem {}
250
251/// A page-aligned heap allocation for use with [`GuestMemory`].
252pub struct AlignedHeapMemory {
253    pages: Box<[AlignedPage]>,
254}
255
256impl Debug for AlignedHeapMemory {
257    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258        f.debug_struct("AlignedHeapMemory")
259            .field("len", &self.len())
260            .finish()
261    }
262}
263
264#[repr(C, align(4096))]
265struct AlignedPage([AtomicU8; PAGE_SIZE]);
266
267impl AlignedHeapMemory {
268    /// Allocates a new memory of `size` bytes, rounded up to a page size.
269    pub fn new(size: usize) -> Self {
270        #[expect(clippy::declare_interior_mutable_const)] // <https://github.com/rust-lang/rust-clippy/issues/7665>
271        const ZERO: AtomicU8 = AtomicU8::new(0);
272        #[expect(clippy::declare_interior_mutable_const)]
273        const ZERO_PAGE: AlignedPage = AlignedPage([ZERO; PAGE_SIZE]);
274        let mut pages = Vec::new();
275        pages.resize_with(size.div_ceil(PAGE_SIZE), || ZERO_PAGE);
276        Self {
277            pages: pages.into(),
278        }
279    }
280
281    /// Returns the length of the memory in bytes.
282    pub fn len(&self) -> usize {
283        self.pages.len() * PAGE_SIZE
284    }
285
286    /// Returns an immutable slice of bytes.
287    ///
288    /// This must take `&mut self` since the buffer is mutable via interior
289    /// mutability with just `&self`.
290    pub fn as_bytes(&mut self) -> &[u8] {
291        self.as_mut()
292    }
293
294    /// Returns a mutable slice of bytes.
295    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
296        self.as_mut()
297    }
298}
299
300impl Deref for AlignedHeapMemory {
301    type Target = [AtomicU8];
302
303    fn deref(&self) -> &Self::Target {
304        // SAFETY: the buffer has the correct size and validity.
305        unsafe { std::slice::from_raw_parts(self.pages.as_ptr().cast(), self.len()) }
306    }
307}
308
309impl DerefMut for AlignedHeapMemory {
310    fn deref_mut(&mut self) -> &mut Self::Target {
311        // SAFETY: the buffer is unaliased and valid.
312        unsafe { std::slice::from_raw_parts_mut(self.pages.as_mut_ptr().cast(), self.len()) }
313    }
314}
315
316impl AsRef<[AtomicU8]> for AlignedHeapMemory {
317    fn as_ref(&self) -> &[AtomicU8] {
318        self
319    }
320}
321
322impl AsMut<[AtomicU8]> for AlignedHeapMemory {
323    fn as_mut(&mut self) -> &mut [AtomicU8] {
324        self
325    }
326}
327
328impl AsMut<[u8]> for AlignedHeapMemory {
329    fn as_mut(&mut self) -> &mut [u8] {
330        // FUTURE: use AtomicU8::get_mut_slice once stabilized.
331        // SAFETY: the buffer is unaliased, so it is fine to cast away the atomicness of the
332        // slice.
333        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), self.len()) }
334    }
335}
336
337// SAFETY: the allocation remains alive and valid for the lifetime of the
338// object.
339unsafe impl GuestMemoryAccess for AlignedHeapMemory {
340    fn mapping(&self) -> Option<NonNull<u8>> {
341        NonNull::new(self.pages.as_ptr().cast_mut().cast())
342    }
343
344    fn max_address(&self) -> u64 {
345        (self.pages.len() * PAGE_SIZE) as u64
346    }
347}
348
349impl LinearGuestMemory for AlignedHeapMemory {}
350
351/// A shareable region of guest memory backed by a file (Unix) or
352/// section handle (Windows).
353///
354/// The backing file must already contain committed data for the region —
355/// the consumer will map it directly, without any guestmem-managed lazy
356/// commitment or fault handling. All bytes in the range must be accessible
357/// without triggering SIGSEGV or SIGBUS due to missing backing. Normal OS
358/// demand paging and minor faults on first access are still expected; this
359/// requirement is specifically incompatible with bitmap-gated access or
360/// lazy fault-in schemes.
361pub struct ShareableRegion {
362    /// Guest physical address of this region.
363    pub guest_address: u64,
364    /// Size in bytes.
365    pub size: u64,
366    /// Backing file/handle, shared via `Arc` to avoid OS-level `dup()`.
367    pub file: Arc<sparse_mmap::Mappable>,
368    /// Offset into `file` where this region starts.
369    pub file_offset: u64,
370}
371
372/// Error type for [`ProvideShareableRegions::get_regions`].
373pub type ShareableRegionError = Box<dyn std::error::Error + Send + Sync>;
374
375/// Opaque control object for accessing the shareable backing of guest
376/// memory. Not all `GuestMemory` instances support this — those backed
377/// by private memory or heap allocations return `None`.
378///
379/// # Contract
380///
381/// * The regions returned by [`get_regions`](Self::get_regions) must have
382///   fully committed backing — the consumer will map them directly,
383///   without guestmem-managed fault handling.
384/// * The set of regions is currently static for the lifetime of the VM.
385///   Hotplug and hot-remove of shareable regions are not yet supported;
386///   once they are, additional methods will be added here to notify
387///   consumers of changes.
388pub struct GuestMemorySharing {
389    inner: Box<dyn DynProvideShareableRegions>,
390}
391
392impl GuestMemorySharing {
393    /// Construct from a trait implementation. Called by `GuestMemoryAccess`
394    /// implementations (e.g., `VaMapper` in membacking).
395    pub fn new(inner: impl ProvideShareableRegions + 'static) -> Self {
396        Self {
397            inner: Box::new(inner),
398        }
399    }
400
401    /// Return the current set of shareable backing regions.
402    pub async fn get_regions(&self) -> Result<Vec<ShareableRegion>, ShareableRegionError> {
403        self.inner.get_regions().await
404    }
405}
406
407/// Trait for providing shareable region information.
408///
409/// Implementors must return regions whose backing files have fully
410/// committed data — consumers will map them directly without
411/// guestmem-managed fault handling. The region set is currently static;
412/// dynamic updates (hotplug / hot-remove) are not yet supported.
413///
414/// This trait must be public so that crates like `membacking` can
415/// implement it, but callers should interact with
416/// [`GuestMemorySharing`]'s methods rather than this trait directly.
417pub trait ProvideShareableRegions: Send + Sync {
418    /// Return the current set of shareable backing regions.
419    fn get_regions(
420        &self,
421    ) -> impl Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_;
422}
423
424/// Dyn-compatible version of [`ProvideShareableRegions`].
425trait DynProvideShareableRegions: Send + Sync {
426    fn get_regions(
427        &self,
428    ) -> std::pin::Pin<
429        Box<dyn Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_>,
430    >;
431}
432
433impl<T: ProvideShareableRegions> DynProvideShareableRegions for T {
434    fn get_regions(
435        &self,
436    ) -> std::pin::Pin<
437        Box<dyn Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_>,
438    > {
439        Box::pin(ProvideShareableRegions::get_regions(self))
440    }
441}
442
443/// A trait for a guest memory backing.
444///
445/// Guest memory may be backed by a virtual memory mapping, in which case this
446/// trait can provide the VA and length of that mapping. Alternatively, it may
447/// be backed by some other means, in which case this trait can provide fallback
448/// methods for reading and writing memory.
449///
450/// Memory access should first be attempted via the virtual address mapping. If
451/// this fails or is not present, the caller should fall back to `read_fallback`
452/// or `write_fallback`. This allows an implementation to have a fast path using
453/// the mapping, and a slow path using the fallback functions.
454///
455/// # Safety
456///
457/// The implementor must follow the contract for each method.
458pub unsafe trait GuestMemoryAccess: 'static + Send + Sync {
459    /// Returns a stable VA mapping for guest memory.
460    ///
461    /// The size of the mapping is the same as `max_address`.
462    ///
463    /// The VA is guaranteed to remain reserved, but individual ranges may be
464    /// uncommitted.
465    fn mapping(&self) -> Option<NonNull<u8>>;
466
467    /// The maximum address that can be passed to the `*_fallback` methods, as
468    /// well as the maximum offset into the VA range described by `mapping`.
469    fn max_address(&self) -> u64;
470
471    /// The bitmaps to check for validity, one bit per page. If a bit is set,
472    /// then the page is valid to access via the mapping; if it is clear, then
473    /// the page will not be accessed.
474    ///
475    /// The bitmaps must be at least `ceil(bitmap_start + max_address() /
476    /// PAGE_SIZE)` bits long, and they must be valid for atomic read access for
477    /// the lifetime of this object from any thread.
478    ///
479    /// The bitmaps are only checked if there is a mapping. If the bitmap check
480    /// fails, then the associated `*_fallback` routine is called to handle the
481    /// error.
482    ///
483    /// Bitmap checks are performed under the [`rcu()`] RCU domain, with relaxed
484    /// accesses. After a thread updates the bitmap to be more restrictive, it
485    /// must call [minircu::RcuDomain::synchronize()] on [`minircu::global()`]
486    /// to ensure that all threads see the update before taking any action that
487    /// depends on the bitmap update being visible.
488    #[cfg(feature = "bitmap")]
489    fn access_bitmap(&self) -> Option<BitmapInfo> {
490        None
491    }
492
493    // Returns an accessor for a subrange, or `None` to use the default
494    // implementation.
495    fn subrange(
496        &self,
497        offset: u64,
498        len: u64,
499        allow_preemptive_locking: bool,
500    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
501        let _ = (offset, len, allow_preemptive_locking);
502        Ok(None)
503    }
504
505    /// Called when access to memory via the mapped range fails, either due to a
506    /// bitmap failure or due to a failure when accessing the virtual address.
507    ///
508    /// `address` is the address where the access failed. `len` is the remainder
509    /// of the access; it is not necessarily the case that all `len` bytes are
510    /// inaccessible in the bitmap or mapping.
511    ///
512    /// Returns whether the faulting operation should be retried, failed, or that
513    /// one of the fallback operations (e.g. `read_fallback`) should be called.
514    fn page_fault(
515        &self,
516        address: u64,
517        len: usize,
518        write: bool,
519        bitmap_failure: bool,
520    ) -> PageFaultAction {
521        let _ = (address, len, write);
522        let err = if bitmap_failure {
523            PageFaultError::other(BitmapFailure)
524        } else {
525            PageFaultError::other(NotMapped)
526        };
527        PageFaultAction::Fail(err)
528    }
529
530    /// Fallback called if a read fails via direct access to `mapped_range`.
531    ///
532    /// This is only called if `mapping()` returns `None` or if `page_fault()`
533    /// returns `PageFaultAction::Fallback`.
534    ///
535    /// Implementors must ensure that `dest[..len]` is fully initialized on
536    /// successful return.
537    ///
538    /// # Safety
539    /// The caller must ensure that `dest[..len]` is valid for write. Note,
540    /// however, that `dest` might be aliased by other threads, the guest, or
541    /// the kernel.
542    unsafe fn read_fallback(
543        &self,
544        addr: u64,
545        dest: *mut u8,
546        len: usize,
547    ) -> Result<(), GuestMemoryBackingError> {
548        let _ = (dest, len);
549        Err(GuestMemoryBackingError::other(addr, NoFallback))
550    }
551
552    /// Fallback called if a write fails via direct access to `mapped_range`.
553    ///
554    /// This is only called if `mapping()` returns `None` or if `page_fault()`
555    /// returns `PageFaultAction::Fallback`.
556    ///
557    /// # Safety
558    /// The caller must ensure that `src[..len]` is valid for read. Note,
559    /// however, that `src` might be aliased by other threads, the guest, or
560    /// the kernel.
561    unsafe fn write_fallback(
562        &self,
563        addr: u64,
564        src: *const u8,
565        len: usize,
566    ) -> Result<(), GuestMemoryBackingError> {
567        let _ = (src, len);
568        Err(GuestMemoryBackingError::other(addr, NoFallback))
569    }
570
571    /// Fallback called if a fill fails via direct access to `mapped_range`.
572    ///
573    /// This is only called if `mapping()` returns `None` or if `page_fault()`
574    /// returns `PageFaultAction::Fallback`.
575    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
576        let _ = (val, len);
577        Err(GuestMemoryBackingError::other(addr, NoFallback))
578    }
579
580    /// Fallback called if a compare exchange fails via direct access to `mapped_range`.
581    ///
582    /// On compare failure, returns `Ok(false)` and updates `current`.
583    ///
584    /// This is only called if `mapping()` returns `None` or if `page_fault()`
585    /// returns `PageFaultAction::Fallback`.
586    fn compare_exchange_fallback(
587        &self,
588        addr: u64,
589        current: &mut [u8],
590        new: &[u8],
591    ) -> Result<bool, GuestMemoryBackingError> {
592        let _ = (current, new);
593        Err(GuestMemoryBackingError::other(addr, NoFallback))
594    }
595
596    /// Prepares a guest page for having its virtual address exposed as part of
597    /// a lock call.
598    ///
599    /// This is useful to ensure that the address is mapped in a way that it can
600    /// be passed to the kernel for DMA.
601    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
602        let _ = (address, len);
603        Ok(())
604    }
605
606    /// Returns the base IO virtual address for the mapping.
607    ///
608    /// This is the base address that should be used for DMA from a user-mode
609    /// device driver whose device is not otherwise configured to go through an
610    /// IOMMU.
611    fn base_iova(&self) -> Option<u64> {
612        None
613    }
614
615    /// Locks the specified guest physical pages (GPNs), preventing any mapping
616    /// or permission changes until they are unlocked.
617    ///
618    /// Returns a boolean indicating whether unlocking is required.
619    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
620        let _ = gpns;
621        Ok(false)
622    }
623
624    /// Unlocks the specified guest physical pages (GPNs) after exclusive access.
625    ///
626    /// Panics if asked to unlock a page that was not previously locked. The
627    /// caller must ensure that the given slice has the same ordering as the
628    /// one passed to `lock_gpns`.
629    fn unlock_gpns(&self, gpns: &[u64]) {
630        let _ = gpns;
631    }
632
633    /// Return a sharing control object if this memory backing supports
634    /// file-based sharing (e.g., memfd on Linux, section on Windows).
635    ///
636    /// Returns `None` for private memory, heap-backed test memory, or
637    /// other non-shareable backings.
638    fn sharing(&self) -> Option<GuestMemorySharing> {
639        None
640    }
641}
642
643trait DynGuestMemoryAccess: 'static + Send + Sync + Any {
644    fn subrange(
645        &self,
646        offset: u64,
647        len: u64,
648        allow_preemptive_locking: bool,
649    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError>;
650
651    fn page_fault(
652        &self,
653        address: u64,
654        len: usize,
655        write: bool,
656        bitmap_failure: bool,
657    ) -> PageFaultAction;
658
659    /// # Safety
660    /// See [`GuestMemoryAccess::read_fallback`].
661    unsafe fn read_fallback(
662        &self,
663        addr: u64,
664        dest: *mut u8,
665        len: usize,
666    ) -> Result<(), GuestMemoryBackingError>;
667
668    /// # Safety
669    /// See [`GuestMemoryAccess::write_fallback`].
670    unsafe fn write_fallback(
671        &self,
672        addr: u64,
673        src: *const u8,
674        len: usize,
675    ) -> Result<(), GuestMemoryBackingError>;
676
677    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError>;
678
679    fn compare_exchange_fallback(
680        &self,
681        addr: u64,
682        current: &mut [u8],
683        new: &[u8],
684    ) -> Result<bool, GuestMemoryBackingError>;
685
686    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError>;
687
688    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError>;
689
690    fn unlock_gpns(&self, gpns: &[u64]);
691
692    fn sharing(&self) -> Option<GuestMemorySharing>;
693}
694
695impl<T: GuestMemoryAccess> DynGuestMemoryAccess for T {
696    fn subrange(
697        &self,
698        offset: u64,
699        len: u64,
700        allow_preemptive_locking: bool,
701    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
702        self.subrange(offset, len, allow_preemptive_locking)
703    }
704
705    fn page_fault(
706        &self,
707        address: u64,
708        len: usize,
709        write: bool,
710        bitmap_failure: bool,
711    ) -> PageFaultAction {
712        self.page_fault(address, len, write, bitmap_failure)
713    }
714
715    unsafe fn read_fallback(
716        &self,
717        addr: u64,
718        dest: *mut u8,
719        len: usize,
720    ) -> Result<(), GuestMemoryBackingError> {
721        // SAFETY: guaranteed by caller.
722        unsafe { self.read_fallback(addr, dest, len) }
723    }
724
725    unsafe fn write_fallback(
726        &self,
727        addr: u64,
728        src: *const u8,
729        len: usize,
730    ) -> Result<(), GuestMemoryBackingError> {
731        // SAFETY: guaranteed by caller.
732        unsafe { self.write_fallback(addr, src, len) }
733    }
734
735    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
736        self.fill_fallback(addr, val, len)
737    }
738
739    fn compare_exchange_fallback(
740        &self,
741        addr: u64,
742        current: &mut [u8],
743        new: &[u8],
744    ) -> Result<bool, GuestMemoryBackingError> {
745        self.compare_exchange_fallback(addr, current, new)
746    }
747
748    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
749        self.expose_va(address, len)
750    }
751
752    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
753        self.lock_gpns(gpns)
754    }
755
756    fn unlock_gpns(&self, gpns: &[u64]) {
757        self.unlock_gpns(gpns)
758    }
759
760    fn sharing(&self) -> Option<GuestMemorySharing> {
761        self.sharing()
762    }
763}
764
765/// The action to take after [`GuestMemoryAccess::page_fault`] returns to
766/// continue the operation.
767pub enum PageFaultAction {
768    /// Fail the operation.
769    Fail(PageFaultError),
770    /// Retry the operation.
771    Retry,
772    /// Use the fallback method to access the memory.
773    Fallback,
774}
775
776/// Returned by [`GuestMemoryAccess::access_bitmap`].
777#[cfg(feature = "bitmap")]
778pub struct BitmapInfo {
779    /// A pointer to the bitmap for read access.
780    pub read_bitmap: NonNull<u8>,
781    /// A pointer to the bitmap for write access.
782    pub write_bitmap: NonNull<u8>,
783    /// The bit offset of the beginning of the bitmap.
784    ///
785    /// Typically this is zero, but it is needed to support subranges that are
786    /// not 8-page multiples.
787    pub bit_offset: u8,
788}
789
790// SAFETY: passing through guarantees from `T`.
791unsafe impl<T: GuestMemoryAccess> GuestMemoryAccess for Arc<T> {
792    fn mapping(&self) -> Option<NonNull<u8>> {
793        self.as_ref().mapping()
794    }
795
796    fn max_address(&self) -> u64 {
797        self.as_ref().max_address()
798    }
799
800    #[cfg(feature = "bitmap")]
801    fn access_bitmap(&self) -> Option<BitmapInfo> {
802        self.as_ref().access_bitmap()
803    }
804
805    fn subrange(
806        &self,
807        offset: u64,
808        len: u64,
809        allow_preemptive_locking: bool,
810    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
811        self.as_ref()
812            .subrange(offset, len, allow_preemptive_locking)
813    }
814
815    fn page_fault(
816        &self,
817        addr: u64,
818        len: usize,
819        write: bool,
820        bitmap_failure: bool,
821    ) -> PageFaultAction {
822        self.as_ref().page_fault(addr, len, write, bitmap_failure)
823    }
824
825    unsafe fn read_fallback(
826        &self,
827        addr: u64,
828        dest: *mut u8,
829        len: usize,
830    ) -> Result<(), GuestMemoryBackingError> {
831        // SAFETY: passing through guarantees from caller.
832        unsafe { self.as_ref().read_fallback(addr, dest, len) }
833    }
834
835    unsafe fn write_fallback(
836        &self,
837        addr: u64,
838        src: *const u8,
839        len: usize,
840    ) -> Result<(), GuestMemoryBackingError> {
841        // SAFETY: passing through guarantees from caller.
842        unsafe { self.as_ref().write_fallback(addr, src, len) }
843    }
844
845    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
846        self.as_ref().fill_fallback(addr, val, len)
847    }
848
849    fn compare_exchange_fallback(
850        &self,
851        addr: u64,
852        current: &mut [u8],
853        new: &[u8],
854    ) -> Result<bool, GuestMemoryBackingError> {
855        self.as_ref().compare_exchange_fallback(addr, current, new)
856    }
857
858    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
859        self.as_ref().expose_va(address, len)
860    }
861
862    fn base_iova(&self) -> Option<u64> {
863        self.as_ref().base_iova()
864    }
865
866    fn sharing(&self) -> Option<GuestMemorySharing> {
867        self.as_ref().sharing()
868    }
869}
870
871// SAFETY: the allocation will stay valid for the lifetime of the object.
872unsafe impl GuestMemoryAccess for sparse_mmap::SparseMapping {
873    fn mapping(&self) -> Option<NonNull<u8>> {
874        NonNull::new(self.as_ptr().cast())
875    }
876
877    fn max_address(&self) -> u64 {
878        self.len() as u64
879    }
880}
881
882/// Default guest memory range type, enforcing access boundaries.
883struct GuestMemoryAccessRange {
884    base: Arc<GuestMemoryInner>,
885    offset: u64,
886    len: u64,
887    region: usize,
888}
889
890impl GuestMemoryAccessRange {
891    fn adjust_range(&self, address: u64, len: u64) -> Result<u64, GuestMemoryBackingError> {
892        if address <= self.len && len <= self.len - address {
893            Ok(self.offset + address)
894        } else {
895            Err(GuestMemoryBackingError::new(
896                GuestMemoryErrorKind::OutOfRange,
897                address,
898                OutOfRange,
899            ))
900        }
901    }
902}
903
904// SAFETY: `mapping()` is guaranteed to be valid for the lifetime of the object.
905unsafe impl GuestMemoryAccess for GuestMemoryAccessRange {
906    fn mapping(&self) -> Option<NonNull<u8>> {
907        let region = &self.base.regions[self.region];
908        region.mapping.and_then(|mapping| {
909            let offset = self.offset & self.base.region_def.region_mask;
910            // This is guaranteed by construction.
911            assert!(region.len >= offset + self.len);
912            // SAFETY: this mapping is guaranteed to be within range by
913            // construction (and validated again via the assertion above).
914            NonNull::new(unsafe { mapping.0.as_ptr().add(offset as usize) })
915        })
916    }
917
918    fn max_address(&self) -> u64 {
919        self.len
920    }
921
922    #[cfg(feature = "bitmap")]
923    fn access_bitmap(&self) -> Option<BitmapInfo> {
924        let region = &self.base.regions[self.region];
925        region.bitmaps.map(|bitmaps| {
926            let offset = self.offset & self.base.region_def.region_mask;
927            let bit_offset = region.bitmap_start as u64 + offset / PAGE_SIZE64;
928            let [read_bitmap, write_bitmap] = bitmaps.map(|SendPtrU8(ptr)| {
929                // SAFETY: the bitmap is guaranteed to be big enough for the region
930                // by construction.
931                NonNull::new(unsafe { ptr.as_ptr().add((bit_offset / 8) as usize) }).unwrap()
932            });
933            let bitmap_start = (bit_offset % 8) as u8;
934            BitmapInfo {
935                read_bitmap,
936                write_bitmap,
937                bit_offset: bitmap_start,
938            }
939        })
940    }
941
942    fn subrange(
943        &self,
944        offset: u64,
945        len: u64,
946        _allow_preemptive_locking: bool,
947    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
948        let address = self.adjust_range(offset, len)?;
949        Ok(Some(GuestMemory::new(
950            self.base.debug_name.clone(),
951            GuestMemoryAccessRange {
952                base: self.base.clone(),
953                offset: address,
954                len,
955                region: self.region,
956            },
957        )))
958    }
959
960    fn page_fault(
961        &self,
962        address: u64,
963        len: usize,
964        write: bool,
965        bitmap_failure: bool,
966    ) -> PageFaultAction {
967        let address = self
968            .adjust_range(address, len as u64)
969            .expect("the caller should have validated the range was in the mapping");
970
971        self.base
972            .imp
973            .page_fault(address, len, write, bitmap_failure)
974    }
975
976    unsafe fn write_fallback(
977        &self,
978        address: u64,
979        src: *const u8,
980        len: usize,
981    ) -> Result<(), GuestMemoryBackingError> {
982        let address = self.adjust_range(address, len as u64)?;
983        // SAFETY: guaranteed by caller.
984        unsafe { self.base.imp.write_fallback(address, src, len) }
985    }
986
987    fn fill_fallback(
988        &self,
989        address: u64,
990        val: u8,
991        len: usize,
992    ) -> Result<(), GuestMemoryBackingError> {
993        let address = self.adjust_range(address, len as u64)?;
994        self.base.imp.fill_fallback(address, val, len)
995    }
996
997    fn compare_exchange_fallback(
998        &self,
999        addr: u64,
1000        current: &mut [u8],
1001        new: &[u8],
1002    ) -> Result<bool, GuestMemoryBackingError> {
1003        let address = self.adjust_range(addr, new.len() as u64)?;
1004        self.base
1005            .imp
1006            .compare_exchange_fallback(address, current, new)
1007    }
1008
1009    unsafe fn read_fallback(
1010        &self,
1011        address: u64,
1012        dest: *mut u8,
1013        len: usize,
1014    ) -> Result<(), GuestMemoryBackingError> {
1015        let address = self.adjust_range(address, len as u64)?;
1016        // SAFETY: guaranteed by caller.
1017        unsafe { self.base.imp.read_fallback(address, dest, len) }
1018    }
1019
1020    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
1021        let address = self.adjust_range(address, len)?;
1022        self.base.imp.expose_va(address, len)
1023    }
1024
1025    fn base_iova(&self) -> Option<u64> {
1026        let region = &self.base.regions[self.region];
1027        Some(region.base_iova? + (self.offset & self.base.region_def.region_mask))
1028    }
1029}
1030
1031/// Create a default guest memory subrange that verifies range limits and calls
1032/// back into the base implementation.
1033fn create_memory_subrange(
1034    base: Arc<GuestMemoryInner>,
1035    offset: u64,
1036    len: u64,
1037    _allow_preemptive_locking: bool,
1038) -> Result<GuestMemory, GuestMemoryBackingError> {
1039    let (_, _, region) = base.region(offset, len)?;
1040    Ok(GuestMemory::new(
1041        base.debug_name.clone(),
1042        GuestMemoryAccessRange {
1043            base,
1044            offset,
1045            len,
1046            region,
1047        },
1048    ))
1049}
1050
1051struct MultiRegionGuestMemoryAccess<T> {
1052    imps: Vec<Option<T>>,
1053    region_def: RegionDefinition,
1054}
1055
1056impl<T> MultiRegionGuestMemoryAccess<T> {
1057    fn region(&self, gpa: u64, len: u64) -> Result<(&T, u64), GuestMemoryBackingError> {
1058        let (i, offset) = self.region_def.region(gpa, len)?;
1059        let imp = self.imps[i].as_ref().ok_or(GuestMemoryBackingError::new(
1060            GuestMemoryErrorKind::OutOfRange,
1061            gpa,
1062            OutOfRange,
1063        ))?;
1064        Ok((imp, offset))
1065    }
1066}
1067
1068// SAFETY: `mapping()` is unreachable and panics if called.
1069impl<T: GuestMemoryAccess> DynGuestMemoryAccess for MultiRegionGuestMemoryAccess<T> {
1070    fn subrange(
1071        &self,
1072        offset: u64,
1073        len: u64,
1074        allow_preemptive_locking: bool,
1075    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
1076        let (region, offset_in_region) = self.region(offset, len)?;
1077        region.subrange(offset_in_region, len, allow_preemptive_locking)
1078    }
1079
1080    unsafe fn read_fallback(
1081        &self,
1082        addr: u64,
1083        dest: *mut u8,
1084        len: usize,
1085    ) -> Result<(), GuestMemoryBackingError> {
1086        let (region, offset_in_region) = self.region(addr, len as u64)?;
1087        // SAFETY: guaranteed by caller.
1088        unsafe { region.read_fallback(offset_in_region, dest, len) }
1089    }
1090
1091    unsafe fn write_fallback(
1092        &self,
1093        addr: u64,
1094        src: *const u8,
1095        len: usize,
1096    ) -> Result<(), GuestMemoryBackingError> {
1097        let (region, offset_in_region) = self.region(addr, len as u64)?;
1098        // SAFETY: guaranteed by caller.
1099        unsafe { region.write_fallback(offset_in_region, src, len) }
1100    }
1101
1102    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
1103        let (region, offset_in_region) = self.region(addr, len as u64)?;
1104        region.fill_fallback(offset_in_region, val, len)
1105    }
1106
1107    fn compare_exchange_fallback(
1108        &self,
1109        addr: u64,
1110        current: &mut [u8],
1111        new: &[u8],
1112    ) -> Result<bool, GuestMemoryBackingError> {
1113        let (region, offset_in_region) = self.region(addr, new.len() as u64)?;
1114        region.compare_exchange_fallback(offset_in_region, current, new)
1115    }
1116
1117    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
1118        let (region, offset_in_region) = self.region(address, len)?;
1119        region.expose_va(offset_in_region, len)
1120    }
1121
1122    fn page_fault(
1123        &self,
1124        address: u64,
1125        len: usize,
1126        write: bool,
1127        bitmap_failure: bool,
1128    ) -> PageFaultAction {
1129        match self.region(address, len as u64) {
1130            Ok((region, offset_in_region)) => {
1131                region.page_fault(offset_in_region, len, write, bitmap_failure)
1132            }
1133            Err(err) => PageFaultAction::Fail(PageFaultError {
1134                kind: err.kind,
1135                err: err.err,
1136            }),
1137        }
1138    }
1139
1140    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
1141        let mut ret = false;
1142        for gpn in gpns {
1143            let (region, offset_in_region) = self.region(gpn * PAGE_SIZE64, PAGE_SIZE64)?;
1144            ret |= region.lock_gpns(&[offset_in_region / PAGE_SIZE64])?;
1145        }
1146        Ok(ret)
1147    }
1148
1149    fn unlock_gpns(&self, gpns: &[u64]) {
1150        for gpn in gpns {
1151            let (region, offset_in_region) = self.region(gpn * PAGE_SIZE64, PAGE_SIZE64).unwrap();
1152            region.unlock_gpns(&[offset_in_region / PAGE_SIZE64]);
1153        }
1154    }
1155
1156    fn sharing(&self) -> Option<GuestMemorySharing> {
1157        // FUTURE: multi-region setups could aggregate shareable regions from
1158        // their sub-regions. For now, sharing is only supported for
1159        // single-region guest memory (the common case). If a VM uses
1160        // MultiRegionGuestMemoryAccess with vhost-user, this will return
1161        // None and the vhost-user backend will fail to initialize.
1162        None
1163    }
1164}
1165
1166/// A wrapper around a `GuestMemoryAccess` that provides methods for safely
1167/// reading and writing guest memory.
1168// NOTE: this type uses `inspect(skip)`, as it end up being a dependency of
1169// _many_ objects, and littering the inspect graph with references to the same
1170// node would be silly.
1171#[derive(Debug, Clone, Inspect)]
1172#[inspect(skip)]
1173pub struct GuestMemory {
1174    inner: Arc<GuestMemoryInner>,
1175}
1176
1177struct GuestMemoryInner<T: ?Sized = dyn DynGuestMemoryAccess> {
1178    region_def: RegionDefinition,
1179    regions: Vec<MemoryRegion>,
1180    debug_name: Arc<str>,
1181    allocated: bool,
1182    imp: T,
1183}
1184
1185impl<T: ?Sized> Debug for GuestMemoryInner<T> {
1186    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1187        f.debug_struct("GuestMemoryInner")
1188            .field("region_def", &self.region_def)
1189            .field("regions", &self.regions)
1190            .finish()
1191    }
1192}
1193
1194#[derive(Debug, Copy, Clone, Default)]
1195struct MemoryRegion {
1196    mapping: Option<SendPtrU8>,
1197    #[cfg(feature = "bitmap")]
1198    bitmaps: Option<[SendPtrU8; 2]>,
1199    #[cfg(feature = "bitmap")]
1200    bitmap_start: u8,
1201    len: u64,
1202    base_iova: Option<u64>,
1203}
1204
1205/// The access type. The values correspond to bitmap indexes.
1206#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1207enum AccessType {
1208    Read = 0,
1209    Write = 1,
1210}
1211
1212/// `NonNull<u8>` that implements `Send+Sync`.
1213///
1214/// Rust makes pointers `!Send+!Sync` by default to force you to think about the
1215/// ownership model and thread safety of types using pointers--there is nothing
1216/// safety-related about `Send`/`Sync` on pointers by themselves since all such
1217/// accesses to pointers require `unsafe` blocks anyway.
1218///
1219/// However, in practice, this leads to spurious manual `Send+Sync` impls on
1220/// types containing pointers, especially those containing generics. Define a
1221/// wrapping pointer type that implements `Send+Sync` so that the normal auto
1222/// trait rules apply to types containing these pointers.
1223#[derive(Debug, Copy, Clone)]
1224struct SendPtrU8(NonNull<u8>);
1225
1226// SAFETY: see type description.
1227unsafe impl Send for SendPtrU8 {}
1228// SAFETY: see type description.
1229unsafe impl Sync for SendPtrU8 {}
1230
1231impl MemoryRegion {
1232    fn new(imp: &impl GuestMemoryAccess) -> Self {
1233        #[cfg(feature = "bitmap")]
1234        let (bitmaps, bitmap_start) = {
1235            let bitmap_info = imp.access_bitmap();
1236            let bitmaps = bitmap_info
1237                .as_ref()
1238                .map(|bm| [SendPtrU8(bm.read_bitmap), SendPtrU8(bm.write_bitmap)]);
1239            let bitmap_start = bitmap_info.map_or(0, |bi| bi.bit_offset);
1240            (bitmaps, bitmap_start)
1241        };
1242        Self {
1243            mapping: imp.mapping().map(SendPtrU8),
1244            #[cfg(feature = "bitmap")]
1245            bitmaps,
1246            #[cfg(feature = "bitmap")]
1247            bitmap_start,
1248            len: imp.max_address(),
1249            base_iova: imp.base_iova(),
1250        }
1251    }
1252
1253    /// # Safety
1254    ///
1255    /// The caller must ensure that `offset + len` fits in this region, and that
1256    /// the object bitmap is currently valid for atomic read access from this
1257    /// thread.
1258    unsafe fn check_access(
1259        &self,
1260        access_type: AccessType,
1261        offset: u64,
1262        len: u64,
1263    ) -> Result<(), u64> {
1264        debug_assert!(self.len >= offset + len);
1265        #[cfg(not(feature = "bitmap"))]
1266        let _ = access_type;
1267
1268        #[cfg(feature = "bitmap")]
1269        if let Some(bitmaps) = &self.bitmaps {
1270            let SendPtrU8(bitmap) = bitmaps[access_type as usize];
1271            let start = offset / PAGE_SIZE64;
1272            let end = (offset + len - 1) / PAGE_SIZE64;
1273            // FUTURE: consider optimizing this separately for multi-page and
1274            // single-page accesses.
1275            for gpn in start..=end {
1276                let bit_offset = self.bitmap_start as u64 + gpn;
1277                // SAFETY: the caller ensures that the bitmap is big enough and
1278                // valid for atomic read access from this thread.
1279                let bit = unsafe {
1280                    (*bitmap
1281                        .as_ptr()
1282                        .cast_const()
1283                        .cast::<AtomicU8>()
1284                        .add(bit_offset as usize / 8))
1285                    .load(std::sync::atomic::Ordering::Relaxed)
1286                        & (1 << (bit_offset % 8))
1287                };
1288                if bit == 0 {
1289                    return Err((gpn * PAGE_SIZE64).saturating_sub(offset));
1290                }
1291            }
1292        }
1293        Ok(())
1294    }
1295}
1296
1297/// The default implementation is [`GuestMemory::empty`].
1298impl Default for GuestMemory {
1299    fn default() -> Self {
1300        Self::empty()
1301    }
1302}
1303
1304struct Empty;
1305
1306// SAFETY: the mapping is empty, so all requirements are trivially satisfied.
1307unsafe impl GuestMemoryAccess for Empty {
1308    fn mapping(&self) -> Option<NonNull<u8>> {
1309        None
1310    }
1311
1312    fn max_address(&self) -> u64 {
1313        0
1314    }
1315}
1316
1317#[derive(Debug, Error)]
1318pub enum MultiRegionError {
1319    #[error("region size {0:#x} is not a power of 2")]
1320    NotPowerOfTwo(u64),
1321    #[error("region size {0:#x} is smaller than a page")]
1322    RegionSizeTooSmall(u64),
1323    #[error(
1324        "too many regions ({region_count}) for region size {region_size:#x}; max is {max_region_count}"
1325    )]
1326    TooManyRegions {
1327        region_count: usize,
1328        max_region_count: usize,
1329        region_size: u64,
1330    },
1331    #[error("backing size {backing_size:#x} is too large for region size {region_size:#x}")]
1332    BackingTooLarge { backing_size: u64, region_size: u64 },
1333}
1334
1335/// The RCU domain memory accesses occur under. Updates to any memory access
1336/// bitmaps must be synchronized under this domain.
1337///
1338/// See [`GuestMemoryAccess::access_bitmap`] for more details.
1339///
1340/// This is currently the global domain, but this is reexported here to make
1341/// calling code clearer.
1342#[cfg(feature = "bitmap")]
1343pub fn rcu() -> minircu::RcuDomain {
1344    // Use the global domain unless we find a reason to do something else.
1345    minircu::global()
1346}
1347
1348impl GuestMemory {
1349    /// Returns a new instance using `imp` as the backing.
1350    ///
1351    /// `debug_name` is used to specify which guest memory is being accessed in
1352    /// error messages.
1353    pub fn new(debug_name: impl Into<Arc<str>>, imp: impl GuestMemoryAccess) -> Self {
1354        // Install signal handlers on unix if a mapping is present.
1355        //
1356        // Skip this on miri even when there is a mapping, since the mapping may
1357        // never be accessed by the code under test.
1358        if imp.mapping().is_some() && !cfg!(miri) {
1359            trycopy::initialize_try_copy();
1360        }
1361        Self::new_inner(debug_name.into(), imp, false)
1362    }
1363
1364    fn new_inner(debug_name: Arc<str>, imp: impl GuestMemoryAccess, allocated: bool) -> Self {
1365        let regions = vec![MemoryRegion::new(&imp)];
1366        Self {
1367            inner: Arc::new(GuestMemoryInner {
1368                imp,
1369                debug_name,
1370                region_def: RegionDefinition {
1371                    invalid_mask: 1 << 63,
1372                    region_mask: !0 >> 1,
1373                    region_bits: 63, // right shift of 64 isn't valid, so restrict the space
1374                },
1375                regions,
1376                allocated,
1377            }),
1378        }
1379    }
1380
1381    /// Creates a new multi-region guest memory, made up of multiple mappings.
1382    /// This allows you to create a very large sparse layout (up to the limits
1383    /// of the VM's physical address space) without having to allocate an
1384    /// enormous amount of virtual address space.
1385    ///
1386    /// Each region will be `region_size` bytes and will start immediately after
1387    /// the last one. This must be a power of two, be at least a page in size,
1388    /// and cannot fill the full 64-bit address space.
1389    ///
1390    /// `imps` must be a list of [`GuestMemoryAccess`] implementations, one for
1391    /// each region. Use `None` if the corresponding region is empty.
1392    ///
1393    /// A region's mapping cannot fully fill the region. This is necessary to
1394    /// avoid callers expecting to be able to access a memory range that spans
1395    /// two regions.
1396    pub fn new_multi_region(
1397        debug_name: impl Into<Arc<str>>,
1398        region_size: u64,
1399        mut imps: Vec<Option<impl GuestMemoryAccess>>,
1400    ) -> Result<Self, MultiRegionError> {
1401        // Install signal handlers.
1402        trycopy::initialize_try_copy();
1403
1404        if !region_size.is_power_of_two() {
1405            return Err(MultiRegionError::NotPowerOfTwo(region_size));
1406        }
1407        if region_size < PAGE_SIZE64 {
1408            return Err(MultiRegionError::RegionSizeTooSmall(region_size));
1409        }
1410        let region_bits = region_size.trailing_zeros();
1411
1412        let max_region_count = 1 << (63 - region_bits);
1413
1414        let region_count = imps.len().next_power_of_two();
1415        if region_count > max_region_count {
1416            return Err(MultiRegionError::TooManyRegions {
1417                region_count,
1418                max_region_count,
1419                region_size,
1420            });
1421        }
1422
1423        let valid_bits = region_bits + region_count.trailing_zeros();
1424        assert!(valid_bits < 64);
1425        let invalid_mask = !0 << valid_bits;
1426
1427        let mut regions = vec![MemoryRegion::default(); region_count];
1428        for (imp, region) in imps.iter().zip(&mut regions) {
1429            let Some(imp) = imp else { continue };
1430            let backing_size = imp.max_address();
1431            if backing_size > region_size {
1432                return Err(MultiRegionError::BackingTooLarge {
1433                    backing_size,
1434                    region_size,
1435                });
1436            }
1437            *region = MemoryRegion::new(imp);
1438        }
1439
1440        let region_def = RegionDefinition {
1441            invalid_mask,
1442            region_mask: region_size - 1,
1443            region_bits,
1444        };
1445
1446        imps.resize_with(region_count, || None);
1447        let imp = MultiRegionGuestMemoryAccess { imps, region_def };
1448
1449        let inner = GuestMemoryInner {
1450            debug_name: debug_name.into(),
1451            region_def,
1452            regions,
1453            imp,
1454            allocated: false,
1455        };
1456
1457        Ok(Self {
1458            inner: Arc::new(inner),
1459        })
1460    }
1461
1462    /// Allocates a guest memory object on the heap with the given size in
1463    /// bytes.
1464    ///
1465    /// `size` will be rounded up to the page size. The backing buffer will be
1466    /// page aligned.
1467    ///
1468    /// The debug name in errors will be "heap". If you want to provide a
1469    /// different debug name, manually use `GuestMemory::new` with
1470    /// [`AlignedHeapMemory`].
1471    pub fn allocate(size: usize) -> Self {
1472        Self::new_inner("heap".into(), AlignedHeapMemory::new(size), true)
1473    }
1474
1475    /// If this memory is unaliased and was created via
1476    /// [`GuestMemory::allocate`], returns the backing buffer.
1477    ///
1478    /// Returns `Err(self)` if there are other references to this memory (via
1479    /// `clone()`).
1480    pub fn into_inner_buf(self) -> Result<AlignedHeapMemory, Self> {
1481        if !self.inner.allocated {
1482            return Err(self);
1483        }
1484        // FUTURE: consider using `Any` and `Arc::downcast` once trait upcasting is stable.
1485        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1486        let inner = unsafe {
1487            Arc::<GuestMemoryInner<AlignedHeapMemory>>::from_raw(Arc::into_raw(self.inner).cast())
1488        };
1489        let inner = Arc::try_unwrap(inner).map_err(|inner| Self { inner })?;
1490        Ok(inner.imp)
1491    }
1492
1493    /// If this memory was created via [`GuestMemory::allocate`], returns a slice to
1494    /// the allocated buffer.
1495    pub fn inner_buf(&self) -> Option<&[AtomicU8]> {
1496        if !self.inner.allocated {
1497            return None;
1498        }
1499        // FUTURE: consider using `<dyn Any>::downcast` once trait upcasting is stable.
1500        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1501        let inner = unsafe { &*core::ptr::from_ref(&self.inner.imp).cast::<AlignedHeapMemory>() };
1502        Some(inner)
1503    }
1504
1505    /// If this memory was created via [`GuestMemory::allocate`] and there are
1506    /// no other references to it, returns a mutable slice to the backing
1507    /// buffer.
1508    pub fn inner_buf_mut(&mut self) -> Option<&mut [u8]> {
1509        if !self.inner.allocated {
1510            return None;
1511        }
1512        let inner = Arc::get_mut(&mut self.inner)?;
1513        // FUTURE: consider using `<dyn Any>::downcast` once trait upcasting is stable.
1514        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1515        let imp = unsafe { &mut *core::ptr::from_mut(&mut inner.imp).cast::<AlignedHeapMemory>() };
1516        Some(imp.as_mut())
1517    }
1518
1519    /// Returns an empty guest memory, which fails every operation.
1520    pub fn empty() -> Self {
1521        GuestMemory::new("empty", Empty)
1522    }
1523
1524    fn wrap_err(
1525        &self,
1526        gpa_len: Option<(u64, u64)>,
1527        op: GuestMemoryOperation,
1528        err: GuestMemoryBackingError,
1529    ) -> GuestMemoryError {
1530        let range = gpa_len.map(|(gpa, len)| gpa..gpa.wrapping_add(len));
1531        GuestMemoryError::new(&self.inner.debug_name, range, op, err)
1532    }
1533
1534    fn with_op<T>(
1535        &self,
1536        gpa_len: Option<(u64, u64)>,
1537        op: GuestMemoryOperation,
1538        f: impl FnOnce() -> Result<T, GuestMemoryBackingError>,
1539    ) -> Result<T, GuestMemoryError> {
1540        f().map_err(|err| self.wrap_err(gpa_len, op, err))
1541    }
1542
1543    /// Creates a smaller view into guest memory, constraining accesses within the new boundaries. For smaller ranges,
1544    /// some memory implementations (e.g. HDV) may choose to lock the pages into memory for faster access. Locking
1545    /// random guest memory may cause issues, so only opt in to this behavior when the range can be considered "owned"
1546    /// by the caller.
1547    pub fn subrange(
1548        &self,
1549        offset: u64,
1550        len: u64,
1551        allow_preemptive_locking: bool,
1552    ) -> Result<GuestMemory, GuestMemoryError> {
1553        self.with_op(Some((offset, len)), GuestMemoryOperation::Subrange, || {
1554            if let Some(guest_memory) =
1555                self.inner
1556                    .imp
1557                    .subrange(offset, len, allow_preemptive_locking)?
1558            {
1559                Ok(guest_memory)
1560            } else {
1561                create_memory_subrange(self.inner.clone(), offset, len, allow_preemptive_locking)
1562            }
1563        })
1564    }
1565
1566    /// Returns a subrange where pages from the subrange can be locked.
1567    pub fn lockable_subrange(
1568        &self,
1569        offset: u64,
1570        len: u64,
1571    ) -> Result<GuestMemory, GuestMemoryError> {
1572        // TODO: Enforce subrange is actually lockable.
1573        self.subrange(offset, len, true)
1574    }
1575
1576    /// Returns the mapping for all of guest memory.
1577    ///
1578    /// Returns `None` if there is more than one region or if the memory is not
1579    /// mapped.
1580    pub fn full_mapping(&self) -> Option<(*mut u8, usize)> {
1581        if let [region] = self.inner.regions.as_slice() {
1582            #[cfg(feature = "bitmap")]
1583            if region.bitmaps.is_some() {
1584                return None;
1585            }
1586            region
1587                .mapping
1588                .map(|SendPtrU8(ptr)| (ptr.as_ptr(), region.len as usize))
1589        } else {
1590            None
1591        }
1592    }
1593
1594    /// Gets the IO address for DMAing to `gpa` from a user-mode driver not
1595    /// going through an IOMMU.
1596    pub fn iova(&self, gpa: u64) -> Option<u64> {
1597        let (region, offset, _) = self.inner.region(gpa, 1).ok()?;
1598        Some(region.base_iova? + offset)
1599    }
1600
1601    /// Returns a sharing object if this memory supports
1602    /// file-based sharing. See [`GuestMemorySharing`].
1603    pub fn sharing(&self) -> Option<GuestMemorySharing> {
1604        self.inner.imp.sharing()
1605    }
1606
1607    /// Gets a pointer to the VA range for `gpa..gpa+len`.
1608    ///
1609    /// Returns `Ok(None)` if there is no mapping. Returns `Err(_)` if the
1610    /// memory is out of range.
1611    fn mapping_range(
1612        &self,
1613        access_type: AccessType,
1614        gpa: u64,
1615        len: usize,
1616    ) -> Result<Option<*mut u8>, GuestMemoryBackingError> {
1617        let (region, offset, _) = self.inner.region(gpa, len as u64)?;
1618        if let Some(SendPtrU8(ptr)) = region.mapping {
1619            loop {
1620                // SAFETY: offset + len is checked by `region()` to be inside the VA range.
1621                let fault_offset = unsafe {
1622                    match region.check_access(access_type, offset, len as u64) {
1623                        Ok(()) => return Ok(Some(ptr.as_ptr().add(offset as usize))),
1624                        Err(n) => n,
1625                    }
1626                };
1627
1628                // Resolve the fault and try again.
1629                match self.inner.imp.page_fault(
1630                    gpa + fault_offset,
1631                    len - fault_offset as usize,
1632                    access_type == AccessType::Write,
1633                    true,
1634                ) {
1635                    PageFaultAction::Fail(err) => {
1636                        return Err(GuestMemoryBackingError::new(
1637                            err.kind,
1638                            gpa + fault_offset,
1639                            err.err,
1640                        ));
1641                    }
1642                    PageFaultAction::Retry => {}
1643                    PageFaultAction::Fallback => break,
1644                }
1645            }
1646        }
1647        Ok(None)
1648    }
1649
1650    /// Runs `f` with a pointer to the mapped memory. If `f` fails, tries to
1651    /// resolve the fault (failing on error), then loops.
1652    ///
1653    /// If there is no mapping for the memory, or if the fault handler requests
1654    /// it, call `fallback` instead. `fallback` will not be called unless `gpa`
1655    /// and `len` are in range.
1656    fn run_on_mapping<T, P>(
1657        &self,
1658        access_type: AccessType,
1659        gpa: u64,
1660        len: usize,
1661        mut param: P,
1662        mut f: impl FnMut(&mut P, *mut u8) -> Result<T, trycopy::MemoryError>,
1663        fallback: impl FnOnce(&mut P) -> Result<T, GuestMemoryBackingError>,
1664    ) -> Result<T, GuestMemoryBackingError> {
1665        let op = || {
1666            let Some(mapping) = self.mapping_range(access_type, gpa, len)? else {
1667                return fallback(&mut param);
1668            };
1669
1670            // Try until the fault fails to resolve.
1671            loop {
1672                match f(&mut param, mapping) {
1673                    Ok(t) => return Ok(t),
1674                    Err(fault) => {
1675                        match self.inner.imp.page_fault(
1676                            gpa + fault.offset() as u64,
1677                            len - fault.offset(),
1678                            access_type == AccessType::Write,
1679                            false,
1680                        ) {
1681                            PageFaultAction::Fail(err) => {
1682                                return Err(GuestMemoryBackingError::new(
1683                                    err.kind,
1684                                    gpa + fault.offset() as u64,
1685                                    err.err,
1686                                ));
1687                            }
1688                            PageFaultAction::Retry => {}
1689                            PageFaultAction::Fallback => return fallback(&mut param),
1690                        }
1691                    }
1692                }
1693            }
1694        };
1695        // If the `bitmap` feature is enabled, run the function in an RCU
1696        // critical section. This will allow callers to flush concurrent
1697        // accesses after bitmap updates.
1698        #[cfg(feature = "bitmap")]
1699        return rcu().run(op);
1700        #[cfg(not(feature = "bitmap"))]
1701        op()
1702    }
1703
1704    /// # Safety
1705    ///
1706    /// The caller must ensure that `src`..`src + len` is a valid buffer for reads.
1707    unsafe fn write_ptr(
1708        &self,
1709        gpa: u64,
1710        src: *const u8,
1711        len: usize,
1712    ) -> Result<(), GuestMemoryBackingError> {
1713        if len == 0 {
1714            return Ok(());
1715        }
1716        self.run_on_mapping(
1717            AccessType::Write,
1718            gpa,
1719            len,
1720            (),
1721            |(), dest| {
1722                // SAFETY: dest..dest+len is guaranteed to point to a reserved VA
1723                // range, and src..src+len is guaranteed by the caller to be a valid
1724                // buffer for reads.
1725                unsafe { trycopy::try_copy(src, dest, len) }
1726            },
1727            |()| {
1728                // SAFETY: src..src+len is guaranteed by the caller to point to a valid
1729                // buffer for reads.
1730                unsafe { self.inner.imp.write_fallback(gpa, src, len) }
1731            },
1732        )
1733    }
1734
1735    /// Writes `src` into guest memory at address `gpa`.
1736    pub fn write_at(&self, gpa: u64, src: &[u8]) -> Result<(), GuestMemoryError> {
1737        self.with_op(
1738            Some((gpa, src.len() as u64)),
1739            GuestMemoryOperation::Write,
1740            || self.write_at_inner(gpa, src),
1741        )
1742    }
1743
1744    fn write_at_inner(&self, gpa: u64, src: &[u8]) -> Result<(), GuestMemoryBackingError> {
1745        // SAFETY: `src` is a valid buffer for reads.
1746        unsafe { self.write_ptr(gpa, src.as_ptr(), src.len()) }
1747    }
1748
1749    /// Writes `src` into guest memory at address `gpa`.
1750    pub fn write_from_atomic(&self, gpa: u64, src: &[AtomicU8]) -> Result<(), GuestMemoryError> {
1751        self.with_op(
1752            Some((gpa, src.len() as u64)),
1753            GuestMemoryOperation::Write,
1754            || {
1755                // SAFETY: `src` is a valid buffer for reads.
1756                unsafe { self.write_ptr(gpa, src.as_ptr().cast(), src.len()) }
1757            },
1758        )
1759    }
1760
1761    /// Writes `len` bytes of `val` into guest memory at address `gpa`.
1762    pub fn fill_at(&self, gpa: u64, val: u8, len: usize) -> Result<(), GuestMemoryError> {
1763        self.with_op(Some((gpa, len as u64)), GuestMemoryOperation::Fill, || {
1764            self.fill_at_inner(gpa, val, len)
1765        })
1766    }
1767
1768    fn fill_at_inner(&self, gpa: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
1769        if len == 0 {
1770            return Ok(());
1771        }
1772        self.run_on_mapping(
1773            AccessType::Write,
1774            gpa,
1775            len,
1776            (),
1777            |(), dest| {
1778                // SAFETY: dest..dest+len is guaranteed to point to a reserved VA range.
1779                unsafe { trycopy::try_write_bytes(dest, val, len) }
1780            },
1781            |()| self.inner.imp.fill_fallback(gpa, val, len),
1782        )
1783    }
1784
1785    /// Reads from guest memory into `dest..dest+len`.
1786    ///
1787    /// # Safety
1788    /// The caller must ensure dest..dest+len is a valid buffer for writes.
1789    unsafe fn read_ptr(
1790        &self,
1791        gpa: u64,
1792        dest: *mut u8,
1793        len: usize,
1794    ) -> Result<(), GuestMemoryBackingError> {
1795        if len == 0 {
1796            return Ok(());
1797        }
1798        self.run_on_mapping(
1799            AccessType::Read,
1800            gpa,
1801            len,
1802            (),
1803            |(), src| {
1804                // SAFETY: src..src+len is guaranteed to point to a reserved VA
1805                // range, and dest..dest+len is guaranteed by the caller to be a
1806                // valid buffer for writes.
1807                unsafe { trycopy::try_copy(src, dest, len) }
1808            },
1809            |()| {
1810                // SAFETY: dest..dest+len is guaranteed by the caller to point to a
1811                // valid buffer for writes.
1812                unsafe { self.inner.imp.read_fallback(gpa, dest, len) }
1813            },
1814        )
1815    }
1816
1817    fn read_at_inner(&self, gpa: u64, dest: &mut [u8]) -> Result<(), GuestMemoryBackingError> {
1818        // SAFETY: `dest` is a valid buffer for writes.
1819        unsafe { self.read_ptr(gpa, dest.as_mut_ptr(), dest.len()) }
1820    }
1821
1822    /// Reads from guest memory address `gpa` into `dest`.
1823    pub fn read_at(&self, gpa: u64, dest: &mut [u8]) -> Result<(), GuestMemoryError> {
1824        self.with_op(
1825            Some((gpa, dest.len() as u64)),
1826            GuestMemoryOperation::Read,
1827            || self.read_at_inner(gpa, dest),
1828        )
1829    }
1830
1831    /// Reads from guest memory address `gpa` into `dest`.
1832    pub fn read_to_atomic(&self, gpa: u64, dest: &[AtomicU8]) -> Result<(), GuestMemoryError> {
1833        self.with_op(
1834            Some((gpa, dest.len() as u64)),
1835            GuestMemoryOperation::Read,
1836            // SAFETY: `dest` is a valid buffer for writes.
1837            || unsafe { self.read_ptr(gpa, dest.as_ptr() as *mut u8, dest.len()) },
1838        )
1839    }
1840
1841    /// Probes whether a write to guest memory at address `gpa` would succeed.
1842    fn probe_write_inner(&self, gpa: u64) -> Result<(), GuestMemoryBackingError> {
1843        self.run_on_mapping(
1844            AccessType::Write,
1845            gpa,
1846            1,
1847            (),
1848            |(), dest| {
1849                // SAFETY: dest is guaranteed to point to a reserved VA range.
1850                // We perform a volatile read followed by write of the same value
1851                // to check write accessibility without modifying the actual data.
1852                unsafe {
1853                    let value = trycopy::try_read_volatile(dest)?;
1854                    trycopy::try_write_volatile(dest, &value)
1855                }
1856            },
1857            |()| {
1858                // Fallback: use compare_exchange_fallback to probe write access
1859                let mut current = 0u8;
1860                self.inner.imp.compare_exchange_fallback(
1861                    gpa,
1862                    std::slice::from_mut(&mut current),
1863                    &[0u8],
1864                )?;
1865                Ok(())
1866            },
1867        )
1868    }
1869
1870    /// Writes an object to guest memory at address `gpa`.
1871    ///
1872    /// If the object is 1, 2, 4, or 8 bytes and the address is naturally
1873    /// aligned, then the write will be performed atomically. Here, this means
1874    /// that concurrent readers (via `read_plain`) cannot observe a torn write
1875    /// but will observe either the old or new value.
1876    ///
1877    /// The memory ordering of the write is unspecified.
1878    ///
1879    /// FUTURE: once we are on Rust 1.79, add a method specifically for atomic
1880    /// accesses that const asserts that the size is appropriate.
1881    pub fn write_plain<T: IntoBytes + Immutable + KnownLayout>(
1882        &self,
1883        gpa: u64,
1884        b: &T,
1885    ) -> Result<(), GuestMemoryError> {
1886        // Note that this is const, so the match below will compile out.
1887        let len = size_of::<T>();
1888        self.with_op(Some((gpa, len as u64)), GuestMemoryOperation::Write, || {
1889            self.run_on_mapping(
1890                AccessType::Write,
1891                gpa,
1892                len,
1893                (),
1894                |(), dest| {
1895                    // SAFETY: dest..dest+len is guaranteed to point to
1896                    // a reserved VA range.
1897                    unsafe { trycopy::try_write_volatile(dest.cast(), b) }
1898                },
1899                |()| {
1900                    // SAFETY: b is a valid buffer for reads.
1901                    unsafe {
1902                        self.inner
1903                            .imp
1904                            .write_fallback(gpa, b.as_bytes().as_ptr(), len)
1905                    }
1906                },
1907            )
1908        })
1909    }
1910
1911    /// Attempts a sequentially-consistent compare exchange of the value at `gpa`.
1912    pub fn compare_exchange<T: IntoBytes + FromBytes + Immutable + KnownLayout + Copy>(
1913        &self,
1914        gpa: u64,
1915        current: T,
1916        new: T,
1917    ) -> Result<Result<T, T>, GuestMemoryError> {
1918        const {
1919            assert!(matches!(size_of::<T>(), 1 | 2 | 4 | 8));
1920            assert!(align_of::<T>() >= size_of::<T>());
1921        };
1922        let len = size_of_val(&new);
1923        self.with_op(
1924            Some((gpa, len as u64)),
1925            GuestMemoryOperation::CompareExchange,
1926            || {
1927                // Assume that if write is allowed, then read is allowed.
1928                self.run_on_mapping(
1929                    AccessType::Write,
1930                    gpa,
1931                    len,
1932                    (),
1933                    |(), dest| {
1934                        // SAFETY: dest..dest+len is guaranteed by the caller to be a valid
1935                        // buffer for writes.
1936                        unsafe { trycopy::try_compare_exchange(dest.cast(), current, new) }
1937                    },
1938                    |()| {
1939                        let mut current = current;
1940                        let success = self.inner.imp.compare_exchange_fallback(
1941                            gpa,
1942                            current.as_mut_bytes(),
1943                            new.as_bytes(),
1944                        )?;
1945
1946                        Ok(if success { Ok(new) } else { Err(current) })
1947                    },
1948                )
1949            },
1950        )
1951    }
1952
1953    /// Reads an object from guest memory at address `gpa`.
1954    ///
1955    /// If the object is 1, 2, 4, or 8 bytes and the address is naturally
1956    /// aligned, then the read will be performed atomically. Here, this means
1957    /// that when there is a concurrent writer, callers will observe either the
1958    /// old or new value, but not a torn read.
1959    ///
1960    /// The memory ordering of the read is unspecified.
1961    ///
1962    /// FUTURE: once we are on Rust 1.79, add a method specifically for atomic
1963    /// accesses that const asserts that the size is appropriate.
1964    pub fn read_plain<T: FromBytes + Immutable + KnownLayout>(
1965        &self,
1966        gpa: u64,
1967    ) -> Result<T, GuestMemoryError> {
1968        self.with_op(
1969            Some((gpa, size_of::<T>() as u64)),
1970            GuestMemoryOperation::Read,
1971            || self.read_plain_inner(gpa),
1972        )
1973    }
1974
1975    fn read_plain_inner<T: FromBytes + Immutable + KnownLayout>(
1976        &self,
1977        gpa: u64,
1978    ) -> Result<T, GuestMemoryBackingError> {
1979        let len = size_of::<T>();
1980        self.run_on_mapping(
1981            AccessType::Read,
1982            gpa,
1983            len,
1984            (),
1985            |(), src| {
1986                // SAFETY: src..src+len is guaranteed to point to a reserved VA
1987                // range.
1988                unsafe { trycopy::try_read_volatile(src.cast::<T>()) }
1989            },
1990            |()| {
1991                let mut obj = std::mem::MaybeUninit::<T>::zeroed();
1992                // SAFETY: dest..dest+len is guaranteed by the caller to point to a
1993                // valid buffer for writes.
1994                unsafe {
1995                    self.inner
1996                        .imp
1997                        .read_fallback(gpa, obj.as_mut_ptr().cast(), len)?;
1998                }
1999                // SAFETY: `obj` was fully initialized by `read_fallback`.
2000                Ok(unsafe { obj.assume_init() })
2001            },
2002        )
2003    }
2004
2005    fn probe_page_for_lock(
2006        &self,
2007        with_kernel_access: bool,
2008        gpa: u64,
2009    ) -> Result<*const AtomicU8, GuestMemoryBackingError> {
2010        let (region, offset, _) = self.inner.region(gpa, 1)?;
2011        let Some(SendPtrU8(ptr)) = region.mapping else {
2012            return Err(GuestMemoryBackingError::other(gpa, NotLockable));
2013        };
2014        // Ensure the virtual address can be exposed.
2015        if with_kernel_access {
2016            self.inner.imp.expose_va(gpa, 1)?;
2017        }
2018        // FUTURE: check the correct bitmap for the access type, which needs to
2019        // be passed in.
2020        self.read_plain_inner::<u8>(gpa)?;
2021        // SAFETY: the read_at call includes a check that ensures that
2022        // `gpa` is in the VA range.
2023        let page = unsafe { ptr.as_ptr().add(offset as usize) };
2024        Ok(page.cast())
2025    }
2026
2027    pub fn lock_gpns(
2028        &self,
2029        with_kernel_access: bool,
2030        gpns: &[u64],
2031    ) -> Result<LockedPages, GuestMemoryError> {
2032        self.with_op(None, GuestMemoryOperation::Lock, || {
2033            let mut pages = Vec::with_capacity(gpns.len());
2034            for &gpn in gpns {
2035                let gpa = gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?;
2036                let page = self.probe_page_for_lock(with_kernel_access, gpa)?;
2037                pages.push(PagePtr(page));
2038            }
2039            let store_gpns = self.inner.imp.lock_gpns(gpns)?;
2040            Ok(LockedPages {
2041                pages: pages.into_boxed_slice(),
2042                gpns: store_gpns.then(|| gpns.to_vec().into_boxed_slice()),
2043                mem: self.inner.clone(),
2044            })
2045        })
2046    }
2047
2048    pub fn probe_gpns(&self, gpns: &[u64]) -> Result<(), GuestMemoryError> {
2049        self.with_op(None, GuestMemoryOperation::Probe, || {
2050            for &gpn in gpns {
2051                self.read_plain_inner::<u8>(
2052                    gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?,
2053                )?;
2054            }
2055            Ok(())
2056        })
2057    }
2058
2059    /// Check if a given PagedRange is readable or not.
2060    pub fn probe_gpn_readable_range(&self, range: &PagedRange<'_>) -> Result<(), GuestMemoryError> {
2061        self.op_range(GuestMemoryOperation::Probe, range, move |addr, _r| {
2062            self.read_plain_inner(addr)
2063        })
2064    }
2065
2066    /// Check if a given PagedRange is writable or not.
2067    pub fn probe_gpn_writable_range(&self, range: &PagedRange<'_>) -> Result<(), GuestMemoryError> {
2068        self.op_range(GuestMemoryOperation::Probe, range, move |addr, _r| {
2069            self.probe_write_inner(addr)
2070        })
2071    }
2072
2073    /// Check if a given GPA is readable or not.
2074    pub fn probe_gpa_readable(&self, gpa: u64) -> Result<(), GuestMemoryErrorKind> {
2075        let mut b = [0];
2076        self.read_at_inner(gpa, &mut b).map_err(|err| err.kind)
2077    }
2078
2079    /// Check if a given GPA is writeable or not.
2080    pub fn probe_gpa_writable(&self, gpa: u64) -> Result<(), GuestMemoryErrorKind> {
2081        let _ = self
2082            .compare_exchange(gpa, 0u8, 0)
2083            .map_err(|err| err.kind())?;
2084        Ok(())
2085    }
2086
2087    /// Gets a slice of guest memory assuming the memory was already locked via
2088    /// [`GuestMemory::lock_gpns`].
2089    ///
2090    /// This is dangerous--if the pages have not been locked, then it could
2091    /// cause an access violation or guest memory corruption.
2092    ///
2093    /// Note that this is not `unsafe` since this cannot cause memory corruption
2094    /// in this process. Even if there is an access violation, the underlying VA
2095    /// space is known to be reserved.
2096    ///
2097    /// Panics if the requested buffer is out of range.
2098    fn dangerous_access_pre_locked_memory(&self, gpa: u64, len: usize) -> &[AtomicU8] {
2099        let addr = self
2100            .mapping_range(AccessType::Write, gpa, len)
2101            .unwrap()
2102            .unwrap();
2103        // SAFETY: addr..addr+len is checked above to be a valid VA range. It's
2104        // possible some of the pages aren't mapped and will cause AVs at
2105        // runtime when accessed, but, as discussed above, at a language level
2106        // this cannot cause any safety issues.
2107        unsafe { std::slice::from_raw_parts(addr.cast(), len) }
2108    }
2109
2110    fn op_range<F: FnMut(u64, Range<usize>) -> Result<(), GuestMemoryBackingError>>(
2111        &self,
2112        op: GuestMemoryOperation,
2113        range: &PagedRange<'_>,
2114        mut f: F,
2115    ) -> Result<(), GuestMemoryError> {
2116        self.with_op(None, op, || {
2117            let gpns = range.gpns();
2118            let offset = range.offset();
2119
2120            // Perform the operation in three phases: the first page (if it is not a
2121            // full page), the full pages, and the last page (if it is not a full
2122            // page).
2123            let mut byte_index = 0;
2124            let mut len = range.len();
2125            let mut page = 0;
2126            if !offset.is_multiple_of(PAGE_SIZE) {
2127                let head_len = std::cmp::min(len, PAGE_SIZE - (offset % PAGE_SIZE));
2128                let addr = gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?
2129                    + offset as u64 % PAGE_SIZE64;
2130                f(addr, byte_index..byte_index + head_len)?;
2131                byte_index += head_len;
2132                len -= head_len;
2133                page += 1;
2134            }
2135            while len >= PAGE_SIZE {
2136                f(
2137                    gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?,
2138                    byte_index..byte_index + PAGE_SIZE,
2139                )?;
2140                byte_index += PAGE_SIZE;
2141                len -= PAGE_SIZE;
2142                page += 1;
2143            }
2144            if len > 0 {
2145                f(
2146                    gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?,
2147                    byte_index..byte_index + len,
2148                )?;
2149            }
2150
2151            Ok(())
2152        })
2153    }
2154
2155    pub fn write_range(&self, range: &PagedRange<'_>, data: &[u8]) -> Result<(), GuestMemoryError> {
2156        assert!(data.len() == range.len());
2157        self.op_range(GuestMemoryOperation::Write, range, move |addr, r| {
2158            self.write_at_inner(addr, &data[r])
2159        })
2160    }
2161
2162    pub fn fill_range(&self, range: &PagedRange<'_>, val: u8) -> Result<(), GuestMemoryError> {
2163        self.op_range(GuestMemoryOperation::Fill, range, move |addr, r| {
2164            self.fill_at_inner(addr, val, r.len())
2165        })
2166    }
2167
2168    pub fn zero_range(&self, range: &PagedRange<'_>) -> Result<(), GuestMemoryError> {
2169        self.op_range(GuestMemoryOperation::Fill, range, move |addr, r| {
2170            self.fill_at_inner(addr, 0, r.len())
2171        })
2172    }
2173
2174    pub fn read_range(
2175        &self,
2176        range: &PagedRange<'_>,
2177        data: &mut [u8],
2178    ) -> Result<(), GuestMemoryError> {
2179        assert!(data.len() == range.len());
2180        self.op_range(GuestMemoryOperation::Read, range, move |addr, r| {
2181            self.read_at_inner(addr, &mut data[r])
2182        })
2183    }
2184
2185    pub fn write_range_from_atomic(
2186        &self,
2187        range: &PagedRange<'_>,
2188        data: &[AtomicU8],
2189    ) -> Result<(), GuestMemoryError> {
2190        assert!(data.len() == range.len());
2191        self.op_range(GuestMemoryOperation::Write, range, move |addr, r| {
2192            let src = &data[r];
2193            // SAFETY: `src` is a valid buffer for reads.
2194            unsafe { self.write_ptr(addr, src.as_ptr().cast(), src.len()) }
2195        })
2196    }
2197
2198    pub fn read_range_to_atomic(
2199        &self,
2200        range: &PagedRange<'_>,
2201        data: &[AtomicU8],
2202    ) -> Result<(), GuestMemoryError> {
2203        assert!(data.len() == range.len());
2204        self.op_range(GuestMemoryOperation::Read, range, move |addr, r| {
2205            let dest = &data[r];
2206            // SAFETY: `dest` is a valid buffer for writes.
2207            unsafe { self.read_ptr(addr, dest.as_ptr().cast_mut().cast(), dest.len()) }
2208        })
2209    }
2210
2211    /// Locks the guest pages spanned by the specified `PagedRange`.
2212    ///
2213    /// # Arguments
2214    /// * 'paged_range' - The guest memory range to lock.
2215    /// * 'locked_range' - Receives a list of VA ranges to which each contiguous physical sub-range in `paged_range`
2216    ///   has been mapped. Must be initially empty.
2217    pub fn lock_range<'a, T: LockedRange<'a>>(
2218        &'a self,
2219        paged_range: PagedRange<'_>,
2220        mut locked_range: T,
2221    ) -> Result<LockedRangeImpl<'a, T>, GuestMemoryError> {
2222        self.with_op(None, GuestMemoryOperation::Lock, || {
2223            let gpns = paged_range.gpns();
2224            for &gpn in gpns {
2225                let gpa = gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?;
2226                self.probe_page_for_lock(true, gpa)?;
2227            }
2228            for range in paged_range.ranges() {
2229                let range = range.map_err(GuestMemoryBackingError::gpn)?;
2230                locked_range.push_sub_range(
2231                    self.dangerous_access_pre_locked_memory(range.start, range.len() as usize),
2232                );
2233            }
2234            let store_gpns = self.inner.imp.lock_gpns(paged_range.gpns())?;
2235            Ok(LockedRangeImpl {
2236                mem: &self.inner,
2237                gpns: store_gpns.then(|| paged_range.gpns().to_vec().into_boxed_slice()),
2238                inner: locked_range,
2239            })
2240        })
2241    }
2242}
2243
2244#[derive(Debug, Error)]
2245#[error("invalid guest page number {0:#x}")]
2246pub struct InvalidGpn(u64);
2247
2248fn gpn_to_gpa(gpn: u64) -> Result<u64, InvalidGpn> {
2249    gpn.checked_mul(PAGE_SIZE64).ok_or(InvalidGpn(gpn))
2250}
2251
2252#[derive(Debug, Copy, Clone, Default)]
2253struct RegionDefinition {
2254    invalid_mask: u64,
2255    region_mask: u64,
2256    region_bits: u32,
2257}
2258
2259impl RegionDefinition {
2260    fn region(&self, gpa: u64, len: u64) -> Result<(usize, u64), GuestMemoryBackingError> {
2261        if (gpa | len) & self.invalid_mask != 0 {
2262            return Err(GuestMemoryBackingError::new(
2263                GuestMemoryErrorKind::OutOfRange,
2264                gpa,
2265                OutOfRange,
2266            ));
2267        }
2268        let offset = gpa & self.region_mask;
2269        if offset.wrapping_add(len) & !self.region_mask != 0 {
2270            return Err(GuestMemoryBackingError::new(
2271                GuestMemoryErrorKind::OutOfRange,
2272                gpa,
2273                OutOfRange,
2274            ));
2275        }
2276        let index = (gpa >> self.region_bits) as usize;
2277        Ok((index, offset))
2278    }
2279}
2280
2281impl GuestMemoryInner {
2282    fn region(
2283        &self,
2284        gpa: u64,
2285        len: u64,
2286    ) -> Result<(&MemoryRegion, u64, usize), GuestMemoryBackingError> {
2287        let (index, offset) = self.region_def.region(gpa, len)?;
2288        let region = &self.regions[index];
2289        if offset + len > region.len {
2290            return Err(GuestMemoryBackingError::new(
2291                GuestMemoryErrorKind::OutOfRange,
2292                gpa,
2293                OutOfRange,
2294            ));
2295        }
2296        Ok((&self.regions[index], offset, index))
2297    }
2298}
2299
2300#[derive(Clone)]
2301pub struct LockedPages {
2302    pages: Box<[PagePtr]>,
2303    gpns: Option<Box<[u64]>>,
2304    // maintain a reference to the backing memory
2305    mem: Arc<GuestMemoryInner>,
2306}
2307
2308impl Drop for LockedPages {
2309    fn drop(&mut self) {
2310        if let Some(gpns) = &self.gpns {
2311            self.mem.imp.unlock_gpns(gpns);
2312        }
2313    }
2314}
2315
2316impl Debug for LockedPages {
2317    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2318        f.debug_struct("LockedPages")
2319            .field("page_count", &self.pages.len())
2320            .finish()
2321    }
2322}
2323
2324#[derive(Copy, Clone, Debug)]
2325// Field is read via slice transmute and pointer casts, not actually dead.
2326struct PagePtr(#[expect(dead_code)] *const AtomicU8);
2327
2328// SAFETY: PagePtr is just a pointer with no methods and has no inherent safety
2329// constraints.
2330unsafe impl Send for PagePtr {}
2331// SAFETY: see above comment
2332unsafe impl Sync for PagePtr {}
2333
2334pub type Page = [AtomicU8; PAGE_SIZE];
2335
2336impl LockedPages {
2337    #[inline]
2338    pub fn pages(&self) -> &[&Page] {
2339        // SAFETY: PagePtr is just a pointer to a Page. The pages are kept alive by
2340        // the reference in _mem, and the lifetimes here ensure the LockedPages outlives
2341        // the slice.
2342        unsafe { std::slice::from_raw_parts(self.pages.as_ptr().cast::<&Page>(), self.pages.len()) }
2343    }
2344}
2345
2346impl<'a> AsRef<[&'a Page]> for &'a LockedPages {
2347    fn as_ref(&self) -> &[&'a Page] {
2348        self.pages()
2349    }
2350}
2351
2352/// Represents a range of locked guest pages as an ordered list of the VA sub-ranges
2353/// to which the guest pages are mapped.
2354/// The range may only partially span the first and last page and must fully span all
2355/// intermediate pages.
2356pub trait LockedRange<'a> {
2357    /// Adds a sub-range to this range.
2358    fn push_sub_range(&mut self, sub_range: &'a [AtomicU8]);
2359}
2360
2361pub struct LockedRangeImpl<'a, T: LockedRange<'a>> {
2362    mem: &'a GuestMemoryInner,
2363    gpns: Option<Box<[u64]>>,
2364    inner: T,
2365}
2366
2367impl<'a, T: LockedRange<'a>> LockedRangeImpl<'a, T> {
2368    pub fn get(&self) -> &T {
2369        &self.inner
2370    }
2371
2372    pub fn get_mut(&mut self) -> &mut T {
2373        &mut self.inner
2374    }
2375}
2376
2377impl<'a, T: LockedRange<'a>> Drop for LockedRangeImpl<'a, T> {
2378    fn drop(&mut self) {
2379        if let Some(gpns) = &self.gpns {
2380            self.mem.imp.unlock_gpns(gpns);
2381        }
2382    }
2383}
2384
2385#[derive(Debug, Error)]
2386pub enum AccessError {
2387    #[error("memory access error")]
2388    Memory(#[from] GuestMemoryError),
2389    #[error("out of range: {0:#x} < {1:#x}")]
2390    OutOfRange(usize, usize),
2391    #[error("write attempted to read-only memory")]
2392    ReadOnly,
2393}
2394
2395pub trait MemoryRead {
2396    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError>;
2397    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError>;
2398    fn len(&self) -> usize;
2399
2400    fn read_plain<T: IntoBytes + FromBytes + Immutable + KnownLayout>(
2401        &mut self,
2402    ) -> Result<T, AccessError> {
2403        let mut value: T = FromZeros::new_zeroed();
2404        self.read(value.as_mut_bytes())?;
2405        Ok(value)
2406    }
2407
2408    fn read_n<T: IntoBytes + FromBytes + Immutable + KnownLayout + Copy>(
2409        &mut self,
2410        len: usize,
2411    ) -> Result<Vec<T>, AccessError> {
2412        let mut value = vec![FromZeros::new_zeroed(); len];
2413        self.read(value.as_mut_bytes())?;
2414        Ok(value)
2415    }
2416
2417    fn read_all(&mut self) -> Result<Vec<u8>, AccessError> {
2418        let mut value = vec![0; self.len()];
2419        self.read(&mut value)?;
2420        Ok(value)
2421    }
2422
2423    fn limit(self, len: usize) -> Limit<Self>
2424    where
2425        Self: Sized,
2426    {
2427        let len = len.min(self.len());
2428        Limit { inner: self, len }
2429    }
2430}
2431
2432/// A trait for sequentially updating a region of memory.
2433pub trait MemoryWrite {
2434    fn write(&mut self, data: &[u8]) -> Result<(), AccessError>;
2435    fn zero(&mut self, len: usize) -> Result<(), AccessError> {
2436        self.fill(0, len)
2437    }
2438    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError>;
2439
2440    /// The space remaining in the memory region.
2441    fn len(&self) -> usize;
2442
2443    fn limit(self, len: usize) -> Limit<Self>
2444    where
2445        Self: Sized,
2446    {
2447        let len = len.min(self.len());
2448        Limit { inner: self, len }
2449    }
2450}
2451
2452impl MemoryRead for &'_ [u8] {
2453    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError> {
2454        if self.len() < data.len() {
2455            return Err(AccessError::OutOfRange(self.len(), data.len()));
2456        }
2457        let (source, rest) = self.split_at(data.len());
2458        data.copy_from_slice(source);
2459        *self = rest;
2460        Ok(self)
2461    }
2462
2463    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError> {
2464        if self.len() < len {
2465            return Err(AccessError::OutOfRange(self.len(), len));
2466        }
2467        *self = &self[len..];
2468        Ok(self)
2469    }
2470
2471    fn len(&self) -> usize {
2472        <[u8]>::len(self)
2473    }
2474}
2475
2476impl MemoryWrite for &mut [u8] {
2477    fn write(&mut self, data: &[u8]) -> Result<(), AccessError> {
2478        if self.len() < data.len() {
2479            return Err(AccessError::OutOfRange(self.len(), data.len()));
2480        }
2481        let (dest, rest) = std::mem::take(self).split_at_mut(data.len());
2482        dest.copy_from_slice(data);
2483        *self = rest;
2484        Ok(())
2485    }
2486
2487    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError> {
2488        if self.len() < len {
2489            return Err(AccessError::OutOfRange(self.len(), len));
2490        }
2491        let (dest, rest) = std::mem::take(self).split_at_mut(len);
2492        dest.fill(val);
2493        *self = rest;
2494        Ok(())
2495    }
2496
2497    fn len(&self) -> usize {
2498        <[u8]>::len(self)
2499    }
2500}
2501
2502#[derive(Debug, Clone)]
2503pub struct Limit<T> {
2504    inner: T,
2505    len: usize,
2506}
2507
2508impl<T: MemoryRead> MemoryRead for Limit<T> {
2509    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError> {
2510        let len = data.len();
2511        if len > self.len {
2512            return Err(AccessError::OutOfRange(self.len, len));
2513        }
2514        self.inner.read(data)?;
2515        self.len -= len;
2516        Ok(self)
2517    }
2518
2519    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError> {
2520        if len > self.len {
2521            return Err(AccessError::OutOfRange(self.len, len));
2522        }
2523        self.inner.skip(len)?;
2524        self.len -= len;
2525        Ok(self)
2526    }
2527
2528    fn len(&self) -> usize {
2529        self.len
2530    }
2531}
2532
2533impl<T: MemoryWrite> MemoryWrite for Limit<T> {
2534    fn write(&mut self, data: &[u8]) -> Result<(), AccessError> {
2535        let len = data.len();
2536        if len > self.len {
2537            return Err(AccessError::OutOfRange(self.len, len));
2538        }
2539        self.inner.write(data)?;
2540        self.len -= len;
2541        Ok(())
2542    }
2543
2544    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError> {
2545        if len > self.len {
2546            return Err(AccessError::OutOfRange(self.len, len));
2547        }
2548        self.inner.fill(val, len)?;
2549        self.len -= len;
2550        Ok(())
2551    }
2552
2553    fn len(&self) -> usize {
2554        self.len
2555    }
2556}
2557
2558/// Trait implemented to allow mapping and unmapping a region of memory at
2559/// a particular guest address.
2560pub trait MappableGuestMemory: Send + Sync {
2561    /// Maps the memory into the guest.
2562    ///
2563    /// `writable` specifies whether the guest can write to the memory region.
2564    /// If a guest tries to write to a non-writable region, the virtual
2565    /// processor will exit for MMIO handling.
2566    fn map_to_guest(&mut self, gpa: u64, writable: bool) -> io::Result<()>;
2567
2568    fn unmap_from_guest(&mut self);
2569}
2570
2571/// Trait implemented for a region of memory that can have memory mapped into
2572/// it.
2573pub trait MappedMemoryRegion: Send + Sync {
2574    /// Maps an object at `offset` in the region.
2575    ///
2576    /// Behaves like mmap--overwrites and splits existing mappings.
2577    fn map(
2578        &self,
2579        offset: usize,
2580        section: &dyn AsMappableRef,
2581        file_offset: u64,
2582        len: usize,
2583        writable: bool,
2584    ) -> io::Result<()>;
2585
2586    /// Unmaps any mappings in the specified range within the region.
2587    fn unmap(&self, offset: usize, len: usize) -> io::Result<()>;
2588}
2589
2590/// Trait implemented to allow the creation of memory regions.
2591pub trait MemoryMapper: Send + Sync {
2592    /// Creates a new memory region that can later be mapped into the guest.
2593    ///
2594    /// Returns both an interface for mapping/unmapping the region and for
2595    /// adding internal mappings.
2596    fn new_region(
2597        &self,
2598        len: usize,
2599        debug_name: String,
2600    ) -> io::Result<(Box<dyn MappableGuestMemory>, Arc<dyn MappedMemoryRegion>)>;
2601}
2602
2603/// Doorbell provides a mechanism to register for notifications on writes to specific addresses in guest memory.
2604pub trait DoorbellRegistration: Send + Sync {
2605    /// Register a doorbell event.
2606    fn register_doorbell(
2607        &self,
2608        guest_address: u64,
2609        value: Option<u64>,
2610        length: Option<u32>,
2611        event: &Event,
2612    ) -> io::Result<Box<dyn Send + Sync>>;
2613}
2614
2615/// Trait to map a ROM at one or more locations in guest memory.
2616pub trait MapRom: Send + Sync {
2617    /// Maps the specified portion of the ROM into guest memory at `gpa`.
2618    ///
2619    /// The returned object will implicitly unmap the ROM when dropped.
2620    fn map_rom(&self, gpa: u64, offset: u64, len: u64) -> io::Result<Box<dyn UnmapRom>>;
2621
2622    /// Returns the length of the ROM in bytes.
2623    fn len(&self) -> u64;
2624}
2625
2626/// Trait to unmap a ROM from guest memory.
2627pub trait UnmapRom: Send + Sync {
2628    /// Unmaps the ROM from guest memory.
2629    fn unmap_rom(self);
2630}
2631
2632#[cfg(test)]
2633#[expect(clippy::undocumented_unsafe_blocks)]
2634mod tests {
2635    use crate::GuestMemory;
2636    use crate::PAGE_SIZE64;
2637    use crate::PageFaultAction;
2638    use crate::PageFaultError;
2639    use crate::ranges::PagedRange;
2640    use sparse_mmap::SparseMapping;
2641    use std::ptr::NonNull;
2642    use std::sync::Arc;
2643    use thiserror::Error;
2644
2645    /// An implementation of a GuestMemoryAccess trait that expects all of
2646    /// guest memory to be mapped at a given base, with mmap or the Windows
2647    /// equivalent. Pages that are not backed by RAM will return failure
2648    /// when attempting to access them.
2649    pub struct GuestMemoryMapping {
2650        mapping: SparseMapping,
2651        #[cfg(feature = "bitmap")]
2652        bitmap: Option<Vec<u8>>,
2653    }
2654
2655    unsafe impl crate::GuestMemoryAccess for GuestMemoryMapping {
2656        fn mapping(&self) -> Option<NonNull<u8>> {
2657            NonNull::new(self.mapping.as_ptr().cast())
2658        }
2659
2660        fn max_address(&self) -> u64 {
2661            self.mapping.len() as u64
2662        }
2663
2664        #[cfg(feature = "bitmap")]
2665        fn access_bitmap(&self) -> Option<crate::BitmapInfo> {
2666            self.bitmap.as_ref().map(|bm| crate::BitmapInfo {
2667                read_bitmap: NonNull::new(bm.as_ptr().cast_mut()).unwrap(),
2668                write_bitmap: NonNull::new(bm.as_ptr().cast_mut()).unwrap(),
2669                bit_offset: 0,
2670            })
2671        }
2672    }
2673
2674    const PAGE_SIZE: usize = 4096;
2675    const SIZE_1MB: usize = 1048576;
2676
2677    /// Create a test guest layout:
2678    /// 0           -> 1MB          RAM
2679    /// 1MB         -> 2MB          empty
2680    /// 2MB         -> 3MB          RAM
2681    /// 3MB         -> 3MB + 4K     empty
2682    /// 3MB + 4K    -> 4MB          RAM
2683    fn create_test_mapping() -> GuestMemoryMapping {
2684        let mapping = SparseMapping::new(SIZE_1MB * 4).unwrap();
2685        mapping.alloc(0, SIZE_1MB).unwrap();
2686        mapping.alloc(2 * SIZE_1MB, SIZE_1MB).unwrap();
2687        mapping
2688            .alloc(3 * SIZE_1MB + PAGE_SIZE, SIZE_1MB - PAGE_SIZE)
2689            .unwrap();
2690
2691        GuestMemoryMapping {
2692            mapping,
2693            #[cfg(feature = "bitmap")]
2694            bitmap: None,
2695        }
2696    }
2697
2698    #[test]
2699    fn test_basic_read_write() {
2700        let mapping = create_test_mapping();
2701        let gm = GuestMemory::new("test", mapping);
2702
2703        // Test reading at 0.
2704        let addr = 0;
2705        let result = gm.read_plain::<u8>(addr);
2706        assert_eq!(result.unwrap(), 0);
2707
2708        // Test read/write to first page
2709        let write_buffer = [1, 2, 3, 4, 5];
2710        let mut read_buffer = [0; 5];
2711        gm.write_at(0, &write_buffer).unwrap();
2712        gm.read_at(0, &mut read_buffer).unwrap();
2713        assert_eq!(write_buffer, read_buffer);
2714        assert_eq!(gm.read_plain::<u8>(0).unwrap(), 1);
2715        assert_eq!(gm.read_plain::<u8>(1).unwrap(), 2);
2716        assert_eq!(gm.read_plain::<u8>(2).unwrap(), 3);
2717        assert_eq!(gm.read_plain::<u8>(3).unwrap(), 4);
2718        assert_eq!(gm.read_plain::<u8>(4).unwrap(), 5);
2719
2720        // Test read/write to page at 2MB
2721        let addr = 2 * SIZE_1MB as u64;
2722        let write_buffer: Vec<u8> = (0..PAGE_SIZE).map(|x| x as u8).collect();
2723        let mut read_buffer: Vec<u8> = (0..PAGE_SIZE).map(|_| 0).collect();
2724        gm.write_at(addr, write_buffer.as_slice()).unwrap();
2725        gm.read_at(addr, read_buffer.as_mut_slice()).unwrap();
2726        assert_eq!(write_buffer, read_buffer);
2727
2728        // Test read/write to first 1MB
2729        let write_buffer: Vec<u8> = (0..SIZE_1MB).map(|x| x as u8).collect();
2730        let mut read_buffer: Vec<u8> = (0..SIZE_1MB).map(|_| 0).collect();
2731        gm.write_at(addr, write_buffer.as_slice()).unwrap();
2732        gm.read_at(addr, read_buffer.as_mut_slice()).unwrap();
2733        assert_eq!(write_buffer, read_buffer);
2734
2735        // Test bad read at 1MB
2736        let addr = SIZE_1MB as u64;
2737        let result = gm.read_plain::<u8>(addr);
2738        assert!(result.is_err());
2739    }
2740
2741    #[test]
2742    fn test_multi() {
2743        let len = SIZE_1MB * 4;
2744        let mapping = SparseMapping::new(len).unwrap();
2745        mapping.alloc(0, len).unwrap();
2746        let mapping = Arc::new(GuestMemoryMapping {
2747            mapping,
2748            #[cfg(feature = "bitmap")]
2749            bitmap: None,
2750        });
2751        let region_len = 1 << 30;
2752        let gm = GuestMemory::new_multi_region(
2753            "test",
2754            region_len,
2755            vec![Some(mapping.clone()), None, Some(mapping.clone())],
2756        )
2757        .unwrap();
2758
2759        let mut b = [0];
2760        let len = len as u64;
2761        gm.read_at(0, &mut b).unwrap();
2762        gm.read_at(len, &mut b).unwrap_err();
2763        gm.read_at(region_len, &mut b).unwrap_err();
2764        gm.read_at(2 * region_len, &mut b).unwrap();
2765        gm.read_at(2 * region_len + len, &mut b).unwrap_err();
2766        gm.read_at(3 * region_len, &mut b).unwrap_err();
2767    }
2768
2769    #[cfg(feature = "bitmap")]
2770    #[test]
2771    fn test_bitmap() {
2772        let len = PAGE_SIZE * 4;
2773        let mapping = SparseMapping::new(len).unwrap();
2774        mapping.alloc(0, len).unwrap();
2775        let bitmap = vec![0b0101];
2776        let mapping = Arc::new(GuestMemoryMapping {
2777            mapping,
2778            bitmap: Some(bitmap),
2779        });
2780        let gm = GuestMemory::new("test", mapping);
2781
2782        gm.read_plain::<[u8; 1]>(0).unwrap();
2783        gm.read_plain::<[u8; 1]>(PAGE_SIZE64 - 1).unwrap();
2784        gm.read_plain::<[u8; 2]>(PAGE_SIZE64 - 1).unwrap_err();
2785        gm.read_plain::<[u8; 1]>(PAGE_SIZE64).unwrap_err();
2786        gm.read_plain::<[u8; 1]>(PAGE_SIZE64 * 2).unwrap();
2787        gm.read_plain::<[u8; PAGE_SIZE * 2]>(0).unwrap_err();
2788    }
2789
2790    struct FaultingMapping {
2791        mapping: SparseMapping,
2792    }
2793
2794    #[derive(Debug, Error)]
2795    #[error("fault")]
2796    struct Fault;
2797
2798    unsafe impl crate::GuestMemoryAccess for FaultingMapping {
2799        fn mapping(&self) -> Option<NonNull<u8>> {
2800            NonNull::new(self.mapping.as_ptr().cast())
2801        }
2802
2803        fn max_address(&self) -> u64 {
2804            self.mapping.len() as u64
2805        }
2806
2807        fn page_fault(
2808            &self,
2809            address: u64,
2810            _len: usize,
2811            write: bool,
2812            bitmap_failure: bool,
2813        ) -> PageFaultAction {
2814            assert!(!bitmap_failure);
2815            let qlen = self.mapping.len() as u64 / 4;
2816            if address < qlen || address >= 3 * qlen {
2817                return PageFaultAction::Fail(PageFaultError::other(Fault));
2818            }
2819            let page_address = (address as usize) & !(PAGE_SIZE - 1);
2820            if address >= 2 * qlen {
2821                if write {
2822                    return PageFaultAction::Fail(PageFaultError::other(Fault));
2823                }
2824                self.mapping.map_zero(page_address, PAGE_SIZE).unwrap();
2825            } else {
2826                self.mapping.alloc(page_address, PAGE_SIZE).unwrap();
2827            }
2828            PageFaultAction::Retry
2829        }
2830    }
2831
2832    impl FaultingMapping {
2833        fn new(len: usize) -> Self {
2834            let mapping = SparseMapping::new(len).unwrap();
2835            FaultingMapping { mapping }
2836        }
2837    }
2838
2839    #[test]
2840    fn test_fault() {
2841        let len = PAGE_SIZE * 4;
2842        let mapping = FaultingMapping::new(len);
2843        let gm = GuestMemory::new("test", mapping);
2844
2845        gm.write_plain::<u8>(0, &0).unwrap_err();
2846        gm.read_plain::<u8>(PAGE_SIZE64 - 1).unwrap_err();
2847        gm.read_plain::<u8>(PAGE_SIZE64).unwrap();
2848        gm.write_plain::<u8>(PAGE_SIZE64, &0).unwrap();
2849        gm.write_plain::<u16>(PAGE_SIZE64 * 3 - 1, &0).unwrap_err();
2850        gm.read_plain::<u16>(PAGE_SIZE64 * 3 - 1).unwrap_err();
2851        gm.read_plain::<u8>(PAGE_SIZE64 * 3 - 1).unwrap();
2852        gm.write_plain::<u8>(PAGE_SIZE64 * 3 - 1, &0).unwrap_err();
2853
2854        // Test probe_gpn_writable_range with FaultingMapping
2855        // FaultingMapping layout:
2856        // - Page 0 (address 0 to PAGE_SIZE): unmapped, fails on access
2857        // - Page 1 (address PAGE_SIZE to 2*PAGE_SIZE): writable
2858        // - Pages 2-3 (address 2*PAGE_SIZE to 4*PAGE_SIZE): read-only
2859        // - Page 4 and beyond: unmapped, fails on access
2860
2861        // Test 1: Probe unmapped page - should fail
2862        let gpns = vec![0];
2863        let range = PagedRange::new(0, PAGE_SIZE, &gpns).unwrap();
2864        assert!(gm.probe_gpn_writable_range(&range).is_err());
2865
2866        // Test 2: Probe writable page - should succeed
2867        let gpns = vec![1];
2868        let range = PagedRange::new(0, PAGE_SIZE, &gpns).unwrap();
2869        gm.probe_gpn_writable_range(&range).unwrap();
2870
2871        // Test 3: Probe read-only pages - should fail
2872        let gpns = vec![2, 3];
2873        let range = PagedRange::new(0, PAGE_SIZE * 2, &gpns).unwrap();
2874        assert!(gm.probe_gpn_writable_range(&range).is_err());
2875
2876        // Test 4: Probe mixed access (writable + read-only) - should fail
2877        let gpns = vec![1, 2];
2878        let range = PagedRange::new(0, PAGE_SIZE * 2, &gpns).unwrap();
2879        assert!(gm.probe_gpn_writable_range(&range).is_err());
2880
2881        // Test 5: Compare readable vs writable on read-only pages
2882        let gpns = vec![2];
2883        let range = PagedRange::new(0, PAGE_SIZE, &gpns).unwrap();
2884        gm.probe_gpn_readable_range(&range).unwrap(); // Should succeed
2885        assert!(gm.probe_gpn_writable_range(&range).is_err()); // Should fail
2886
2887        // Test 6: Partial page range
2888        let gpns = vec![1];
2889        let range = PagedRange::new(100, 500, &gpns).unwrap();
2890        gm.probe_gpn_writable_range(&range).unwrap();
2891
2892        // Test 7: Empty range - should succeed
2893        let range = PagedRange::empty();
2894        gm.probe_gpn_writable_range(&range).unwrap();
2895
2896        // Test probe_gpn_readable_range with FaultingMapping
2897
2898        // Test 8: Probe unmapped page for read - should fail
2899        let gpns = vec![5];
2900        let range = PagedRange::new(0, PAGE_SIZE, &gpns).unwrap();
2901        assert!(gm.probe_gpn_readable_range(&range).is_err());
2902
2903        // Test 9: Probe writable page for read - should succeed
2904        let gpns = vec![1];
2905        let range = PagedRange::new(0, PAGE_SIZE, &gpns).unwrap();
2906        gm.probe_gpn_readable_range(&range).unwrap();
2907
2908        // Test 10: Probe mixed access (writable + read-only) for read - should succeed
2909        let gpns = vec![1, 2];
2910        let range = PagedRange::new(0, PAGE_SIZE * 2, &gpns).unwrap();
2911        gm.probe_gpn_readable_range(&range).unwrap(); // Both pages are readable
2912
2913        // Test 11: Probe mixed access (unmapped + writable) for read - should fail
2914        let gpns = vec![5, 1];
2915        let range = PagedRange::new(0, PAGE_SIZE * 2, &gpns).unwrap();
2916        assert!(gm.probe_gpn_readable_range(&range).is_err()); // Page 5 is unmapped
2917
2918        // Test 12: Probe mixed access (unmapped + read-only) for read - should fail
2919        let gpns = vec![5, 2];
2920        let range = PagedRange::new(0, PAGE_SIZE * 2, &gpns).unwrap();
2921        assert!(gm.probe_gpn_readable_range(&range).is_err()); // Page 5 is unmapped
2922
2923        // Test 13: Partial page range for read on read-only pages
2924        let gpns = vec![2];
2925        let range = PagedRange::new(100, 500, &gpns).unwrap();
2926        gm.probe_gpn_readable_range(&range).unwrap();
2927
2928        // Test 14: Partial page range for read on writable pages
2929        let gpns = vec![1];
2930        let range = PagedRange::new(200, 1000, &gpns).unwrap();
2931        gm.probe_gpn_readable_range(&range).unwrap();
2932
2933        // Test 15: Empty range for read - should succeed
2934        let range = PagedRange::empty();
2935        gm.probe_gpn_readable_range(&range).unwrap();
2936
2937        // Test 16: Single byte read on read-only page
2938        let gpns = vec![2];
2939        let range = PagedRange::new(0, 1, &gpns).unwrap();
2940        gm.probe_gpn_readable_range(&range).unwrap();
2941
2942        // Test 17: Single byte read on unmapped page
2943        let gpns = vec![5];
2944        let range = PagedRange::new(0, 1, &gpns).unwrap();
2945        assert!(gm.probe_gpn_readable_range(&range).is_err());
2946
2947        // Test 18: Cross-boundary range on writable + read-only
2948        let gpns = vec![1, 2, 3];
2949        let range = PagedRange::new(PAGE_SIZE / 2, PAGE_SIZE * 2, &gpns).unwrap();
2950        gm.probe_gpn_readable_range(&range).unwrap(); // All readable
2951        assert!(gm.probe_gpn_writable_range(&range).is_err()); // Pages 2-3 not writable
2952    }
2953
2954    #[test]
2955    fn test_allocated() {
2956        let mut gm = GuestMemory::allocate(0x10000);
2957        let pattern = [0x42; 0x10000];
2958        gm.write_at(0, &pattern).unwrap();
2959        assert_eq!(gm.inner_buf_mut().unwrap(), &pattern);
2960        gm.inner_buf().unwrap();
2961        let gm2 = gm.clone();
2962        assert!(gm.inner_buf_mut().is_none());
2963        gm.inner_buf().unwrap();
2964        let mut gm = gm.into_inner_buf().unwrap_err();
2965        drop(gm2);
2966        assert_eq!(gm.inner_buf_mut().unwrap(), &pattern);
2967        gm.into_inner_buf().unwrap();
2968    }
2969}