Skip to main content

guestmem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Interfaces to read and write guest memory.
5
6// UNSAFETY: This crate's whole purpose is manual memory mapping and management.
7#![expect(unsafe_code)]
8#![expect(missing_docs)]
9
10pub mod ranges;
11
12use self::ranges::PagedRange;
13use inspect::Inspect;
14use pal_event::Event;
15use sparse_mmap::AsMappableRef;
16use std::any::Any;
17use std::fmt::Debug;
18use std::future::Future;
19use std::io;
20use std::ops::Deref;
21use std::ops::DerefMut;
22use std::ops::Range;
23use std::ptr::NonNull;
24use std::sync::Arc;
25use std::sync::atomic::AtomicU8;
26use thiserror::Error;
27use zerocopy::FromBytes;
28use zerocopy::FromZeros;
29use zerocopy::Immutable;
30use zerocopy::IntoBytes;
31use zerocopy::KnownLayout;
32
33// Effective page size for page-related operations in this crate.
34pub const PAGE_SIZE: usize = 4096;
35const PAGE_SIZE64: u64 = 4096;
36
37/// A memory access error returned by one of the [`GuestMemory`] methods.
38#[derive(Debug, Error)]
39#[error(transparent)]
40pub struct GuestMemoryError(Box<GuestMemoryErrorInner>);
41
42impl GuestMemoryError {
43    fn new(
44        debug_name: &Arc<str>,
45        range: Option<Range<u64>>,
46        op: GuestMemoryOperation,
47        err: GuestMemoryBackingError,
48    ) -> Self {
49        GuestMemoryError(Box::new(GuestMemoryErrorInner {
50            op,
51            debug_name: debug_name.clone(),
52            range,
53            gpa: (err.gpa != INVALID_ERROR_GPA).then_some(err.gpa),
54            kind: err.kind,
55            err: err.err,
56        }))
57    }
58
59    /// Returns the kind of the error.
60    pub fn kind(&self) -> GuestMemoryErrorKind {
61        self.0.kind
62    }
63}
64
65#[derive(Debug, Copy, Clone)]
66enum GuestMemoryOperation {
67    Read,
68    Write,
69    Fill,
70    CompareExchange,
71    Lock,
72    Subrange,
73    Probe,
74}
75
76impl std::fmt::Display for GuestMemoryOperation {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        f.pad(match self {
79            GuestMemoryOperation::Read => "read",
80            GuestMemoryOperation::Write => "write",
81            GuestMemoryOperation::Fill => "fill",
82            GuestMemoryOperation::CompareExchange => "compare exchange",
83            GuestMemoryOperation::Lock => "lock",
84            GuestMemoryOperation::Subrange => "subrange",
85            GuestMemoryOperation::Probe => "probe",
86        })
87    }
88}
89
90#[derive(Debug, Error)]
91struct GuestMemoryErrorInner {
92    op: GuestMemoryOperation,
93    debug_name: Arc<str>,
94    range: Option<Range<u64>>,
95    gpa: Option<u64>,
96    kind: GuestMemoryErrorKind,
97    #[source]
98    err: Box<dyn std::error::Error + Send + Sync>,
99}
100
101impl std::fmt::Display for GuestMemoryErrorInner {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        write!(
104            f,
105            "guest memory '{debug_name}': {op} error: failed to access ",
106            debug_name = self.debug_name,
107            op = self.op
108        )?;
109        if let Some(range) = &self.range {
110            write!(f, "{:#x}-{:#x}", range.start, range.end)?;
111        } else {
112            f.write_str("memory")?;
113        }
114        // Include the precise GPA if provided and different from the start of
115        // the range.
116        if let Some(gpa) = self.gpa {
117            if self.range.as_ref().is_none_or(|range| range.start != gpa) {
118                write!(f, " at {:#x}", gpa)?;
119            }
120        }
121        Ok(())
122    }
123}
124
125/// A memory access error returned by a [`GuestMemoryAccess`] trait method.
126#[derive(Debug)]
127pub struct GuestMemoryBackingError {
128    gpa: u64,
129    kind: GuestMemoryErrorKind,
130    err: Box<dyn std::error::Error + Send + Sync>,
131}
132
133/// The kind of memory access error.
134#[derive(Debug, Copy, Clone, PartialEq, Eq)]
135#[non_exhaustive]
136pub enum GuestMemoryErrorKind {
137    /// An error that does not fit any other category.
138    Other,
139    /// The address is outside the valid range of the memory.
140    OutOfRange,
141    /// The memory has been protected by a higher virtual trust level.
142    VtlProtected,
143    /// The memory is shared but was accessed via a private address.
144    NotPrivate,
145    /// The memory is private but was accessed via a shared address.
146    NotShared,
147}
148
149/// An error returned by a page fault handler in [`GuestMemoryAccess::page_fault`].
150pub struct PageFaultError {
151    kind: GuestMemoryErrorKind,
152    err: Box<dyn std::error::Error + Send + Sync>,
153}
154
155impl PageFaultError {
156    /// Returns a new page fault error.
157    pub fn new(
158        kind: GuestMemoryErrorKind,
159        err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
160    ) -> Self {
161        Self {
162            kind,
163            err: err.into(),
164        }
165    }
166
167    /// Returns a page fault error without an explicit kind.
168    pub fn other(err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Self {
169        Self::new(GuestMemoryErrorKind::Other, err)
170    }
171}
172
173/// Used to avoid needing an `Option` for [`GuestMemoryBackingError::gpa`], to
174/// save size in hot paths.
175const INVALID_ERROR_GPA: u64 = !0;
176
177impl GuestMemoryBackingError {
178    /// Returns a new error for a memory access failure at address `gpa`.
179    pub fn new(
180        kind: GuestMemoryErrorKind,
181        gpa: u64,
182        err: impl Into<Box<dyn std::error::Error + Send + Sync>>,
183    ) -> Self {
184        // `gpa` might incorrectly be INVALID_ERROR_GPA; this is harmless (just
185        // affecting the error message), so don't assert on it in case this is
186        // an untrusted value in some path.
187        Self {
188            kind,
189            gpa,
190            err: err.into(),
191        }
192    }
193
194    /// Returns a new error without an explicit kind.
195    pub fn other(gpa: u64, err: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Self {
196        Self::new(GuestMemoryErrorKind::Other, gpa, err)
197    }
198
199    fn gpn(err: InvalidGpn) -> Self {
200        Self {
201            kind: GuestMemoryErrorKind::OutOfRange,
202            gpa: INVALID_ERROR_GPA,
203            err: err.into(),
204        }
205    }
206}
207
208#[derive(Debug, Error)]
209#[error("no memory at address")]
210struct OutOfRange;
211
212#[derive(Debug, Error)]
213#[error("memory not lockable")]
214struct NotLockable;
215
216#[derive(Debug, Error)]
217#[error("no fallback for this operation")]
218struct NoFallback;
219
220#[derive(Debug, Error)]
221#[error("the specified page is not mapped")]
222struct NotMapped;
223
224#[derive(Debug, Error)]
225#[error("page inaccessible in bitmap")]
226struct BitmapFailure;
227
228/// A trait for a guest memory backing that is fully available via a virtual
229/// address mapping, as opposed to the fallback functions such as
230/// [`GuestMemoryAccess::read_fallback`].
231///
232/// By implementing this trait, a type guarantees that its
233/// [`GuestMemoryAccess::mapping`] will return `Some(_)` and that all of its
234/// memory can be accessed through that mapping, without needing to call the
235/// fallback functions.
236pub trait LinearGuestMemory: GuestMemoryAccess {}
237
238// SAFETY: the allocation will stay valid for the lifetime of the object.
239unsafe impl GuestMemoryAccess for sparse_mmap::alloc::SharedMem {
240    fn mapping(&self) -> Option<NonNull<u8>> {
241        NonNull::new(self.as_ptr().cast_mut().cast())
242    }
243
244    fn max_address(&self) -> u64 {
245        self.len() as u64
246    }
247}
248
249impl LinearGuestMemory for sparse_mmap::alloc::SharedMem {}
250
251/// A page-aligned heap allocation for use with [`GuestMemory`].
252pub struct AlignedHeapMemory {
253    pages: Box<[AlignedPage]>,
254}
255
256impl Debug for AlignedHeapMemory {
257    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258        f.debug_struct("AlignedHeapMemory")
259            .field("len", &self.len())
260            .finish()
261    }
262}
263
264#[repr(C, align(4096))]
265struct AlignedPage([AtomicU8; PAGE_SIZE]);
266
267impl AlignedHeapMemory {
268    /// Allocates a new memory of `size` bytes, rounded up to a page size.
269    pub fn new(size: usize) -> Self {
270        #[expect(clippy::declare_interior_mutable_const)] // <https://github.com/rust-lang/rust-clippy/issues/7665>
271        const ZERO: AtomicU8 = AtomicU8::new(0);
272        #[expect(clippy::declare_interior_mutable_const)]
273        const ZERO_PAGE: AlignedPage = AlignedPage([ZERO; PAGE_SIZE]);
274        let mut pages = Vec::new();
275        pages.resize_with(size.div_ceil(PAGE_SIZE), || ZERO_PAGE);
276        Self {
277            pages: pages.into(),
278        }
279    }
280
281    /// Returns the length of the memory in bytes.
282    pub fn len(&self) -> usize {
283        self.pages.len() * PAGE_SIZE
284    }
285
286    /// Returns an immutable slice of bytes.
287    ///
288    /// This must take `&mut self` since the buffer is mutable via interior
289    /// mutability with just `&self`.
290    pub fn as_bytes(&mut self) -> &[u8] {
291        self.as_mut()
292    }
293
294    /// Returns a mutable slice of bytes.
295    pub fn as_mut_bytes(&mut self) -> &mut [u8] {
296        self.as_mut()
297    }
298}
299
300impl Deref for AlignedHeapMemory {
301    type Target = [AtomicU8];
302
303    fn deref(&self) -> &Self::Target {
304        // SAFETY: the buffer has the correct size and validity.
305        unsafe { std::slice::from_raw_parts(self.pages.as_ptr().cast(), self.len()) }
306    }
307}
308
309impl DerefMut for AlignedHeapMemory {
310    fn deref_mut(&mut self) -> &mut Self::Target {
311        // SAFETY: the buffer is unaliased and valid.
312        unsafe { std::slice::from_raw_parts_mut(self.pages.as_mut_ptr().cast(), self.len()) }
313    }
314}
315
316impl AsRef<[AtomicU8]> for AlignedHeapMemory {
317    fn as_ref(&self) -> &[AtomicU8] {
318        self
319    }
320}
321
322impl AsMut<[AtomicU8]> for AlignedHeapMemory {
323    fn as_mut(&mut self) -> &mut [AtomicU8] {
324        self
325    }
326}
327
328impl AsMut<[u8]> for AlignedHeapMemory {
329    fn as_mut(&mut self) -> &mut [u8] {
330        // FUTURE: use AtomicU8::get_mut_slice once stabilized.
331        // SAFETY: the buffer is unaliased, so it is fine to cast away the atomicness of the
332        // slice.
333        unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr().cast(), self.len()) }
334    }
335}
336
337// SAFETY: the allocation remains alive and valid for the lifetime of the
338// object.
339unsafe impl GuestMemoryAccess for AlignedHeapMemory {
340    fn mapping(&self) -> Option<NonNull<u8>> {
341        NonNull::new(self.pages.as_ptr().cast_mut().cast())
342    }
343
344    fn max_address(&self) -> u64 {
345        (self.pages.len() * PAGE_SIZE) as u64
346    }
347}
348
349impl LinearGuestMemory for AlignedHeapMemory {}
350
351/// A shareable region of guest memory backed by a file (Unix) or
352/// section handle (Windows).
353///
354/// The backing file must already contain committed data for the region —
355/// the consumer will map it directly, without any guestmem-managed lazy
356/// commitment or fault handling. All bytes in the range must be accessible
357/// without triggering SIGSEGV or SIGBUS due to missing backing. Normal OS
358/// demand paging and minor faults on first access are still expected; this
359/// requirement is specifically incompatible with bitmap-gated access or
360/// lazy fault-in schemes.
361pub struct ShareableRegion {
362    /// Guest physical address of this region.
363    pub guest_address: u64,
364    /// Size in bytes.
365    pub size: u64,
366    /// Backing file/handle, shared via `Arc` to avoid OS-level `dup()`.
367    pub file: Arc<sparse_mmap::Mappable>,
368    /// Offset into `file` where this region starts.
369    pub file_offset: u64,
370}
371
372/// Error type for [`ProvideShareableRegions::get_regions`].
373pub type ShareableRegionError = Box<dyn std::error::Error + Send + Sync>;
374
375/// Opaque control object for accessing the shareable backing of guest
376/// memory. Not all `GuestMemory` instances support this — those backed
377/// by private memory or heap allocations return `None`.
378///
379/// # Contract
380///
381/// * The regions returned by [`get_regions`](Self::get_regions) must have
382///   fully committed backing — the consumer will map them directly,
383///   without guestmem-managed fault handling.
384/// * The set of regions is currently static for the lifetime of the VM.
385///   Hotplug and hot-remove of shareable regions are not yet supported;
386///   once they are, additional methods will be added here to notify
387///   consumers of changes.
388pub struct GuestMemorySharing {
389    inner: Box<dyn DynProvideShareableRegions>,
390}
391
392impl GuestMemorySharing {
393    /// Construct from a trait implementation. Called by `GuestMemoryAccess`
394    /// implementations (e.g., `VaMapper` in membacking).
395    pub fn new(inner: impl ProvideShareableRegions + 'static) -> Self {
396        Self {
397            inner: Box::new(inner),
398        }
399    }
400
401    /// Return the current set of shareable backing regions.
402    pub async fn get_regions(&self) -> Result<Vec<ShareableRegion>, ShareableRegionError> {
403        self.inner.get_regions().await
404    }
405}
406
407/// Trait for providing shareable region information.
408///
409/// Implementors must return regions whose backing files have fully
410/// committed data — consumers will map them directly without
411/// guestmem-managed fault handling. The region set is currently static;
412/// dynamic updates (hotplug / hot-remove) are not yet supported.
413///
414/// This trait must be public so that crates like `membacking` can
415/// implement it, but callers should interact with
416/// [`GuestMemorySharing`]'s methods rather than this trait directly.
417pub trait ProvideShareableRegions: Send + Sync {
418    /// Return the current set of shareable backing regions.
419    fn get_regions(
420        &self,
421    ) -> impl Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_;
422}
423
424/// Dyn-compatible version of [`ProvideShareableRegions`].
425trait DynProvideShareableRegions: Send + Sync {
426    fn get_regions(
427        &self,
428    ) -> std::pin::Pin<
429        Box<dyn Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_>,
430    >;
431}
432
433impl<T: ProvideShareableRegions> DynProvideShareableRegions for T {
434    fn get_regions(
435        &self,
436    ) -> std::pin::Pin<
437        Box<dyn Future<Output = Result<Vec<ShareableRegion>, ShareableRegionError>> + Send + '_>,
438    > {
439        Box::pin(ProvideShareableRegions::get_regions(self))
440    }
441}
442
443/// A trait for a guest memory backing.
444///
445/// Guest memory may be backed by a virtual memory mapping, in which case this
446/// trait can provide the VA and length of that mapping. Alternatively, it may
447/// be backed by some other means, in which case this trait can provide fallback
448/// methods for reading and writing memory.
449///
450/// Memory access should first be attempted via the virtual address mapping. If
451/// this fails or is not present, the caller should fall back to `read_fallback`
452/// or `write_fallback`. This allows an implementation to have a fast path using
453/// the mapping, and a slow path using the fallback functions.
454///
455/// # Safety
456///
457/// The implementor must follow the contract for each method.
458pub unsafe trait GuestMemoryAccess: 'static + Send + Sync {
459    /// Returns a stable VA mapping for guest memory.
460    ///
461    /// The size of the mapping is the same as `max_address`.
462    ///
463    /// The VA is guaranteed to remain reserved, but individual ranges may be
464    /// uncommitted.
465    fn mapping(&self) -> Option<NonNull<u8>>;
466
467    /// The maximum address that can be passed to the `*_fallback` methods, as
468    /// well as the maximum offset into the VA range described by `mapping`.
469    fn max_address(&self) -> u64;
470
471    /// The bitmaps to check for validity, one bit per page. If a bit is set,
472    /// then the page is valid to access via the mapping; if it is clear, then
473    /// the page will not be accessed.
474    ///
475    /// The bitmaps must be at least `ceil(bitmap_start + max_address() /
476    /// PAGE_SIZE)` bits long, and they must be valid for atomic read access for
477    /// the lifetime of this object from any thread.
478    ///
479    /// The bitmaps are only checked if there is a mapping. If the bitmap check
480    /// fails, then the associated `*_fallback` routine is called to handle the
481    /// error.
482    ///
483    /// Bitmap checks are performed under the [`rcu()`] RCU domain, with relaxed
484    /// accesses. After a thread updates the bitmap to be more restrictive, it
485    /// must call [minircu::RcuDomain::synchronize()] on [`minircu::global()`]
486    /// to ensure that all threads see the update before taking any action that
487    /// depends on the bitmap update being visible.
488    #[cfg(feature = "bitmap")]
489    fn access_bitmap(&self) -> Option<BitmapInfo> {
490        None
491    }
492
493    // Returns an accessor for a subrange, or `None` to use the default
494    // implementation.
495    fn subrange(
496        &self,
497        offset: u64,
498        len: u64,
499        allow_preemptive_locking: bool,
500    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
501        let _ = (offset, len, allow_preemptive_locking);
502        Ok(None)
503    }
504
505    /// Called when access to memory via the mapped range fails, either due to a
506    /// bitmap failure or due to a failure when accessing the virtual address.
507    ///
508    /// `address` is the address where the access failed. `len` is the remainder
509    /// of the access; it is not necessarily the case that all `len` bytes are
510    /// inaccessible in the bitmap or mapping.
511    ///
512    /// Returns whether the faulting operation should be retried, failed, or that
513    /// one of the fallback operations (e.g. `read_fallback`) should be called.
514    fn page_fault(
515        &self,
516        address: u64,
517        len: usize,
518        write: bool,
519        bitmap_failure: bool,
520    ) -> PageFaultAction {
521        let _ = (address, len, write);
522        let err = if bitmap_failure {
523            PageFaultError::other(BitmapFailure)
524        } else {
525            PageFaultError::other(NotMapped)
526        };
527        PageFaultAction::Fail(err)
528    }
529
530    /// Fallback called if a read fails via direct access to `mapped_range`.
531    ///
532    /// This is only called if `mapping()` returns `None` or if `page_fault()`
533    /// returns `PageFaultAction::Fallback`.
534    ///
535    /// Implementors must ensure that `dest[..len]` is fully initialized on
536    /// successful return.
537    ///
538    /// # Safety
539    /// The caller must ensure that `dest[..len]` is valid for write. Note,
540    /// however, that `dest` might be aliased by other threads, the guest, or
541    /// the kernel.
542    unsafe fn read_fallback(
543        &self,
544        addr: u64,
545        dest: *mut u8,
546        len: usize,
547    ) -> Result<(), GuestMemoryBackingError> {
548        let _ = (dest, len);
549        Err(GuestMemoryBackingError::other(addr, NoFallback))
550    }
551
552    /// Fallback called if a write fails via direct access to `mapped_range`.
553    ///
554    /// This is only called if `mapping()` returns `None` or if `page_fault()`
555    /// returns `PageFaultAction::Fallback`.
556    ///
557    /// # Safety
558    /// The caller must ensure that `src[..len]` is valid for read. Note,
559    /// however, that `src` might be aliased by other threads, the guest, or
560    /// the kernel.
561    unsafe fn write_fallback(
562        &self,
563        addr: u64,
564        src: *const u8,
565        len: usize,
566    ) -> Result<(), GuestMemoryBackingError> {
567        let _ = (src, len);
568        Err(GuestMemoryBackingError::other(addr, NoFallback))
569    }
570
571    /// Fallback called if a fill fails via direct access to `mapped_range`.
572    ///
573    /// This is only called if `mapping()` returns `None` or if `page_fault()`
574    /// returns `PageFaultAction::Fallback`.
575    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
576        let _ = (val, len);
577        Err(GuestMemoryBackingError::other(addr, NoFallback))
578    }
579
580    /// Fallback called if a compare exchange fails via direct access to `mapped_range`.
581    ///
582    /// On compare failure, returns `Ok(false)` and updates `current`.
583    ///
584    /// This is only called if `mapping()` returns `None` or if `page_fault()`
585    /// returns `PageFaultAction::Fallback`.
586    fn compare_exchange_fallback(
587        &self,
588        addr: u64,
589        current: &mut [u8],
590        new: &[u8],
591    ) -> Result<bool, GuestMemoryBackingError> {
592        let _ = (current, new);
593        Err(GuestMemoryBackingError::other(addr, NoFallback))
594    }
595
596    /// Prepares a guest page for having its virtual address exposed as part of
597    /// a lock call.
598    ///
599    /// This is useful to ensure that the address is mapped in a way that it can
600    /// be passed to the kernel for DMA.
601    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
602        let _ = (address, len);
603        Ok(())
604    }
605
606    /// Returns the base IO virtual address for the mapping.
607    ///
608    /// This is the base address that should be used for DMA from a user-mode
609    /// device driver whose device is not otherwise configured to go through an
610    /// IOMMU.
611    fn base_iova(&self) -> Option<u64> {
612        None
613    }
614
615    /// Locks the specified guest physical pages (GPNs), preventing any mapping
616    /// or permission changes until they are unlocked.
617    ///
618    /// Returns a boolean indicating whether unlocking is required.
619    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
620        let _ = gpns;
621        Ok(false)
622    }
623
624    /// Unlocks the specified guest physical pages (GPNs) after exclusive access.
625    ///
626    /// Panics if asked to unlock a page that was not previously locked. The
627    /// caller must ensure that the given slice has the same ordering as the
628    /// one passed to `lock_gpns`.
629    fn unlock_gpns(&self, gpns: &[u64]) {
630        let _ = gpns;
631    }
632
633    /// Return a sharing control object if this memory backing supports
634    /// file-based sharing (e.g., memfd on Linux, section on Windows).
635    ///
636    /// Returns `None` for private memory, heap-backed test memory, or
637    /// other non-shareable backings.
638    fn sharing(&self) -> Option<GuestMemorySharing> {
639        None
640    }
641}
642
643trait DynGuestMemoryAccess: 'static + Send + Sync + Any {
644    fn subrange(
645        &self,
646        offset: u64,
647        len: u64,
648        allow_preemptive_locking: bool,
649    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError>;
650
651    fn page_fault(
652        &self,
653        address: u64,
654        len: usize,
655        write: bool,
656        bitmap_failure: bool,
657    ) -> PageFaultAction;
658
659    /// # Safety
660    /// See [`GuestMemoryAccess::read_fallback`].
661    unsafe fn read_fallback(
662        &self,
663        addr: u64,
664        dest: *mut u8,
665        len: usize,
666    ) -> Result<(), GuestMemoryBackingError>;
667
668    /// # Safety
669    /// See [`GuestMemoryAccess::write_fallback`].
670    unsafe fn write_fallback(
671        &self,
672        addr: u64,
673        src: *const u8,
674        len: usize,
675    ) -> Result<(), GuestMemoryBackingError>;
676
677    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError>;
678
679    fn compare_exchange_fallback(
680        &self,
681        addr: u64,
682        current: &mut [u8],
683        new: &[u8],
684    ) -> Result<bool, GuestMemoryBackingError>;
685
686    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError>;
687
688    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError>;
689
690    fn unlock_gpns(&self, gpns: &[u64]);
691
692    fn sharing(&self) -> Option<GuestMemorySharing>;
693}
694
695impl<T: GuestMemoryAccess> DynGuestMemoryAccess for T {
696    fn subrange(
697        &self,
698        offset: u64,
699        len: u64,
700        allow_preemptive_locking: bool,
701    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
702        self.subrange(offset, len, allow_preemptive_locking)
703    }
704
705    fn page_fault(
706        &self,
707        address: u64,
708        len: usize,
709        write: bool,
710        bitmap_failure: bool,
711    ) -> PageFaultAction {
712        self.page_fault(address, len, write, bitmap_failure)
713    }
714
715    unsafe fn read_fallback(
716        &self,
717        addr: u64,
718        dest: *mut u8,
719        len: usize,
720    ) -> Result<(), GuestMemoryBackingError> {
721        // SAFETY: guaranteed by caller.
722        unsafe { self.read_fallback(addr, dest, len) }
723    }
724
725    unsafe fn write_fallback(
726        &self,
727        addr: u64,
728        src: *const u8,
729        len: usize,
730    ) -> Result<(), GuestMemoryBackingError> {
731        // SAFETY: guaranteed by caller.
732        unsafe { self.write_fallback(addr, src, len) }
733    }
734
735    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
736        self.fill_fallback(addr, val, len)
737    }
738
739    fn compare_exchange_fallback(
740        &self,
741        addr: u64,
742        current: &mut [u8],
743        new: &[u8],
744    ) -> Result<bool, GuestMemoryBackingError> {
745        self.compare_exchange_fallback(addr, current, new)
746    }
747
748    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
749        self.expose_va(address, len)
750    }
751
752    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
753        self.lock_gpns(gpns)
754    }
755
756    fn unlock_gpns(&self, gpns: &[u64]) {
757        self.unlock_gpns(gpns)
758    }
759
760    fn sharing(&self) -> Option<GuestMemorySharing> {
761        self.sharing()
762    }
763}
764
765/// The action to take after [`GuestMemoryAccess::page_fault`] returns to
766/// continue the operation.
767pub enum PageFaultAction {
768    /// Fail the operation.
769    Fail(PageFaultError),
770    /// Retry the operation.
771    Retry,
772    /// Use the fallback method to access the memory.
773    Fallback,
774}
775
776/// Returned by [`GuestMemoryAccess::access_bitmap`].
777#[cfg(feature = "bitmap")]
778pub struct BitmapInfo {
779    /// A pointer to the bitmap for read access.
780    pub read_bitmap: NonNull<u8>,
781    /// A pointer to the bitmap for write access.
782    pub write_bitmap: NonNull<u8>,
783    /// The bit offset of the beginning of the bitmap.
784    ///
785    /// Typically this is zero, but it is needed to support subranges that are
786    /// not 8-page multiples.
787    pub bit_offset: u8,
788}
789
790// SAFETY: passing through guarantees from `T`.
791unsafe impl<T: GuestMemoryAccess> GuestMemoryAccess for Arc<T> {
792    fn mapping(&self) -> Option<NonNull<u8>> {
793        self.as_ref().mapping()
794    }
795
796    fn max_address(&self) -> u64 {
797        self.as_ref().max_address()
798    }
799
800    #[cfg(feature = "bitmap")]
801    fn access_bitmap(&self) -> Option<BitmapInfo> {
802        self.as_ref().access_bitmap()
803    }
804
805    fn subrange(
806        &self,
807        offset: u64,
808        len: u64,
809        allow_preemptive_locking: bool,
810    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
811        self.as_ref()
812            .subrange(offset, len, allow_preemptive_locking)
813    }
814
815    fn page_fault(
816        &self,
817        addr: u64,
818        len: usize,
819        write: bool,
820        bitmap_failure: bool,
821    ) -> PageFaultAction {
822        self.as_ref().page_fault(addr, len, write, bitmap_failure)
823    }
824
825    unsafe fn read_fallback(
826        &self,
827        addr: u64,
828        dest: *mut u8,
829        len: usize,
830    ) -> Result<(), GuestMemoryBackingError> {
831        // SAFETY: passing through guarantees from caller.
832        unsafe { self.as_ref().read_fallback(addr, dest, len) }
833    }
834
835    unsafe fn write_fallback(
836        &self,
837        addr: u64,
838        src: *const u8,
839        len: usize,
840    ) -> Result<(), GuestMemoryBackingError> {
841        // SAFETY: passing through guarantees from caller.
842        unsafe { self.as_ref().write_fallback(addr, src, len) }
843    }
844
845    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
846        self.as_ref().fill_fallback(addr, val, len)
847    }
848
849    fn compare_exchange_fallback(
850        &self,
851        addr: u64,
852        current: &mut [u8],
853        new: &[u8],
854    ) -> Result<bool, GuestMemoryBackingError> {
855        self.as_ref().compare_exchange_fallback(addr, current, new)
856    }
857
858    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
859        self.as_ref().expose_va(address, len)
860    }
861
862    fn base_iova(&self) -> Option<u64> {
863        self.as_ref().base_iova()
864    }
865
866    fn sharing(&self) -> Option<GuestMemorySharing> {
867        self.as_ref().sharing()
868    }
869}
870
871// SAFETY: the allocation will stay valid for the lifetime of the object.
872unsafe impl GuestMemoryAccess for sparse_mmap::SparseMapping {
873    fn mapping(&self) -> Option<NonNull<u8>> {
874        NonNull::new(self.as_ptr().cast())
875    }
876
877    fn max_address(&self) -> u64 {
878        self.len() as u64
879    }
880}
881
882/// Default guest memory range type, enforcing access boundaries.
883struct GuestMemoryAccessRange {
884    base: Arc<GuestMemoryInner>,
885    offset: u64,
886    len: u64,
887    region: usize,
888}
889
890impl GuestMemoryAccessRange {
891    fn adjust_range(&self, address: u64, len: u64) -> Result<u64, GuestMemoryBackingError> {
892        if address <= self.len && len <= self.len - address {
893            Ok(self.offset + address)
894        } else {
895            Err(GuestMemoryBackingError::new(
896                GuestMemoryErrorKind::OutOfRange,
897                address,
898                OutOfRange,
899            ))
900        }
901    }
902}
903
904// SAFETY: `mapping()` is guaranteed to be valid for the lifetime of the object.
905unsafe impl GuestMemoryAccess for GuestMemoryAccessRange {
906    fn mapping(&self) -> Option<NonNull<u8>> {
907        let region = &self.base.regions[self.region];
908        region.mapping.and_then(|mapping| {
909            let offset = self.offset & self.base.region_def.region_mask;
910            // This is guaranteed by construction.
911            assert!(region.len >= offset + self.len);
912            // SAFETY: this mapping is guaranteed to be within range by
913            // construction (and validated again via the assertion above).
914            NonNull::new(unsafe { mapping.0.as_ptr().add(offset as usize) })
915        })
916    }
917
918    fn max_address(&self) -> u64 {
919        self.len
920    }
921
922    #[cfg(feature = "bitmap")]
923    fn access_bitmap(&self) -> Option<BitmapInfo> {
924        let region = &self.base.regions[self.region];
925        region.bitmaps.map(|bitmaps| {
926            let offset = self.offset & self.base.region_def.region_mask;
927            let bit_offset = region.bitmap_start as u64 + offset / PAGE_SIZE64;
928            let [read_bitmap, write_bitmap] = bitmaps.map(|SendPtrU8(ptr)| {
929                // SAFETY: the bitmap is guaranteed to be big enough for the region
930                // by construction.
931                NonNull::new(unsafe { ptr.as_ptr().add((bit_offset / 8) as usize) }).unwrap()
932            });
933            let bitmap_start = (bit_offset % 8) as u8;
934            BitmapInfo {
935                read_bitmap,
936                write_bitmap,
937                bit_offset: bitmap_start,
938            }
939        })
940    }
941
942    fn subrange(
943        &self,
944        offset: u64,
945        len: u64,
946        _allow_preemptive_locking: bool,
947    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
948        let address = self.adjust_range(offset, len)?;
949        Ok(Some(GuestMemory::new(
950            self.base.debug_name.clone(),
951            GuestMemoryAccessRange {
952                base: self.base.clone(),
953                offset: address,
954                len,
955                region: self.region,
956            },
957        )))
958    }
959
960    fn page_fault(
961        &self,
962        address: u64,
963        len: usize,
964        write: bool,
965        bitmap_failure: bool,
966    ) -> PageFaultAction {
967        let address = self
968            .adjust_range(address, len as u64)
969            .expect("the caller should have validated the range was in the mapping");
970
971        self.base
972            .imp
973            .page_fault(address, len, write, bitmap_failure)
974    }
975
976    unsafe fn write_fallback(
977        &self,
978        address: u64,
979        src: *const u8,
980        len: usize,
981    ) -> Result<(), GuestMemoryBackingError> {
982        let address = self.adjust_range(address, len as u64)?;
983        // SAFETY: guaranteed by caller.
984        unsafe { self.base.imp.write_fallback(address, src, len) }
985    }
986
987    fn fill_fallback(
988        &self,
989        address: u64,
990        val: u8,
991        len: usize,
992    ) -> Result<(), GuestMemoryBackingError> {
993        let address = self.adjust_range(address, len as u64)?;
994        self.base.imp.fill_fallback(address, val, len)
995    }
996
997    fn compare_exchange_fallback(
998        &self,
999        addr: u64,
1000        current: &mut [u8],
1001        new: &[u8],
1002    ) -> Result<bool, GuestMemoryBackingError> {
1003        let address = self.adjust_range(addr, new.len() as u64)?;
1004        self.base
1005            .imp
1006            .compare_exchange_fallback(address, current, new)
1007    }
1008
1009    unsafe fn read_fallback(
1010        &self,
1011        address: u64,
1012        dest: *mut u8,
1013        len: usize,
1014    ) -> Result<(), GuestMemoryBackingError> {
1015        let address = self.adjust_range(address, len as u64)?;
1016        // SAFETY: guaranteed by caller.
1017        unsafe { self.base.imp.read_fallback(address, dest, len) }
1018    }
1019
1020    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
1021        let address = self.adjust_range(address, len)?;
1022        self.base.imp.expose_va(address, len)
1023    }
1024
1025    fn base_iova(&self) -> Option<u64> {
1026        let region = &self.base.regions[self.region];
1027        Some(region.base_iova? + (self.offset & self.base.region_def.region_mask))
1028    }
1029}
1030
1031/// Create a default guest memory subrange that verifies range limits and calls
1032/// back into the base implementation.
1033fn create_memory_subrange(
1034    base: Arc<GuestMemoryInner>,
1035    offset: u64,
1036    len: u64,
1037    _allow_preemptive_locking: bool,
1038) -> Result<GuestMemory, GuestMemoryBackingError> {
1039    let (_, _, region) = base.region(offset, len)?;
1040    Ok(GuestMemory::new(
1041        base.debug_name.clone(),
1042        GuestMemoryAccessRange {
1043            base,
1044            offset,
1045            len,
1046            region,
1047        },
1048    ))
1049}
1050
1051struct MultiRegionGuestMemoryAccess<T> {
1052    imps: Vec<Option<T>>,
1053    region_def: RegionDefinition,
1054}
1055
1056impl<T> MultiRegionGuestMemoryAccess<T> {
1057    fn region(&self, gpa: u64, len: u64) -> Result<(&T, u64), GuestMemoryBackingError> {
1058        let (i, offset) = self.region_def.region(gpa, len)?;
1059        let imp = self.imps[i].as_ref().ok_or(GuestMemoryBackingError::new(
1060            GuestMemoryErrorKind::OutOfRange,
1061            gpa,
1062            OutOfRange,
1063        ))?;
1064        Ok((imp, offset))
1065    }
1066}
1067
1068// SAFETY: `mapping()` is unreachable and panics if called.
1069impl<T: GuestMemoryAccess> DynGuestMemoryAccess for MultiRegionGuestMemoryAccess<T> {
1070    fn subrange(
1071        &self,
1072        offset: u64,
1073        len: u64,
1074        allow_preemptive_locking: bool,
1075    ) -> Result<Option<GuestMemory>, GuestMemoryBackingError> {
1076        let (region, offset_in_region) = self.region(offset, len)?;
1077        region.subrange(offset_in_region, len, allow_preemptive_locking)
1078    }
1079
1080    unsafe fn read_fallback(
1081        &self,
1082        addr: u64,
1083        dest: *mut u8,
1084        len: usize,
1085    ) -> Result<(), GuestMemoryBackingError> {
1086        let (region, offset_in_region) = self.region(addr, len as u64)?;
1087        // SAFETY: guaranteed by caller.
1088        unsafe { region.read_fallback(offset_in_region, dest, len) }
1089    }
1090
1091    unsafe fn write_fallback(
1092        &self,
1093        addr: u64,
1094        src: *const u8,
1095        len: usize,
1096    ) -> Result<(), GuestMemoryBackingError> {
1097        let (region, offset_in_region) = self.region(addr, len as u64)?;
1098        // SAFETY: guaranteed by caller.
1099        unsafe { region.write_fallback(offset_in_region, src, len) }
1100    }
1101
1102    fn fill_fallback(&self, addr: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
1103        let (region, offset_in_region) = self.region(addr, len as u64)?;
1104        region.fill_fallback(offset_in_region, val, len)
1105    }
1106
1107    fn compare_exchange_fallback(
1108        &self,
1109        addr: u64,
1110        current: &mut [u8],
1111        new: &[u8],
1112    ) -> Result<bool, GuestMemoryBackingError> {
1113        let (region, offset_in_region) = self.region(addr, new.len() as u64)?;
1114        region.compare_exchange_fallback(offset_in_region, current, new)
1115    }
1116
1117    fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
1118        let (region, offset_in_region) = self.region(address, len)?;
1119        region.expose_va(offset_in_region, len)
1120    }
1121
1122    fn page_fault(
1123        &self,
1124        address: u64,
1125        len: usize,
1126        write: bool,
1127        bitmap_failure: bool,
1128    ) -> PageFaultAction {
1129        match self.region(address, len as u64) {
1130            Ok((region, offset_in_region)) => {
1131                region.page_fault(offset_in_region, len, write, bitmap_failure)
1132            }
1133            Err(err) => PageFaultAction::Fail(PageFaultError {
1134                kind: err.kind,
1135                err: err.err,
1136            }),
1137        }
1138    }
1139
1140    fn lock_gpns(&self, gpns: &[u64]) -> Result<bool, GuestMemoryBackingError> {
1141        let mut ret = false;
1142        for gpn in gpns {
1143            let (region, offset_in_region) = self.region(gpn * PAGE_SIZE64, PAGE_SIZE64)?;
1144            ret |= region.lock_gpns(&[offset_in_region / PAGE_SIZE64])?;
1145        }
1146        Ok(ret)
1147    }
1148
1149    fn unlock_gpns(&self, gpns: &[u64]) {
1150        for gpn in gpns {
1151            let (region, offset_in_region) = self.region(gpn * PAGE_SIZE64, PAGE_SIZE64).unwrap();
1152            region.unlock_gpns(&[offset_in_region / PAGE_SIZE64]);
1153        }
1154    }
1155
1156    fn sharing(&self) -> Option<GuestMemorySharing> {
1157        // FUTURE: multi-region setups could aggregate shareable regions from
1158        // their sub-regions. For now, sharing is only supported for
1159        // single-region guest memory (the common case). If a VM uses
1160        // MultiRegionGuestMemoryAccess with vhost-user, this will return
1161        // None and the vhost-user backend will fail to initialize.
1162        None
1163    }
1164}
1165
1166/// A wrapper around a `GuestMemoryAccess` that provides methods for safely
1167/// reading and writing guest memory.
1168// NOTE: this type uses `inspect(skip)`, as it end up being a dependency of
1169// _many_ objects, and littering the inspect graph with references to the same
1170// node would be silly.
1171#[derive(Debug, Clone, Inspect)]
1172#[inspect(skip)]
1173pub struct GuestMemory {
1174    inner: Arc<GuestMemoryInner>,
1175}
1176
1177struct GuestMemoryInner<T: ?Sized = dyn DynGuestMemoryAccess> {
1178    region_def: RegionDefinition,
1179    regions: Vec<MemoryRegion>,
1180    debug_name: Arc<str>,
1181    allocated: bool,
1182    imp: T,
1183}
1184
1185impl<T: ?Sized> Debug for GuestMemoryInner<T> {
1186    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1187        f.debug_struct("GuestMemoryInner")
1188            .field("region_def", &self.region_def)
1189            .field("regions", &self.regions)
1190            .finish()
1191    }
1192}
1193
1194#[derive(Debug, Copy, Clone, Default)]
1195struct MemoryRegion {
1196    mapping: Option<SendPtrU8>,
1197    #[cfg(feature = "bitmap")]
1198    bitmaps: Option<[SendPtrU8; 2]>,
1199    #[cfg(feature = "bitmap")]
1200    bitmap_start: u8,
1201    len: u64,
1202    base_iova: Option<u64>,
1203}
1204
1205/// The access type. The values correspond to bitmap indexes.
1206#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1207enum AccessType {
1208    Read = 0,
1209    Write = 1,
1210}
1211
1212/// `NonNull<u8>` that implements `Send+Sync`.
1213///
1214/// Rust makes pointers `!Send+!Sync` by default to force you to think about the
1215/// ownership model and thread safety of types using pointers--there is nothing
1216/// safety-related about `Send`/`Sync` on pointers by themselves since all such
1217/// accesses to pointers require `unsafe` blocks anyway.
1218///
1219/// However, in practice, this leads to spurious manual `Send+Sync` impls on
1220/// types containing pointers, especially those containing generics. Define a
1221/// wrapping pointer type that implements `Send+Sync` so that the normal auto
1222/// trait rules apply to types containing these pointers.
1223#[derive(Debug, Copy, Clone)]
1224struct SendPtrU8(NonNull<u8>);
1225
1226// SAFETY: see type description.
1227unsafe impl Send for SendPtrU8 {}
1228// SAFETY: see type description.
1229unsafe impl Sync for SendPtrU8 {}
1230
1231impl MemoryRegion {
1232    fn new(imp: &impl GuestMemoryAccess) -> Self {
1233        #[cfg(feature = "bitmap")]
1234        let (bitmaps, bitmap_start) = {
1235            let bitmap_info = imp.access_bitmap();
1236            let bitmaps = bitmap_info
1237                .as_ref()
1238                .map(|bm| [SendPtrU8(bm.read_bitmap), SendPtrU8(bm.write_bitmap)]);
1239            let bitmap_start = bitmap_info.map_or(0, |bi| bi.bit_offset);
1240            (bitmaps, bitmap_start)
1241        };
1242        Self {
1243            mapping: imp.mapping().map(SendPtrU8),
1244            #[cfg(feature = "bitmap")]
1245            bitmaps,
1246            #[cfg(feature = "bitmap")]
1247            bitmap_start,
1248            len: imp.max_address(),
1249            base_iova: imp.base_iova(),
1250        }
1251    }
1252
1253    /// # Safety
1254    ///
1255    /// The caller must ensure that `offset + len` fits in this region, and that
1256    /// the object bitmap is currently valid for atomic read access from this
1257    /// thread.
1258    unsafe fn check_access(
1259        &self,
1260        access_type: AccessType,
1261        offset: u64,
1262        len: u64,
1263    ) -> Result<(), u64> {
1264        debug_assert!(self.len >= offset + len);
1265        #[cfg(not(feature = "bitmap"))]
1266        let _ = access_type;
1267
1268        #[cfg(feature = "bitmap")]
1269        if len == 0 {
1270            return Ok(());
1271        } else if let Some(bitmaps) = &self.bitmaps {
1272            let SendPtrU8(bitmap) = bitmaps[access_type as usize];
1273            let start = offset / PAGE_SIZE64;
1274            let end = (offset + len - 1) / PAGE_SIZE64;
1275            // FUTURE: consider optimizing this separately for multi-page and
1276            // single-page accesses.
1277            for gpn in start..=end {
1278                let bit_offset = self.bitmap_start as u64 + gpn;
1279                // SAFETY: the caller ensures that the bitmap is big enough and
1280                // valid for atomic read access from this thread.
1281                let bit = unsafe {
1282                    (*bitmap
1283                        .as_ptr()
1284                        .cast_const()
1285                        .cast::<AtomicU8>()
1286                        .add(bit_offset as usize / 8))
1287                    .load(std::sync::atomic::Ordering::Relaxed)
1288                        & (1 << (bit_offset % 8))
1289                };
1290                if bit == 0 {
1291                    return Err((gpn * PAGE_SIZE64).saturating_sub(offset));
1292                }
1293            }
1294        }
1295
1296        Ok(())
1297    }
1298}
1299
1300/// The default implementation is [`GuestMemory::empty`].
1301impl Default for GuestMemory {
1302    fn default() -> Self {
1303        Self::empty()
1304    }
1305}
1306
1307struct Empty;
1308
1309// SAFETY: the mapping is empty, so all requirements are trivially satisfied.
1310unsafe impl GuestMemoryAccess for Empty {
1311    fn mapping(&self) -> Option<NonNull<u8>> {
1312        None
1313    }
1314
1315    fn max_address(&self) -> u64 {
1316        0
1317    }
1318}
1319
1320#[derive(Debug, Error)]
1321pub enum MultiRegionError {
1322    #[error("region size {0:#x} is not a power of 2")]
1323    NotPowerOfTwo(u64),
1324    #[error("region size {0:#x} is smaller than a page")]
1325    RegionSizeTooSmall(u64),
1326    #[error(
1327        "too many regions ({region_count}) for region size {region_size:#x}; max is {max_region_count}"
1328    )]
1329    TooManyRegions {
1330        region_count: usize,
1331        max_region_count: usize,
1332        region_size: u64,
1333    },
1334    #[error("backing size {backing_size:#x} is too large for region size {region_size:#x}")]
1335    BackingTooLarge { backing_size: u64, region_size: u64 },
1336}
1337
1338/// The RCU domain memory accesses occur under. Updates to any memory access
1339/// bitmaps must be synchronized under this domain.
1340///
1341/// See [`GuestMemoryAccess::access_bitmap`] for more details.
1342///
1343/// This is currently the global domain, but this is reexported here to make
1344/// calling code clearer.
1345#[cfg(feature = "bitmap")]
1346pub fn rcu() -> minircu::RcuDomain {
1347    // Use the global domain unless we find a reason to do something else.
1348    minircu::global()
1349}
1350
1351impl GuestMemory {
1352    /// Returns a new instance using `imp` as the backing.
1353    ///
1354    /// `debug_name` is used to specify which guest memory is being accessed in
1355    /// error messages.
1356    pub fn new(debug_name: impl Into<Arc<str>>, imp: impl GuestMemoryAccess) -> Self {
1357        // Install signal handlers on unix if a mapping is present.
1358        //
1359        // Skip this on miri even when there is a mapping, since the mapping may
1360        // never be accessed by the code under test.
1361        if imp.mapping().is_some() && !cfg!(miri) {
1362            trycopy::initialize_try_copy();
1363        }
1364        Self::new_inner(debug_name.into(), imp, false)
1365    }
1366
1367    fn new_inner(debug_name: Arc<str>, imp: impl GuestMemoryAccess, allocated: bool) -> Self {
1368        let regions = vec![MemoryRegion::new(&imp)];
1369        Self {
1370            inner: Arc::new(GuestMemoryInner {
1371                imp,
1372                debug_name,
1373                region_def: RegionDefinition {
1374                    invalid_mask: 1 << 63,
1375                    region_mask: !0 >> 1,
1376                    region_bits: 63, // right shift of 64 isn't valid, so restrict the space
1377                },
1378                regions,
1379                allocated,
1380            }),
1381        }
1382    }
1383
1384    /// Creates a new multi-region guest memory, made up of multiple mappings.
1385    /// This allows you to create a very large sparse layout (up to the limits
1386    /// of the VM's physical address space) without having to allocate an
1387    /// enormous amount of virtual address space.
1388    ///
1389    /// Each region will be `region_size` bytes and will start immediately after
1390    /// the last one. This must be a power of two, be at least a page in size,
1391    /// and cannot fill the full 64-bit address space.
1392    ///
1393    /// `imps` must be a list of [`GuestMemoryAccess`] implementations, one for
1394    /// each region. Use `None` if the corresponding region is empty.
1395    ///
1396    /// A region's mapping cannot fully fill the region. This is necessary to
1397    /// avoid callers expecting to be able to access a memory range that spans
1398    /// two regions.
1399    pub fn new_multi_region(
1400        debug_name: impl Into<Arc<str>>,
1401        region_size: u64,
1402        mut imps: Vec<Option<impl GuestMemoryAccess>>,
1403    ) -> Result<Self, MultiRegionError> {
1404        // Install signal handlers.
1405        trycopy::initialize_try_copy();
1406
1407        if !region_size.is_power_of_two() {
1408            return Err(MultiRegionError::NotPowerOfTwo(region_size));
1409        }
1410        if region_size < PAGE_SIZE64 {
1411            return Err(MultiRegionError::RegionSizeTooSmall(region_size));
1412        }
1413        let region_bits = region_size.trailing_zeros();
1414
1415        let max_region_count = 1 << (63 - region_bits);
1416
1417        let region_count = imps.len().next_power_of_two();
1418        if region_count > max_region_count {
1419            return Err(MultiRegionError::TooManyRegions {
1420                region_count,
1421                max_region_count,
1422                region_size,
1423            });
1424        }
1425
1426        let valid_bits = region_bits + region_count.trailing_zeros();
1427        assert!(valid_bits < 64);
1428        let invalid_mask = !0 << valid_bits;
1429
1430        let mut regions = vec![MemoryRegion::default(); region_count];
1431        for (imp, region) in imps.iter().zip(&mut regions) {
1432            let Some(imp) = imp else { continue };
1433            let backing_size = imp.max_address();
1434            if backing_size > region_size {
1435                return Err(MultiRegionError::BackingTooLarge {
1436                    backing_size,
1437                    region_size,
1438                });
1439            }
1440            *region = MemoryRegion::new(imp);
1441        }
1442
1443        let region_def = RegionDefinition {
1444            invalid_mask,
1445            region_mask: region_size - 1,
1446            region_bits,
1447        };
1448
1449        imps.resize_with(region_count, || None);
1450        let imp = MultiRegionGuestMemoryAccess { imps, region_def };
1451
1452        let inner = GuestMemoryInner {
1453            debug_name: debug_name.into(),
1454            region_def,
1455            regions,
1456            imp,
1457            allocated: false,
1458        };
1459
1460        Ok(Self {
1461            inner: Arc::new(inner),
1462        })
1463    }
1464
1465    /// Allocates a guest memory object on the heap with the given size in
1466    /// bytes.
1467    ///
1468    /// `size` will be rounded up to the page size. The backing buffer will be
1469    /// page aligned.
1470    ///
1471    /// The debug name in errors will be "heap". If you want to provide a
1472    /// different debug name, manually use `GuestMemory::new` with
1473    /// [`AlignedHeapMemory`].
1474    pub fn allocate(size: usize) -> Self {
1475        Self::new_inner("heap".into(), AlignedHeapMemory::new(size), true)
1476    }
1477
1478    /// If this memory is unaliased and was created via
1479    /// [`GuestMemory::allocate`], returns the backing buffer.
1480    ///
1481    /// Returns `Err(self)` if there are other references to this memory (via
1482    /// `clone()`).
1483    pub fn into_inner_buf(self) -> Result<AlignedHeapMemory, Self> {
1484        if !self.inner.allocated {
1485            return Err(self);
1486        }
1487        // FUTURE: consider using `Any` and `Arc::downcast` once trait upcasting is stable.
1488        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1489        let inner = unsafe {
1490            Arc::<GuestMemoryInner<AlignedHeapMemory>>::from_raw(Arc::into_raw(self.inner).cast())
1491        };
1492        let inner = Arc::try_unwrap(inner).map_err(|inner| Self { inner })?;
1493        Ok(inner.imp)
1494    }
1495
1496    /// If this memory was created via [`GuestMemory::allocate`], returns a slice to
1497    /// the allocated buffer.
1498    pub fn inner_buf(&self) -> Option<&[AtomicU8]> {
1499        if !self.inner.allocated {
1500            return None;
1501        }
1502        // FUTURE: consider using `<dyn Any>::downcast` once trait upcasting is stable.
1503        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1504        let inner = unsafe { &*core::ptr::from_ref(&self.inner.imp).cast::<AlignedHeapMemory>() };
1505        Some(inner)
1506    }
1507
1508    /// If this memory was created via [`GuestMemory::allocate`] and there are
1509    /// no other references to it, returns a mutable slice to the backing
1510    /// buffer.
1511    pub fn inner_buf_mut(&mut self) -> Option<&mut [u8]> {
1512        if !self.inner.allocated {
1513            return None;
1514        }
1515        let inner = Arc::get_mut(&mut self.inner)?;
1516        // FUTURE: consider using `<dyn Any>::downcast` once trait upcasting is stable.
1517        // SAFETY: the inner implementation is guaranteed to be a `AlignedHeapMemory`.
1518        let imp = unsafe { &mut *core::ptr::from_mut(&mut inner.imp).cast::<AlignedHeapMemory>() };
1519        Some(imp.as_mut())
1520    }
1521
1522    /// Returns an empty guest memory, which fails every operation.
1523    pub fn empty() -> Self {
1524        GuestMemory::new("empty", Empty)
1525    }
1526
1527    fn wrap_err(
1528        &self,
1529        gpa_len: Option<(u64, u64)>,
1530        op: GuestMemoryOperation,
1531        err: GuestMemoryBackingError,
1532    ) -> GuestMemoryError {
1533        let range = gpa_len.map(|(gpa, len)| gpa..gpa.wrapping_add(len));
1534        GuestMemoryError::new(&self.inner.debug_name, range, op, err)
1535    }
1536
1537    fn with_op<T>(
1538        &self,
1539        gpa_len: Option<(u64, u64)>,
1540        op: GuestMemoryOperation,
1541        f: impl FnOnce() -> Result<T, GuestMemoryBackingError>,
1542    ) -> Result<T, GuestMemoryError> {
1543        f().map_err(|err| self.wrap_err(gpa_len, op, err))
1544    }
1545
1546    /// Creates a smaller view into guest memory, constraining accesses within the new boundaries. For smaller ranges,
1547    /// some memory implementations (e.g. HDV) may choose to lock the pages into memory for faster access. Locking
1548    /// random guest memory may cause issues, so only opt in to this behavior when the range can be considered "owned"
1549    /// by the caller.
1550    pub fn subrange(
1551        &self,
1552        offset: u64,
1553        len: u64,
1554        allow_preemptive_locking: bool,
1555    ) -> Result<GuestMemory, GuestMemoryError> {
1556        self.with_op(Some((offset, len)), GuestMemoryOperation::Subrange, || {
1557            if let Some(guest_memory) =
1558                self.inner
1559                    .imp
1560                    .subrange(offset, len, allow_preemptive_locking)?
1561            {
1562                Ok(guest_memory)
1563            } else {
1564                create_memory_subrange(self.inner.clone(), offset, len, allow_preemptive_locking)
1565            }
1566        })
1567    }
1568
1569    /// Returns a subrange where pages from the subrange can be locked.
1570    pub fn lockable_subrange(
1571        &self,
1572        offset: u64,
1573        len: u64,
1574    ) -> Result<GuestMemory, GuestMemoryError> {
1575        // TODO: Enforce subrange is actually lockable.
1576        self.subrange(offset, len, true)
1577    }
1578
1579    /// Returns the mapping for all of guest memory.
1580    ///
1581    /// Returns `None` if there is more than one region or if the memory is not
1582    /// mapped.
1583    pub fn full_mapping(&self) -> Option<(*mut u8, usize)> {
1584        if let [region] = self.inner.regions.as_slice() {
1585            #[cfg(feature = "bitmap")]
1586            if region.bitmaps.is_some() {
1587                return None;
1588            }
1589            region
1590                .mapping
1591                .map(|SendPtrU8(ptr)| (ptr.as_ptr(), region.len as usize))
1592        } else {
1593            None
1594        }
1595    }
1596
1597    /// Gets the IO address for DMAing to `gpa` from a user-mode driver not
1598    /// going through an IOMMU.
1599    pub fn iova(&self, gpa: u64) -> Option<u64> {
1600        let (region, offset, _) = self.inner.region(gpa, 1).ok()?;
1601        Some(region.base_iova? + offset)
1602    }
1603
1604    /// Returns a sharing object if this memory supports
1605    /// file-based sharing. See [`GuestMemorySharing`].
1606    pub fn sharing(&self) -> Option<GuestMemorySharing> {
1607        self.inner.imp.sharing()
1608    }
1609
1610    /// Gets a pointer to the VA range for `gpa..gpa+len`.
1611    ///
1612    /// Returns `Ok(None)` if there is no mapping. Returns `Err(_)` if the
1613    /// memory is out of range.
1614    fn mapping_range(
1615        &self,
1616        access_type: AccessType,
1617        gpa: u64,
1618        len: usize,
1619    ) -> Result<Option<*mut u8>, GuestMemoryBackingError> {
1620        let (region, offset, _) = self.inner.region(gpa, len as u64)?;
1621        if let Some(SendPtrU8(ptr)) = region.mapping {
1622            loop {
1623                // SAFETY: offset + len is checked by `region()` to be inside the VA range.
1624                let fault_offset = unsafe {
1625                    match region.check_access(access_type, offset, len as u64) {
1626                        Ok(()) => return Ok(Some(ptr.as_ptr().add(offset as usize))),
1627                        Err(n) => n,
1628                    }
1629                };
1630
1631                // Resolve the fault and try again.
1632                match self.inner.imp.page_fault(
1633                    gpa + fault_offset,
1634                    len - fault_offset as usize,
1635                    access_type == AccessType::Write,
1636                    true,
1637                ) {
1638                    PageFaultAction::Fail(err) => {
1639                        return Err(GuestMemoryBackingError::new(
1640                            err.kind,
1641                            gpa + fault_offset,
1642                            err.err,
1643                        ));
1644                    }
1645                    PageFaultAction::Retry => {}
1646                    PageFaultAction::Fallback => break,
1647                }
1648            }
1649        }
1650        Ok(None)
1651    }
1652
1653    /// Runs `f` with a pointer to the mapped memory. If `f` fails, tries to
1654    /// resolve the fault (failing on error), then loops.
1655    ///
1656    /// If there is no mapping for the memory, or if the fault handler requests
1657    /// it, call `fallback` instead. `fallback` will not be called unless `gpa`
1658    /// and `len` are in range.
1659    fn run_on_mapping<T, P>(
1660        &self,
1661        access_type: AccessType,
1662        gpa: u64,
1663        len: usize,
1664        mut param: P,
1665        mut f: impl FnMut(&mut P, *mut u8) -> Result<T, trycopy::MemoryError>,
1666        fallback: impl FnOnce(&mut P) -> Result<T, GuestMemoryBackingError>,
1667    ) -> Result<T, GuestMemoryBackingError> {
1668        let op = || {
1669            let Some(mapping) = self.mapping_range(access_type, gpa, len)? else {
1670                return fallback(&mut param);
1671            };
1672
1673            // Try until the fault fails to resolve.
1674            loop {
1675                match f(&mut param, mapping) {
1676                    Ok(t) => return Ok(t),
1677                    Err(fault) => {
1678                        match self.inner.imp.page_fault(
1679                            gpa + fault.offset() as u64,
1680                            len - fault.offset(),
1681                            access_type == AccessType::Write,
1682                            false,
1683                        ) {
1684                            PageFaultAction::Fail(err) => {
1685                                return Err(GuestMemoryBackingError::new(
1686                                    err.kind,
1687                                    gpa + fault.offset() as u64,
1688                                    err.err,
1689                                ));
1690                            }
1691                            PageFaultAction::Retry => {}
1692                            PageFaultAction::Fallback => return fallback(&mut param),
1693                        }
1694                    }
1695                }
1696            }
1697        };
1698        // If the `bitmap` feature is enabled, run the function in an RCU
1699        // critical section. This will allow callers to flush concurrent
1700        // accesses after bitmap updates.
1701        #[cfg(feature = "bitmap")]
1702        return rcu().run(op);
1703        #[cfg(not(feature = "bitmap"))]
1704        op()
1705    }
1706
1707    /// # Safety
1708    ///
1709    /// The caller must ensure that `src`..`src + len` is a valid buffer for reads.
1710    unsafe fn write_ptr(
1711        &self,
1712        gpa: u64,
1713        src: *const u8,
1714        len: usize,
1715    ) -> Result<(), GuestMemoryBackingError> {
1716        if len == 0 {
1717            return Ok(());
1718        }
1719        self.run_on_mapping(
1720            AccessType::Write,
1721            gpa,
1722            len,
1723            (),
1724            |(), dest| {
1725                // SAFETY: dest..dest+len is guaranteed to point to a reserved VA
1726                // range, and src..src+len is guaranteed by the caller to be a valid
1727                // buffer for reads.
1728                unsafe { trycopy::try_copy(src, dest, len) }
1729            },
1730            |()| {
1731                // SAFETY: src..src+len is guaranteed by the caller to point to a valid
1732                // buffer for reads.
1733                unsafe { self.inner.imp.write_fallback(gpa, src, len) }
1734            },
1735        )
1736    }
1737
1738    /// Writes `src` into guest memory at address `gpa`.
1739    pub fn write_at(&self, gpa: u64, src: &[u8]) -> Result<(), GuestMemoryError> {
1740        self.with_op(
1741            Some((gpa, src.len() as u64)),
1742            GuestMemoryOperation::Write,
1743            || self.write_at_inner(gpa, src),
1744        )
1745    }
1746
1747    fn write_at_inner(&self, gpa: u64, src: &[u8]) -> Result<(), GuestMemoryBackingError> {
1748        // SAFETY: `src` is a valid buffer for reads.
1749        unsafe { self.write_ptr(gpa, src.as_ptr(), src.len()) }
1750    }
1751
1752    /// Writes `src` into guest memory at address `gpa`.
1753    pub fn write_from_atomic(&self, gpa: u64, src: &[AtomicU8]) -> Result<(), GuestMemoryError> {
1754        self.with_op(
1755            Some((gpa, src.len() as u64)),
1756            GuestMemoryOperation::Write,
1757            || {
1758                // SAFETY: `src` is a valid buffer for reads.
1759                unsafe { self.write_ptr(gpa, src.as_ptr().cast(), src.len()) }
1760            },
1761        )
1762    }
1763
1764    /// Writes `len` bytes of `val` into guest memory at address `gpa`.
1765    pub fn fill_at(&self, gpa: u64, val: u8, len: usize) -> Result<(), GuestMemoryError> {
1766        self.with_op(Some((gpa, len as u64)), GuestMemoryOperation::Fill, || {
1767            self.fill_at_inner(gpa, val, len)
1768        })
1769    }
1770
1771    fn fill_at_inner(&self, gpa: u64, val: u8, len: usize) -> Result<(), GuestMemoryBackingError> {
1772        if len == 0 {
1773            return Ok(());
1774        }
1775        self.run_on_mapping(
1776            AccessType::Write,
1777            gpa,
1778            len,
1779            (),
1780            |(), dest| {
1781                // SAFETY: dest..dest+len is guaranteed to point to a reserved VA range.
1782                unsafe { trycopy::try_write_bytes(dest, val, len) }
1783            },
1784            |()| self.inner.imp.fill_fallback(gpa, val, len),
1785        )
1786    }
1787
1788    /// Reads from guest memory into `dest..dest+len`.
1789    ///
1790    /// # Safety
1791    /// The caller must ensure dest..dest+len is a valid buffer for writes.
1792    unsafe fn read_ptr(
1793        &self,
1794        gpa: u64,
1795        dest: *mut u8,
1796        len: usize,
1797    ) -> Result<(), GuestMemoryBackingError> {
1798        if len == 0 {
1799            return Ok(());
1800        }
1801        self.run_on_mapping(
1802            AccessType::Read,
1803            gpa,
1804            len,
1805            (),
1806            |(), src| {
1807                // SAFETY: src..src+len is guaranteed to point to a reserved VA
1808                // range, and dest..dest+len is guaranteed by the caller to be a
1809                // valid buffer for writes.
1810                unsafe { trycopy::try_copy(src, dest, len) }
1811            },
1812            |()| {
1813                // SAFETY: dest..dest+len is guaranteed by the caller to point to a
1814                // valid buffer for writes.
1815                unsafe { self.inner.imp.read_fallback(gpa, dest, len) }
1816            },
1817        )
1818    }
1819
1820    fn read_at_inner(&self, gpa: u64, dest: &mut [u8]) -> Result<(), GuestMemoryBackingError> {
1821        // SAFETY: `dest` is a valid buffer for writes.
1822        unsafe { self.read_ptr(gpa, dest.as_mut_ptr(), dest.len()) }
1823    }
1824
1825    /// Reads from guest memory address `gpa` into `dest`.
1826    pub fn read_at(&self, gpa: u64, dest: &mut [u8]) -> Result<(), GuestMemoryError> {
1827        self.with_op(
1828            Some((gpa, dest.len() as u64)),
1829            GuestMemoryOperation::Read,
1830            || self.read_at_inner(gpa, dest),
1831        )
1832    }
1833
1834    /// Reads from guest memory address `gpa` into `dest`.
1835    pub fn read_to_atomic(&self, gpa: u64, dest: &[AtomicU8]) -> Result<(), GuestMemoryError> {
1836        self.with_op(
1837            Some((gpa, dest.len() as u64)),
1838            GuestMemoryOperation::Read,
1839            // SAFETY: `dest` is a valid buffer for writes.
1840            || unsafe { self.read_ptr(gpa, dest.as_ptr() as *mut u8, dest.len()) },
1841        )
1842    }
1843
1844    /// Writes an object to guest memory at address `gpa`.
1845    ///
1846    /// If the object is 1, 2, 4, or 8 bytes and the address is naturally
1847    /// aligned, then the write will be performed atomically. Here, this means
1848    /// that concurrent readers (via `read_plain`) cannot observe a torn write
1849    /// but will observe either the old or new value.
1850    ///
1851    /// The memory ordering of the write is unspecified.
1852    ///
1853    /// FUTURE: once we are on Rust 1.79, add a method specifically for atomic
1854    /// accesses that const asserts that the size is appropriate.
1855    pub fn write_plain<T: IntoBytes + Immutable + KnownLayout>(
1856        &self,
1857        gpa: u64,
1858        b: &T,
1859    ) -> Result<(), GuestMemoryError> {
1860        // Note that this is const, so the match below will compile out.
1861        let len = size_of::<T>();
1862        self.with_op(Some((gpa, len as u64)), GuestMemoryOperation::Write, || {
1863            self.run_on_mapping(
1864                AccessType::Write,
1865                gpa,
1866                len,
1867                (),
1868                |(), dest| {
1869                    // SAFETY: dest..dest+len is guaranteed to point to
1870                    // a reserved VA range.
1871                    unsafe { trycopy::try_write_volatile(dest.cast(), b) }
1872                },
1873                |()| {
1874                    // SAFETY: b is a valid buffer for reads.
1875                    unsafe {
1876                        self.inner
1877                            .imp
1878                            .write_fallback(gpa, b.as_bytes().as_ptr(), len)
1879                    }
1880                },
1881            )
1882        })
1883    }
1884
1885    /// Attempts a sequentially-consistent compare exchange of the value at `gpa`.
1886    pub fn compare_exchange<T: IntoBytes + FromBytes + Immutable + KnownLayout + Copy>(
1887        &self,
1888        gpa: u64,
1889        current: T,
1890        new: T,
1891    ) -> Result<Result<T, T>, GuestMemoryError> {
1892        const {
1893            assert!(matches!(size_of::<T>(), 1 | 2 | 4 | 8));
1894            assert!(align_of::<T>() >= size_of::<T>());
1895        };
1896        let len = size_of_val(&new);
1897        self.with_op(
1898            Some((gpa, len as u64)),
1899            GuestMemoryOperation::CompareExchange,
1900            || {
1901                // Assume that if write is allowed, then read is allowed.
1902                self.run_on_mapping(
1903                    AccessType::Write,
1904                    gpa,
1905                    len,
1906                    (),
1907                    |(), dest| {
1908                        // SAFETY: dest..dest+len is guaranteed by the caller to be a valid
1909                        // buffer for writes.
1910                        unsafe { trycopy::try_compare_exchange(dest.cast(), current, new) }
1911                    },
1912                    |()| {
1913                        let mut current = current;
1914                        let success = self.inner.imp.compare_exchange_fallback(
1915                            gpa,
1916                            current.as_mut_bytes(),
1917                            new.as_bytes(),
1918                        )?;
1919
1920                        Ok(if success { Ok(new) } else { Err(current) })
1921                    },
1922                )
1923            },
1924        )
1925    }
1926
1927    /// Reads an object from guest memory at address `gpa`.
1928    ///
1929    /// If the object is 1, 2, 4, or 8 bytes and the address is naturally
1930    /// aligned, then the read will be performed atomically. Here, this means
1931    /// that when there is a concurrent writer, callers will observe either the
1932    /// old or new value, but not a torn read.
1933    ///
1934    /// The memory ordering of the read is unspecified.
1935    ///
1936    /// FUTURE: once we are on Rust 1.79, add a method specifically for atomic
1937    /// accesses that const asserts that the size is appropriate.
1938    pub fn read_plain<T: FromBytes + Immutable + KnownLayout>(
1939        &self,
1940        gpa: u64,
1941    ) -> Result<T, GuestMemoryError> {
1942        self.with_op(
1943            Some((gpa, size_of::<T>() as u64)),
1944            GuestMemoryOperation::Read,
1945            || self.read_plain_inner(gpa),
1946        )
1947    }
1948
1949    fn read_plain_inner<T: FromBytes + Immutable + KnownLayout>(
1950        &self,
1951        gpa: u64,
1952    ) -> Result<T, GuestMemoryBackingError> {
1953        let len = size_of::<T>();
1954        self.run_on_mapping(
1955            AccessType::Read,
1956            gpa,
1957            len,
1958            (),
1959            |(), src| {
1960                // SAFETY: src..src+len is guaranteed to point to a reserved VA
1961                // range.
1962                unsafe { trycopy::try_read_volatile(src.cast::<T>()) }
1963            },
1964            |()| {
1965                let mut obj = std::mem::MaybeUninit::<T>::zeroed();
1966                // SAFETY: dest..dest+len is guaranteed by the caller to point to a
1967                // valid buffer for writes.
1968                unsafe {
1969                    self.inner
1970                        .imp
1971                        .read_fallback(gpa, obj.as_mut_ptr().cast(), len)?;
1972                }
1973                // SAFETY: `obj` was fully initialized by `read_fallback`.
1974                Ok(unsafe { obj.assume_init() })
1975            },
1976        )
1977    }
1978
1979    fn probe_page_for_lock(
1980        &self,
1981        with_kernel_access: bool,
1982        gpa: u64,
1983    ) -> Result<*const AtomicU8, GuestMemoryBackingError> {
1984        let (region, offset, _) = self.inner.region(gpa, 1)?;
1985        let Some(SendPtrU8(ptr)) = region.mapping else {
1986            return Err(GuestMemoryBackingError::other(gpa, NotLockable));
1987        };
1988        // Ensure the virtual address can be exposed.
1989        if with_kernel_access {
1990            self.inner.imp.expose_va(gpa, 1)?;
1991        }
1992        // FUTURE: check the correct bitmap for the access type, which needs to
1993        // be passed in.
1994        self.read_plain_inner::<u8>(gpa)?;
1995        // SAFETY: the read_at call includes a check that ensures that
1996        // `gpa` is in the VA range.
1997        let page = unsafe { ptr.as_ptr().add(offset as usize) };
1998        Ok(page.cast())
1999    }
2000
2001    pub fn lock_gpns(
2002        &self,
2003        with_kernel_access: bool,
2004        gpns: &[u64],
2005    ) -> Result<LockedPages, GuestMemoryError> {
2006        self.with_op(None, GuestMemoryOperation::Lock, || {
2007            let mut pages = Vec::with_capacity(gpns.len());
2008            for &gpn in gpns {
2009                let gpa = gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?;
2010                let page = self.probe_page_for_lock(with_kernel_access, gpa)?;
2011                pages.push(PagePtr(page));
2012            }
2013            let store_gpns = self.inner.imp.lock_gpns(gpns)?;
2014            Ok(LockedPages {
2015                pages: pages.into_boxed_slice(),
2016                gpns: store_gpns.then(|| gpns.to_vec().into_boxed_slice()),
2017                mem: self.inner.clone(),
2018            })
2019        })
2020    }
2021
2022    pub fn probe_gpns(&self, gpns: &[u64]) -> Result<(), GuestMemoryError> {
2023        self.with_op(None, GuestMemoryOperation::Probe, || {
2024            for &gpn in gpns {
2025                self.read_plain_inner::<u8>(
2026                    gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?,
2027                )?;
2028            }
2029            Ok(())
2030        })
2031    }
2032
2033    /// Check if a given GPA is readable or not.
2034    pub fn probe_gpa_readable(&self, gpa: u64) -> Result<(), GuestMemoryErrorKind> {
2035        let mut b = [0];
2036        self.read_at_inner(gpa, &mut b).map_err(|err| err.kind)
2037    }
2038
2039    /// Check if a given GPA is writeable or not.
2040    pub fn probe_gpa_writable(&self, gpa: u64) -> Result<(), GuestMemoryErrorKind> {
2041        let _ = self
2042            .compare_exchange(gpa, 0u8, 0)
2043            .map_err(|err| err.kind())?;
2044        Ok(())
2045    }
2046
2047    /// Gets a slice of guest memory assuming the memory was already locked via
2048    /// [`GuestMemory::lock_gpns`].
2049    ///
2050    /// This is dangerous--if the pages have not been locked, then it could
2051    /// cause an access violation or guest memory corruption.
2052    ///
2053    /// Note that this is not `unsafe` since this cannot cause memory corruption
2054    /// in this process. Even if there is an access violation, the underlying VA
2055    /// space is known to be reserved.
2056    ///
2057    /// Panics if the requested buffer is out of range.
2058    fn dangerous_access_pre_locked_memory(&self, gpa: u64, len: usize) -> &[AtomicU8] {
2059        let addr = self
2060            .mapping_range(AccessType::Write, gpa, len)
2061            .unwrap()
2062            .unwrap();
2063        // SAFETY: addr..addr+len is checked above to be a valid VA range. It's
2064        // possible some of the pages aren't mapped and will cause AVs at
2065        // runtime when accessed, but, as discussed above, at a language level
2066        // this cannot cause any safety issues.
2067        unsafe { std::slice::from_raw_parts(addr.cast(), len) }
2068    }
2069
2070    fn op_range<F: FnMut(u64, Range<usize>) -> Result<(), GuestMemoryBackingError>>(
2071        &self,
2072        op: GuestMemoryOperation,
2073        range: &PagedRange<'_>,
2074        mut f: F,
2075    ) -> Result<(), GuestMemoryError> {
2076        self.with_op(None, op, || {
2077            let gpns = range.gpns();
2078            let offset = range.offset();
2079
2080            // Perform the operation in three phases: the first page (if it is not a
2081            // full page), the full pages, and the last page (if it is not a full
2082            // page).
2083            let mut byte_index = 0;
2084            let mut len = range.len();
2085            let mut page = 0;
2086            if !offset.is_multiple_of(PAGE_SIZE) {
2087                let head_len = std::cmp::min(len, PAGE_SIZE - (offset % PAGE_SIZE));
2088                let addr = gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?
2089                    + offset as u64 % PAGE_SIZE64;
2090                f(addr, byte_index..byte_index + head_len)?;
2091                byte_index += head_len;
2092                len -= head_len;
2093                page += 1;
2094            }
2095            while len >= PAGE_SIZE {
2096                f(
2097                    gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?,
2098                    byte_index..byte_index + PAGE_SIZE,
2099                )?;
2100                byte_index += PAGE_SIZE;
2101                len -= PAGE_SIZE;
2102                page += 1;
2103            }
2104            if len > 0 {
2105                f(
2106                    gpn_to_gpa(gpns[page]).map_err(GuestMemoryBackingError::gpn)?,
2107                    byte_index..byte_index + len,
2108                )?;
2109            }
2110
2111            Ok(())
2112        })
2113    }
2114
2115    pub fn write_range(&self, range: &PagedRange<'_>, data: &[u8]) -> Result<(), GuestMemoryError> {
2116        assert!(data.len() == range.len());
2117        self.op_range(GuestMemoryOperation::Write, range, move |addr, r| {
2118            self.write_at_inner(addr, &data[r])
2119        })
2120    }
2121
2122    pub fn fill_range(&self, range: &PagedRange<'_>, val: u8) -> Result<(), GuestMemoryError> {
2123        self.op_range(GuestMemoryOperation::Fill, range, move |addr, r| {
2124            self.fill_at_inner(addr, val, r.len())
2125        })
2126    }
2127
2128    pub fn zero_range(&self, range: &PagedRange<'_>) -> Result<(), GuestMemoryError> {
2129        self.op_range(GuestMemoryOperation::Fill, range, move |addr, r| {
2130            self.fill_at_inner(addr, 0, r.len())
2131        })
2132    }
2133
2134    pub fn read_range(
2135        &self,
2136        range: &PagedRange<'_>,
2137        data: &mut [u8],
2138    ) -> Result<(), GuestMemoryError> {
2139        assert!(data.len() == range.len());
2140        self.op_range(GuestMemoryOperation::Read, range, move |addr, r| {
2141            self.read_at_inner(addr, &mut data[r])
2142        })
2143    }
2144
2145    pub fn write_range_from_atomic(
2146        &self,
2147        range: &PagedRange<'_>,
2148        data: &[AtomicU8],
2149    ) -> Result<(), GuestMemoryError> {
2150        assert!(data.len() == range.len());
2151        self.op_range(GuestMemoryOperation::Write, range, move |addr, r| {
2152            let src = &data[r];
2153            // SAFETY: `src` is a valid buffer for reads.
2154            unsafe { self.write_ptr(addr, src.as_ptr().cast(), src.len()) }
2155        })
2156    }
2157
2158    pub fn read_range_to_atomic(
2159        &self,
2160        range: &PagedRange<'_>,
2161        data: &[AtomicU8],
2162    ) -> Result<(), GuestMemoryError> {
2163        assert!(data.len() == range.len());
2164        self.op_range(GuestMemoryOperation::Read, range, move |addr, r| {
2165            let dest = &data[r];
2166            // SAFETY: `dest` is a valid buffer for writes.
2167            unsafe { self.read_ptr(addr, dest.as_ptr().cast_mut().cast(), dest.len()) }
2168        })
2169    }
2170
2171    /// Locks the guest pages spanned by the specified `PagedRange`.
2172    ///
2173    /// # Arguments
2174    /// * 'paged_range' - The guest memory range to lock.
2175    /// * 'locked_range' - Receives a list of VA ranges to which each contiguous physical sub-range in `paged_range`
2176    ///   has been mapped. Must be initially empty.
2177    pub fn lock_range<'a, T: LockedRange<'a>>(
2178        &'a self,
2179        paged_range: PagedRange<'_>,
2180        mut locked_range: T,
2181    ) -> Result<LockedRangeImpl<'a, T>, GuestMemoryError> {
2182        self.with_op(None, GuestMemoryOperation::Lock, || {
2183            let gpns = paged_range.gpns();
2184            for &gpn in gpns {
2185                let gpa = gpn_to_gpa(gpn).map_err(GuestMemoryBackingError::gpn)?;
2186                self.probe_page_for_lock(true, gpa)?;
2187            }
2188            for range in paged_range.ranges() {
2189                let range = range.map_err(GuestMemoryBackingError::gpn)?;
2190                locked_range.push_sub_range(
2191                    self.dangerous_access_pre_locked_memory(range.start, range.len() as usize),
2192                );
2193            }
2194            let store_gpns = self.inner.imp.lock_gpns(paged_range.gpns())?;
2195            Ok(LockedRangeImpl {
2196                mem: &self.inner,
2197                gpns: store_gpns.then(|| paged_range.gpns().to_vec().into_boxed_slice()),
2198                inner: locked_range,
2199            })
2200        })
2201    }
2202}
2203
2204#[derive(Debug, Error)]
2205#[error("invalid guest page number {0:#x}")]
2206pub struct InvalidGpn(u64);
2207
2208fn gpn_to_gpa(gpn: u64) -> Result<u64, InvalidGpn> {
2209    gpn.checked_mul(PAGE_SIZE64).ok_or(InvalidGpn(gpn))
2210}
2211
2212#[derive(Debug, Copy, Clone, Default)]
2213struct RegionDefinition {
2214    invalid_mask: u64,
2215    region_mask: u64,
2216    region_bits: u32,
2217}
2218
2219impl RegionDefinition {
2220    fn region(&self, gpa: u64, len: u64) -> Result<(usize, u64), GuestMemoryBackingError> {
2221        if (gpa | len) & self.invalid_mask != 0 {
2222            return Err(GuestMemoryBackingError::new(
2223                GuestMemoryErrorKind::OutOfRange,
2224                gpa,
2225                OutOfRange,
2226            ));
2227        }
2228        let offset = gpa & self.region_mask;
2229        if offset.wrapping_add(len) & !self.region_mask != 0 {
2230            return Err(GuestMemoryBackingError::new(
2231                GuestMemoryErrorKind::OutOfRange,
2232                gpa,
2233                OutOfRange,
2234            ));
2235        }
2236        let index = (gpa >> self.region_bits) as usize;
2237        Ok((index, offset))
2238    }
2239}
2240
2241impl GuestMemoryInner {
2242    fn region(
2243        &self,
2244        gpa: u64,
2245        len: u64,
2246    ) -> Result<(&MemoryRegion, u64, usize), GuestMemoryBackingError> {
2247        let (index, offset) = self.region_def.region(gpa, len)?;
2248        let region = &self.regions[index];
2249        if offset + len > region.len {
2250            return Err(GuestMemoryBackingError::new(
2251                GuestMemoryErrorKind::OutOfRange,
2252                gpa,
2253                OutOfRange,
2254            ));
2255        }
2256        Ok((&self.regions[index], offset, index))
2257    }
2258}
2259
2260pub struct LockedPages {
2261    pages: Box<[PagePtr]>,
2262    gpns: Option<Box<[u64]>>,
2263    // maintain a reference to the backing memory
2264    mem: Arc<GuestMemoryInner>,
2265}
2266
2267impl Drop for LockedPages {
2268    fn drop(&mut self) {
2269        if let Some(gpns) = &self.gpns {
2270            self.mem.imp.unlock_gpns(gpns);
2271        }
2272    }
2273}
2274
2275impl Debug for LockedPages {
2276    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2277        f.debug_struct("LockedPages")
2278            .field("page_count", &self.pages.len())
2279            .finish()
2280    }
2281}
2282
2283#[derive(Copy, Clone, Debug)]
2284// Field is read via slice transmute and pointer casts, not actually dead.
2285struct PagePtr(#[expect(dead_code)] *const AtomicU8);
2286
2287// SAFETY: PagePtr is just a pointer with no methods and has no inherent safety
2288// constraints.
2289unsafe impl Send for PagePtr {}
2290// SAFETY: see above comment
2291unsafe impl Sync for PagePtr {}
2292
2293pub type Page = [AtomicU8; PAGE_SIZE];
2294
2295impl LockedPages {
2296    #[inline]
2297    pub fn pages(&self) -> &[&Page] {
2298        // SAFETY: PagePtr is just a pointer to a Page. The pages are kept alive by
2299        // the reference in _mem, and the lifetimes here ensure the LockedPages outlives
2300        // the slice.
2301        unsafe { std::slice::from_raw_parts(self.pages.as_ptr().cast::<&Page>(), self.pages.len()) }
2302    }
2303}
2304
2305impl<'a> AsRef<[&'a Page]> for &'a LockedPages {
2306    fn as_ref(&self) -> &[&'a Page] {
2307        self.pages()
2308    }
2309}
2310
2311/// Represents a range of locked guest pages as an ordered list of the VA sub-ranges
2312/// to which the guest pages are mapped.
2313/// The range may only partially span the first and last page and must fully span all
2314/// intermediate pages.
2315pub trait LockedRange<'a> {
2316    /// Adds a sub-range to this range.
2317    fn push_sub_range(&mut self, sub_range: &'a [AtomicU8]);
2318}
2319
2320pub struct LockedRangeImpl<'a, T: LockedRange<'a>> {
2321    mem: &'a GuestMemoryInner,
2322    gpns: Option<Box<[u64]>>,
2323    inner: T,
2324}
2325
2326impl<'a, T: LockedRange<'a>> LockedRangeImpl<'a, T> {
2327    pub fn get(&self) -> &T {
2328        &self.inner
2329    }
2330
2331    pub fn get_mut(&mut self) -> &mut T {
2332        &mut self.inner
2333    }
2334}
2335
2336impl<'a, T: LockedRange<'a>> Drop for LockedRangeImpl<'a, T> {
2337    fn drop(&mut self) {
2338        if let Some(gpns) = &self.gpns {
2339            self.mem.imp.unlock_gpns(gpns);
2340        }
2341    }
2342}
2343
2344#[derive(Debug, Error)]
2345pub enum AccessError {
2346    #[error("memory access error")]
2347    Memory(#[from] GuestMemoryError),
2348    #[error("out of range: {0:#x} < {1:#x}")]
2349    OutOfRange(usize, usize),
2350    #[error("write attempted to read-only memory")]
2351    ReadOnly,
2352}
2353
2354pub trait MemoryRead {
2355    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError>;
2356    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError>;
2357    fn len(&self) -> usize;
2358
2359    fn read_plain<T: IntoBytes + FromBytes + Immutable + KnownLayout>(
2360        &mut self,
2361    ) -> Result<T, AccessError> {
2362        let mut value: T = FromZeros::new_zeroed();
2363        self.read(value.as_mut_bytes())?;
2364        Ok(value)
2365    }
2366
2367    fn read_n<T: IntoBytes + FromBytes + Immutable + KnownLayout + Copy>(
2368        &mut self,
2369        len: usize,
2370    ) -> Result<Vec<T>, AccessError> {
2371        let mut value = vec![FromZeros::new_zeroed(); len];
2372        self.read(value.as_mut_bytes())?;
2373        Ok(value)
2374    }
2375
2376    fn read_all(&mut self) -> Result<Vec<u8>, AccessError> {
2377        let mut value = vec![0; self.len()];
2378        self.read(&mut value)?;
2379        Ok(value)
2380    }
2381
2382    fn limit(self, len: usize) -> Limit<Self>
2383    where
2384        Self: Sized,
2385    {
2386        let len = len.min(self.len());
2387        Limit { inner: self, len }
2388    }
2389}
2390
2391/// A trait for sequentially updating a region of memory.
2392pub trait MemoryWrite {
2393    fn write(&mut self, data: &[u8]) -> Result<(), AccessError>;
2394    fn zero(&mut self, len: usize) -> Result<(), AccessError> {
2395        self.fill(0, len)
2396    }
2397    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError>;
2398
2399    /// The space remaining in the memory region.
2400    fn len(&self) -> usize;
2401
2402    fn limit(self, len: usize) -> Limit<Self>
2403    where
2404        Self: Sized,
2405    {
2406        let len = len.min(self.len());
2407        Limit { inner: self, len }
2408    }
2409}
2410
2411impl MemoryRead for &'_ [u8] {
2412    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError> {
2413        if self.len() < data.len() {
2414            return Err(AccessError::OutOfRange(self.len(), data.len()));
2415        }
2416        let (source, rest) = self.split_at(data.len());
2417        data.copy_from_slice(source);
2418        *self = rest;
2419        Ok(self)
2420    }
2421
2422    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError> {
2423        if self.len() < len {
2424            return Err(AccessError::OutOfRange(self.len(), len));
2425        }
2426        *self = &self[len..];
2427        Ok(self)
2428    }
2429
2430    fn len(&self) -> usize {
2431        <[u8]>::len(self)
2432    }
2433}
2434
2435impl MemoryWrite for &mut [u8] {
2436    fn write(&mut self, data: &[u8]) -> Result<(), AccessError> {
2437        if self.len() < data.len() {
2438            return Err(AccessError::OutOfRange(self.len(), data.len()));
2439        }
2440        let (dest, rest) = std::mem::take(self).split_at_mut(data.len());
2441        dest.copy_from_slice(data);
2442        *self = rest;
2443        Ok(())
2444    }
2445
2446    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError> {
2447        if self.len() < len {
2448            return Err(AccessError::OutOfRange(self.len(), len));
2449        }
2450        let (dest, rest) = std::mem::take(self).split_at_mut(len);
2451        dest.fill(val);
2452        *self = rest;
2453        Ok(())
2454    }
2455
2456    fn len(&self) -> usize {
2457        <[u8]>::len(self)
2458    }
2459}
2460
2461#[derive(Debug, Clone)]
2462pub struct Limit<T> {
2463    inner: T,
2464    len: usize,
2465}
2466
2467impl<T: MemoryRead> MemoryRead for Limit<T> {
2468    fn read(&mut self, data: &mut [u8]) -> Result<&mut Self, AccessError> {
2469        let len = data.len();
2470        if len > self.len {
2471            return Err(AccessError::OutOfRange(self.len, len));
2472        }
2473        self.inner.read(data)?;
2474        self.len -= len;
2475        Ok(self)
2476    }
2477
2478    fn skip(&mut self, len: usize) -> Result<&mut Self, AccessError> {
2479        if len > self.len {
2480            return Err(AccessError::OutOfRange(self.len, len));
2481        }
2482        self.inner.skip(len)?;
2483        self.len -= len;
2484        Ok(self)
2485    }
2486
2487    fn len(&self) -> usize {
2488        self.len
2489    }
2490}
2491
2492impl<T: MemoryWrite> MemoryWrite for Limit<T> {
2493    fn write(&mut self, data: &[u8]) -> Result<(), AccessError> {
2494        let len = data.len();
2495        if len > self.len {
2496            return Err(AccessError::OutOfRange(self.len, len));
2497        }
2498        self.inner.write(data)?;
2499        self.len -= len;
2500        Ok(())
2501    }
2502
2503    fn fill(&mut self, val: u8, len: usize) -> Result<(), AccessError> {
2504        if len > self.len {
2505            return Err(AccessError::OutOfRange(self.len, len));
2506        }
2507        self.inner.fill(val, len)?;
2508        self.len -= len;
2509        Ok(())
2510    }
2511
2512    fn len(&self) -> usize {
2513        self.len
2514    }
2515}
2516
2517/// Trait implemented to allow mapping and unmapping a region of memory at
2518/// a particular guest address.
2519pub trait MappableGuestMemory: Send + Sync {
2520    /// Maps the memory into the guest.
2521    ///
2522    /// `writable` specifies whether the guest can write to the memory region.
2523    /// If a guest tries to write to a non-writable region, the virtual
2524    /// processor will exit for MMIO handling.
2525    fn map_to_guest(&mut self, gpa: u64, writable: bool) -> io::Result<()>;
2526
2527    fn unmap_from_guest(&mut self);
2528}
2529
2530/// Trait implemented for a region of memory that can have memory mapped into
2531/// it.
2532pub trait MappedMemoryRegion: Send + Sync {
2533    /// Maps an object at `offset` in the region.
2534    ///
2535    /// Behaves like mmap--overwrites and splits existing mappings.
2536    fn map(
2537        &self,
2538        offset: usize,
2539        section: &dyn AsMappableRef,
2540        file_offset: u64,
2541        len: usize,
2542        writable: bool,
2543    ) -> io::Result<()>;
2544
2545    /// Unmaps any mappings in the specified range within the region.
2546    fn unmap(&self, offset: usize, len: usize) -> io::Result<()>;
2547}
2548
2549/// Trait implemented to allow the creation of memory regions.
2550pub trait MemoryMapper: Send + Sync {
2551    /// Creates a new memory region that can later be mapped into the guest.
2552    ///
2553    /// Returns both an interface for mapping/unmapping the region and for
2554    /// adding internal mappings.
2555    fn new_region(
2556        &self,
2557        len: usize,
2558        debug_name: String,
2559    ) -> io::Result<(Box<dyn MappableGuestMemory>, Arc<dyn MappedMemoryRegion>)>;
2560}
2561
2562/// Doorbell provides a mechanism to register for notifications on writes to specific addresses in guest memory.
2563pub trait DoorbellRegistration: Send + Sync {
2564    /// Register a doorbell event.
2565    fn register_doorbell(
2566        &self,
2567        guest_address: u64,
2568        value: Option<u64>,
2569        length: Option<u32>,
2570        event: &Event,
2571    ) -> io::Result<Box<dyn Send + Sync>>;
2572}
2573
2574/// Trait to map a ROM at one or more locations in guest memory.
2575pub trait MapRom: Send + Sync {
2576    /// Maps the specified portion of the ROM into guest memory at `gpa`.
2577    ///
2578    /// The returned object will implicitly unmap the ROM when dropped.
2579    fn map_rom(&self, gpa: u64, offset: u64, len: u64) -> io::Result<Box<dyn UnmapRom>>;
2580
2581    /// Returns the length of the ROM in bytes.
2582    fn len(&self) -> u64;
2583}
2584
2585/// Trait to unmap a ROM from guest memory.
2586pub trait UnmapRom: Send + Sync {
2587    /// Unmaps the ROM from guest memory.
2588    fn unmap_rom(self);
2589}
2590
2591#[cfg(test)]
2592#[expect(clippy::undocumented_unsafe_blocks)]
2593mod tests {
2594    use crate::GuestMemory;
2595    use crate::PAGE_SIZE64;
2596    use crate::PageFaultAction;
2597    use crate::PageFaultError;
2598
2599    use sparse_mmap::SparseMapping;
2600    use std::ptr::NonNull;
2601    use std::sync::Arc;
2602    use thiserror::Error;
2603
2604    /// An implementation of a GuestMemoryAccess trait that expects all of
2605    /// guest memory to be mapped at a given base, with mmap or the Windows
2606    /// equivalent. Pages that are not backed by RAM will return failure
2607    /// when attempting to access them.
2608    pub struct GuestMemoryMapping {
2609        mapping: SparseMapping,
2610        #[cfg(feature = "bitmap")]
2611        bitmap: Option<Vec<u8>>,
2612    }
2613
2614    unsafe impl crate::GuestMemoryAccess for GuestMemoryMapping {
2615        fn mapping(&self) -> Option<NonNull<u8>> {
2616            NonNull::new(self.mapping.as_ptr().cast())
2617        }
2618
2619        fn max_address(&self) -> u64 {
2620            self.mapping.len() as u64
2621        }
2622
2623        #[cfg(feature = "bitmap")]
2624        fn access_bitmap(&self) -> Option<crate::BitmapInfo> {
2625            self.bitmap.as_ref().map(|bm| crate::BitmapInfo {
2626                read_bitmap: NonNull::new(bm.as_ptr().cast_mut()).unwrap(),
2627                write_bitmap: NonNull::new(bm.as_ptr().cast_mut()).unwrap(),
2628                bit_offset: 0,
2629            })
2630        }
2631    }
2632
2633    const PAGE_SIZE: usize = 4096;
2634    const SIZE_1MB: usize = 1048576;
2635
2636    /// Create a test guest layout:
2637    /// 0           -> 1MB          RAM
2638    /// 1MB         -> 2MB          empty
2639    /// 2MB         -> 3MB          RAM
2640    /// 3MB         -> 3MB + 4K     empty
2641    /// 3MB + 4K    -> 4MB          RAM
2642    fn create_test_mapping() -> GuestMemoryMapping {
2643        let mapping = SparseMapping::new(SIZE_1MB * 4).unwrap();
2644        mapping.alloc(0, SIZE_1MB).unwrap();
2645        mapping.alloc(2 * SIZE_1MB, SIZE_1MB).unwrap();
2646        mapping
2647            .alloc(3 * SIZE_1MB + PAGE_SIZE, SIZE_1MB - PAGE_SIZE)
2648            .unwrap();
2649
2650        GuestMemoryMapping {
2651            mapping,
2652            #[cfg(feature = "bitmap")]
2653            bitmap: None,
2654        }
2655    }
2656
2657    #[test]
2658    fn test_basic_read_write() {
2659        let mapping = create_test_mapping();
2660        let gm = GuestMemory::new("test", mapping);
2661
2662        // Test reading at 0.
2663        let addr = 0;
2664        let result = gm.read_plain::<u8>(addr);
2665        assert_eq!(result.unwrap(), 0);
2666
2667        // Test read/write to first page
2668        let write_buffer = [1, 2, 3, 4, 5];
2669        let mut read_buffer = [0; 5];
2670        gm.write_at(0, &write_buffer).unwrap();
2671        gm.read_at(0, &mut read_buffer).unwrap();
2672        assert_eq!(write_buffer, read_buffer);
2673        assert_eq!(gm.read_plain::<u8>(0).unwrap(), 1);
2674        assert_eq!(gm.read_plain::<u8>(1).unwrap(), 2);
2675        assert_eq!(gm.read_plain::<u8>(2).unwrap(), 3);
2676        assert_eq!(gm.read_plain::<u8>(3).unwrap(), 4);
2677        assert_eq!(gm.read_plain::<u8>(4).unwrap(), 5);
2678
2679        // Test read/write to page at 2MB
2680        let addr = 2 * SIZE_1MB as u64;
2681        let write_buffer: Vec<u8> = (0..PAGE_SIZE).map(|x| x as u8).collect();
2682        let mut read_buffer: Vec<u8> = (0..PAGE_SIZE).map(|_| 0).collect();
2683        gm.write_at(addr, write_buffer.as_slice()).unwrap();
2684        gm.read_at(addr, read_buffer.as_mut_slice()).unwrap();
2685        assert_eq!(write_buffer, read_buffer);
2686
2687        // Test read/write to first 1MB
2688        let write_buffer: Vec<u8> = (0..SIZE_1MB).map(|x| x as u8).collect();
2689        let mut read_buffer: Vec<u8> = (0..SIZE_1MB).map(|_| 0).collect();
2690        gm.write_at(addr, write_buffer.as_slice()).unwrap();
2691        gm.read_at(addr, read_buffer.as_mut_slice()).unwrap();
2692        assert_eq!(write_buffer, read_buffer);
2693
2694        // Test bad read at 1MB
2695        let addr = SIZE_1MB as u64;
2696        let result = gm.read_plain::<u8>(addr);
2697        assert!(result.is_err());
2698    }
2699
2700    #[test]
2701    fn test_multi() {
2702        let len = SIZE_1MB * 4;
2703        let mapping = SparseMapping::new(len).unwrap();
2704        mapping.alloc(0, len).unwrap();
2705        let mapping = Arc::new(GuestMemoryMapping {
2706            mapping,
2707            #[cfg(feature = "bitmap")]
2708            bitmap: None,
2709        });
2710        let region_len = 1 << 30;
2711        let gm = GuestMemory::new_multi_region(
2712            "test",
2713            region_len,
2714            vec![Some(mapping.clone()), None, Some(mapping.clone())],
2715        )
2716        .unwrap();
2717
2718        let mut b = [0];
2719        let len = len as u64;
2720        gm.read_at(0, &mut b).unwrap();
2721        gm.read_at(len, &mut b).unwrap_err();
2722        gm.read_at(region_len, &mut b).unwrap_err();
2723        gm.read_at(2 * region_len, &mut b).unwrap();
2724        gm.read_at(2 * region_len + len, &mut b).unwrap_err();
2725        gm.read_at(3 * region_len, &mut b).unwrap_err();
2726    }
2727
2728    #[cfg(feature = "bitmap")]
2729    #[test]
2730    fn test_bitmap() {
2731        let len = PAGE_SIZE * 4;
2732        let mapping = SparseMapping::new(len).unwrap();
2733        mapping.alloc(0, len).unwrap();
2734        let bitmap = vec![0b0101];
2735        let mapping = Arc::new(GuestMemoryMapping {
2736            mapping,
2737            bitmap: Some(bitmap),
2738        });
2739        let gm = GuestMemory::new("test", mapping);
2740
2741        gm.read_plain::<[u8; 1]>(0).unwrap();
2742        gm.read_plain::<[u8; 1]>(PAGE_SIZE64 - 1).unwrap();
2743        gm.read_plain::<[u8; 2]>(PAGE_SIZE64 - 1).unwrap_err();
2744        gm.read_plain::<[u8; 1]>(PAGE_SIZE64).unwrap_err();
2745        gm.read_plain::<[u8; 1]>(PAGE_SIZE64 * 2).unwrap();
2746        gm.read_plain::<[u8; PAGE_SIZE * 2]>(0).unwrap_err();
2747    }
2748
2749    struct FaultingMapping {
2750        mapping: SparseMapping,
2751    }
2752
2753    #[derive(Debug, Error)]
2754    #[error("fault")]
2755    struct Fault;
2756
2757    unsafe impl crate::GuestMemoryAccess for FaultingMapping {
2758        fn mapping(&self) -> Option<NonNull<u8>> {
2759            NonNull::new(self.mapping.as_ptr().cast())
2760        }
2761
2762        fn max_address(&self) -> u64 {
2763            self.mapping.len() as u64
2764        }
2765
2766        fn page_fault(
2767            &self,
2768            address: u64,
2769            _len: usize,
2770            write: bool,
2771            bitmap_failure: bool,
2772        ) -> PageFaultAction {
2773            assert!(!bitmap_failure);
2774            let qlen = self.mapping.len() as u64 / 4;
2775            if address < qlen || address >= 3 * qlen {
2776                return PageFaultAction::Fail(PageFaultError::other(Fault));
2777            }
2778            let page_address = (address as usize) & !(PAGE_SIZE - 1);
2779            if address >= 2 * qlen {
2780                if write {
2781                    return PageFaultAction::Fail(PageFaultError::other(Fault));
2782                }
2783                self.mapping.map_zero(page_address, PAGE_SIZE).unwrap();
2784            } else {
2785                self.mapping.alloc(page_address, PAGE_SIZE).unwrap();
2786            }
2787            PageFaultAction::Retry
2788        }
2789    }
2790
2791    impl FaultingMapping {
2792        fn new(len: usize) -> Self {
2793            let mapping = SparseMapping::new(len).unwrap();
2794            FaultingMapping { mapping }
2795        }
2796    }
2797
2798    #[test]
2799    fn test_fault() {
2800        let len = PAGE_SIZE * 4;
2801        let mapping = FaultingMapping::new(len);
2802        let gm = GuestMemory::new("test", mapping);
2803
2804        gm.write_plain::<u8>(0, &0).unwrap_err();
2805        gm.read_plain::<u8>(PAGE_SIZE64 - 1).unwrap_err();
2806        gm.read_plain::<u8>(PAGE_SIZE64).unwrap();
2807        gm.write_plain::<u8>(PAGE_SIZE64, &0).unwrap();
2808        gm.write_plain::<u16>(PAGE_SIZE64 * 3 - 1, &0).unwrap_err();
2809        gm.read_plain::<u16>(PAGE_SIZE64 * 3 - 1).unwrap_err();
2810        gm.read_plain::<u8>(PAGE_SIZE64 * 3 - 1).unwrap();
2811        gm.write_plain::<u8>(PAGE_SIZE64 * 3 - 1, &0).unwrap_err();
2812    }
2813
2814    #[cfg(feature = "bitmap")]
2815    #[test]
2816    fn test_zero_length_access_at_offset_zero() {
2817        // Regression test for a fuzzing-reported subtract-with-overflow panic
2818        // in `check_access`: a zero-length access at offset 0 underflowed while
2819        // computing the index of the last accessed page (`offset + len - 1`).
2820        // A zero-length access touches no pages and so must succeed without
2821        // consulting the bitmap, even when the page is marked inaccessible.
2822        let len = PAGE_SIZE * 4;
2823        let mapping = SparseMapping::new(len).unwrap();
2824        mapping.alloc(0, len).unwrap();
2825        let bitmap = vec![0b0000]; // every page marked inaccessible
2826        let mapping = Arc::new(GuestMemoryMapping {
2827            mapping,
2828            bitmap: Some(bitmap),
2829        });
2830        let gm = GuestMemory::new("test", mapping);
2831
2832        // Zero-sized plain accesses reach `check_access` with offset 0 and
2833        // len 0; these previously panicked with "attempt to subtract with
2834        // overflow".
2835        gm.read_plain::<()>(0).unwrap();
2836        gm.write_plain::<()>(0, &()).unwrap();
2837    }
2838
2839    #[test]
2840    fn test_allocated() {
2841        let mut gm = GuestMemory::allocate(0x10000);
2842        let pattern = [0x42; 0x10000];
2843        gm.write_at(0, &pattern).unwrap();
2844        assert_eq!(gm.inner_buf_mut().unwrap(), &pattern);
2845        gm.inner_buf().unwrap();
2846        let gm2 = gm.clone();
2847        assert!(gm.inner_buf_mut().is_none());
2848        gm.inner_buf().unwrap();
2849        let mut gm = gm.into_inner_buf().unwrap_err();
2850        drop(gm2);
2851        assert_eq!(gm.inner_buf_mut().unwrap(), &pattern);
2852        gm.into_inner_buf().unwrap();
2853    }
2854}