page_pool_alloc/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! This module implements a page memory allocator for allocating pages from a
5//! given portion of the guest address space.
6
7mod device_dma;
8
9pub use device_dma::PagePoolDmaBuffer;
10
11use anyhow::Context;
12use inspect::Inspect;
13use inspect::Response;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use safeatomic::AtomicSliceOps;
17use sparse_mmap::Mappable;
18use sparse_mmap::MappableRef;
19use sparse_mmap::SparseMapping;
20use sparse_mmap::alloc_shared_memory;
21use std::fmt::Debug;
22use std::num::NonZeroU64;
23use std::sync::Arc;
24use std::sync::atomic::AtomicU8;
25use thiserror::Error;
26
27const PAGE_SIZE: u64 = 4096;
28
29/// Save restore suport for [`PagePool`].
30pub mod save_restore {
31    use super::PAGE_SIZE;
32    use super::PagePool;
33    use super::Slot;
34    use super::SlotState;
35    use crate::ResolvedSlotState;
36    use memory_range::MemoryRange;
37    use mesh::payload::Protobuf;
38    use vmcore::save_restore::SaveRestore;
39    use vmcore::save_restore::SavedStateRoot;
40
41    #[derive(Protobuf)]
42    #[mesh(package = "openvmm.pagepool")]
43    enum InnerSlotState {
44        #[mesh(1)]
45        Free,
46        #[mesh(2)]
47        Allocated {
48            #[mesh(1)]
49            device_id: String,
50            #[mesh(2)]
51            tag: String,
52        },
53        #[mesh(3)]
54        Leaked {
55            #[mesh(1)]
56            device_id: String,
57            #[mesh(2)]
58            tag: String,
59        },
60    }
61
62    #[derive(Protobuf)]
63    #[mesh(package = "openvmm.pagepool")]
64    struct SlotSavedState {
65        #[mesh(1)]
66        base_pfn: u64,
67        #[mesh(2)]
68        size_pages: u64,
69        #[mesh(3)]
70        state: InnerSlotState,
71    }
72
73    /// The saved state for [`PagePool`].
74    #[derive(Protobuf, SavedStateRoot)]
75    #[mesh(package = "openvmm.pagepool")]
76    pub struct PagePoolState {
77        #[mesh(1)]
78        state: Vec<SlotSavedState>,
79        #[mesh(2)]
80        ranges: Vec<MemoryRange>,
81    }
82
83    impl SaveRestore for PagePool {
84        type SavedState = PagePoolState;
85
86        fn save(&mut self) -> Result<Self::SavedState, vmcore::save_restore::SaveError> {
87            let state = self.inner.state.lock();
88            Ok(PagePoolState {
89                state: state
90                    .slots
91                    .iter()
92                    .map(|slot| {
93                        let slot = slot.resolve(&state.device_ids);
94                        let inner_state = match slot.state {
95                            ResolvedSlotState::Free => InnerSlotState::Free,
96                            ResolvedSlotState::Allocated { device_id, tag } => {
97                                InnerSlotState::Allocated {
98                                    device_id: device_id.to_string(),
99                                    tag: tag.to_string(),
100                                }
101                            }
102                            ResolvedSlotState::Leaked { device_id, tag } => {
103                                InnerSlotState::Leaked {
104                                    device_id: device_id.to_string(),
105                                    tag: tag.to_string(),
106                                }
107                            }
108                            ResolvedSlotState::AllocatedPendingRestore { .. } => {
109                                panic!("should not save allocated pending restore")
110                            }
111                        };
112
113                        SlotSavedState {
114                            base_pfn: slot.base_pfn,
115                            size_pages: slot.size_pages,
116                            state: inner_state,
117                        }
118                    })
119                    .collect(),
120                ranges: self.ranges.clone(),
121            })
122        }
123
124        fn restore(
125            &mut self,
126            mut state: Self::SavedState,
127        ) -> Result<(), vmcore::save_restore::RestoreError> {
128            // Verify that the pool describes the same regions of memory as the
129            // saved state.
130            for (current, saved) in self.ranges.iter().zip(state.ranges.iter()) {
131                if current != saved {
132                    // TODO: return unmatched range or vecs?
133                    return Err(vmcore::save_restore::RestoreError::InvalidSavedState(
134                        anyhow::anyhow!("pool ranges do not match"),
135                    ));
136                }
137            }
138
139            let mut inner = self.inner.state.lock();
140
141            // Verify there are no existing allocators present, as we rely on
142            // the pool being completely free since we will overwrite the state
143            // of the pool with the stored slot info.
144            //
145            // Note that this also means that the pool does not have any pending
146            // allocations, as it's impossible to allocate without creating an
147            // allocator.
148            if !inner.device_ids.is_empty() {
149                return Err(vmcore::save_restore::RestoreError::InvalidSavedState(
150                    anyhow::anyhow!("existing allocators present, pool must be empty to restore"),
151                ));
152            }
153
154            state.state.sort_by_key(|slot| slot.base_pfn);
155
156            let mut mapping_offset = 0;
157            inner.slots = state
158                .state
159                .into_iter()
160                .map(|slot| {
161                    let inner = match slot.state {
162                        InnerSlotState::Free => SlotState::Free,
163                        InnerSlotState::Allocated { device_id, tag } => {
164                            SlotState::AllocatedPendingRestore { device_id, tag }
165                        }
166                        InnerSlotState::Leaked { device_id, tag } => {
167                            SlotState::Leaked { device_id, tag }
168                        }
169                    };
170
171                    let slot = Slot {
172                        base_pfn: slot.base_pfn,
173                        mapping_offset: mapping_offset as usize,
174                        size_pages: slot.size_pages,
175                        state: inner,
176                    };
177                    mapping_offset += slot.size_pages * PAGE_SIZE;
178                    slot
179                })
180                .collect();
181
182            if mapping_offset != self.inner.mapping.len() as u64 {
183                return Err(vmcore::save_restore::RestoreError::InvalidSavedState(
184                    anyhow::anyhow!("missing slots in saved state"),
185                ));
186            }
187
188            Ok(())
189        }
190    }
191}
192
193/// Errors returned on allocation methods.
194#[derive(Debug, Error)]
195pub enum Error {
196    /// Unable to allocate memory due to not enough free pages.
197    #[error("unable to allocate page pool size {size} with tag {tag}")]
198    PagePoolOutOfMemory {
199        /// The size in pages of the allocation.
200        size: u64,
201        /// The tag of the allocation.
202        tag: String,
203    },
204    /// Unable to create mapping requested for the allocation.
205    #[error("failed to create mapping for allocation")]
206    Mapping(#[source] anyhow::Error),
207    /// No matching allocation found for restore.
208    #[error("no matching allocation found for restore")]
209    NoMatchingAllocation,
210}
211
212/// Error returned when unrestored allocations are found.
213#[derive(Debug, Error)]
214#[error("unrestored allocations found")]
215pub struct UnrestoredAllocations;
216
217#[derive(Debug, PartialEq, Eq)]
218struct Slot {
219    base_pfn: u64,
220    mapping_offset: usize,
221    size_pages: u64,
222    state: SlotState,
223}
224
225#[derive(Clone, Debug, PartialEq, Eq)]
226enum SlotState {
227    Free,
228    Allocated {
229        /// This is an index into the outer [`PagePoolInner`]'s device_ids
230        /// vector.
231        device_id: usize,
232        tag: String,
233    },
234    /// This allocation was restored, and is waiting for a
235    /// [`PagePoolAllocator::restore_alloc`] to restore it.
236    AllocatedPendingRestore {
237        device_id: String,
238        tag: String,
239    },
240    /// This allocation was leaked, and is no longer able to be allocated from.
241    Leaked {
242        device_id: String,
243        tag: String,
244    },
245}
246
247impl Slot {
248    fn resolve<'a>(&'a self, device_ids: &'a [DeviceId]) -> ResolvedSlot<'a> {
249        ResolvedSlot {
250            base_pfn: self.base_pfn,
251            mapping_offset: self.mapping_offset,
252            size_pages: self.size_pages,
253            state: match self.state {
254                SlotState::Free => ResolvedSlotState::Free,
255                SlotState::Allocated { device_id, ref tag } => ResolvedSlotState::Allocated {
256                    device_id: device_ids[device_id].name(),
257                    tag,
258                },
259                SlotState::AllocatedPendingRestore {
260                    ref device_id,
261                    ref tag,
262                } => ResolvedSlotState::AllocatedPendingRestore { device_id, tag },
263                SlotState::Leaked {
264                    ref device_id,
265                    ref tag,
266                } => ResolvedSlotState::Leaked { device_id, tag },
267            },
268        }
269    }
270}
271
272impl SlotState {
273    fn restore_allocated(&mut self, device_id: usize) {
274        if !matches!(self, SlotState::AllocatedPendingRestore { .. }) {
275            panic!("invalid state");
276        }
277
278        // Temporarily swap with free so we can move the string tag to the
279        // restored state without allocating.
280        let prev = std::mem::replace(self, SlotState::Free);
281        *self = match prev {
282            SlotState::AllocatedPendingRestore { device_id: _, tag } => {
283                SlotState::Allocated { device_id, tag }
284            }
285            _ => unreachable!(),
286        };
287    }
288}
289
290#[derive(Inspect)]
291struct ResolvedSlot<'a> {
292    base_pfn: u64,
293    mapping_offset: usize,
294    size_pages: u64,
295    state: ResolvedSlotState<'a>,
296}
297
298#[derive(Inspect)]
299#[inspect(external_tag)]
300enum ResolvedSlotState<'a> {
301    Free,
302    Allocated { device_id: &'a str, tag: &'a str },
303    AllocatedPendingRestore { device_id: &'a str, tag: &'a str },
304    Leaked { device_id: &'a str, tag: &'a str },
305}
306
307#[derive(Inspect, Debug, Clone, PartialEq, Eq)]
308#[inspect(tag = "state")]
309enum DeviceId {
310    /// A device id that is in use by an allocator.
311    Used(#[inspect(rename = "name")] String),
312    /// A device id that was dropped and can be reused if an allocator with the
313    /// same name is created.
314    Unassigned(#[inspect(rename = "name")] String),
315}
316
317impl DeviceId {
318    fn name(&self) -> &str {
319        match self {
320            DeviceId::Used(name) => name,
321            DeviceId::Unassigned(name) => name,
322        }
323    }
324}
325
326#[derive(Inspect)]
327struct PagePoolInner {
328    #[inspect(flatten)]
329    state: Mutex<PagePoolState>,
330    /// The pfn_bias for the pool.
331    pfn_bias: u64,
332    /// The mapper used to create mappings for allocations.
333    source: Box<dyn PoolSource>,
334    #[inspect(skip)]
335    mapping: SparseMapping,
336}
337
338impl Debug for PagePoolInner {
339    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
340        f.debug_struct("PagePoolInner")
341            .field("state", &self.state)
342            .field("pfn_bias", &self.pfn_bias)
343            .field("mapping", &self.mapping)
344            .finish()
345    }
346}
347
348#[derive(Debug)]
349struct PagePoolState {
350    /// The internal slots for the pool, representing page state.
351    slots: Vec<Slot>,
352    /// The list of device ids for outstanding allocators. Each name must be
353    /// unique.
354    device_ids: Vec<DeviceId>,
355}
356
357impl Inspect for PagePoolState {
358    fn inspect(&self, req: inspect::Request<'_>) {
359        let Self { slots, device_ids } = self;
360        req.respond().field(
361            "slots",
362            inspect::iter_by_index(slots).map_value(|s| s.resolve(device_ids)),
363        );
364    }
365}
366
367/// A handle for a page pool allocation. When dropped, the allocation is
368/// freed.
369#[derive(Debug)]
370pub struct PagePoolHandle {
371    inner: Arc<PagePoolInner>,
372    base_pfn: u64,
373    size_pages: u64,
374    mapping_offset: usize,
375}
376
377impl PagePoolHandle {
378    /// The base pfn (with bias) for this allocation.
379    pub fn base_pfn(&self) -> u64 {
380        self.base_pfn + self.inner.pfn_bias
381    }
382
383    /// The base pfn without bias for this allocation.
384    pub fn base_pfn_without_bias(&self) -> u64 {
385        self.base_pfn
386    }
387
388    /// The number of 4K pages for this allocation.
389    pub fn size_pages(&self) -> u64 {
390        self.size_pages
391    }
392
393    /// The associated mapping with this allocation.
394    pub fn mapping(&self) -> &[AtomicU8] {
395        self.inner
396            .mapping
397            .atomic_slice(self.mapping_offset, (self.size_pages * PAGE_SIZE) as usize)
398    }
399
400    /// Create a memory block from this allocation.
401    fn into_memory_block(self) -> anyhow::Result<user_driver::memory::MemoryBlock> {
402        let pfns: Vec<_> = (self.base_pfn()..self.base_pfn() + self.size_pages).collect();
403        Ok(user_driver::memory::MemoryBlock::new(PagePoolDmaBuffer {
404            alloc: self,
405            pfns,
406        }))
407    }
408}
409
410impl Drop for PagePoolHandle {
411    fn drop(&mut self) {
412        let mut inner = self.inner.state.lock();
413
414        let slot = inner
415            .slots
416            .iter_mut()
417            .find(|slot| {
418                if matches!(slot.state, SlotState::Allocated { .. }) {
419                    slot.base_pfn == self.base_pfn && slot.size_pages == self.size_pages
420                } else {
421                    false
422                }
423            })
424            .expect("must find allocation");
425
426        assert_eq!(slot.mapping_offset, self.mapping_offset);
427        slot.state = SlotState::Free;
428    }
429}
430
431/// A source for pool allocations.
432pub trait PoolSource: Inspect + Send + Sync {
433    /// The bias to apply to the physical address of each allocation.
434    fn address_bias(&self) -> u64;
435    /// Translates a physical address into the file offset to use when mapping
436    /// the page.
437    fn file_offset(&self, address: u64) -> u64;
438    /// Returns the OS object to map pages from.
439    fn mappable(&self) -> MappableRef<'_>;
440}
441
442/// A mapper that uses an internal buffer to map pages. This is meant to be used
443/// for tests that use [`PagePool`].
444#[derive(Inspect)]
445#[inspect(extra = "TestMapper::inspect_extra")]
446pub struct TestMapper {
447    #[inspect(skip)]
448    mem: Mappable,
449    len: usize,
450}
451
452impl TestMapper {
453    /// Create a new test mapper that holds an internal buffer of `size_pages`.
454    pub fn new(size_pages: u64) -> anyhow::Result<Self> {
455        let len = (size_pages * PAGE_SIZE) as usize;
456        let fd = alloc_shared_memory(len).context("creating shared mem")?;
457
458        Ok(Self { mem: fd, len })
459    }
460
461    /// Returns [`SparseMapping`] that maps starting at page 0.
462    pub fn sparse_mapping(&self) -> SparseMapping {
463        let mappable = self.mappable();
464        let mapping = SparseMapping::new(self.len).unwrap();
465        mapping.map_file(0, self.len, mappable, 0, true).unwrap();
466        mapping
467    }
468
469    fn inspect_extra(&self, resp: &mut Response<'_>) {
470        resp.field("type", "test");
471    }
472}
473
474impl PoolSource for TestMapper {
475    fn address_bias(&self) -> u64 {
476        0
477    }
478
479    fn file_offset(&self, address: u64) -> u64 {
480        address
481    }
482
483    fn mappable(&self) -> MappableRef<'_> {
484        #[cfg(windows)]
485        return std::os::windows::io::AsHandle::as_handle(&self.mem);
486        #[cfg(not(windows))]
487        std::os::unix::io::AsFd::as_fd(&self.mem)
488    }
489}
490
491/// A page allocator for memory.
492///
493/// This memory may be private memory, or shared visibility memory on isolated
494/// VMs. depending on the memory range passed into the corresponding new
495/// methods.
496///
497/// Pages are allocated via [`PagePoolAllocator`] from [`Self::allocator`] or
498/// [`PagePoolAllocatorSpawner::allocator`].
499///
500/// This struct is considered the "owner" of the pool allowing for save/restore.
501#[derive(Inspect)]
502pub struct PagePool {
503    #[inspect(flatten)]
504    inner: Arc<PagePoolInner>,
505    #[inspect(iter_by_index)]
506    ranges: Vec<MemoryRange>,
507}
508
509impl PagePool {
510    /// Returns a new page pool managing the address ranges in `ranges`,
511    /// using `source` to access the memory.
512    pub fn new<T: PoolSource + 'static>(ranges: &[MemoryRange], source: T) -> anyhow::Result<Self> {
513        Self::new_internal(ranges, Box::new(source))
514    }
515
516    fn new_internal(memory: &[MemoryRange], source: Box<dyn PoolSource>) -> anyhow::Result<Self> {
517        let mut mapping_offset = 0;
518        let pages = memory
519            .iter()
520            .map(|range| {
521                let slot = Slot {
522                    base_pfn: range.start() / PAGE_SIZE,
523                    size_pages: range.len() / PAGE_SIZE,
524                    mapping_offset,
525                    state: SlotState::Free,
526                };
527                mapping_offset += range.len() as usize;
528                slot
529            })
530            .collect();
531
532        let total_len = mapping_offset;
533
534        // Create a contiguous mapping of the memory ranges.
535        let mapping = SparseMapping::new(total_len).context("failed to reserve VA")?;
536        let mappable = source.mappable();
537        let mut mapping_offset = 0;
538        for range in memory {
539            let file_offset = source.file_offset(range.start());
540            let len = range.len() as usize;
541            mapping
542                .map_file(mapping_offset, len, mappable, file_offset, true)
543                .context("failed to map range")?;
544            mapping_offset += len;
545        }
546
547        assert_eq!(mapping_offset, total_len);
548
549        Ok(Self {
550            inner: Arc::new(PagePoolInner {
551                state: Mutex::new(PagePoolState {
552                    slots: pages,
553                    device_ids: Vec::new(),
554                }),
555                pfn_bias: source.address_bias() / PAGE_SIZE,
556                source,
557                mapping,
558            }),
559            ranges: memory.to_vec(),
560        })
561    }
562
563    /// Create an allocator instance that can be used to allocate pages. The
564    /// specified `device_name` must be unique.
565    ///
566    /// Users should create a new allocator for each device, as the device name
567    /// is used to track allocations in the pool.
568    pub fn allocator(&self, device_name: String) -> anyhow::Result<PagePoolAllocator> {
569        PagePoolAllocator::new(&self.inner, device_name)
570    }
571
572    /// Create a spawner that allows creating multiple allocators.
573    pub fn allocator_spawner(&self) -> PagePoolAllocatorSpawner {
574        PagePoolAllocatorSpawner {
575            inner: self.inner.clone(),
576        }
577    }
578
579    /// Validate that all allocations have been restored. This should be called
580    /// after all devices have been restored.
581    ///
582    /// `leak_unrestored` controls what to do if a matching allocation was not
583    /// restored. If true, the allocation is marked as leaked and the function
584    /// returns Ok. If false, the function returns an error if any are
585    /// unmatched.
586    ///
587    /// Unmatched allocations are always logged via a `tracing::warn!` log.
588    pub fn validate_restore(&self, leak_unrestored: bool) -> Result<(), UnrestoredAllocations> {
589        let mut inner = self.inner.state.lock();
590        let mut unrestored_allocation = false;
591
592        // Mark unrestored allocations as leaked.
593        for slot in inner.slots.iter_mut() {
594            match &slot.state {
595                SlotState::Free | SlotState::Allocated { .. } | SlotState::Leaked { .. } => {}
596                SlotState::AllocatedPendingRestore { device_id, tag } => {
597                    tracing::warn!(
598                        base_pfn = slot.base_pfn,
599                        pfn_bias = slot.size_pages,
600                        size_pages = slot.size_pages,
601                        device_id = device_id,
602                        tag = tag.as_str(),
603                        "unrestored allocation"
604                    );
605
606                    if leak_unrestored {
607                        slot.state = SlotState::Leaked {
608                            device_id: device_id.clone(),
609                            tag: tag.clone(),
610                        };
611                    }
612
613                    unrestored_allocation = true;
614                }
615            }
616        }
617
618        if unrestored_allocation && !leak_unrestored {
619            Err(UnrestoredAllocations)
620        } else {
621            Ok(())
622        }
623    }
624}
625
626/// A spawner for [`PagePoolAllocator`] instances.
627///
628/// Useful when you need to create multiple allocators, without having ownership
629/// of the actual [`PagePool`].
630pub struct PagePoolAllocatorSpawner {
631    inner: Arc<PagePoolInner>,
632}
633
634impl PagePoolAllocatorSpawner {
635    /// Create an allocator instance that can be used to allocate pages. The
636    /// specified `device_name` must be unique.
637    ///
638    /// Users should create a new allocator for each device, as the device name
639    /// is used to track allocations in the pool.
640    pub fn allocator(&self, device_name: String) -> anyhow::Result<PagePoolAllocator> {
641        PagePoolAllocator::new(&self.inner, device_name)
642    }
643}
644
645/// A page allocator for memory.
646///
647/// Pages are allocated via the [`Self::alloc`] method and freed by dropping the
648/// associated handle returned.
649///
650/// When an allocator is dropped, outstanding allocations for that device
651/// are left as-is in the pool. A new allocator can then be created with the
652/// same name. Exisitng allocations with that same device_name will be
653/// linked to the new allocator.
654#[derive(Inspect)]
655pub struct PagePoolAllocator {
656    #[inspect(skip)]
657    inner: Arc<PagePoolInner>,
658    #[inspect(skip)]
659    device_id: usize,
660}
661
662impl PagePoolAllocator {
663    fn new(inner: &Arc<PagePoolInner>, device_name: String) -> anyhow::Result<Self> {
664        let device_id;
665        {
666            let mut inner = inner.state.lock();
667
668            let index = inner
669                .device_ids
670                .iter()
671                .position(|id| id.name() == device_name);
672
673            // Device ID must be unique, or be unassigned or pending a restore.
674            match index {
675                Some(index) => {
676                    let entry = &mut inner.device_ids[index];
677
678                    match entry {
679                        DeviceId::Unassigned(_) => {
680                            *entry = DeviceId::Used(device_name);
681                            device_id = index;
682                        }
683                        DeviceId::Used(_) => {
684                            anyhow::bail!("device name {device_name} already in use");
685                        }
686                    }
687                }
688                None => {
689                    inner.device_ids.push(DeviceId::Used(device_name));
690                    device_id = inner.device_ids.len() - 1;
691                }
692            }
693        }
694
695        Ok(Self {
696            inner: inner.clone(),
697            device_id,
698        })
699    }
700
701    fn alloc_inner(&self, size_pages: NonZeroU64, tag: String) -> Result<PagePoolHandle, Error> {
702        let mut inner = self.inner.state.lock();
703        let size_pages = size_pages.get();
704
705        let index = inner
706            .slots
707            .iter()
708            .position(|slot| match slot.state {
709                SlotState::Free => slot.size_pages >= size_pages,
710                SlotState::Allocated { .. }
711                | SlotState::AllocatedPendingRestore { .. }
712                | SlotState::Leaked { .. } => false,
713            })
714            .ok_or(Error::PagePoolOutOfMemory {
715                size: size_pages,
716                tag: tag.clone(),
717            })?;
718
719        // Track which slots we should append if the mapping creation succeeds.
720        // If the mapping creation fails, we instead commit the original free
721        // slot back to the pool.
722        let (allocation_slot, free_slot) = {
723            let slot = inner.slots.swap_remove(index);
724            assert!(matches!(slot.state, SlotState::Free));
725
726            let allocation_slot = Slot {
727                base_pfn: slot.base_pfn,
728                mapping_offset: slot.mapping_offset,
729                size_pages,
730                state: SlotState::Allocated {
731                    device_id: self.device_id,
732                    tag: tag.clone(),
733                },
734            };
735
736            let free_slot = if slot.size_pages > size_pages {
737                Some(Slot {
738                    base_pfn: slot.base_pfn + size_pages,
739                    mapping_offset: slot.mapping_offset + (size_pages * PAGE_SIZE) as usize,
740                    size_pages: slot.size_pages - size_pages,
741                    state: SlotState::Free,
742                })
743            } else {
744                None
745            };
746
747            (allocation_slot, free_slot)
748        };
749
750        let base_pfn = allocation_slot.base_pfn;
751        let mapping_offset = allocation_slot.mapping_offset;
752        assert_eq!(mapping_offset % PAGE_SIZE as usize, 0);
753
754        // Commit state to the pool.
755        inner.slots.push(allocation_slot);
756        if let Some(free_slot) = free_slot {
757            inner.slots.push(free_slot);
758        }
759
760        Ok(PagePoolHandle {
761            inner: self.inner.clone(),
762            base_pfn,
763            size_pages,
764            mapping_offset,
765        })
766    }
767
768    /// Allocate contiguous pages from the page pool with the given tag. If a
769    /// contiguous region of free pages is not available, then an error is
770    /// returned.
771    pub fn alloc(&self, size_pages: NonZeroU64, tag: String) -> Result<PagePoolHandle, Error> {
772        self.alloc_inner(size_pages, tag)
773    }
774
775    /// Restore an allocation that was previously allocated in the pool. The
776    /// base_pfn, size_pages, and device must match.
777    ///
778    /// `with_mapping` specifies if a mapping should be created that can be used
779    /// via [`PagePoolHandle::mapping`].
780    pub fn restore_alloc(
781        &self,
782        base_pfn: u64,
783        size_pages: NonZeroU64,
784    ) -> Result<PagePoolHandle, Error> {
785        let size_pages = size_pages.get();
786        let mut inner = self.inner.state.lock();
787        let inner = &mut *inner;
788        let slot = inner
789            .slots
790            .iter_mut()
791            .find(|slot| {
792                if let SlotState::AllocatedPendingRestore { device_id, tag: _ } = &slot.state {
793                    device_id == inner.device_ids[self.device_id].name()
794                        && slot.base_pfn == base_pfn
795                        && slot.size_pages == size_pages
796                } else {
797                    false
798                }
799            })
800            .ok_or(Error::NoMatchingAllocation)?;
801
802        slot.state.restore_allocated(self.device_id);
803        assert_eq!(slot.mapping_offset % PAGE_SIZE as usize, 0);
804
805        Ok(PagePoolHandle {
806            inner: self.inner.clone(),
807            base_pfn,
808            size_pages,
809            mapping_offset: slot.mapping_offset,
810        })
811    }
812
813    /// Restore all pending allocs
814    pub fn restore_pending_allocs(&self) -> Vec<PagePoolHandle> {
815        let mut inner = self.inner.state.lock();
816        let inner = &mut *inner;
817        let mut slots: Vec<&mut Slot> = inner
818            .slots
819            .iter_mut()
820            .filter(|slot| {
821                if let SlotState::AllocatedPendingRestore {
822                    device_id: slot_device_id,
823                    tag: _,
824                } = &slot.state
825                {
826                    return inner.device_ids[self.device_id].name() == *slot_device_id;
827                }
828                false
829            })
830            .collect();
831
832        slots
833            .iter_mut()
834            .map(|slot| {
835                slot.state.restore_allocated(self.device_id);
836                PagePoolHandle {
837                    inner: self.inner.clone(),
838                    base_pfn: slot.base_pfn,
839                    size_pages: slot.size_pages,
840                    mapping_offset: slot.mapping_offset,
841                }
842            })
843            .collect()
844    }
845}
846
847impl Drop for PagePoolAllocator {
848    fn drop(&mut self) {
849        let mut inner = self.inner.state.lock();
850        let device_name = inner.device_ids[self.device_id].name().to_string();
851        let prev = std::mem::replace(
852            &mut inner.device_ids[self.device_id],
853            DeviceId::Unassigned(device_name),
854        );
855        assert!(matches!(prev, DeviceId::Used(_)));
856    }
857}
858
859impl user_driver::DmaClient for PagePoolAllocator {
860    fn allocate_dma_buffer(&self, len: usize) -> anyhow::Result<user_driver::memory::MemoryBlock> {
861        if len as u64 % PAGE_SIZE != 0 {
862            anyhow::bail!("not a page-size multiple");
863        }
864
865        let size_pages = NonZeroU64::new(len as u64 / PAGE_SIZE)
866            .context("allocation of size 0 not supported")?;
867
868        let alloc = self
869            .alloc(size_pages, "vfio dma".into())
870            .context("failed to allocate shared mem")?;
871
872        // The VfioDmaBuffer trait requires that newly allocated buffers are
873        // zeroed.
874        alloc.mapping().atomic_fill(0);
875        alloc.into_memory_block()
876    }
877
878    fn attach_pending_buffers(&self) -> anyhow::Result<Vec<user_driver::memory::MemoryBlock>> {
879        let allocs = self.restore_pending_allocs();
880
881        allocs
882            .into_iter()
883            .map(|alloc| alloc.into_memory_block())
884            .collect()
885    }
886}
887
888#[cfg(test)]
889mod test {
890    use crate::PAGE_SIZE;
891    use crate::PagePool;
892    use crate::PoolSource;
893    use crate::TestMapper;
894    use inspect::Inspect;
895    use memory_range::MemoryRange;
896    use safeatomic::AtomicSliceOps;
897    use sparse_mmap::MappableRef;
898    use vmcore::save_restore::SaveRestore;
899
900    #[derive(Inspect)]
901    #[inspect(bound = "T: Inspect")]
902    struct BiasedMapper<T> {
903        mapper: T,
904        bias: u64,
905    }
906
907    impl<T: PoolSource> BiasedMapper<T> {
908        fn new(mapper: T, bias: u64) -> Self {
909            Self { mapper, bias }
910        }
911    }
912
913    impl<T: PoolSource> PoolSource for BiasedMapper<T> {
914        fn address_bias(&self) -> u64 {
915            self.bias.wrapping_add(self.mapper.address_bias())
916        }
917
918        fn file_offset(&self, address: u64) -> u64 {
919            self.mapper.file_offset(address)
920        }
921
922        fn mappable(&self) -> MappableRef<'_> {
923            self.mapper.mappable()
924        }
925    }
926
927    fn big_test_mapper() -> TestMapper {
928        TestMapper::new(1024 * 1024).unwrap()
929    }
930
931    #[test]
932    fn test_basic_alloc() {
933        let pfn_bias = 15;
934        let pool = PagePool::new(
935            &[MemoryRange::from_4k_gpn_range(10..30)],
936            BiasedMapper::new(big_test_mapper(), pfn_bias * PAGE_SIZE),
937        )
938        .unwrap();
939        let alloc = pool.allocator("test".into()).unwrap();
940
941        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
942        assert_eq!(a1.base_pfn, 10);
943        assert_eq!(a1.base_pfn(), a1.base_pfn + pfn_bias);
944        assert_eq!(a1.base_pfn_without_bias(), a1.base_pfn);
945        assert_eq!(a1.size_pages, 5);
946
947        let a2 = alloc
948            .alloc(15.try_into().unwrap(), "alloc2".into())
949            .unwrap();
950        assert_eq!(a2.base_pfn, 15);
951        assert_eq!(a2.base_pfn(), a2.base_pfn + pfn_bias);
952        assert_eq!(a2.base_pfn_without_bias(), a2.base_pfn);
953        assert_eq!(a2.size_pages, 15);
954
955        assert!(alloc.alloc(1.try_into().unwrap(), "failed".into()).is_err());
956
957        drop(a1);
958        drop(a2);
959
960        let inner = alloc.inner.state.lock();
961        assert_eq!(inner.slots.len(), 2);
962    }
963
964    #[test]
965    fn test_duplicate_device_name() {
966        let pool =
967            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
968        let _alloc = pool.allocator("test".into()).unwrap();
969
970        assert!(pool.allocator("test".into()).is_err());
971    }
972
973    #[test]
974    fn test_dropping_allocator() {
975        let pool =
976            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..40)], big_test_mapper()).unwrap();
977        let alloc = pool.allocator("test".into()).unwrap();
978        let _alloc2 = pool.allocator("test2".into()).unwrap();
979
980        let _a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
981        let _a2 = alloc
982            .alloc(15.try_into().unwrap(), "alloc2".into())
983            .unwrap();
984
985        drop(alloc);
986
987        let alloc = pool.allocator("test".into()).unwrap();
988        let _a3 = alloc.alloc(5.try_into().unwrap(), "alloc3".into()).unwrap();
989    }
990
991    #[test]
992    fn test_save_restore() {
993        let mut pool =
994            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
995        let alloc = pool.allocator("test".into()).unwrap();
996
997        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
998        let a1_pfn = a1.base_pfn();
999        let a1_size = a1.size_pages;
1000
1001        let a2 = alloc
1002            .alloc(15.try_into().unwrap(), "alloc2".into())
1003            .unwrap();
1004        let a2_pfn = a2.base_pfn();
1005        let a2_size = a2.size_pages;
1006
1007        let state = pool.save().unwrap();
1008
1009        let mut pool =
1010            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1011        pool.restore(state).unwrap();
1012        let alloc = pool.allocator("test".into()).unwrap();
1013
1014        let restored_a1 = alloc
1015            .restore_alloc(a1_pfn, a1_size.try_into().unwrap())
1016            .unwrap();
1017        let restored_a2 = alloc
1018            .restore_alloc(a2_pfn, a2_size.try_into().unwrap())
1019            .unwrap();
1020
1021        assert_eq!(restored_a1.base_pfn(), a1_pfn);
1022        assert_eq!(restored_a1.size_pages, a1_size);
1023
1024        assert_eq!(restored_a2.base_pfn(), a2_pfn);
1025        assert_eq!(restored_a2.size_pages, a2_size);
1026
1027        pool.validate_restore(false).unwrap();
1028    }
1029
1030    #[test]
1031    fn test_save_restore_all_pending() {
1032        let mut pool =
1033            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1034        let alloc = pool.allocator("test".into()).unwrap();
1035
1036        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
1037        let a1_pfn = a1.base_pfn();
1038        let a1_size = a1.size_pages;
1039
1040        let a2 = alloc
1041            .alloc(15.try_into().unwrap(), "alloc2".into())
1042            .unwrap();
1043        let a2_pfn = a2.base_pfn();
1044        let a2_size = a2.size_pages;
1045
1046        let state = pool.save().unwrap();
1047
1048        let mut pool =
1049            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1050        pool.restore(state).unwrap();
1051        let alloc = pool.allocator("test".into()).unwrap();
1052
1053        let restored = alloc.restore_pending_allocs();
1054        assert!(
1055            restored
1056                .iter()
1057                .any(|a| a.base_pfn == a1_pfn && a.size_pages == a1_size),
1058            "matching allocation not found for a1"
1059        );
1060
1061        assert!(
1062            restored
1063                .iter()
1064                .any(|a| a.base_pfn == a2_pfn && a.size_pages == a2_size),
1065            "matching allocation not found for a2"
1066        );
1067
1068        pool.validate_restore(false).unwrap();
1069    }
1070
1071    #[test]
1072    fn test_save_restore_all_pending_other_allocator() {
1073        let mut pool =
1074            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1075        let alloc = pool.allocator("test".into()).unwrap();
1076
1077        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
1078        let a1_pfn = a1.base_pfn();
1079        let a1_size = a1.size_pages;
1080
1081        let a2 = alloc
1082            .alloc(15.try_into().unwrap(), "alloc2".into())
1083            .unwrap();
1084        let a2_pfn = a2.base_pfn();
1085        let a2_size = a2.size_pages;
1086
1087        let state = pool.save().unwrap();
1088
1089        let mut pool =
1090            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1091        pool.restore(state).unwrap();
1092        let alloc = pool.allocator("test2".into()).unwrap();
1093
1094        let restored = alloc.restore_pending_allocs();
1095        assert_eq!(
1096            restored
1097                .iter()
1098                .any(|a| a.base_pfn == a1_pfn && a.size_pages == a1_size),
1099            false,
1100            "matching allocation found for a1 for wrong allocator"
1101        );
1102
1103        assert_eq!(
1104            restored
1105                .iter()
1106                .any(|a| a.base_pfn == a2_pfn && a.size_pages == a2_size),
1107            false,
1108            "matching allocation found for a2 for wrong allocator"
1109        );
1110
1111        assert!(pool.validate_restore(false).is_err());
1112    }
1113
1114    #[test]
1115    fn test_save_restore_unmatched_allocations() {
1116        let mut pool =
1117            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1118
1119        let alloc = pool.allocator("test".into()).unwrap();
1120        let _a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
1121
1122        let state = pool.save().unwrap();
1123
1124        let mut pool =
1125            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1126
1127        pool.restore(state).unwrap();
1128
1129        assert!(pool.validate_restore(false).is_err());
1130    }
1131
1132    #[test]
1133    fn test_restore_other_allocator() {
1134        let mut pool =
1135            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1136
1137        let alloc = pool.allocator("test".into()).unwrap();
1138        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
1139
1140        let state = pool.save().unwrap();
1141
1142        let mut pool =
1143            PagePool::new(&[MemoryRange::from_4k_gpn_range(10..30)], big_test_mapper()).unwrap();
1144
1145        pool.restore(state).unwrap();
1146
1147        let alloc = pool.allocator("test2".into()).unwrap();
1148        assert!(
1149            alloc
1150                .restore_alloc(a1.base_pfn, a1.size_pages.try_into().unwrap())
1151                .is_err()
1152        );
1153    }
1154
1155    #[test]
1156    fn test_mapping() {
1157        let pool = PagePool::new(
1158            &[MemoryRange::from_4k_gpn_range(0..30)],
1159            TestMapper::new(30).unwrap(),
1160        )
1161        .unwrap();
1162        let alloc = pool.allocator("test".into()).unwrap();
1163
1164        let a1 = alloc.alloc(5.try_into().unwrap(), "alloc1".into()).unwrap();
1165        let a1_mapping = a1.mapping();
1166        assert_eq!(a1_mapping.len(), 5 * PAGE_SIZE as usize);
1167        a1_mapping[123..][..4].atomic_write(&[1, 2, 3, 4]);
1168        let mut data = [0; 4];
1169        a1_mapping[123..][..4].atomic_read(&mut data);
1170        assert_eq!(data, [1, 2, 3, 4]);
1171        let mut data = [0; 2];
1172        a1_mapping[125..][..2].atomic_read(&mut data);
1173        assert_eq!(data, [3, 4]);
1174    }
1175}