underhill_mem/
registrar.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Code to register lower VTL memory with the kernel as needed.
5//!
6//! For many kernel operations that operate on memory, such as passing a buffer
7//! to a device for DMA, the kernel requires that has allocated a `struct page`
8//! object for each page being accessed. Thanks to some optimizations for large
9//! memory allocations, the space overhead of this for guest memory is not too
10//! large, but the initialization time overhead can be significant for large
11//! VMs.
12//!
13//! To avoid this overhead, we only register memory with the kernel as needed,
14//! when a VA might leak out of a `GuestMemory` object and possibly be passed to
15//! a kernel routine.
16//!
17//! This is done by registering memory in 2GB chunks, which is large enough to
18//! get large pages in the kernel, but small enough to keep the overhead of the
19//! initial registration for a chunk small. We track whether a given chunk has
20//! been registered via a small bitmap.
21
22use cvm_tracing::CVM_ALLOWED;
23use inspect::Inspect;
24use memory_range::MemoryRange;
25use memory_range::overlapping_ranges;
26use parking_lot::Mutex;
27use std::ops::Range;
28use std::sync::atomic::AtomicU64;
29use std::sync::atomic::Ordering::Acquire;
30use std::sync::atomic::Ordering::Release;
31use vm_topology::memory::MemoryLayout;
32
33const PAGE_SIZE: u64 = guestmem::PAGE_SIZE as u64;
34
35#[derive(Debug)]
36pub struct MemoryRegistrar<T> {
37    registered: Bitmap,
38    chunk_count: u64,
39    state: Mutex<RegistrarState>,
40    register: T,
41    ram: Vec<MemoryRange>,
42    registration_offset: u64,
43}
44
45impl<T> Inspect for MemoryRegistrar<T> {
46    fn inspect(&self, req: inspect::Request<'_>) {
47        req.respond()
48            .field_with("chunks_registered", || {
49                (0..self.chunk_count)
50                    .filter(|&chunk| self.registered.get(chunk))
51                    .count()
52            })
53            .field("chunk_count", self.chunk_count)
54            .hex("registration_offset", self.registration_offset);
55    }
56}
57
58#[derive(Debug)]
59struct RegistrarState {
60    failed: Bitmap,
61}
62
63#[derive(Debug)]
64struct Bitmap(Vec<AtomicU64>);
65
66impl Bitmap {
67    fn new(address_space_size: u64) -> Self {
68        let chunks = address_space_size.div_ceil(GRANULARITY);
69        let words = chunks.div_ceil(64);
70        let mut v = Vec::new();
71        v.resize_with(words as usize, AtomicU64::default);
72        Self(v)
73    }
74
75    fn get(&self, chunk: u64) -> bool {
76        self.0[chunk as usize / 64].load(Acquire) & (1 << (chunk % 64)) != 0
77    }
78
79    fn get_mut(&mut self, chunk: u64) -> bool {
80        *self.0[chunk as usize / 64].get_mut() & (1 << (chunk % 64)) != 0
81    }
82
83    fn set(&self, chunk: u64, value: bool) {
84        if value {
85            self.0[chunk as usize / 64].fetch_or(1 << (chunk % 64), Release);
86        } else {
87            self.0[chunk as usize / 64].fetch_and(!(1 << (chunk % 64)), Release);
88        }
89    }
90
91    fn set_mut(&mut self, chunk: u64, value: bool) {
92        if value {
93            *self.0[chunk as usize / 64].get_mut() |= 1 << (chunk % 64);
94        } else {
95            *self.0[chunk as usize / 64].get_mut() &= !(1 << (chunk % 64));
96        }
97    }
98}
99
100pub trait RegisterMemory {
101    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error>;
102}
103
104impl<T: Fn(MemoryRange) -> Result<(), E>, E: 'static + std::error::Error> RegisterMemory for T {
105    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
106        (self)(range)
107    }
108}
109
110/// Register in 2GB chunks.
111const GRANULARITY: u64 = 2 << 30;
112
113impl<T: RegisterMemory> MemoryRegistrar<T> {
114    pub fn new(layout: &MemoryLayout, registration_offset: u64, register: T) -> Self {
115        let address_space_size = layout.ram().last().unwrap().range.end();
116
117        Self {
118            chunk_count: address_space_size.div_ceil(GRANULARITY),
119            registered: Bitmap::new(address_space_size),
120            state: Mutex::new(RegistrarState {
121                failed: Bitmap::new(address_space_size),
122            }),
123            register,
124            ram: layout.ram().iter().map(|r| r.range).collect(),
125            registration_offset,
126        }
127    }
128
129    fn chunks(range: MemoryRange) -> Range<u64> {
130        let start = range.start() / GRANULARITY;
131        let end = range.end().div_ceil(GRANULARITY);
132        start..end
133    }
134
135    pub fn register(&self, address: u64, len: u64) -> Result<(), u64> {
136        // Page align the requested range.
137        let requested_range = MemoryRange::new(
138            address & !(PAGE_SIZE - 1)..(address + len + (PAGE_SIZE - 1)) & !(PAGE_SIZE - 1),
139        );
140
141        // Check if the range is already registered.
142        'check_registered: {
143            for chunk in Self::chunks(requested_range) {
144                if !self.registered.get(chunk) {
145                    break 'check_registered;
146                }
147            }
148            return Ok(());
149        }
150
151        // Register each chunk one at a time. We don't typically lock lots of
152        // memory at a time, so in practice there should only be one chunk
153        // anyway.
154        let mut state = self.state.lock();
155        for chunk in Self::chunks(requested_range) {
156            if self.registered.get(chunk) {
157                continue;
158            }
159            if state.failed.get_mut(chunk) {
160                return Err(chunk * GRANULARITY);
161            }
162            // Register the full chunk, bounded by the RAM regions. This could
163            // be more efficient, but again, we expect there to only be one
164            // chunk in practice.
165            let full_range = MemoryRange::new(chunk * GRANULARITY..(chunk + 1) * GRANULARITY);
166            for range in overlapping_ranges([full_range], self.ram.iter().copied()) {
167                let range = MemoryRange::new(
168                    self.registration_offset + range.start()
169                        ..self.registration_offset + range.end(),
170                );
171                tracing::info!(CVM_ALLOWED, %range, "registering memory");
172                if let Err(err) = self.register.register_range(range) {
173                    tracing::error!(CVM_ALLOWED,
174                        %range,
175                        registration_offset = self.registration_offset,
176                        error = &err as &dyn std::error::Error,
177                        "failed to register memory"
178                    );
179                    state.failed.set_mut(chunk, true);
180                    return Err(range.start());
181                }
182            }
183            self.registered.set(chunk, true);
184        }
185        Ok(())
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::MemoryRegistrar;
192    use crate::registrar::GRANULARITY;
193    use memory_range::MemoryRange;
194    use std::cell::RefCell;
195    use std::convert::Infallible;
196    use vm_topology::memory::MemoryLayout;
197
198    #[test]
199    fn test_registrar() {
200        let layout = MemoryLayout::new(
201            1 << 40,
202            &[
203                MemoryRange::new(0x10000..0x20000),
204                MemoryRange::new(1 << 40..2 << 40),
205            ],
206            None,
207        )
208        .unwrap();
209
210        let offset = 1 << 50;
211        let ranges = RefCell::new(Vec::new());
212        let registrar = MemoryRegistrar::new(&layout, offset, |range| {
213            println!("registering {:#x?}", range);
214            ranges.borrow_mut().push(range);
215            Ok::<_, Infallible>(())
216        });
217
218        for range in [
219            0x1000..0x8000,
220            0x20000..0x30000,
221            0x100000..0x200000,
222            1u64 << 33..(1u64 << 35) + 1,
223        ] {
224            registrar
225                .register(range.start, range.end - range.start)
226                .unwrap();
227        }
228
229        let mut expected = vec![
230            MemoryRange::new(offset..offset | 0x10000),
231            MemoryRange::new(offset | 0x20000..offset | GRANULARITY),
232        ];
233        expected.extend(
234            (1 << 33..(1 << 35) + GRANULARITY)
235                .step_by(GRANULARITY as usize)
236                .map(|start| MemoryRange::new(offset | start..offset | (start + GRANULARITY))),
237        );
238
239        let ranges = ranges.take();
240        assert_eq!(
241            ranges.as_slice(),
242            expected.as_slice(),
243            "ranges: {}\n\nexpected: {}",
244            ranges
245                .iter()
246                .map(|r| r.to_string())
247                .collect::<Vec<_>>()
248                .join("\n"),
249            expected
250                .iter()
251                .map(|r| r.to_string())
252                .collect::<Vec<_>>()
253                .join("\n")
254        );
255    }
256}