membacking/mapping_manager/
va_mapper.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implements the VA mapper, which maintains a linear virtual address space for
5//! all memory mapped into a partition.
6//!
7//! The VA mapper sends messages to the mapping manager to request mappings for
8//! specific address ranges, on demand. The mapping manager later sends
9//! invalidation requests back when tearing down mappings, e.g. when some device
10//! memory is unmapped from the partition.
11//!
12//! This lazy approach is taken to avoid having to keep each VA mapper
13//! up-to-date with all mappings at all times.
14//!
15//! TODO: This is a bit dubious because the backing hypervisor will not
16//! necessarily propagate a page fault. E.g., KVM will just fail the VP. So at
17//! least for the mapper used by the partition itself, this optimization
18//! probably needs to be removed and replaced with a guarantee that replacement
19//! mappings are established immediately (and atomically?) instead of just by
20//! invalidating the existing mappings.
21
22// UNSAFETY: Implementing the unsafe GuestMemoryAccess trait by calling unsafe
23// low level memory manipulation functions.
24#![expect(unsafe_code)]
25
26use super::manager::MapperId;
27use super::manager::MapperRequest;
28use super::manager::MappingParams;
29use super::manager::MappingRequest;
30use crate::RemoteProcess;
31use futures::executor::block_on;
32use guestmem::GuestMemoryAccess;
33use guestmem::PageFaultAction;
34use guestmem::PageFaultError;
35use memory_range::MemoryRange;
36use mesh::rpc::RpcError;
37use mesh::rpc::RpcSend;
38use parking_lot::Mutex;
39use sparse_mmap::SparseMapping;
40use std::ptr::NonNull;
41use std::sync::Arc;
42use std::thread::JoinHandle;
43use thiserror::Error;
44
45pub struct VaMapper {
46    inner: Arc<MapperInner>,
47    process: Option<RemoteProcess>,
48    _thread: JoinHandle<()>,
49}
50
51impl std::fmt::Debug for VaMapper {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        f.debug_struct("VaMapper")
54            .field("inner", &self.inner)
55            .field("_thread", &self._thread)
56            .finish()
57    }
58}
59
60#[derive(Debug)]
61struct MapperInner {
62    mapping: SparseMapping,
63    waiters: Mutex<Option<Vec<MapWaiter>>>,
64    req_send: mesh::Sender<MappingRequest>,
65    id: MapperId,
66}
67
68#[derive(Debug)]
69struct MapWaiter {
70    range: MemoryRange,
71    writable: bool,
72    done: mesh::OneshotSender<bool>,
73}
74
75impl MapWaiter {
76    fn complete(&mut self, range: MemoryRange, writable: Option<bool>) -> Option<bool> {
77        if range.contains_addr(self.range.start()) {
78            if writable.is_none() || (self.writable && writable == Some(false)) {
79                return Some(false);
80            }
81            let new_start = self.range.end().min(range.end());
82            let remaining = MemoryRange::new(new_start..self.range.end());
83            if remaining.is_empty() {
84                return Some(true);
85            }
86            tracing::debug!(%remaining, "waiting for more");
87            self.range = remaining;
88        }
89        None
90    }
91}
92
93struct MapperTask {
94    inner: Arc<MapperInner>,
95}
96
97impl MapperTask {
98    async fn run(mut self, mut req_recv: mesh::Receiver<MapperRequest>) {
99        while let Ok(req) = req_recv.recv().await {
100            match req {
101                MapperRequest::Unmap(rpc) => rpc.handle_sync(|range| {
102                    tracing::debug!(%range, "invalidate received");
103                    self.inner
104                        .mapping
105                        .unmap(range.start() as usize, range.len() as usize)
106                        .expect("invalidate request should be valid");
107                }),
108                MapperRequest::Map(MappingParams {
109                    range,
110                    mappable,
111                    writable,
112                    file_offset,
113                }) => {
114                    tracing::debug!(%range, "mapping received for range");
115
116                    self.inner
117                        .mapping
118                        .map_file(
119                            range.start() as usize,
120                            range.len() as usize,
121                            &mappable,
122                            file_offset,
123                            writable,
124                        )
125                        .expect("oom mapping file");
126
127                    self.wake_waiters(range, Some(writable));
128                }
129                MapperRequest::NoMapping(range) => {
130                    // Wake up waiters. They'll see a failure when they try to
131                    // access the VA.
132                    tracing::debug!(%range, "no mapping received for range");
133                    self.wake_waiters(range, None);
134                }
135            }
136        }
137        // Don't allow more waiters.
138        *self.inner.waiters.lock() = None;
139        // Invalidate everything.
140        let _ = self.inner.mapping.unmap(0, self.inner.mapping.len());
141    }
142
143    fn wake_waiters(&mut self, range: MemoryRange, writable: Option<bool>) {
144        let mut waiters = self.inner.waiters.lock();
145        let waiters = waiters.as_mut().unwrap();
146
147        let mut i = 0;
148        while i < waiters.len() {
149            if let Some(success) = waiters[i].complete(range, writable) {
150                waiters.swap_remove(i).done.send(success);
151            } else {
152                i += 1;
153            }
154        }
155    }
156}
157
158#[derive(Debug, Error)]
159pub enum VaMapperError {
160    #[error("failed to communicate with the memory manager")]
161    MemoryManagerGone(#[source] RpcError),
162    #[error("failed to reserve address space")]
163    Reserve(#[source] std::io::Error),
164}
165
166#[derive(Debug, Error)]
167#[error("no mapping for {0}")]
168pub struct NoMapping(MemoryRange);
169
170impl MapperInner {
171    async fn request_mapping(&self, range: MemoryRange, writable: bool) -> Result<(), NoMapping> {
172        let (send, recv) = mesh::oneshot();
173        self.waiters
174            .lock()
175            .as_mut()
176            .ok_or(NoMapping(range))?
177            .push(MapWaiter {
178                range,
179                writable,
180                done: send,
181            });
182
183        tracing::debug!(%range, "waiting for mappings");
184        self.req_send
185            .send(MappingRequest::SendMappings(self.id, range));
186        match recv.await {
187            Ok(true) => Ok(()),
188            Ok(false) | Err(_) => Err(NoMapping(range)),
189        }
190    }
191}
192
193impl VaMapper {
194    pub(crate) async fn new(
195        req_send: mesh::Sender<MappingRequest>,
196        len: u64,
197        remote_process: Option<RemoteProcess>,
198    ) -> Result<Self, VaMapperError> {
199        let mapping = match &remote_process {
200            None => SparseMapping::new(len as usize),
201            Some(process) => match process {
202                #[cfg(not(windows))]
203                _ => unreachable!(),
204                #[cfg(windows)]
205                process => SparseMapping::new_remote(
206                    process.as_handle().try_clone_to_owned().unwrap().into(),
207                    None,
208                    len as usize,
209                ),
210            },
211        }
212        .map_err(VaMapperError::Reserve)?;
213
214        let (send, req_recv) = mesh::channel();
215        let id = req_send
216            .call(MappingRequest::AddMapper, send)
217            .await
218            .map_err(VaMapperError::MemoryManagerGone)?;
219
220        let inner = Arc::new(MapperInner {
221            mapping,
222            waiters: Mutex::new(Some(Vec::new())),
223            req_send,
224            id,
225        });
226
227        // FUTURE: use a task once we resolve the block_ons in the
228        // GuestMemoryAccess implementation.
229        let thread = std::thread::Builder::new()
230            .name("mapper".to_owned())
231            .spawn({
232                let runner = MapperTask {
233                    inner: inner.clone(),
234                };
235                || block_on(runner.run(req_recv))
236            })
237            .unwrap();
238
239        Ok(VaMapper {
240            inner,
241            process: remote_process,
242            _thread: thread,
243        })
244    }
245
246    /// Ensures a mapping has been established for the given range.
247    pub async fn ensure_mapped(&self, range: MemoryRange) -> Result<(), NoMapping> {
248        self.inner.request_mapping(range, false).await
249    }
250
251    pub fn as_ptr(&self) -> *mut u8 {
252        self.inner.mapping.as_ptr().cast()
253    }
254
255    pub fn len(&self) -> usize {
256        self.inner.mapping.len()
257    }
258
259    pub fn process(&self) -> Option<&RemoteProcess> {
260        self.process.as_ref()
261    }
262}
263
264/// SAFETY: the underlying VA mapping is guaranteed to be valid for the lifetime
265/// of this object.
266unsafe impl GuestMemoryAccess for VaMapper {
267    fn mapping(&self) -> Option<NonNull<u8>> {
268        // No one should be using this as a GuestMemoryAccess for remote
269        // mappings, but it's convenient to have the same type for both local
270        // and remote mappings for the sake of simplicity in
271        // `PartitionRegionMapper`.
272        assert!(self.inner.mapping.is_local());
273
274        NonNull::new(self.inner.mapping.as_ptr().cast())
275    }
276
277    fn max_address(&self) -> u64 {
278        self.inner.mapping.len() as u64
279    }
280
281    fn page_fault(
282        &self,
283        address: u64,
284        len: usize,
285        write: bool,
286        bitmap_failure: bool,
287    ) -> PageFaultAction {
288        assert!(!bitmap_failure, "bitmaps are not used");
289        // `block_on` is OK to call here (will not deadlock) because this is
290        // never called from the page fault handler thread or any threads it
291        // depends on.
292        //
293        // Removing this `block_on` would make all guest memory access `async`,
294        // which would be a difficult change.
295        if let Err(err) = block_on(
296            self.inner
297                .request_mapping(MemoryRange::bounding(address..address + len as u64), write),
298        ) {
299            return PageFaultAction::Fail(PageFaultError::new(
300                guestmem::GuestMemoryErrorKind::OutOfRange,
301                err,
302            ));
303        }
304        PageFaultAction::Retry
305    }
306}