sparse_mmap/
unix.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux implementation for memory mapping abstractions.
5
6#![cfg(unix)]
7
8use pal::unix::SyscallResult;
9use std::ffi::c_void;
10use std::fs::File;
11use std::io;
12use std::io::Error;
13use std::os::unix::prelude::*;
14use std::ptr::null_mut;
15use std::sync::atomic::AtomicUsize;
16use std::sync::atomic::Ordering;
17
18pub(crate) fn page_size() -> usize {
19    static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0);
20    let s = PAGE_SIZE.load(Ordering::Relaxed);
21    if s != 0 {
22        s
23    } else {
24        let s = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
25        PAGE_SIZE.store(s, Ordering::Relaxed);
26        s
27    }
28}
29
30/// A reserved virtual address range that may be partially populated with memory
31/// mappings.
32#[derive(Debug)]
33pub struct SparseMapping {
34    address: *mut c_void,
35    len: usize,
36}
37
38/// An owned handle to an OS object that can be mapped into a [`SparseMapping`].
39///
40/// On Windows, this is a section handle. On Linux, it is a file descriptor.
41pub type Mappable = OwnedFd;
42
43/// An object that can be mapped into a `SparseMapping`.
44///
45/// On Windows, this is a section handle. On Linux, it is a file descriptor.
46pub use std::os::unix::io::AsFd as AsMappableRef;
47
48/// A reference to an object that can be mapped into a [`SparseMapping`].
49///
50/// On Windows, this is a section handle. On Linux, it is a file descriptor.
51pub type MappableRef<'a> = BorrowedFd<'a>;
52
53/// Creates a new mappable from a file.
54///
55/// N.B. `writable` and `executable` have no effect on Linux.
56pub fn new_mappable_from_file(
57    file: &File,
58    _writable: bool,
59    _executable: bool,
60) -> io::Result<Mappable> {
61    file.as_fd().try_clone_to_owned()
62}
63
64// SAFETY: SparseMapping's internal pointer represents an owned virtual address
65// range. There is no safety issue accessing this pointer across threads.
66unsafe impl Send for SparseMapping {}
67// SAFETY: See above comment
68unsafe impl Sync for SparseMapping {}
69
70unsafe fn mmap(
71    addr: *mut c_void,
72    len: usize,
73    prot: i32,
74    flags: i32,
75    fd: i32,
76    offset: i64,
77) -> Result<*mut c_void, Error> {
78    let address = unsafe { libc::mmap(addr, len, prot, flags, fd, offset) };
79    if address == libc::MAP_FAILED {
80        return Err(Error::last_os_error());
81    }
82    Ok(address)
83}
84
85unsafe fn munmap(addr: *mut c_void, len: usize) -> Result<(), Error> {
86    if unsafe { libc::munmap(addr, len) } < 0 {
87        return Err(Error::last_os_error());
88    }
89    Ok(())
90}
91
92impl SparseMapping {
93    /// Reserves a sparse mapping range with the given size.
94    ///
95    /// The range will be aligned to the largest system page size that's smaller
96    /// or equal to `len`.
97    pub fn new(len: usize) -> Result<Self, Error> {
98        super::initialize_try_copy();
99
100        // Length of 0 return an OS error, so we need to handle it explicitly.
101        if len == 0 {
102            return Err(Error::new(
103                io::ErrorKind::InvalidInput,
104                "length must be greater than 0",
105            ));
106        }
107
108        let size_4k = 4096;
109        let size_2m = 0x200000;
110        let size_1g = 0x40000000;
111        let alignment = if len < size_2m {
112            size_4k
113        } else if len < size_1g {
114            size_2m
115        } else {
116            size_1g
117        };
118
119        let len = len
120            .checked_add(alignment - 1)
121            .map(|temp| temp & !(alignment - 1))
122            .ok_or_else(|| {
123                Error::new(
124                    io::ErrorKind::InvalidInput,
125                    "length and alignment combination causes overflow",
126                )
127            })?;
128
129        let alloc_len = len
130            .checked_add(alignment)
131            .map(|temp| temp - size_4k)
132            .ok_or_else(|| {
133                Error::new(
134                    io::ErrorKind::InvalidInput,
135                    "length and alignment combination causes overflow",
136                )
137            })?;
138
139        // SAFETY: calling mmap to allocate a new range.
140        let address = unsafe {
141            mmap(
142                null_mut(),
143                alloc_len,
144                libc::PROT_NONE,
145                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
146                -1,
147                0,
148            )? as usize
149        };
150        let aligned_address = (address + alignment - 1) & !(alignment - 1);
151        let end = address + alloc_len;
152        let aligned_end = aligned_address + len;
153        assert!(aligned_end <= end);
154
155        if address != aligned_address {
156            // SAFETY: freeing VA just allocated above.
157            unsafe { munmap(address as *mut _, aligned_address - address).unwrap() };
158        }
159        if aligned_end != end {
160            // SAFETY: freeing VA just allocated above.
161            unsafe { munmap(aligned_end as *mut _, end - aligned_end).unwrap() };
162        }
163        Ok(Self {
164            address: aligned_address as *mut _,
165            len,
166        })
167    }
168
169    /// Returns true if the mapping is local to the current process.
170    pub fn is_local(&self) -> bool {
171        true
172    }
173
174    /// Returns the pointer to the beginning of the sparse mapping.
175    pub fn as_ptr(&self) -> *mut c_void {
176        self.address
177    }
178
179    /// Returns the length of the mapping, in bytes.
180    pub fn len(&self) -> usize {
181        self.len
182    }
183
184    fn validate_offset_len(&self, offset: usize, len: usize) -> io::Result<usize> {
185        let end = offset.checked_add(len).ok_or(io::ErrorKind::InvalidInput)?;
186        let page_size = page_size();
187        if offset % page_size != 0 || end % page_size != 0 || end > self.len {
188            return Err(io::ErrorKind::InvalidInput.into());
189        }
190        Ok(end)
191    }
192
193    /// Allocates private, writable memory at the given offset within the mapping.
194    pub fn alloc(&self, offset: usize, len: usize) -> Result<(), Error> {
195        // SAFETY: The flags passed in are guaranteed to be valid
196        unsafe {
197            self.mmap_anonymous(
198                offset,
199                len,
200                libc::PROT_READ | libc::PROT_WRITE,
201                libc::MAP_PRIVATE,
202            )
203        }
204    }
205
206    /// Maps read-only zero pages at the given offset within the mapping.
207    pub fn map_zero(&self, offset: usize, len: usize) -> Result<(), Error> {
208        // SAFETY: The flags passed in are guaranteed to be valid
209        unsafe { self.mmap_anonymous(offset, len, libc::PROT_READ, libc::MAP_PRIVATE) }
210    }
211
212    /// Updates the protection flags of the mapping at the given offset and length
213    /// to allow or disallow writes.
214    pub fn set_writable(&self, offset: usize, len: usize, allow_writes: bool) -> Result<(), Error> {
215        let prot = if allow_writes {
216            libc::PROT_READ | libc::PROT_WRITE
217        } else {
218            libc::PROT_READ
219        };
220        self.mprotect(offset, len, prot)
221    }
222
223    /// Calls `mprotect` on the mapping at the given offset and length, changing
224    /// the protection flags to `prot`.
225    fn mprotect(&self, offset: usize, len: usize, prot: i32) -> Result<(), Error> {
226        self.validate_offset_len(offset, len)?;
227        if prot & !(libc::PROT_READ | libc::PROT_WRITE) != 0 {
228            return Err(Error::new(
229                io::ErrorKind::InvalidInput,
230                "unsupported protection flags",
231            ));
232        }
233        // SAFETY: The flags and address passed in are guaranteed to be valid.
234        unsafe {
235            if libc::mprotect(self.address.add(offset), len, prot) < 0 {
236                return Err(Error::last_os_error());
237            }
238        }
239        Ok(())
240    }
241
242    /// Maps a portion of a file mapping at `offset`.
243    pub fn map_file(
244        &self,
245        offset: usize,
246        len: usize,
247        file_mapping: impl AsFd,
248        file_offset: u64,
249        writable: bool,
250    ) -> Result<(), Error> {
251        let prot = if writable {
252            libc::PROT_READ | libc::PROT_WRITE
253        } else {
254            libc::PROT_READ
255        };
256
257        // SAFETY: The flags passed in are guaranteed to be valid. MAP_SHARED is required.
258        unsafe {
259            self.mmap(
260                offset,
261                len,
262                prot,
263                libc::MAP_SHARED,
264                file_mapping.as_fd(),
265                file_offset as i64,
266            )
267        }
268    }
269
270    /// Maps memory into the mapping, passing parameters through to the mmap
271    /// syscall.
272    ///
273    /// # Safety
274    ///
275    /// This routine is safe to use as long as the caller ensures `map_flags` excludes
276    /// any flags that render the memory region non-unmappable (e.g., `MAP_LOCKED`).
277    /// Misuse may lead to system resource issues, such as falsely perceived out-of-memory
278    /// conditions.
279    pub unsafe fn mmap(
280        &self,
281        offset: usize,
282        len: usize,
283        prot: i32,
284        map_flags: i32,
285        fd: impl AsFd,
286        file_offset: i64,
287    ) -> Result<(), Error> {
288        let _ = self.validate_offset_len(offset, len)?;
289
290        // SAFETY: guaranteed by caller and offset + len checks above
291        unsafe {
292            let address = self.address.add(offset);
293            let mapped_address = mmap(
294                address,
295                len,
296                prot,
297                map_flags | libc::MAP_FIXED,
298                fd.as_fd().as_raw_fd(),
299                file_offset,
300            )?;
301            assert_eq!(mapped_address, address);
302        }
303        Ok(())
304    }
305
306    /// Maps anonymous memory into the mapping, with parameters for the mmap syscall.
307    ///
308    /// # Safety
309    ///
310    /// This routine is safe to use as long as the caller ensures `map_flags` excludes
311    /// any flags that render the memory region non-unmappable (e.g., `MAP_LOCKED`).
312    /// Misuse may lead to system resource issues, such as falsely perceived out-of-memory
313    /// conditions.
314    pub unsafe fn mmap_anonymous(
315        &self,
316        offset: usize,
317        len: usize,
318        prot: i32,
319        map_flags: i32,
320    ) -> io::Result<()> {
321        let _ = self.validate_offset_len(offset, len)?;
322
323        // SAFETY: guaranteed by caller and offset + len checks above
324        unsafe {
325            let address = self.address.add(offset);
326            let mapped_address = mmap(
327                address,
328                len,
329                prot,
330                map_flags | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
331                -1,
332                0,
333            )?;
334            assert_eq!(mapped_address, address);
335        }
336        Ok(())
337    }
338
339    /// Unmaps memory from the mapping.
340    pub fn unmap(&self, offset: usize, len: usize) -> io::Result<()> {
341        let _ = self.validate_offset_len(offset, len)?;
342
343        // Skipping this check would result in the "expect" below
344        if len == 0 {
345            return Err(io::ErrorKind::InvalidInput.into());
346        }
347
348        // Remap to PROT_NONE to preserve the reservation.
349        // SAFETY: guaranteed by caller and offset + len checks above
350        unsafe {
351            let address = self.address.add(offset);
352            let mapped_address = mmap(
353                address,
354                len,
355                libc::PROT_NONE,
356                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
357                -1,
358                0,
359            )
360            .expect("remap to PROT_NONE should not fail (except for low resources)");
361            assert_eq!(mapped_address, address);
362        }
363        Ok(())
364    }
365}
366
367impl Drop for SparseMapping {
368    fn drop(&mut self) {
369        unsafe {
370            libc::munmap(self.address, self.len)
371                .syscall_result()
372                .expect("unmap should not fail");
373        }
374    }
375}
376#[cfg(target_os = "linux")]
377fn new_memfd() -> io::Result<File> {
378    // SAFETY: creating and truncating a new file descriptor according to
379    // the documented contract.
380    unsafe {
381        let fd = libc::memfd_create(c"mem".as_ptr(), libc::MFD_CLOEXEC).syscall_result()?;
382        Ok(File::from_raw_fd(fd))
383    }
384}
385
386#[cfg(not(target_os = "linux"))]
387fn new_memfd() -> io::Result<File> {
388    let mut name = [0; 16];
389    getrandom::fill(&mut name).unwrap();
390    let mut name = format!("{:x}", u128::from_ne_bytes(name));
391    // macOS limits the name length to 31 bytes, which is sufficient to ensure uniqueness.
392    name.truncate(31);
393    let name = std::ffi::CString::new(name).unwrap();
394    unsafe {
395        // Create a new shared memory object.
396        let fd = libc::shm_open(name.as_ptr(), libc::O_RDWR | libc::O_EXCL | libc::O_CREAT)
397            .syscall_result()?;
398        // Unlink it to make it anonymous.
399        let _ = libc::shm_unlink(name.as_ptr());
400        Ok(File::from_raw_fd(fd))
401    }
402}
403
404/// Allocates a mappable shared memory object of `size` bytes.
405pub fn alloc_shared_memory(size: usize) -> io::Result<OwnedFd> {
406    let fd = new_memfd()?;
407    fd.set_len(size as u64)?;
408    Ok(fd.into())
409}