sparse_mmap/
unix.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux implementation for memory mapping abstractions.
5
6#![cfg(unix)]
7
8use pal::unix::SyscallResult;
9use std::ffi::c_void;
10use std::fs::File;
11use std::io;
12use std::io::Error;
13use std::os::unix::prelude::*;
14use std::ptr::null_mut;
15use std::sync::atomic::AtomicUsize;
16use std::sync::atomic::Ordering;
17
18pub(crate) fn page_size() -> usize {
19    static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0);
20    let s = PAGE_SIZE.load(Ordering::Relaxed);
21    if s != 0 {
22        s
23    } else {
24        let s = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
25        PAGE_SIZE.store(s, Ordering::Relaxed);
26        s
27    }
28}
29
30/// A reserved virtual address range that may be partially populated with memory
31/// mappings.
32#[derive(Debug)]
33pub struct SparseMapping {
34    address: *mut c_void,
35    len: usize,
36}
37
38/// An owned handle to an OS object that can be mapped into a [`SparseMapping`].
39///
40/// On Windows, this is a section handle. On Linux, it is a file descriptor.
41pub type Mappable = OwnedFd;
42
43/// An object that can be mapped into a `SparseMapping`.
44///
45/// On Windows, this is a section handle. On Linux, it is a file descriptor.
46pub use std::os::unix::io::AsFd as AsMappableRef;
47
48/// A reference to an object that can be mapped into a [`SparseMapping`].
49///
50/// On Windows, this is a section handle. On Linux, it is a file descriptor.
51pub type MappableRef<'a> = BorrowedFd<'a>;
52
53/// Creates a new mappable from a file.
54///
55/// N.B. `writable` and `executable` have no effect on Linux.
56pub fn new_mappable_from_file(
57    file: &File,
58    _writable: bool,
59    _executable: bool,
60) -> io::Result<Mappable> {
61    file.as_fd().try_clone_to_owned()
62}
63
64// SAFETY: SparseMapping's internal pointer represents an owned virtual address
65// range. There is no safety issue accessing this pointer across threads.
66unsafe impl Send for SparseMapping {}
67// SAFETY: See above comment
68unsafe impl Sync for SparseMapping {}
69
70unsafe fn mmap(
71    addr: *mut c_void,
72    len: usize,
73    prot: i32,
74    flags: i32,
75    fd: i32,
76    offset: i64,
77) -> Result<*mut c_void, Error> {
78    let address = unsafe { libc::mmap(addr, len, prot, flags, fd, offset) };
79    if address == libc::MAP_FAILED {
80        return Err(Error::last_os_error());
81    }
82    Ok(address)
83}
84
85unsafe fn munmap(addr: *mut c_void, len: usize) -> Result<(), Error> {
86    if unsafe { libc::munmap(addr, len) } < 0 {
87        return Err(Error::last_os_error());
88    }
89    Ok(())
90}
91
92impl SparseMapping {
93    /// Reserves a sparse mapping range with the given size.
94    ///
95    /// The range will be aligned to the largest system page size that's smaller
96    /// or equal to `len`.
97    pub fn new(len: usize) -> Result<Self, Error> {
98        super::initialize_try_copy();
99
100        // Length of 0 return an OS error, so we need to handle it explicitly.
101        if len == 0 {
102            return Err(Error::new(
103                io::ErrorKind::InvalidInput,
104                "length must be greater than 0",
105            ));
106        }
107
108        let size_4k = 4096;
109        let size_2m = 0x200000;
110        let size_1g = 0x40000000;
111        let alignment = if len < size_2m {
112            size_4k
113        } else if len < size_1g {
114            size_2m
115        } else {
116            size_1g
117        };
118
119        let len = len
120            .checked_add(alignment - 1)
121            .map(|temp| temp & !(alignment - 1))
122            .ok_or_else(|| {
123                Error::new(
124                    io::ErrorKind::InvalidInput,
125                    "length and alignment combination causes overflow",
126                )
127            })?;
128
129        let alloc_len = len
130            .checked_add(alignment)
131            .map(|temp| temp - size_4k)
132            .ok_or_else(|| {
133                Error::new(
134                    io::ErrorKind::InvalidInput,
135                    "length and alignment combination causes overflow",
136                )
137            })?;
138
139        // SAFETY: calling mmap to allocate a new range.
140        let address = unsafe {
141            mmap(
142                null_mut(),
143                alloc_len,
144                libc::PROT_NONE,
145                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
146                -1,
147                0,
148            )? as usize
149        };
150        let aligned_address = (address + alignment - 1) & !(alignment - 1);
151        let end = address + alloc_len;
152        let aligned_end = aligned_address + len;
153        assert!(aligned_end <= end);
154
155        if address != aligned_address {
156            // SAFETY: freeing VA just allocated above.
157            unsafe { munmap(address as *mut _, aligned_address - address).unwrap() };
158        }
159        if aligned_end != end {
160            // SAFETY: freeing VA just allocated above.
161            unsafe { munmap(aligned_end as *mut _, end - aligned_end).unwrap() };
162        }
163        Ok(Self {
164            address: aligned_address as *mut _,
165            len,
166        })
167    }
168
169    /// Returns true if the mapping is local to the current process.
170    pub fn is_local(&self) -> bool {
171        true
172    }
173
174    /// Returns the pointer to the beginning of the sparse mapping.
175    pub fn as_ptr(&self) -> *mut c_void {
176        self.address
177    }
178
179    /// Returns the length of the mapping, in bytes.
180    pub fn len(&self) -> usize {
181        self.len
182    }
183
184    fn validate_offset_len(&self, offset: usize, len: usize) -> io::Result<usize> {
185        let end = offset.checked_add(len).ok_or(io::ErrorKind::InvalidInput)?;
186        let page_size = page_size();
187        if offset % page_size != 0 || end % page_size != 0 || end > self.len {
188            return Err(io::ErrorKind::InvalidInput.into());
189        }
190        Ok(end)
191    }
192
193    /// Allocates private, writable memory at the given offset within the mapping.
194    pub fn alloc(&self, offset: usize, len: usize) -> Result<(), Error> {
195        // SAFETY: The flags passed in are guaranteed to be valid
196        unsafe {
197            self.mmap_anonymous(
198                offset,
199                len,
200                libc::PROT_READ | libc::PROT_WRITE,
201                libc::MAP_PRIVATE,
202            )
203        }
204    }
205
206    /// Maps read-only zero pages at the given offset within the mapping.
207    pub fn map_zero(&self, offset: usize, len: usize) -> Result<(), Error> {
208        // SAFETY: The flags passed in are guaranteed to be valid
209        unsafe { self.mmap_anonymous(offset, len, libc::PROT_READ, libc::MAP_PRIVATE) }
210    }
211
212    /// Maps a portion of a file mapping at `offset`.
213    pub fn map_file(
214        &self,
215        offset: usize,
216        len: usize,
217        file_mapping: impl AsFd,
218        file_offset: u64,
219        writable: bool,
220    ) -> Result<(), Error> {
221        let prot = if writable {
222            libc::PROT_READ | libc::PROT_WRITE
223        } else {
224            libc::PROT_READ
225        };
226
227        // SAFETY: The flags passed in are guaranteed to be valid. MAP_SHARED is required.
228        unsafe {
229            self.mmap(
230                offset,
231                len,
232                prot,
233                libc::MAP_SHARED,
234                file_mapping.as_fd(),
235                file_offset as i64,
236            )
237        }
238    }
239
240    /// Maps memory into the mapping, passing parameters through to the mmap
241    /// syscall.
242    ///
243    /// # Safety
244    ///
245    /// This routine is safe to use as long as the caller ensures `map_flags` excludes
246    /// any flags that render the memory region non-unmappable (e.g., `MAP_LOCKED`).
247    /// Misuse may lead to system resource issues, such as falsely perceived out-of-memory
248    /// conditions.
249    pub unsafe fn mmap(
250        &self,
251        offset: usize,
252        len: usize,
253        prot: i32,
254        map_flags: i32,
255        fd: impl AsFd,
256        file_offset: i64,
257    ) -> Result<(), Error> {
258        let _ = self.validate_offset_len(offset, len)?;
259
260        // SAFETY: guaranteed by caller and offset + len checks above
261        unsafe {
262            let address = self.address.add(offset);
263            let mapped_address = mmap(
264                address,
265                len,
266                prot,
267                map_flags | libc::MAP_FIXED,
268                fd.as_fd().as_raw_fd(),
269                file_offset,
270            )?;
271            assert_eq!(mapped_address, address);
272        }
273        Ok(())
274    }
275
276    /// Maps anonymous memory into the mapping, with parameters for the mmap syscall.
277    ///
278    /// # Safety
279    ///
280    /// This routine is safe to use as long as the caller ensures `map_flags` excludes
281    /// any flags that render the memory region non-unmappable (e.g., `MAP_LOCKED`).
282    /// Misuse may lead to system resource issues, such as falsely perceived out-of-memory
283    /// conditions.
284    pub unsafe fn mmap_anonymous(
285        &self,
286        offset: usize,
287        len: usize,
288        prot: i32,
289        map_flags: i32,
290    ) -> io::Result<()> {
291        let _ = self.validate_offset_len(offset, len)?;
292
293        // SAFETY: guaranteed by caller and offset + len checks above
294        unsafe {
295            let address = self.address.add(offset);
296            let mapped_address = mmap(
297                address,
298                len,
299                prot,
300                map_flags | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
301                -1,
302                0,
303            )?;
304            assert_eq!(mapped_address, address);
305        }
306        Ok(())
307    }
308
309    /// Unmaps memory from the mapping.
310    pub fn unmap(&self, offset: usize, len: usize) -> io::Result<()> {
311        let _ = self.validate_offset_len(offset, len)?;
312
313        // Skipping this check would result in the "expect" below
314        if len == 0 {
315            return Err(io::ErrorKind::InvalidInput.into());
316        }
317
318        // Remap to PROT_NONE to preserve the reservation.
319        // SAFETY: guaranteed by caller and offset + len checks above
320        unsafe {
321            let address = self.address.add(offset);
322            let mapped_address = mmap(
323                address,
324                len,
325                libc::PROT_NONE,
326                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED,
327                -1,
328                0,
329            )
330            .expect("remap to PROT_NONE should not fail (except for low resources)");
331            assert_eq!(mapped_address, address);
332        }
333        Ok(())
334    }
335}
336
337impl Drop for SparseMapping {
338    fn drop(&mut self) {
339        unsafe {
340            libc::munmap(self.address, self.len)
341                .syscall_result()
342                .expect("unmap should not fail");
343        }
344    }
345}
346#[cfg(target_os = "linux")]
347fn new_memfd() -> io::Result<File> {
348    // SAFETY: creating and truncating a new file descriptor according to
349    // the documented contract.
350    unsafe {
351        let fd = libc::memfd_create(c"mem".as_ptr(), libc::MFD_CLOEXEC).syscall_result()?;
352        Ok(File::from_raw_fd(fd))
353    }
354}
355
356#[cfg(not(target_os = "linux"))]
357fn new_memfd() -> io::Result<File> {
358    let mut name = [0; 16];
359    getrandom::fill(&mut name).unwrap();
360    let mut name = format!("{:x}", u128::from_ne_bytes(name));
361    // macOS limits the name length to 31 bytes, which is sufficient to ensure uniqueness.
362    name.truncate(31);
363    let name = std::ffi::CString::new(name).unwrap();
364    unsafe {
365        // Create a new shared memory object.
366        let fd = libc::shm_open(name.as_ptr(), libc::O_RDWR | libc::O_EXCL | libc::O_CREAT)
367            .syscall_result()?;
368        // Unlink it to make it anonymous.
369        let _ = libc::shm_unlink(name.as_ptr());
370        Ok(File::from_raw_fd(fd))
371    }
372}
373
374/// Allocates a mappable shared memory object of `size` bytes.
375pub fn alloc_shared_memory(size: usize) -> io::Result<OwnedFd> {
376    let fd = new_memfd()?;
377    fd.set_len(size as u64)?;
378    Ok(fd.into())
379}