virtiofs/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4#![expect(missing_docs)]
5#![cfg(any(windows, target_os = "linux"))]
6
7mod file;
8mod inode;
9pub mod resolver;
10#[cfg(windows)]
11mod section;
12mod util;
13pub mod virtio;
14mod virtio_util;
15
16#[cfg(windows)]
17pub use section::SectionFs;
18
19use file::VirtioFsFile;
20use fuse::protocol::*;
21use fuse::*;
22use inode::VirtioFsInode;
23pub use lxutil::LxVolumeOptions;
24use parking_lot::RwLock;
25use std::collections::HashMap;
26use std::collections::hash_map::Entry;
27use std::path::Path;
28use std::path::PathBuf;
29use std::sync::Arc;
30use std::time::Duration;
31
32// TODO: Make these configurable.
33// FUSE likes to spam getattr a lot, so having a small timeout on the attributes avoids excessive
34// calls. It also means that a lookup/stat sequence can use the attributes returned by lookup
35// rather than having to call getattr.
36const ATTRIBUTE_TIMEOUT: Duration = Duration::from_millis(1);
37
38// Entry timeout must be zero, because on rename existing entries for the child being renamed do
39// not get updated and would stop working. Having a zero timeout forces a new lookup which will
40// update the path.
41const ENTRY_TIMEOUT: Duration = Duration::from_secs(0);
42
43/// Implementation of the virtio-fs file system.
44pub struct VirtioFs {
45    inodes: RwLock<InodeMap>,
46    files: RwLock<HandleMap<Arc<VirtioFsFile>>>,
47}
48
49impl Fuse for VirtioFs {
50    fn init(&self, info: &mut SessionInfo) {
51        // Indicate we support both readdir and readdirplus.
52        if info.capable() & FUSE_DO_READDIRPLUS != 0 {
53            info.want |= FUSE_DO_READDIRPLUS;
54        }
55
56        // Using "auto" lets FUSE pick whether to use readdir or readdirplus, which can be
57        // beneficial since readdirplus needs to query every file and is therefore more expensive.
58        if info.capable() & FUSE_READDIRPLUS_AUTO != 0 {
59            info.want |= FUSE_READDIRPLUS_AUTO;
60        }
61    }
62
63    fn get_attr(&self, request: &Request, flags: u32, fh: u64) -> lx::Result<fuse_attr_out> {
64        let node_id = request.node_id();
65        // If a file handle is specified, get the attributes from the open file. This is faster on
66        // Windows and works if the file was deleted.
67        let attr = if flags & FUSE_GETATTR_FH != 0 {
68            let file = self.get_file(fh)?;
69            file.get_attr()?
70        } else {
71            let inode = self.get_inode(node_id)?;
72            inode.get_attr()?
73        };
74
75        Ok(fuse_attr_out::new(ATTRIBUTE_TIMEOUT, attr))
76    }
77
78    fn set_attr(&self, request: &Request, arg: &fuse_setattr_in) -> lx::Result<fuse_attr_out> {
79        let node_id = request.node_id();
80
81        // If a file handle is specified, set the attributes on the open file. This is faster on
82        // Windows and works if the file was deleted.
83        let attr = if arg.valid & FATTR_FH != 0 {
84            let file = self.get_file(arg.fh)?;
85            file.set_attr(arg, request.uid())?;
86            file.get_attr()?
87        } else {
88            let inode = self.get_inode(node_id)?;
89            inode.set_attr(arg, request.uid())?
90        };
91
92        Ok(fuse_attr_out::new(ATTRIBUTE_TIMEOUT, attr))
93    }
94
95    fn lookup(&self, request: &Request, name: &lx::LxStr) -> lx::Result<fuse_entry_out> {
96        let inode = self.get_inode(request.node_id())?;
97        self.lookup_helper(&inode, name)
98    }
99
100    fn forget(&self, node_id: u64, lookup_count: u64) {
101        // This must be done under lock so an inode can't be resurrected between the lookup count
102        // reaching zero and removing it from the list.
103        let mut inodes = self.inodes.write();
104        if let Some(inode) = inodes.get(node_id) {
105            if inode.forget(node_id, lookup_count) == 0 {
106                tracing::trace!(node_id, "Removing inode");
107                inodes.remove(node_id);
108            }
109        }
110    }
111
112    fn open(&self, request: &Request, flags: u32) -> lx::Result<fuse_open_out> {
113        let inode = self.get_inode(request.node_id())?;
114        let file = inode.open(flags)?;
115        let fh = self.insert_file(file);
116
117        // TODO: Optionally allow caching.
118        Ok(fuse_open_out::new(fh, FOPEN_DIRECT_IO))
119    }
120
121    fn create(
122        &self,
123        request: &Request,
124        name: &lx::LxStr,
125        arg: &fuse_create_in,
126    ) -> lx::Result<CreateOut> {
127        let inode = self.get_inode(request.node_id())?;
128        let (new_inode, attr, file) =
129            inode.create(name, arg.flags, arg.mode, request.uid(), request.gid())?;
130
131        // Insert the newly created inode; this can return an existing inode if it found a match
132        // on the inode number (if this is a non-exclusive create), so make sure to associate the
133        // file with the returned inode.
134        let (new_inode, node_id) = self.insert_inode(new_inode);
135        let file = VirtioFsFile::new(file, new_inode);
136        let fh = self.insert_file(file);
137        Ok(CreateOut {
138            entry: fuse_entry_out::new(node_id, ENTRY_TIMEOUT, ATTRIBUTE_TIMEOUT, attr),
139            open: fuse_open_out::new(fh, FOPEN_DIRECT_IO),
140        })
141    }
142
143    fn mkdir(
144        &self,
145        request: &Request,
146        name: &lx::LxStr,
147        arg: &fuse_mkdir_in,
148    ) -> lx::Result<fuse_entry_out> {
149        let inode = self.get_inode(request.node_id())?;
150        let (new_inode, attr) = inode.mkdir(name, arg.mode, request.uid(), request.gid())?;
151        let (_, node_id) = self.insert_inode(new_inode);
152        Ok(fuse_entry_out::new(
153            node_id,
154            ENTRY_TIMEOUT,
155            ATTRIBUTE_TIMEOUT,
156            attr,
157        ))
158    }
159
160    fn mknod(
161        &self,
162        request: &Request,
163        name: &lx::LxStr,
164        arg: &fuse_mknod_in,
165    ) -> lx::Result<fuse_entry_out> {
166        let inode = self.get_inode(request.node_id())?;
167        let (new_inode, attr) =
168            inode.mknod(name, arg.mode, request.uid(), request.gid(), arg.rdev)?;
169
170        let (_, node_id) = self.insert_inode(new_inode);
171        Ok(fuse_entry_out::new(
172            node_id,
173            ENTRY_TIMEOUT,
174            ATTRIBUTE_TIMEOUT,
175            attr,
176        ))
177    }
178
179    fn symlink(
180        &self,
181        request: &Request,
182        name: &lx::LxStr,
183        target: &lx::LxStr,
184    ) -> lx::Result<fuse_entry_out> {
185        let inode = self.get_inode(request.node_id())?;
186        let (new_inode, attr) = inode.symlink(name, target, request.uid(), request.gid())?;
187
188        let (_, node_id) = self.insert_inode(new_inode);
189        Ok(fuse_entry_out::new(
190            node_id,
191            ENTRY_TIMEOUT,
192            ATTRIBUTE_TIMEOUT,
193            attr,
194        ))
195    }
196
197    fn link(&self, request: &Request, name: &lx::LxStr, target: u64) -> lx::Result<fuse_entry_out> {
198        let inode = self.get_inode(request.node_id())?;
199        let target_inode = self.get_inode(target)?;
200        let attr = inode.link(name, &target_inode)?;
201
202        // Use the target inode as the reply, with refreshed attributes.
203        Ok(fuse_entry_out::new(
204            target,
205            ENTRY_TIMEOUT,
206            ATTRIBUTE_TIMEOUT,
207            attr,
208        ))
209    }
210
211    fn read_link(&self, request: &Request) -> lx::Result<lx::LxString> {
212        let inode = self.get_inode(request.node_id())?;
213        inode.read_link()
214    }
215
216    fn read(&self, _request: &Request, arg: &fuse_read_in) -> lx::Result<Vec<u8>> {
217        let file = self.get_file(arg.fh)?;
218        let mut buffer = vec![0u8; arg.size as usize];
219        let size = file.read(&mut buffer, arg.offset)?;
220        buffer.truncate(size);
221        Ok(buffer)
222    }
223
224    fn write(&self, request: &Request, arg: &fuse_write_in, data: &[u8]) -> lx::Result<usize> {
225        let file = self.get_file(arg.fh)?;
226        file.write(data, arg.offset, request.uid())
227    }
228
229    fn release(&self, _request: &Request, arg: &fuse_release_in) -> lx::Result<()> {
230        self.remove_file(arg.fh);
231        Ok(())
232    }
233
234    fn open_dir(&self, request: &Request, flags: u32) -> lx::Result<fuse_open_out> {
235        // There is no special handling for directories, so just call open.
236        self.open(request, flags)
237    }
238
239    fn read_dir(&self, _request: &Request, arg: &fuse_read_in) -> lx::Result<Vec<u8>> {
240        let file = self.get_file(arg.fh)?;
241        file.read_dir(self, arg.offset, arg.size, false)
242    }
243
244    fn read_dir_plus(&self, _request: &Request, arg: &fuse_read_in) -> lx::Result<Vec<u8>> {
245        let file = self.get_file(arg.fh)?;
246        file.read_dir(self, arg.offset, arg.size, true)
247    }
248
249    fn release_dir(&self, request: &Request, arg: &fuse_release_in) -> lx::Result<()> {
250        self.release(request, arg)
251    }
252
253    fn unlink(&self, request: &Request, name: &lx::LxStr) -> lx::Result<()> {
254        self.unlink_helper(request, name, 0)
255    }
256
257    fn rmdir(&self, request: &Request, name: &lx::LxStr) -> lx::Result<()> {
258        self.unlink_helper(request, name, lx::AT_REMOVEDIR)
259    }
260
261    fn rename(
262        &self,
263        request: &Request,
264        name: &lx::LxStr,
265        new_dir: u64,
266        new_name: &lx::LxStr,
267        flags: u32,
268    ) -> lx::Result<()> {
269        let inode = self.get_inode(request.node_id())?;
270        let new_inode = self.get_inode(new_dir)?;
271        inode.rename(name, &new_inode, new_name, flags)
272    }
273
274    fn statfs(&self, request: &Request) -> lx::Result<fuse_kstatfs> {
275        let inode = self.get_inode(request.node_id())?;
276        inode.stat_fs()
277    }
278
279    fn fsync(&self, _request: &Request, fh: u64, flags: u32) -> lx::Result<()> {
280        let file = self.get_file(fh)?;
281        let data_only = flags & FUSE_FSYNC_FDATASYNC != 0;
282        file.fsync(data_only)
283    }
284
285    fn fsync_dir(&self, request: &Request, fh: u64, flags: u32) -> lx::Result<()> {
286        self.fsync(request, fh, flags)
287    }
288
289    fn get_xattr(&self, request: &Request, name: &lx::LxStr, size: u32) -> lx::Result<Vec<u8>> {
290        let inode = self.get_inode(request.node_id())?;
291        let mut value = vec![0u8; size as usize];
292        let size = inode.get_xattr(name, Some(&mut value))?;
293        value.truncate(size);
294        Ok(value)
295    }
296
297    fn get_xattr_size(&self, request: &Request, name: &lx::LxStr) -> lx::Result<u32> {
298        let inode = self.get_inode(request.node_id())?;
299        let size = inode.get_xattr(name, None)?;
300        let size = size.try_into().map_err(|_| lx::Error::E2BIG)?;
301        Ok(size)
302    }
303
304    fn set_xattr(
305        &self,
306        request: &Request,
307        name: &lx::LxStr,
308        value: &[u8],
309        flags: u32,
310    ) -> lx::Result<()> {
311        let inode = self.get_inode(request.node_id())?;
312        inode.set_xattr(name, value, flags)
313    }
314
315    fn list_xattr(&self, request: &Request, size: u32) -> lx::Result<Vec<u8>> {
316        let inode = self.get_inode(request.node_id())?;
317        let mut list = vec![0u8; size as usize];
318        let size = inode.list_xattr(Some(&mut list))?;
319        list.truncate(size);
320        Ok(list)
321    }
322
323    fn list_xattr_size(&self, request: &Request) -> lx::Result<u32> {
324        let inode = self.get_inode(request.node_id())?;
325        let size = inode.list_xattr(None)?;
326        let size = size.try_into().map_err(|_| lx::Error::E2BIG)?;
327        Ok(size)
328    }
329
330    fn remove_xattr(&self, request: &Request, name: &lx::LxStr) -> lx::Result<()> {
331        let inode = self.get_inode(request.node_id())?;
332        inode.remove_xattr(name)
333    }
334
335    fn destroy(&self) {
336        // To get the file system ready for re-mount, clean out any open files and leaked inodes.
337        self.files.write().clear();
338        self.inodes.write().clear();
339    }
340}
341
342impl VirtioFs {
343    /// Create a new virtio-fs for the specified root path.
344    pub fn new(
345        root_path: impl AsRef<Path>,
346        mount_options: Option<&LxVolumeOptions>,
347    ) -> lx::Result<Self> {
348        let volume = if let Some(mount_options) = mount_options {
349            mount_options.new_volume(root_path)
350        } else {
351            lxutil::LxVolume::new(root_path)
352        }?;
353        let mut inodes = InodeMap::new(volume.supports_stable_file_id());
354        let (root_inode, _) = VirtioFsInode::new(Arc::new(volume), PathBuf::new())?;
355        assert!(inodes.insert(root_inode).1 == FUSE_ROOT_ID);
356        Ok(Self {
357            inodes: RwLock::new(inodes),
358            files: RwLock::new(HandleMap::new()),
359        })
360    }
361
362    /// Perform lookup on a specified directory inode.
363    fn lookup_helper(&self, inode: &VirtioFsInode, name: &lx::LxStr) -> lx::Result<fuse_entry_out> {
364        let (new_inode, attr) = inode.lookup_child(name)?;
365        let (_, new_inode_nr) = self.insert_inode(new_inode);
366        Ok(fuse_entry_out::new(
367            new_inode_nr,
368            ENTRY_TIMEOUT,
369            ATTRIBUTE_TIMEOUT,
370            attr,
371        ))
372    }
373
374    /// Removes a file or directory.
375    fn unlink_helper(&self, request: &Request, name: &lx::LxStr, flags: i32) -> lx::Result<()> {
376        let inode = self.get_inode(request.node_id())?;
377        inode.unlink(name, flags)
378    }
379
380    /// Retrieve the inode with the specified node ID.
381    fn get_inode(&self, node_id: u64) -> lx::Result<Arc<VirtioFsInode>> {
382        self.inodes.read().get(node_id).ok_or_else(|| {
383            tracing::warn!(node_id, "request for unknown inode");
384            lx::Error::EINVAL
385        })
386    }
387
388    /// Insert a new inode, and returns the assigned node ID as well as a reference to the inode.
389    ///
390    /// If the file system supports stable inode numbers and an inode already existed with this
391    /// number, the existing inode is returned, not the passed in one.
392    fn insert_inode(&self, inode: VirtioFsInode) -> (Arc<VirtioFsInode>, u64) {
393        self.inodes.write().insert(inode)
394    }
395
396    /// Retrieve the file object with the specified file handle.
397    fn get_file(&self, fh: u64) -> lx::Result<Arc<VirtioFsFile>> {
398        let files = self.files.read();
399        let file = files.get(fh).ok_or_else(|| {
400            tracing::warn!(fh, "Request for unknown file");
401            lx::Error::EBADF
402        })?;
403
404        Ok(Arc::clone(file))
405    }
406
407    /// Insert a new file object, and return the assigned file handle.
408    fn insert_file(&self, file: VirtioFsFile) -> u64 {
409        self.files.write().insert(Arc::new(file))
410    }
411
412    /// Remove the file with the specified node ID.
413    fn remove_file(&self, fh: u64) {
414        self.files.write().remove(fh);
415    }
416}
417
418/// A key/value map where the keys are automatically incremented identifiers.
419struct HandleMap<T> {
420    values: HashMap<u64, T>,
421    next_handle: u64,
422}
423
424impl<T> HandleMap<T> {
425    /// Create a new `HandleMap`.
426    pub fn new() -> Self {
427        Self::starting_at(1)
428    }
429
430    /// Create a new `HandleMap` starting with handle value `next_handle`.
431    pub fn starting_at(next_handle: u64) -> Self {
432        Self {
433            values: HashMap::new(),
434            next_handle,
435        }
436    }
437
438    /// Inserts an item into the map, and returns the assigned handle.
439    pub fn insert(&mut self, value: T) -> u64 {
440        let handle = self.next_handle;
441        if self.values.insert(handle, value).is_some() {
442            panic!("Inode number reused.");
443        }
444
445        self.next_handle += 1;
446        handle
447    }
448
449    /// Retrieves a value from the map.
450    pub fn get(&self, handle: u64) -> Option<&T> {
451        self.values.get(&handle)
452    }
453
454    /// Retrieves a value from the map.
455    #[cfg_attr(not(windows), expect(dead_code))]
456    pub fn get_mut(&mut self, handle: u64) -> Option<&mut T> {
457        self.values.get_mut(&handle)
458    }
459
460    /// Removes a value from the map.
461    pub fn remove(&mut self, handle: u64) -> Option<T> {
462        self.values.remove(&handle)
463    }
464
465    /// Clears the map and resets the handle values.
466    pub fn clear(&mut self) {
467        self.values.clear();
468        self.next_handle = 1;
469    }
470}
471
472/// Assigns node IDs to inodes, and keeps track of in-use inodes by their actual inode number.
473///
474/// We cannot use the real inode number as the FUSE node ID:
475/// - FUSE node ID 1 is reserved for the root, so this would break if a file system used that inode
476///   number.
477/// - When we want to support multiple volumes in a single file system, node IDs still need to be
478///   globally unique, whereas inode numbers are per-volume.
479struct InodeMap {
480    inodes_by_node_id: HandleMap<Arc<VirtioFsInode>>,
481    inodes_by_inode_nr: Option<HashMap<lx::ino_t, (Arc<VirtioFsInode>, u64)>>,
482}
483
484impl InodeMap {
485    /// Create a new `InodeMap`.
486    pub fn new(supports_stable_file_id: bool) -> Self {
487        // TODO: Once multiple volumes are supported, the inodes_by_inode_nr map should be per
488        // volume.
489        Self {
490            inodes_by_node_id: HandleMap::new(),
491            inodes_by_inode_nr: if supports_stable_file_id {
492                Some(HashMap::new())
493            } else {
494                None
495            },
496        }
497    }
498
499    /// Get an inode with the specified FUSE node ID.
500    pub fn get(&self, node_id: u64) -> Option<Arc<VirtioFsInode>> {
501        let inode = self.inodes_by_node_id.get(node_id)?;
502        Some(Arc::clone(inode))
503    }
504
505    /// Insert an inode into the map, returning its node ID.
506    pub fn insert(&mut self, inode: VirtioFsInode) -> (Arc<VirtioFsInode>, u64) {
507        // If stable inode numbers are supported, look for the inode by its number.
508        if let Some(inodes_by_inode_nr) = self.inodes_by_inode_nr.as_mut() {
509            match inodes_by_inode_nr.entry(inode.inode_nr()) {
510                Entry::Occupied(entry) => {
511                    // Inode found; increment its count and return the existing FUSE node ID.
512                    let new_path = inode.clone_path();
513                    let (inode, node_id) = entry.get();
514                    inode.lookup(new_path);
515                    return (Arc::clone(inode), *node_id);
516                }
517                Entry::Vacant(entry) => {
518                    // Inode not found, so insert it into both maps.
519                    let inode = Arc::new(inode);
520                    let node_id = self.inodes_by_node_id.insert(Arc::clone(&inode));
521                    entry.insert((Arc::clone(&inode), node_id));
522                    return (inode, node_id);
523                }
524            }
525        }
526
527        // No support for stable inode numbers, so just use node ID.
528        let inode = Arc::new(inode);
529        let node_id = self.inodes_by_node_id.insert(Arc::clone(&inode));
530        (inode, node_id)
531    }
532
533    /// Remove an inode with the specified FUSE node ID from the map.
534    pub fn remove(&mut self, node_id: u64) {
535        let inode = self.inodes_by_node_id.remove(node_id).unwrap();
536        if let Some(inodes_by_inode_nr) = self.inodes_by_inode_nr.as_mut() {
537            inodes_by_inode_nr.remove(&inode.inode_nr());
538        }
539    }
540
541    /// Clears the map, preserving the root inode.
542    pub fn clear(&mut self) {
543        let root_inode = Arc::clone(self.inodes_by_node_id.get(FUSE_ROOT_ID).unwrap());
544        self.inodes_by_node_id.clear();
545
546        // Re-insert the root inode.
547        assert!(self.inodes_by_node_id.insert(Arc::clone(&root_inode)) == FUSE_ROOT_ID);
548
549        // Clear the inode number map if it's supported.
550        if let Some(inodes_by_inode_nr) = self.inodes_by_inode_nr.as_mut() {
551            inodes_by_inode_nr.clear();
552            inodes_by_inode_nr.insert(root_inode.inode_nr(), (root_inode, FUSE_ROOT_ID));
553        }
554    }
555}