disk_backend/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Defines the [`Disk`] type, which provides an interface to a block
5//! device, used for different disk frontends (such as the floppy disk, IDE,
6//! SCSI, or NVMe emulators) as well as direct disk access for other purposes
7//! (such as the VMGS file system).
8//!
9//! `Disk`s are backed by a [`DiskIo`] implementation. Specific disk
10//! backends should be in their own crates.
11
12#![forbid(unsafe_code)]
13
14pub mod pr;
15pub mod resolve;
16pub mod sync_wrapper;
17
18use guestmem::AccessError;
19use inspect::Inspect;
20use scsi_buffers::RequestBuffers;
21use stackfuture::StackFuture;
22use std::fmt::Debug;
23use std::future::Future;
24use std::future::ready;
25use std::pin::Pin;
26use std::sync::Arc;
27use thiserror::Error;
28
29/// A disk operation error.
30#[derive(Debug, Error)]
31pub enum DiskError {
32    /// The request failed due to a preempt and abort status.
33    #[error("aborted command")]
34    AbortDueToPreemptAndAbort,
35    /// The LBA was out of range.
36    #[error("illegal request")]
37    IllegalBlock,
38    /// The request failed due to invalid input.
39    #[error("invalid input")]
40    InvalidInput,
41    /// The request failed due to an unrecovered IO error.
42    #[error("io error")]
43    Io(#[source] std::io::Error),
44    /// The request failed due to a reportable medium error.
45    #[error("medium error")]
46    MediumError(#[source] std::io::Error, MediumErrorDetails),
47    /// The request failed due to a failure to access the specified buffers.
48    #[error("failed to access guest memory")]
49    MemoryAccess(#[from] AccessError),
50    /// The request failed because the disk is read-only.
51    #[error("attempt to write to read-only disk/range")]
52    ReadOnly,
53    /// The request failed due to a persistent reservation conflict.
54    #[error("reservation conflict")]
55    ReservationConflict,
56    /// The request failed because eject is not supported.
57    #[error("unsupported eject")]
58    UnsupportedEject,
59}
60
61/// Failure details for [`DiskError::MediumError`].
62#[derive(Debug)]
63pub enum MediumErrorDetails {
64    /// The medium had an application tag check failure.
65    ApplicationTagCheckFailed,
66    /// The medium had a guard check failure.
67    GuardCheckFailed,
68    /// The medium had a reference tag check failure.
69    ReferenceTagCheckFailed,
70    /// The medium had an unrecovered read error.
71    UnrecoveredReadError,
72    /// The medium had a write fault.
73    WriteFault,
74}
75
76/// Disk metadata and IO operations.
77pub trait DiskIo: 'static + Send + Sync + Inspect {
78    /// Returns the disk type name as a string.
79    ///
80    /// This is used for diagnostic purposes.
81    fn disk_type(&self) -> &str;
82
83    /// Returns the current sector count.
84    ///
85    /// For some backing stores, this may change at runtime. If it does, then
86    /// the backing store must also implement [`DiskIo::wait_resize`].
87    fn sector_count(&self) -> u64;
88
89    /// Returns the logical sector size of the backing store.
90    ///
91    /// This must not change at runtime.
92    fn sector_size(&self) -> u32;
93
94    /// Optionally returns a 16-byte identifier for the disk, if there is a
95    /// natural one for this backing store.
96    ///
97    /// This may be exposed to the guest as a unique disk identifier.
98    /// This must not change at runtime.
99    fn disk_id(&self) -> Option<[u8; 16]>;
100
101    /// Returns the physical sector size of the backing store.
102    ///
103    /// This must not change at runtime.
104    fn physical_sector_size(&self) -> u32;
105
106    /// Returns true if the `fua` parameter to [`DiskIo::write_vectored`] is
107    /// respected by the backing store by ensuring that the IO is immediately
108    /// committed to disk.
109    fn is_fua_respected(&self) -> bool;
110
111    /// Returns true if the disk is read only.
112    fn is_read_only(&self) -> bool;
113
114    /// Unmap sectors from the layer.
115    fn unmap(
116        &self,
117        sector: u64,
118        count: u64,
119        block_level_only: bool,
120    ) -> impl Future<Output = Result<(), DiskError>> + Send;
121
122    /// Returns the behavior of the unmap operation.
123    fn unmap_behavior(&self) -> UnmapBehavior;
124
125    /// Returns the optimal granularity for unmaps, in sectors.
126    fn optimal_unmap_sectors(&self) -> u32 {
127        1
128    }
129
130    /// Optionally returns a trait object to issue persistent reservation
131    /// requests.
132    fn pr(&self) -> Option<&dyn pr::PersistentReservation> {
133        None
134    }
135
136    /// Issues an asynchronous eject media operation to the disk.
137    fn eject(&self) -> impl Future<Output = Result<(), DiskError>> + Send {
138        ready(Err(DiskError::UnsupportedEject))
139    }
140
141    /// Issues an asynchronous read-scatter operation to the disk.
142    ///
143    /// # Arguments
144    /// * `buffers` - An object representing the data buffers into which the disk data will be transferred.
145    /// * `sector` - The logical sector at which the read operation starts.
146    fn read_vectored(
147        &self,
148        buffers: &RequestBuffers<'_>,
149        sector: u64,
150    ) -> impl Future<Output = Result<(), DiskError>> + Send;
151
152    /// Issues an asynchronous write-gather operation to the disk.
153    /// # Arguments
154    /// * `buffers` - An object representing the data buffers containing the data to transfer to the disk.
155    /// * `sector` - The logical sector at which the write operation starts.
156    /// * `fua` - A flag indicates if FUA (force unit access) is requested.
157    fn write_vectored(
158        &self,
159        buffers: &RequestBuffers<'_>,
160        sector: u64,
161        fua: bool,
162    ) -> impl Future<Output = Result<(), DiskError>> + Send;
163
164    /// Issues an asynchronous flush operation to the disk.
165    fn sync_cache(&self) -> impl Future<Output = Result<(), DiskError>> + Send;
166
167    /// Waits for the disk sector size to be different than the specified value.
168    fn wait_resize(&self, sector_count: u64) -> impl Future<Output = u64> + Send {
169        let _ = sector_count;
170        std::future::pending()
171    }
172}
173
174/// An asynchronous block device.
175///
176/// This type is cheap to clone, for sharing the disk among multiple concurrent
177/// users.
178#[derive(Inspect, Clone)]
179#[inspect(extra = "Self::inspect_extra")]
180pub struct Disk(#[inspect(flatten)] Arc<DiskInner>);
181
182impl Disk {
183    fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
184        resp.field("disk_type", self.0.disk.disk_type())
185            .field("sector_count", self.0.disk.sector_count())
186            .field("supports_pr", self.0.disk.pr().is_some());
187    }
188}
189
190impl Debug for Disk {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        f.debug_tuple("Disk").finish()
193    }
194}
195
196#[derive(Inspect)]
197#[inspect(bound = "T: DynDisk")]
198struct DiskInner<T: ?Sized = dyn DynDisk> {
199    sector_size: u32,
200    sector_shift: u32,
201    physical_sector_size: u32,
202    disk_id: Option<[u8; 16]>,
203    is_fua_respected: bool,
204    is_read_only: bool,
205    unmap_behavior: UnmapBehavior,
206    optimal_unmap_sectors: u32,
207    disk: T,
208}
209
210/// Errors that can occur when creating a `Disk`.
211#[derive(Debug, Error)]
212pub enum InvalidDisk {
213    /// The sector size is invalid.
214    #[error("invalid sector size: {0}")]
215    InvalidSectorSize(u32),
216    /// The physical sector size is invalid.
217    #[error("invalid physical sector size: {0}")]
218    InvalidPhysicalSectorSize(u32),
219}
220
221impl Disk {
222    /// Returns a new disk wrapping the given backing object.
223    pub fn new(disk: impl 'static + DiskIo) -> Result<Self, InvalidDisk> {
224        // Cache the metadata locally to validate it and so that it can be
225        // accessed without needing to go through the trait object. This is more
226        // efficient and ensures the backing disk does not change these values
227        // during the lifetime of the disk.
228        let sector_size = disk.sector_size();
229        if !sector_size.is_power_of_two() || sector_size < 512 {
230            return Err(InvalidDisk::InvalidSectorSize(sector_size));
231        }
232        let physical_sector_size = disk.physical_sector_size();
233        if !physical_sector_size.is_power_of_two() || physical_sector_size < sector_size {
234            return Err(InvalidDisk::InvalidPhysicalSectorSize(physical_sector_size));
235        }
236        Ok(Self(Arc::new(DiskInner {
237            sector_size,
238            sector_shift: sector_size.trailing_zeros(),
239            physical_sector_size,
240            disk_id: disk.disk_id(),
241            is_fua_respected: disk.is_fua_respected(),
242            is_read_only: disk.is_read_only(),
243            optimal_unmap_sectors: disk.optimal_unmap_sectors(),
244            unmap_behavior: disk.unmap_behavior(),
245            disk,
246        })))
247    }
248
249    /// Returns the current sector count.
250    ///
251    /// For some backing stores, this may change at runtime. Use
252    /// [`wait_resize`](Self::wait_resize) to detect this change.
253    pub fn sector_count(&self) -> u64 {
254        self.0.disk.sector_count()
255    }
256
257    /// Returns the logical sector size of the backing store.
258    pub fn sector_size(&self) -> u32 {
259        self.0.sector_size
260    }
261
262    /// Returns log2 of the logical sector size of the backing store.
263    pub fn sector_shift(&self) -> u32 {
264        self.0.sector_shift
265    }
266
267    /// Optionally returns a 16-byte identifier for the disk, if there is a
268    /// natural one for this backing store.
269    ///
270    /// This may be exposed to the guest as a unique disk identifier.
271    pub fn disk_id(&self) -> Option<[u8; 16]> {
272        self.0.disk_id
273    }
274
275    /// Returns the physical sector size of the backing store.
276    pub fn physical_sector_size(&self) -> u32 {
277        self.0.physical_sector_size
278    }
279
280    /// Returns true if the `fua` parameter to
281    /// [`write_vectored`](Self::write_vectored) is respected by the backing
282    /// store by ensuring that the IO is immediately committed to disk.
283    pub fn is_fua_respected(&self) -> bool {
284        self.0.is_fua_respected
285    }
286
287    /// Returns true if the disk is read only.
288    pub fn is_read_only(&self) -> bool {
289        self.0.is_read_only
290    }
291
292    /// Unmap sectors from the disk.
293    pub fn unmap(
294        &self,
295        sector: u64,
296        count: u64,
297        block_level_only: bool,
298    ) -> impl use<'_> + Future<Output = Result<(), DiskError>> + Send {
299        self.0.disk.unmap(sector, count, block_level_only)
300    }
301
302    /// Returns the behavior of the unmap operation.
303    pub fn unmap_behavior(&self) -> UnmapBehavior {
304        self.0.unmap_behavior
305    }
306
307    /// Returns the optimal granularity for unmaps, in sectors.
308    pub fn optimal_unmap_sectors(&self) -> u32 {
309        self.0.optimal_unmap_sectors
310    }
311
312    /// Optionally returns a trait object to issue persistent reservation
313    /// requests.
314    pub fn pr(&self) -> Option<&dyn pr::PersistentReservation> {
315        self.0.disk.pr()
316    }
317
318    /// Issues an asynchronous eject media operation to the disk.
319    pub fn eject(&self) -> impl use<'_> + Future<Output = Result<(), DiskError>> + Send {
320        self.0.disk.eject()
321    }
322
323    /// Issues an asynchronous read-scatter operation to the disk.
324    ///
325    /// # Arguments
326    ///
327    /// * `buffers` - An object representing the data buffers into which the disk data will be transferred.
328    /// * `sector` - The logical sector at which the read operation starts.
329    pub fn read_vectored<'a>(
330        &'a self,
331        buffers: &'a RequestBuffers<'_>,
332        sector: u64,
333    ) -> impl use<'a> + Future<Output = Result<(), DiskError>> + Send {
334        self.0.disk.read_vectored(buffers, sector)
335    }
336
337    /// Issues an asynchronous write-gather operation to the disk.
338    ///
339    /// # Arguments
340    ///
341    /// * `buffers` - An object representing the data buffers containing the data to transfer to the disk.
342    /// * `sector` - The logical sector at which the write operation starts.
343    /// * `fua` - A flag indicates if FUA (force unit access) is requested.
344    ///
345    /// # Panics
346    ///
347    /// The caller must pass a buffer with an integer number of sectors.
348    pub fn write_vectored<'a>(
349        &'a self,
350        buffers: &'a RequestBuffers<'_>,
351        sector: u64,
352        fua: bool,
353    ) -> impl use<'a> + Future<Output = Result<(), DiskError>> + Send {
354        self.0.disk.write_vectored(buffers, sector, fua)
355    }
356
357    /// Issues an asynchronous flush operation to the disk.
358    pub fn sync_cache(&self) -> impl use<'_> + Future<Output = Result<(), DiskError>> + Send {
359        self.0.disk.sync_cache()
360    }
361
362    /// Waits for the disk sector size to be different than the specified value.
363    pub fn wait_resize(&self, sector_count: u64) -> impl use<'_> + Future<Output = u64> {
364        self.0.disk.wait_resize(sector_count)
365    }
366}
367
368/// The behavior of unmap.
369#[derive(Clone, Copy, Debug, PartialEq, Eq, Inspect)]
370pub enum UnmapBehavior {
371    /// Unmap may or may not change the content, and not necessarily to zero.
372    Unspecified,
373    /// Unmaps are guaranteed to be ignored.
374    Ignored,
375    /// Unmap will deterministically zero the content.
376    Zeroes,
377}
378
379/// The amount of space reserved for a DiskIo future
380///
381/// This was chosen by running `cargo test -p storvsp -- --no-capture` and looking at the required
382/// size that was given in the failure message
383const ASYNC_DISK_STACK_SIZE: usize = 1256;
384
385type IoFuture<'a> = StackFuture<'a, Result<(), DiskError>, { ASYNC_DISK_STACK_SIZE }>;
386
387trait DynDisk: Send + Sync + Inspect {
388    fn disk_type(&self) -> &str;
389    fn sector_count(&self) -> u64;
390
391    fn unmap(&self, sector_offset: u64, sector_count: u64, block_level_only: bool) -> IoFuture<'_>;
392
393    fn pr(&self) -> Option<&dyn pr::PersistentReservation>;
394    fn eject(&self) -> IoFuture<'_>;
395
396    fn read_vectored<'a>(&'a self, buffers: &'a RequestBuffers<'_>, sector: u64) -> IoFuture<'a>;
397
398    fn write_vectored<'a>(
399        &'a self,
400        buffers: &'a RequestBuffers<'_>,
401        sector: u64,
402        fua: bool,
403    ) -> IoFuture<'a>;
404
405    fn sync_cache(&self) -> IoFuture<'_>;
406
407    fn wait_resize<'a>(
408        &'a self,
409        sector_count: u64,
410    ) -> Pin<Box<dyn 'a + Send + Future<Output = u64>>> {
411        let _ = sector_count;
412        Box::pin(std::future::pending())
413    }
414}
415
416impl<T: DiskIo> DynDisk for T {
417    fn disk_type(&self) -> &str {
418        self.disk_type()
419    }
420
421    fn sector_count(&self) -> u64 {
422        self.sector_count()
423    }
424
425    fn unmap(
426        &self,
427        sector_offset: u64,
428        sector_count: u64,
429        block_level_only: bool,
430    ) -> StackFuture<'_, Result<(), DiskError>, { ASYNC_DISK_STACK_SIZE }> {
431        StackFuture::from_or_box(self.unmap(sector_offset, sector_count, block_level_only))
432    }
433
434    fn pr(&self) -> Option<&dyn pr::PersistentReservation> {
435        self.pr()
436    }
437
438    fn eject(&self) -> IoFuture<'_> {
439        StackFuture::from_or_box(self.eject())
440    }
441
442    fn read_vectored<'a>(&'a self, buffers: &'a RequestBuffers<'_>, sector: u64) -> IoFuture<'a> {
443        StackFuture::from_or_box(self.read_vectored(buffers, sector))
444    }
445
446    fn write_vectored<'a>(
447        &'a self,
448        buffers: &'a RequestBuffers<'a>,
449        sector: u64,
450        fua: bool,
451    ) -> IoFuture<'a> {
452        StackFuture::from_or_box(self.write_vectored(buffers, sector, fua))
453    }
454
455    fn sync_cache(&self) -> IoFuture<'_> {
456        StackFuture::from_or_box(self.sync_cache())
457    }
458}