disk_striped/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implements the [`DiskIo`] trait for virtual disks backed by multiple raw
5//! block devices.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10use async_trait::async_trait;
11use disk_backend::Disk;
12use disk_backend::DiskError;
13use disk_backend::DiskIo;
14use disk_backend::UnmapBehavior;
15use disk_backend::resolve::ResolveDiskParameters;
16use disk_backend::resolve::ResolvedDisk;
17use disk_backend_resources::StripedDiskHandle;
18use futures::future::join_all;
19use futures::future::try_join_all;
20use inspect::Inspect;
21use scsi_buffers::RequestBuffers;
22use std::fmt::Debug;
23use thiserror::Error;
24use vm_resource::AsyncResolveResource;
25use vm_resource::ResourceResolver;
26use vm_resource::declare_static_async_resolver;
27use vm_resource::kind::DiskHandleKind;
28
29pub struct StripedDiskResolver;
30declare_static_async_resolver!(StripedDiskResolver, (DiskHandleKind, StripedDiskHandle));
31
32#[async_trait]
33impl AsyncResolveResource<DiskHandleKind, StripedDiskHandle> for StripedDiskResolver {
34    type Output = ResolvedDisk;
35    type Error = anyhow::Error;
36
37    async fn resolve(
38        &self,
39        resolver: &ResourceResolver,
40        rsrc: StripedDiskHandle,
41        input: ResolveDiskParameters<'_>,
42    ) -> Result<Self::Output, Self::Error> {
43        let disks = try_join_all(
44            rsrc.devices
45                .into_iter()
46                .map(async |device| resolver.resolve(device, input).await.map(|r| r.0)),
47        )
48        .await?;
49        Ok(ResolvedDisk::new(StripedDisk::new(
50            disks,
51            rsrc.chunk_size_in_bytes,
52            rsrc.logic_sector_count,
53        )?)?)
54    }
55}
56
57#[derive(Debug, Inspect)]
58pub struct StripedDisk {
59    #[inspect(iter_by_index)]
60    block_devices: Vec<Disk>,
61    sector_size: u32,
62    sector_shift: u32,
63    sector_count: u64,
64    read_only: bool,
65    sector_count_per_chunk: u32,
66    unmap_behavior: UnmapBehavior,
67}
68
69const CHUNK_SIZE_128K: u32 = 128 * 1024;
70
71#[derive(Error, Debug)]
72pub enum NewDeviceError {
73    #[error("Can't create a striping disk since the input device list is empty")]
74    EmptyDeviceList,
75    #[error(
76        "The files are not compatible to form a striping disk: sector_size-{sector_size} != cur_sector_size-{cur_sector_size} OR sector_count-{sector_count} != cur_sector_count-{cur_sector_count}"
77    )]
78    DeviceNotCompatible {
79        sector_size: u32,
80        cur_sector_size: u32,
81        sector_count: u64,
82        cur_sector_count: u64,
83    },
84    #[error(
85        "Invalid chunk size: chunk_size_in_bytes-{0} is not multiple of logical_sector_size-{1}"
86    )]
87    InvalidChunkSize(u32, u32),
88    #[error(
89        "logic_sector_count is out of range: logic_sector_count.unwrap_or(total_sector_count)-{0} > total_sector_count-{1}"
90    )]
91    InvalidLogicSectorCount(u64, u64),
92    #[error(
93        "The striping disk size must be multiple of chunk size * number of disks. logic_sector_count-{0} != {1}."
94    )]
95    InvalidStripingDiskSize(u64, u64),
96}
97
98#[derive(Debug, Error)]
99enum IoError {
100    #[error("cur_buf_offset-{cur_buf_offset} != buf_total_size -{buf_total_size}")]
101    InternalErrorBufferLengthMismatch {
102        cur_buf_offset: usize,
103        buf_total_size: usize,
104    },
105    #[error("trimmed_sectors-{trimmed_sectors} != sector_count -{sector_count}")]
106    InternalErrorTrimLengthMismatch {
107        trimmed_sectors: u64,
108        sector_count: u64,
109    },
110    #[error(
111        "Sector out of range: start_sector-{start_sector}, end_sector-{end_sector}, self.sector_count-{disk_sector_count}"
112    )]
113    IOInvalidSector {
114        start_sector: u64,
115        end_sector: u64,
116        disk_sector_count: u64,
117    },
118    #[error("error in lower disk {index}")]
119    LowerError {
120        index: usize,
121        #[source]
122        err: DiskError,
123    },
124}
125
126impl From<IoError> for DiskError {
127    fn from(err: IoError) -> Self {
128        DiskError::Io(std::io::Error::other(err))
129    }
130}
131
132struct Chunk {
133    // The index of the disk where the chunk is in.
134    disk_index: usize,
135    // The chunk starting sector and offset on the disk.
136    disk_sector_index: u64,
137    // The chunk length. It can be less the sector_count_per_chunk for the first and last chunk.
138    chunk_length_in_sectors: u32,
139}
140
141#[derive(Debug, Clone)]
142struct ChunkIter {
143    disk_count: usize,
144    sector_count_per_chunk: u32,
145    start_sector: u64,
146    end_sector: u64,
147    start_chunk_index: u64,
148    end_chunk_index: u64,
149    cur_chunk_index: u64,
150}
151
152impl Iterator for ChunkIter {
153    type Item = Chunk;
154
155    fn next(&mut self) -> Option<Chunk> {
156        // The valid range is [start_chunk_index, end_chunk_index).
157        assert!(
158            self.cur_chunk_index >= self.start_chunk_index,
159            "self.cur_chunk_index-[{}] < self.start_chunk_index-[{}] should never happen.",
160            self.cur_chunk_index,
161            self.start_chunk_index
162        );
163
164        if self.cur_chunk_index >= self.end_chunk_index {
165            return None;
166        }
167
168        // The sector can be in middle of a chunk for the first chunk.
169        let sector_offset_in_chunk = if self.cur_chunk_index == self.start_chunk_index {
170            self.start_sector % self.sector_count_per_chunk as u64
171        } else {
172            0
173        };
174
175        let disk_index = (self.cur_chunk_index % (self.disk_count as u64)) as usize;
176        let disk_sector_index = (self.cur_chunk_index / self.disk_count as u64)
177            * self.sector_count_per_chunk as u64
178            + sector_offset_in_chunk;
179
180        // The disk end offset can be in middle of the chunk for the last chunk.
181        let disk_end_offset_in_sectors = (self.cur_chunk_index / self.disk_count as u64)
182            * self.sector_count_per_chunk as u64
183            + if self.cur_chunk_index == self.end_chunk_index - 1 {
184                self.end_sector - self.sector_count_per_chunk as u64 * self.cur_chunk_index
185            } else {
186                self.sector_count_per_chunk as u64
187            };
188
189        // The chunk length can be less the sector_count_per_chunk for the first and last chunk.
190        let chunk_length_in_sectors = (disk_end_offset_in_sectors - disk_sector_index) as u32;
191
192        self.cur_chunk_index += 1;
193
194        Some(Chunk {
195            disk_index,
196            disk_sector_index,
197            chunk_length_in_sectors,
198        })
199    }
200}
201
202impl StripedDisk {
203    fn get_chunk_iter(&self, start_sector: u64, end_sector: u64) -> Result<ChunkIter, DiskError> {
204        // The valid range is [start_sector, end_sector).
205        if end_sector > self.sector_count {
206            let err = IoError::IOInvalidSector {
207                start_sector,
208                end_sector,
209                disk_sector_count: self.sector_count,
210            };
211            tracelimit::error_ratelimited!(err = ?err);
212            return Err(err.into());
213        }
214
215        let start_chunk_index = start_sector / self.sector_count_per_chunk as u64;
216        let end_chunk_index = end_sector.div_ceil(self.sector_count_per_chunk as u64);
217
218        let chunk_iter = ChunkIter {
219            disk_count: self.block_devices.len(),
220            sector_count_per_chunk: self.sector_count_per_chunk,
221            start_sector,
222            end_sector,
223            start_chunk_index,
224            end_chunk_index,
225            cur_chunk_index: start_chunk_index,
226        };
227
228        Ok(chunk_iter)
229    }
230}
231
232impl StripedDisk {
233    /// Constructs a new `StripedDisk` backed by the vector of file.
234    ///
235    /// # Arguments
236    /// * `devices` - The backing devices opened for raw access.
237    /// * 'chunk_size_in_bytes' - The chunk size of the striped disk, and the default value is 128K.
238    /// * 'logic_sector_count' - The sector count of the striped disk, and the default value is the sum of the sector count of the backing devices.
239    ///
240    pub fn new(
241        devices: Vec<Disk>,
242        chunk_size_in_bytes: Option<u32>,
243        logic_sector_count: Option<u64>,
244    ) -> Result<Self, NewDeviceError> {
245        if devices.is_empty() {
246            return Err(NewDeviceError::EmptyDeviceList);
247        }
248
249        let mut total_sector_count = 0;
250        let sector_size = devices[0].sector_size();
251        let sector_count = devices[0].sector_count();
252        let read_only = devices[0].is_read_only();
253        let chunk_size_in_bytes = chunk_size_in_bytes.unwrap_or(CHUNK_SIZE_128K);
254        if chunk_size_in_bytes % sector_size != 0 {
255            return Err(NewDeviceError::InvalidChunkSize(
256                chunk_size_in_bytes,
257                sector_size,
258            ));
259        }
260
261        let sector_count_per_chunk = (chunk_size_in_bytes / sector_size) as u64;
262
263        for device in &devices {
264            let cur_sector_size = device.sector_size();
265            let cur_sector_count = device.sector_count();
266            let cur_read_only = device.is_read_only();
267
268            if sector_size != cur_sector_size
269                || sector_count != cur_sector_count
270                || read_only != cur_read_only
271            {
272                return Err(NewDeviceError::DeviceNotCompatible {
273                    sector_size,
274                    cur_sector_size,
275                    sector_count,
276                    cur_sector_count,
277                });
278            }
279
280            total_sector_count +=
281                (cur_sector_count / sector_count_per_chunk) * sector_count_per_chunk;
282        }
283
284        if total_sector_count % (devices.len() as u64 * sector_count_per_chunk) != 0 {
285            return Err(NewDeviceError::InvalidStripingDiskSize(
286                total_sector_count,
287                devices.len() as u64 * sector_count_per_chunk,
288            ));
289        }
290
291        let logic_sector_count = logic_sector_count.unwrap_or(total_sector_count);
292        if logic_sector_count > total_sector_count {
293            return Err(NewDeviceError::InvalidLogicSectorCount(
294                logic_sector_count,
295                total_sector_count,
296            ));
297        }
298
299        if logic_sector_count % (devices.len() as u64 * sector_count_per_chunk) != 0 {
300            return Err(NewDeviceError::InvalidStripingDiskSize(
301                logic_sector_count,
302                devices.len() as u64 * sector_count_per_chunk,
303            ));
304        }
305
306        // Unify the unmap behavior of all devices. If all disks specify the
307        // same behavior, use it. Otherwise, report unspecified behavior and
308        // send unmap to all disks.
309        let unmap_behavior = devices.iter().fold(UnmapBehavior::Zeroes, |rest, d| {
310            match (rest, d.unmap_behavior()) {
311                (UnmapBehavior::Zeroes, UnmapBehavior::Zeroes) => UnmapBehavior::Zeroes,
312                (UnmapBehavior::Ignored, UnmapBehavior::Ignored) => UnmapBehavior::Ignored,
313                _ => UnmapBehavior::Unspecified,
314            }
315        });
316
317        let stripped_block_device = StripedDisk {
318            block_devices: devices,
319            sector_size,
320            sector_shift: sector_size.trailing_zeros(),
321            sector_count: logic_sector_count,
322            read_only,
323            sector_count_per_chunk: (sector_count_per_chunk as u32),
324            unmap_behavior,
325        };
326
327        tracing::info!("stripped block device start completed.");
328        Ok(stripped_block_device)
329    }
330}
331
332impl DiskIo for StripedDisk {
333    fn disk_type(&self) -> &str {
334        "striped"
335    }
336
337    fn sector_count(&self) -> u64 {
338        self.sector_count
339    }
340
341    fn sector_size(&self) -> u32 {
342        self.sector_size
343    }
344
345    fn is_read_only(&self) -> bool {
346        self.read_only
347    }
348
349    fn disk_id(&self) -> Option<[u8; 16]> {
350        None
351    }
352
353    fn physical_sector_size(&self) -> u32 {
354        self.block_devices
355            .iter()
356            .map(|d| d.physical_sector_size())
357            .max()
358            .unwrap()
359    }
360
361    fn is_fua_respected(&self) -> bool {
362        self.block_devices.iter().all(|d| d.is_fua_respected())
363    }
364
365    async fn eject(&self) -> Result<(), DiskError> {
366        let mut futures = Vec::new();
367        for disk in &self.block_devices {
368            futures.push(disk.eject());
369        }
370        await_all_and_check(futures).await?;
371        Ok(())
372    }
373
374    async fn read_vectored(
375        &self,
376        buffers: &RequestBuffers<'_>,
377        start_sector: u64,
378    ) -> Result<(), DiskError> {
379        let buf_total_size = buffers.len();
380        let end_sector = start_sector + ((buf_total_size as u64) >> self.sector_shift);
381        if end_sector > self.sector_count {
382            return Err(DiskError::IllegalBlock);
383        }
384        let chunk_iter = self.get_chunk_iter(start_sector, end_sector)?;
385
386        let mut all_futures = Vec::new();
387        let mut cur_buf_offset: usize = 0;
388
389        for chunk in chunk_iter {
390            let disk = &self.block_devices[chunk.disk_index];
391
392            let buf_len = (chunk.chunk_length_in_sectors as usize) << self.sector_shift;
393
394            let sub_buffers = buffers.subrange(cur_buf_offset, buf_len);
395            cur_buf_offset += buf_len;
396
397            all_futures.push(async move {
398                disk.read_vectored(&sub_buffers, chunk.disk_sector_index)
399                    .await
400                    .map_err(|err| IoError::LowerError {
401                        index: chunk.disk_index,
402                        err,
403                    })
404            });
405        }
406
407        if cur_buf_offset != buf_total_size {
408            return Err(IoError::InternalErrorBufferLengthMismatch {
409                cur_buf_offset,
410                buf_total_size,
411            }
412            .into());
413        }
414
415        await_all_and_check(all_futures).await?;
416        Ok(())
417    }
418
419    async fn write_vectored(
420        &self,
421        buffers: &RequestBuffers<'_>,
422        start_sector: u64,
423        fua: bool,
424    ) -> Result<(), DiskError> {
425        let buf_total_size = buffers.len();
426        let end_sector = start_sector + ((buf_total_size as u64) >> self.sector_shift);
427        if end_sector > self.sector_count {
428            return Err(DiskError::IllegalBlock);
429        }
430        let chunk_iter = self.get_chunk_iter(start_sector, end_sector)?;
431
432        let mut all_futures = Vec::new();
433        let mut cur_buf_offset: usize = 0;
434
435        for chunk in chunk_iter {
436            let disk = &self.block_devices[chunk.disk_index];
437
438            let buf_len = (chunk.chunk_length_in_sectors as usize) << self.sector_shift;
439
440            let sub_buffers = buffers.subrange(cur_buf_offset, buf_len);
441            cur_buf_offset += buf_len;
442
443            all_futures.push(async move {
444                disk.write_vectored(&sub_buffers, chunk.disk_sector_index, fua)
445                    .await
446                    .map_err(|err| IoError::LowerError {
447                        index: chunk.disk_index,
448                        err,
449                    })
450            });
451        }
452
453        if cur_buf_offset != buf_total_size {
454            return Err(IoError::InternalErrorBufferLengthMismatch {
455                cur_buf_offset,
456                buf_total_size,
457            }
458            .into());
459        }
460
461        await_all_and_check(all_futures).await?;
462        Ok(())
463    }
464
465    async fn sync_cache(&self) -> Result<(), DiskError> {
466        let mut all_futures = Vec::new();
467        for (disk_index, disk) in self.block_devices.iter().enumerate() {
468            all_futures.push(async move {
469                disk.sync_cache().await.map_err(|err| IoError::LowerError {
470                    index: disk_index,
471                    err,
472                })
473            });
474        }
475        await_all_and_check(all_futures).await?;
476        Ok(())
477    }
478
479    async fn unmap(
480        &self,
481        start_sector: u64,
482        sector_count: u64,
483        block_level_only: bool,
484    ) -> Result<(), DiskError> {
485        let end_sector = start_sector + sector_count;
486
487        if end_sector > self.sector_count {
488            return Err(DiskError::IllegalBlock);
489        }
490
491        let chunk_iter = match self.get_chunk_iter(start_sector, end_sector) {
492            Ok(iter) => iter,
493            Err(err) => {
494                return Err(err);
495            }
496        };
497
498        // Create a vector to group chunks by disk index
499        let mut disk_sectors: Vec<(u64, u64)> = vec![(0, 0); self.block_devices.len()];
500        let mut trimmed_sectors: u64 = 0;
501
502        for chunk in chunk_iter {
503            let start = chunk.disk_sector_index;
504            let length = chunk.chunk_length_in_sectors as u64;
505            let (disk_start, disk_len) = &mut disk_sectors[chunk.disk_index];
506            if *disk_len == 0 {
507                *disk_start = start; // set the start of the unmap operation
508            }
509            *disk_len += length; // add the length to the total
510
511            trimmed_sectors += length;
512        }
513
514        if trimmed_sectors != sector_count {
515            return Err(IoError::InternalErrorTrimLengthMismatch {
516                trimmed_sectors,
517                sector_count,
518            }
519            .into());
520        }
521
522        // Create a future for each disk's combined unmap operations
523        let mut all_futures = Vec::new();
524
525        for (disk_index, &(start, length)) in disk_sectors.iter().enumerate() {
526            let disk = &self.block_devices[disk_index];
527            // Check if the length is non-zero before pushing to all_futures
528            if length > 0 {
529                all_futures.push(async move { disk.unmap(start, length, block_level_only).await });
530            }
531        }
532        await_all_and_check(all_futures).await?;
533        Ok(())
534    }
535
536    fn unmap_behavior(&self) -> UnmapBehavior {
537        self.unmap_behavior
538    }
539
540    fn optimal_unmap_sectors(&self) -> u32 {
541        self.block_devices
542            .iter()
543            .map(|disk| disk.optimal_unmap_sectors())
544            .max()
545            .unwrap_or(1)
546    }
547}
548
549async fn await_all_and_check<T, E>(futures: T) -> Result<(), E>
550where
551    T: IntoIterator,
552    T::Item: core::future::Future<Output = Result<(), E>>,
553{
554    // Use join_all to wait for all IOs even if one fails. This is necessary to
555    // avoid dropping IOs while they are in flight.
556    let results = join_all(futures).await;
557    for result in results {
558        result?;
559    }
560    Ok(())
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566    use guestmem::GuestMemory;
567    use hvdef::HV_PAGE_SIZE;
568    use pal_async::async_test;
569    use scsi_buffers::OwnedRequestBuffers;
570
571    fn new_strip_device(
572        disk_count: u8,
573        disk_size_in_bytes: Option<u64>,
574        chunk_size_in_bytes: Option<u32>,
575        logic_sector_count: Option<u64>,
576    ) -> StripedDisk {
577        let mut devices = Vec::new();
578
579        for _i in 0..disk_count {
580            let ramdisk =
581                disklayer_ram::ram_disk(disk_size_in_bytes.unwrap_or(1024 * 1024 * 64), false)
582                    .unwrap();
583            devices.push(ramdisk);
584        }
585
586        StripedDisk::new(devices, chunk_size_in_bytes, logic_sector_count).unwrap()
587    }
588
589    fn create_guest_mem(size: usize) -> GuestMemory {
590        let mem = GuestMemory::allocate(size);
591
592        let mut index: usize = 0;
593        while index < size - 3 {
594            mem.write_at(
595                index as u64,
596                &[
597                    (index % 255) as u8,
598                    ((index >> 8) % 255) as u8,
599                    ((index >> 16) % 255) as u8,
600                    ((index >> 24) % 255) as u8,
601                ],
602            )
603            .unwrap();
604
605            index += 4;
606        }
607
608        mem
609    }
610
611    async fn validate_async_striping_disk_ios(
612        disk: &StripedDisk,
613        start_sectors: &[u64],
614        offset: &[usize],
615        length: usize,
616        write_gpns: &[u64],
617        read_gpns: &[u64],
618    ) {
619        for (start_sector, offset) in start_sectors.iter().zip(offset) {
620            validate_async_striping_disk_io(
621                disk,
622                *start_sector,
623                *offset,
624                length,
625                write_gpns,
626                read_gpns,
627            )
628            .await;
629        }
630    }
631
632    /// Validate the async strip disk I/O.
633    ///
634    /// # Arguments
635    /// * `disk` - The strip block device.
636    /// * `start_sector` - The sector index where the I/O shall start.
637    /// * `offset` - The I/O buffer offset.
638    /// * `length` - The total I/O length.
639    /// * `write_gpns` - The write GPN index.
640    /// * `read_gpns` - The read GPN index.
641    ///
642    async fn validate_async_striping_disk_io(
643        disk: &StripedDisk,
644        start_sector: u64,
645        offset: usize,
646        length: usize,
647        write_gpns: &[u64],
648        read_gpns: &[u64],
649    ) {
650        let page_count = (offset + length).div_ceil(HV_PAGE_SIZE as usize);
651        // Create continuous guest memory pages and initialize them with random data.
652        let guest_mem = create_guest_mem(page_count * 2 * HV_PAGE_SIZE as usize);
653        assert_eq!(write_gpns.len(), page_count);
654        assert_eq!(read_gpns.len(), page_count);
655
656        // Get the write buffer from guest memory, which has random data.
657        let write_buffers = OwnedRequestBuffers::new_unaligned(write_gpns, offset, length);
658        // Write the random data to disk.
659        disk.write_vectored(&write_buffers.buffer(&guest_mem), start_sector, false)
660            .await
661            .unwrap();
662
663        disk.sync_cache().await.unwrap();
664
665        // Get the read buffer from guest memory, which has random data.
666        let read_buffers = OwnedRequestBuffers::new_unaligned(read_gpns, offset, length);
667        // Read the data from disk back to read buffers.
668        disk.read_vectored(&read_buffers.buffer(&guest_mem), start_sector)
669            .await
670            .unwrap();
671
672        // Validate if the source and target match.
673        let mut source = vec![0u8; page_count * HV_PAGE_SIZE as usize];
674        guest_mem.read_at(0, &mut source).unwrap();
675
676        let mut target = vec![255u8; page_count * HV_PAGE_SIZE as usize];
677        guest_mem
678            .read_at(page_count as u64 * HV_PAGE_SIZE, &mut target)
679            .unwrap();
680
681        assert_eq!(
682            source[offset..(offset + length - 1)],
683            target[offset..(offset + length - 1)]
684        );
685
686        // async_trim test
687        // Since the discard function doesn't trim the file content, the test doesn't check if the file content is ZERO after the trim.
688        disk.unmap(
689            start_sector,
690            (length / disk.sector_size() as usize) as u64,
691            true,
692        )
693        .await
694        .unwrap();
695    }
696
697    #[async_test]
698    async fn run_async_striping_disk_io() {
699        // Create a striping disk with two disks, set the chunk size to 4K and total size to 256K.
700        let disk = new_strip_device(2, Some(128 * 1024), Some(4096), None);
701        assert_eq!(disk.sector_size, 512);
702        assert_eq!(disk.sector_count_per_chunk, 4096 / 512);
703        assert_eq!(disk.sector_count(), 128 * 1024 * 2 / 512);
704
705        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
706        validate_async_striping_disk_ios(
707            &disk,
708            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
709            &[0, 0, 0],
710            1024,
711            &[0],
712            &[1],
713        )
714        .await;
715
716        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
717        validate_async_striping_disk_ios(
718            &disk,
719            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
720            &[0, 0, 0],
721            512,
722            &[0],
723            &[1],
724        )
725        .await;
726
727        // Read 16K data from the beginning, middle, and end of the disk using paged aligned buffers.
728        validate_async_striping_disk_ios(
729            &disk,
730            &[0, disk.sector_count() / 2 - 16, disk.sector_count() - 32],
731            &[0, 0, 0],
732            16 * 1024,
733            &[0, 1, 2, 3],
734            &[4, 5, 6, 7],
735        )
736        .await;
737
738        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
739        validate_async_striping_disk_ios(
740            &disk,
741            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
742            &[512, 513, 1028],
743            512,
744            &[0],
745            &[1],
746        )
747        .await;
748
749        // Read 5K data from the beginning, middle, and end of the disk using un-aligned buffers.
750        validate_async_striping_disk_ios(
751            &disk,
752            &[0, disk.sector_count() / 2 - 5, disk.sector_count() - 10],
753            &[512, 513, 1028],
754            5 * 1024,
755            &[0, 1],
756            &[2, 3],
757        )
758        .await;
759    }
760
761    #[async_test]
762    async fn run_async_128k_striping_disk_io() {
763        // Create a striping disk with four disks, set the chunk size to 128K and total size to 4M.
764        let disk = new_strip_device(4, Some(1024 * 1024), Some(128 * 1024), None);
765        assert_eq!(disk.sector_size, 512);
766        assert_eq!(disk.sector_count_per_chunk, 128 * 1024 / 512);
767        assert_eq!(disk.sector_count(), 1024 * 1024 * 4 / 512);
768
769        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
770        validate_async_striping_disk_ios(
771            &disk,
772            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
773            &[0, 0, 0],
774            1024,
775            &[0],
776            &[1],
777        )
778        .await;
779
780        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
781        validate_async_striping_disk_ios(
782            &disk,
783            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
784            &[0, 0, 0],
785            512,
786            &[0],
787            &[1],
788        )
789        .await;
790
791        // Read 256K data from the beginning, middle, and end of the disk using paged aligned buffers.
792        let mut write_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
793            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
794        for (i, write_gpn) in write_gpns.iter_mut().enumerate() {
795            *write_gpn = i as u64;
796        }
797
798        let mut read_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
799            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
800        for (i, read_gpn) in read_gpns.iter_mut().enumerate() {
801            *read_gpn = (i + write_gpns.len()) as u64;
802        }
803
804        validate_async_striping_disk_ios(
805            &disk,
806            &[0, disk.sector_count() / 2 - 256, disk.sector_count() - 512],
807            &[0, 0, 0],
808            256 * 1024,
809            &write_gpns,
810            &read_gpns,
811        )
812        .await;
813
814        // Read 9K data from the beginning, middle, and end of the disk using un-aligned buffers.
815        validate_async_striping_disk_ios(
816            &disk,
817            &[0, disk.sector_count() / 2 - 9, disk.sector_count() - 18],
818            &[512, 513, 1028],
819            9 * 1024,
820            &[0, 1, 2],
821            &[3, 4, 5],
822        )
823        .await;
824
825        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
826        validate_async_striping_disk_ios(
827            &disk,
828            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
829            &[512, 513, 1028],
830            512,
831            &[0],
832            &[1],
833        )
834        .await;
835    }
836
837    #[async_test]
838    async fn run_async_64k_striping_disk_io() {
839        // Create a striping disk with thirty two disks, set the chunk size to 64K and total size to 32M.
840        let disk = new_strip_device(32, Some(1024 * 1024), Some(64 * 1024), None);
841        assert_eq!(disk.sector_size, 512);
842        assert_eq!(disk.sector_count_per_chunk, 64 * 1024 / 512);
843        assert_eq!(disk.sector_count(), 1024 * 1024 * 32 / 512);
844
845        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
846        validate_async_striping_disk_ios(
847            &disk,
848            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
849            &[0, 0, 0],
850            1024,
851            &[0],
852            &[1],
853        )
854        .await;
855
856        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
857        validate_async_striping_disk_ios(
858            &disk,
859            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 1],
860            &[0, 0, 0],
861            512,
862            &[0],
863            &[1],
864        )
865        .await;
866
867        // Read 256K data from the beginning, middle, and end of the disk using paged aligned buffers.
868        let mut write_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
869            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
870        for (i, write_gpn) in write_gpns.iter_mut().enumerate() {
871            *write_gpn = i as u64;
872        }
873
874        let mut read_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
875            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
876        for (i, read_gpn) in read_gpns.iter_mut().enumerate() {
877            *read_gpn = (i + write_gpns.len()) as u64;
878        }
879
880        validate_async_striping_disk_ios(
881            &disk,
882            &[0, disk.sector_count() / 2 - 256, disk.sector_count() - 512],
883            &[0, 0, 0],
884            256 * 1024,
885            &write_gpns,
886            &read_gpns,
887        )
888        .await;
889
890        // Read 9K data from the beginning, middle, and end of the disk using un-aligned buffers.
891        validate_async_striping_disk_ios(
892            &disk,
893            &[0, disk.sector_count() / 2 - 9, disk.sector_count() - 18],
894            &[512, 513, 1028],
895            9 * 1024,
896            &[0, 1, 2],
897            &[3, 4, 5],
898        )
899        .await;
900
901        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
902        validate_async_striping_disk_ios(
903            &disk,
904            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
905            &[512, 513, 1028],
906            512,
907            &[0],
908            &[1],
909        )
910        .await;
911    }
912
913    #[async_test]
914    async fn run_async_striping_disk_negative() {
915        // Creating striping disk using incompatible files shall fail.
916        let mut devices = Vec::new();
917        for i in 0..2 {
918            let ramdisk = disklayer_ram::ram_disk(1024 * 1024 + i * 64 * 1024, false).unwrap();
919            devices.push(ramdisk);
920        }
921
922        match StripedDisk::new(devices, None, None) {
923            Err(err) => {
924                println!(
925                    "Expected failure since underlying files are not compatible: {}",
926                    err
927                );
928            }
929            Ok(strip_disk) => panic!("{:?}", strip_disk),
930        }
931
932        // Creating striping disk using invalid chunk size shall fail.
933        let mut block_devices = Vec::new();
934        for _ in 0..2 {
935            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
936            block_devices.push(ramdisk);
937        }
938
939        match StripedDisk::new(block_devices, Some(4 * 1024 + 1), None) {
940            Err(err) => {
941                println!("Expected failure since chunk size is invalid: {}", err);
942            }
943            Ok(strip_disk) => panic!("{:?}", strip_disk),
944        }
945
946        // Creating striping disk using invalid logic sector count shall fail.
947        let mut block_devices = Vec::new();
948        for _ in 0..2 {
949            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
950            block_devices.push(ramdisk);
951        }
952
953        match StripedDisk::new(
954            block_devices,
955            Some(4 * 1024),
956            Some(1024 * 1024 * 2 / 512 + 1),
957        ) {
958            Err(err) => {
959                println!(
960                    "Expected failure since logic sector count is invalid: {}",
961                    err
962                );
963            }
964            Ok(strip_disk) => panic!("{:?}", strip_disk),
965        }
966
967        // Create a simple striping disk.
968        let mut block_devices = Vec::new();
969        for _ in 0..2 {
970            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
971            block_devices.push(ramdisk);
972        }
973
974        let disk = match StripedDisk::new(block_devices, Some(8 * 1024), None) {
975            Err(err) => panic!("{}", err),
976            Ok(strip_disk) => strip_disk,
977        };
978
979        assert_eq!(disk.sector_size, 512);
980        assert_eq!(disk.sector_count_per_chunk, 8 * 1024 / 512);
981        assert_eq!(disk.sector_count(), 1024 * 1024 * 2 / 512);
982
983        // write 1 sector off shall be caught.
984        let guest_mem = create_guest_mem(2 * HV_PAGE_SIZE as usize);
985        let write_buffers = OwnedRequestBuffers::new(&[0]);
986        let buf_sector_count = write_buffers.len().div_ceil(disk.sector_size as usize);
987        match disk
988            .write_vectored(
989                &write_buffers.buffer(&guest_mem),
990                disk.sector_count() - buf_sector_count as u64 + 1,
991                false,
992            )
993            .await
994        {
995            Ok(_) => {
996                panic!("{:?}", disk);
997            }
998            Err(err) => {
999                println!("Expected write failure because of 1 sector off: {:?}", err);
1000            }
1001        }
1002
1003        // read 1 sector off shall be caught.
1004        let guest_mem = create_guest_mem(2 * HV_PAGE_SIZE as usize);
1005        let read_buffers = OwnedRequestBuffers::new(&[1]);
1006        let buf_sector_count = read_buffers.len().div_ceil(disk.sector_size as usize);
1007        match disk
1008            .read_vectored(
1009                &write_buffers.buffer(&guest_mem),
1010                disk.sector_count() - buf_sector_count as u64 + 1,
1011            )
1012            .await
1013        {
1014            Ok(_) => {
1015                panic!("{:?}", disk);
1016            }
1017            Err(err) => {
1018                println!("Expected read failure because of 1 sector off: {:?}", err);
1019            }
1020        }
1021
1022        match disk
1023            .unmap(
1024                (disk.sector_count() - 2) * disk.sector_size as u64,
1025                disk.sector_size as u64 * 3,
1026                true,
1027            )
1028            .await
1029        {
1030            Ok(_) => {
1031                panic!("{:?}", disk);
1032            }
1033            Err(err) => {
1034                println!("Expected failure because of 1 sector off: {:?}", err);
1035            }
1036        }
1037
1038        // write 1 byte off shall be caught.
1039        let write_buffers =
1040            OwnedRequestBuffers::new_unaligned(&[0], 0, disk.sector_size as usize + 1);
1041        let buf_sector_count = write_buffers.len().div_ceil(disk.sector_size as usize);
1042        match disk
1043            .write_vectored(
1044                &write_buffers.buffer(&guest_mem),
1045                disk.sector_count() - buf_sector_count as u64 + 1,
1046                false,
1047            )
1048            .await
1049        {
1050            Ok(_) => {
1051                panic!("{:?}", disk);
1052            }
1053            Err(err) => {
1054                println!("Expected failure because of write 1 byte off: {:?}", err);
1055            }
1056        }
1057
1058        // read 1 byte off shall be caught.
1059        let read_buffers =
1060            OwnedRequestBuffers::new_unaligned(&[1], 0, disk.sector_size as usize + 1);
1061        let buf_sector_count = read_buffers.len().div_ceil(disk.sector_size as usize);
1062        match disk
1063            .read_vectored(
1064                &read_buffers.buffer(&guest_mem),
1065                disk.sector_count() - buf_sector_count as u64 + 1,
1066            )
1067            .await
1068        {
1069            Ok(_) => {
1070                panic!("{:?}", disk);
1071            }
1072            Err(err) => {
1073                println!("Expected failure because of read 1 byte off: {:?}", err);
1074            }
1075        }
1076
1077        match disk
1078            .unmap(
1079                (disk.sector_count() - 2) * disk.sector_size as u64,
1080                disk.sector_size as u64 * 2 + 1,
1081                true,
1082            )
1083            .await
1084        {
1085            Ok(_) => {
1086                panic!("{:?}", disk);
1087            }
1088            Err(err) => {
1089                println!("Expected failure because of 1 byte off: {:?}", err);
1090            }
1091        }
1092    }
1093
1094    #[async_test]
1095    async fn run_async_striping_disk_unmap() {
1096        let disk = new_strip_device(2, Some(128 * 1024 * 1024), Some(4096), None);
1097        assert_eq!(disk.sector_size, 512);
1098        assert_eq!(disk.sector_count_per_chunk, 4096 / 512);
1099        assert_eq!(disk.sector_count(), 128 * 1024 * 1024 * 2 / 512); //sector_count =  524288
1100        disk.unmap(0, 1, false).await.unwrap();
1101        disk.unmap(0, 524288, false).await.unwrap();
1102        disk.unmap(8, 524280, false).await.unwrap();
1103        disk.unmap(disk.sector_count() / 2 - 512, 1024, false)
1104            .await
1105            .unwrap();
1106        disk.unmap(disk.sector_count() - 1024, 1024, false)
1107            .await
1108            .unwrap();
1109        disk.unmap(0, disk.sector_count() / 2, false).await.unwrap();
1110        disk.unmap(disk.sector_count() / 2, disk.sector_count() / 2, false)
1111            .await
1112            .unwrap();
1113        disk.unmap(disk.sector_count() / 2 - 500, 1000, false)
1114            .await
1115            .unwrap();
1116        //this one should fail, out of bounds
1117        assert!(disk.unmap(disk.sector_count(), 100, false).await.is_err());
1118        //unmap zero sector
1119        disk.unmap(1000, 0, false).await.unwrap();
1120    }
1121}