disk_striped/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implements the [`DiskIo`] trait for virtual disks backed by multiple raw
5//! block devices.
6
7#![expect(missing_docs)]
8#![forbid(unsafe_code)]
9
10use async_trait::async_trait;
11use disk_backend::Disk;
12use disk_backend::DiskError;
13use disk_backend::DiskIo;
14use disk_backend::UnmapBehavior;
15use disk_backend::resolve::ResolveDiskParameters;
16use disk_backend::resolve::ResolvedDisk;
17use disk_backend_resources::StripedDiskHandle;
18use futures::future::try_join_all;
19use inspect::Inspect;
20use scsi_buffers::RequestBuffers;
21use std::fmt::Debug;
22use thiserror::Error;
23use vm_resource::AsyncResolveResource;
24use vm_resource::ResourceResolver;
25use vm_resource::declare_static_async_resolver;
26use vm_resource::kind::DiskHandleKind;
27
28pub struct StripedDiskResolver;
29declare_static_async_resolver!(StripedDiskResolver, (DiskHandleKind, StripedDiskHandle));
30
31#[async_trait]
32impl AsyncResolveResource<DiskHandleKind, StripedDiskHandle> for StripedDiskResolver {
33    type Output = ResolvedDisk;
34    type Error = anyhow::Error;
35
36    async fn resolve(
37        &self,
38        resolver: &ResourceResolver,
39        rsrc: StripedDiskHandle,
40        input: ResolveDiskParameters<'_>,
41    ) -> Result<Self::Output, Self::Error> {
42        let disks = try_join_all(
43            rsrc.devices
44                .into_iter()
45                .map(async |device| resolver.resolve(device, input).await.map(|r| r.0)),
46        )
47        .await?;
48        Ok(ResolvedDisk::new(StripedDisk::new(
49            disks,
50            rsrc.chunk_size_in_bytes,
51            rsrc.logic_sector_count,
52        )?)?)
53    }
54}
55
56#[derive(Debug, Inspect)]
57pub struct StripedDisk {
58    #[inspect(iter_by_index)]
59    block_devices: Vec<Disk>,
60    sector_size: u32,
61    sector_shift: u32,
62    sector_count: u64,
63    read_only: bool,
64    sector_count_per_chunk: u32,
65    unmap_behavior: UnmapBehavior,
66}
67
68const CHUNK_SIZE_128K: u32 = 128 * 1024;
69
70#[derive(Error, Debug)]
71pub enum NewDeviceError {
72    #[error("Can't create a striping disk since the input device list is empty")]
73    EmptyDeviceList,
74    #[error(
75        "The files are not compatible to form a striping disk: sector_size-{sector_size} != cur_sector_size-{cur_sector_size} OR sector_count-{sector_count} != cur_sector_count-{cur_sector_count}"
76    )]
77    DeviceNotCompatible {
78        sector_size: u32,
79        cur_sector_size: u32,
80        sector_count: u64,
81        cur_sector_count: u64,
82    },
83    #[error(
84        "Invalid chunk size: chunk_size_in_bytes-{0} is not multiple of logical_sector_size-{1}"
85    )]
86    InvalidChunkSize(u32, u32),
87    #[error(
88        "logic_sector_count is out of range: logic_sector_count.unwrap_or(total_sector_count)-{0} > total_sector_count-{1}"
89    )]
90    InvalidLogicSectorCount(u64, u64),
91    #[error(
92        "The striping disk size must be multiple of chunk size * number of disks. logic_sector_count-{0} != {1}."
93    )]
94    InvalidStripingDiskSize(u64, u64),
95}
96
97#[derive(Debug, Error)]
98#[error("error in lower disk {index}")]
99struct LowerError {
100    index: usize,
101    #[source]
102    err: DiskError,
103}
104
105impl From<LowerError> for DiskError {
106    fn from(err: LowerError) -> Self {
107        // Treat all lower disk errors as IO errors--we don't currently handle
108        // specific errors from lower disks in a striped configuration.
109        DiskError::Io(std::io::Error::other(err))
110    }
111}
112
113struct Chunk {
114    // The index of the disk where the chunk is in.
115    disk_index: usize,
116    // The chunk starting sector and offset on the disk.
117    disk_sector_index: u64,
118    // The chunk length. It can be less than the sector_count_per_chunk for the
119    // first and last chunk.
120    chunk_length_in_sectors: u32,
121}
122
123impl StripedDisk {
124    fn get_chunk_iter(
125        &self,
126        start_sector: u64,
127        end_sector: u64,
128    ) -> Result<impl 'static + Iterator<Item = Chunk>, DiskError> {
129        if end_sector > self.sector_count {
130            return Err(DiskError::IllegalBlock);
131        }
132
133        let start_chunk_index = start_sector / self.sector_count_per_chunk as u64;
134        let end_chunk_index = end_sector.div_ceil(self.sector_count_per_chunk as u64);
135
136        // Use `Iterator::map` on `Range` so that the iterator implements
137        // `TrustedLen`, which ensures `Vec::from_iter` (i.e., `.collect()`) is
138        // optimized to use a single allocation and in-place construction.
139        let sector_count_per_chunk = self.sector_count_per_chunk;
140        let disk_count = self.block_devices.len();
141        let iter = (start_chunk_index..end_chunk_index).map(move |i| {
142            // The sector can be in middle of a chunk for the first chunk.
143            let sector_offset_in_chunk = if i == start_chunk_index {
144                start_sector % sector_count_per_chunk as u64
145            } else {
146                0
147            };
148
149            let disk_index = (i % (disk_count as u64)) as usize;
150            let disk_sector_index =
151                (i / disk_count as u64) * sector_count_per_chunk as u64 + sector_offset_in_chunk;
152
153            // The disk end offset can be in middle of the chunk for the last
154            // chunk.
155            let disk_end_offset_in_sectors = (i / disk_count as u64)
156                * sector_count_per_chunk as u64
157                + if i == end_chunk_index - 1 {
158                    end_sector - sector_count_per_chunk as u64 * i
159                } else {
160                    sector_count_per_chunk as u64
161                };
162
163            // The chunk length can be less than the sector_count_per_chunk for
164            // the first and last chunk.
165            let chunk_length_in_sectors = (disk_end_offset_in_sectors - disk_sector_index) as u32;
166
167            Chunk {
168                disk_index,
169                disk_sector_index,
170                chunk_length_in_sectors,
171            }
172        });
173
174        Ok(iter)
175    }
176}
177
178impl StripedDisk {
179    /// Constructs a new `StripedDisk` backed by the vector of file.
180    ///
181    /// # Arguments
182    /// * `devices` - The backing devices opened for raw access.
183    /// * 'chunk_size_in_bytes' - The chunk size of the striped disk, and the default value is 128K.
184    /// * 'logic_sector_count' - The sector count of the striped disk, and the default value is the sum of the sector count of the backing devices.
185    ///
186    pub fn new(
187        devices: Vec<Disk>,
188        chunk_size_in_bytes: Option<u32>,
189        logic_sector_count: Option<u64>,
190    ) -> Result<Self, NewDeviceError> {
191        if devices.is_empty() {
192            return Err(NewDeviceError::EmptyDeviceList);
193        }
194
195        let mut total_sector_count = 0;
196        let sector_size = devices[0].sector_size();
197        let sector_count = devices[0].sector_count();
198        let read_only = devices[0].is_read_only();
199        let chunk_size_in_bytes = chunk_size_in_bytes.unwrap_or(CHUNK_SIZE_128K);
200        if chunk_size_in_bytes == 0 || !chunk_size_in_bytes.is_multiple_of(sector_size) {
201            return Err(NewDeviceError::InvalidChunkSize(
202                chunk_size_in_bytes,
203                sector_size,
204            ));
205        }
206
207        let sector_count_per_chunk = (chunk_size_in_bytes / sector_size) as u64;
208
209        for device in &devices {
210            let cur_sector_size = device.sector_size();
211            let cur_sector_count = device.sector_count();
212            let cur_read_only = device.is_read_only();
213
214            if sector_size != cur_sector_size
215                || sector_count != cur_sector_count
216                || read_only != cur_read_only
217            {
218                return Err(NewDeviceError::DeviceNotCompatible {
219                    sector_size,
220                    cur_sector_size,
221                    sector_count,
222                    cur_sector_count,
223                });
224            }
225
226            total_sector_count +=
227                (cur_sector_count / sector_count_per_chunk) * sector_count_per_chunk;
228        }
229
230        if total_sector_count % (devices.len() as u64 * sector_count_per_chunk) != 0 {
231            return Err(NewDeviceError::InvalidStripingDiskSize(
232                total_sector_count,
233                devices.len() as u64 * sector_count_per_chunk,
234            ));
235        }
236
237        let logic_sector_count = logic_sector_count.unwrap_or(total_sector_count);
238        if logic_sector_count > total_sector_count {
239            return Err(NewDeviceError::InvalidLogicSectorCount(
240                logic_sector_count,
241                total_sector_count,
242            ));
243        }
244
245        if !logic_sector_count.is_multiple_of(devices.len() as u64 * sector_count_per_chunk) {
246            return Err(NewDeviceError::InvalidStripingDiskSize(
247                logic_sector_count,
248                devices.len() as u64 * sector_count_per_chunk,
249            ));
250        }
251
252        // Unify the unmap behavior of all devices. If all disks specify the
253        // same behavior, use it. Otherwise, report unspecified behavior and
254        // send unmap to all disks.
255        let unmap_behavior = devices.iter().fold(UnmapBehavior::Zeroes, |rest, d| {
256            match (rest, d.unmap_behavior()) {
257                (UnmapBehavior::Zeroes, UnmapBehavior::Zeroes) => UnmapBehavior::Zeroes,
258                (UnmapBehavior::Ignored, UnmapBehavior::Ignored) => UnmapBehavior::Ignored,
259                _ => UnmapBehavior::Unspecified,
260            }
261        });
262
263        let stripped_block_device = StripedDisk {
264            block_devices: devices,
265            sector_size,
266            sector_shift: sector_size.trailing_zeros(),
267            sector_count: logic_sector_count,
268            read_only,
269            sector_count_per_chunk: (sector_count_per_chunk as u32),
270            unmap_behavior,
271        };
272
273        tracing::info!("stripped block device start completed.");
274        Ok(stripped_block_device)
275    }
276}
277
278impl DiskIo for StripedDisk {
279    fn disk_type(&self) -> &str {
280        "striped"
281    }
282
283    fn sector_count(&self) -> u64 {
284        self.sector_count
285    }
286
287    fn sector_size(&self) -> u32 {
288        self.sector_size
289    }
290
291    fn is_read_only(&self) -> bool {
292        self.read_only
293    }
294
295    fn disk_id(&self) -> Option<[u8; 16]> {
296        None
297    }
298
299    fn physical_sector_size(&self) -> u32 {
300        self.block_devices
301            .iter()
302            .map(|d| d.physical_sector_size())
303            .max()
304            .unwrap()
305    }
306
307    fn is_fua_respected(&self) -> bool {
308        self.block_devices.iter().all(|d| d.is_fua_respected())
309    }
310
311    async fn eject(&self) -> Result<(), DiskError> {
312        let futures = self.block_devices.iter().map(|disk| disk.eject()).collect();
313        await_all_and_check(futures).await?;
314        Ok(())
315    }
316
317    async fn read_vectored(
318        &self,
319        buffers: &RequestBuffers<'_>,
320        start_sector: u64,
321    ) -> Result<(), DiskError> {
322        let buf_total_size = buffers.len();
323        let end_sector = start_sector + ((buf_total_size as u64) >> self.sector_shift);
324        let chunk_iter = self.get_chunk_iter(start_sector, end_sector)?;
325
326        let mut cur_buf_offset: usize = 0;
327        let all_futures = chunk_iter
328            .map(|chunk| {
329                let disk = &self.block_devices[chunk.disk_index];
330
331                let buf_len = (chunk.chunk_length_in_sectors as usize) << self.sector_shift;
332
333                let sub_buffers = buffers.subrange(cur_buf_offset, buf_len);
334                cur_buf_offset += buf_len;
335
336                async move {
337                    disk.read_vectored(&sub_buffers, chunk.disk_sector_index)
338                        .await
339                        .map_err(|err| LowerError {
340                            index: chunk.disk_index,
341                            err,
342                        })
343                }
344            })
345            .collect();
346
347        assert_eq!(cur_buf_offset, buf_total_size);
348
349        await_all_and_check(all_futures).await?;
350        Ok(())
351    }
352
353    async fn write_vectored(
354        &self,
355        buffers: &RequestBuffers<'_>,
356        start_sector: u64,
357        fua: bool,
358    ) -> Result<(), DiskError> {
359        let buf_total_size = buffers.len();
360        let end_sector = start_sector + ((buf_total_size as u64) >> self.sector_shift);
361        let chunk_iter = self.get_chunk_iter(start_sector, end_sector)?;
362
363        let mut cur_buf_offset: usize = 0;
364        let all_futures = chunk_iter
365            .map(|chunk| {
366                let disk = &self.block_devices[chunk.disk_index];
367
368                let buf_len = (chunk.chunk_length_in_sectors as usize) << self.sector_shift;
369
370                let sub_buffers = buffers.subrange(cur_buf_offset, buf_len);
371                cur_buf_offset += buf_len;
372
373                async move {
374                    disk.write_vectored(&sub_buffers, chunk.disk_sector_index, fua)
375                        .await
376                        .map_err(|err| LowerError {
377                            index: chunk.disk_index,
378                            err,
379                        })
380                }
381            })
382            .collect();
383
384        assert_eq!(cur_buf_offset, buf_total_size);
385
386        await_all_and_check(all_futures).await?;
387        Ok(())
388    }
389
390    async fn sync_cache(&self) -> Result<(), DiskError> {
391        let all_futures = self
392            .block_devices
393            .iter()
394            .enumerate()
395            .map(|(disk_index, disk)| async move {
396                disk.sync_cache().await.map_err(|err| LowerError {
397                    index: disk_index,
398                    err,
399                })
400            })
401            .collect();
402        await_all_and_check(all_futures).await?;
403        Ok(())
404    }
405
406    async fn unmap(
407        &self,
408        start_sector: u64,
409        sector_count: u64,
410        block_level_only: bool,
411    ) -> Result<(), DiskError> {
412        let end_sector = start_sector + sector_count;
413        let chunk_iter = match self.get_chunk_iter(start_sector, end_sector) {
414            Ok(iter) => iter,
415            Err(err) => {
416                return Err(err);
417            }
418        };
419
420        // Create a vector to group chunks by disk index
421        let mut disk_sectors: Vec<(u64, u64)> = vec![(0, 0); self.block_devices.len()];
422        let mut trimmed_sectors: u64 = 0;
423
424        for chunk in chunk_iter {
425            let start = chunk.disk_sector_index;
426            let length = chunk.chunk_length_in_sectors as u64;
427            let (disk_start, disk_len) = &mut disk_sectors[chunk.disk_index];
428            if *disk_len == 0 {
429                *disk_start = start; // set the start of the unmap operation
430            }
431            *disk_len += length; // add the length to the total
432
433            trimmed_sectors += length;
434        }
435
436        assert_eq!(trimmed_sectors, sector_count);
437
438        // Create a future for each disk's combined unmap operations
439        let all_futures = disk_sectors
440            .iter()
441            .enumerate()
442            .map(|(disk_index, &(start, length))| {
443                let disk = &self.block_devices[disk_index];
444                async move {
445                    if length > 0 {
446                        disk.unmap(start, length, block_level_only).await
447                    } else {
448                        Ok(())
449                    }
450                }
451            })
452            .collect();
453
454        await_all_and_check(all_futures).await?;
455        Ok(())
456    }
457
458    fn unmap_behavior(&self) -> UnmapBehavior {
459        self.unmap_behavior
460    }
461
462    fn optimal_unmap_sectors(&self) -> u32 {
463        self.block_devices
464            .iter()
465            .map(|disk| disk.optimal_unmap_sectors())
466            .max()
467            .unwrap_or(1)
468    }
469}
470
471/// Waits for all IOs to complete and checks for errors.
472///
473/// Use `JoinAll` to wait for all IOs even if one fails. This is necessary to
474/// avoid dropping IOs while they are in flight.
475async fn await_all_and_check<F, E>(futures: futures::future::JoinAll<F>) -> Result<(), E>
476where
477    F: Future<Output = Result<(), E>>,
478{
479    for result in futures.await {
480        result?;
481    }
482    Ok(())
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488    use guestmem::GuestMemory;
489    use hvdef::HV_PAGE_SIZE;
490    use pal_async::async_test;
491    use scsi_buffers::OwnedRequestBuffers;
492
493    fn new_strip_device(
494        disk_count: u8,
495        disk_size_in_bytes: Option<u64>,
496        chunk_size_in_bytes: Option<u32>,
497        logic_sector_count: Option<u64>,
498    ) -> StripedDisk {
499        let mut devices = Vec::new();
500
501        for _i in 0..disk_count {
502            let ramdisk =
503                disklayer_ram::ram_disk(disk_size_in_bytes.unwrap_or(1024 * 1024 * 64), false)
504                    .unwrap();
505            devices.push(ramdisk);
506        }
507
508        StripedDisk::new(devices, chunk_size_in_bytes, logic_sector_count).unwrap()
509    }
510
511    fn create_guest_mem(size: usize) -> GuestMemory {
512        let mem = GuestMemory::allocate(size);
513
514        let mut index: usize = 0;
515        while index < size - 3 {
516            mem.write_at(
517                index as u64,
518                &[
519                    (index % 255) as u8,
520                    ((index >> 8) % 255) as u8,
521                    ((index >> 16) % 255) as u8,
522                    ((index >> 24) % 255) as u8,
523                ],
524            )
525            .unwrap();
526
527            index += 4;
528        }
529
530        mem
531    }
532
533    async fn validate_async_striping_disk_ios(
534        disk: &StripedDisk,
535        start_sectors: &[u64],
536        offset: &[usize],
537        length: usize,
538        write_gpns: &[u64],
539        read_gpns: &[u64],
540    ) {
541        for (start_sector, offset) in start_sectors.iter().zip(offset) {
542            validate_async_striping_disk_io(
543                disk,
544                *start_sector,
545                *offset,
546                length,
547                write_gpns,
548                read_gpns,
549            )
550            .await;
551        }
552    }
553
554    /// Validate the async strip disk I/O.
555    ///
556    /// # Arguments
557    /// * `disk` - The strip block device.
558    /// * `start_sector` - The sector index where the I/O shall start.
559    /// * `offset` - The I/O buffer offset.
560    /// * `length` - The total I/O length.
561    /// * `write_gpns` - The write GPN index.
562    /// * `read_gpns` - The read GPN index.
563    ///
564    async fn validate_async_striping_disk_io(
565        disk: &StripedDisk,
566        start_sector: u64,
567        offset: usize,
568        length: usize,
569        write_gpns: &[u64],
570        read_gpns: &[u64],
571    ) {
572        let page_count = (offset + length).div_ceil(HV_PAGE_SIZE as usize);
573        // Create continuous guest memory pages and initialize them with random data.
574        let guest_mem = create_guest_mem(page_count * 2 * HV_PAGE_SIZE as usize);
575        assert_eq!(write_gpns.len(), page_count);
576        assert_eq!(read_gpns.len(), page_count);
577
578        // Get the write buffer from guest memory, which has random data.
579        let write_buffers = OwnedRequestBuffers::new_unaligned(write_gpns, offset, length);
580        // Write the random data to disk.
581        disk.write_vectored(&write_buffers.buffer(&guest_mem), start_sector, false)
582            .await
583            .unwrap();
584
585        disk.sync_cache().await.unwrap();
586
587        // Get the read buffer from guest memory, which has random data.
588        let read_buffers = OwnedRequestBuffers::new_unaligned(read_gpns, offset, length);
589        // Read the data from disk back to read buffers.
590        disk.read_vectored(&read_buffers.buffer(&guest_mem), start_sector)
591            .await
592            .unwrap();
593
594        // Validate if the source and target match.
595        let mut source = vec![0u8; page_count * HV_PAGE_SIZE as usize];
596        guest_mem.read_at(0, &mut source).unwrap();
597
598        let mut target = vec![255u8; page_count * HV_PAGE_SIZE as usize];
599        guest_mem
600            .read_at(page_count as u64 * HV_PAGE_SIZE, &mut target)
601            .unwrap();
602
603        assert_eq!(
604            source[offset..(offset + length - 1)],
605            target[offset..(offset + length - 1)]
606        );
607
608        // async_trim test
609        // Since the discard function doesn't trim the file content, the test doesn't check if the file content is ZERO after the trim.
610        disk.unmap(
611            start_sector,
612            (length / disk.sector_size() as usize) as u64,
613            true,
614        )
615        .await
616        .unwrap();
617    }
618
619    #[async_test]
620    async fn run_async_striping_disk_io() {
621        // Create a striping disk with two disks, set the chunk size to 4K and total size to 256K.
622        let disk = new_strip_device(2, Some(128 * 1024), Some(4096), None);
623        assert_eq!(disk.sector_size, 512);
624        assert_eq!(disk.sector_count_per_chunk, 4096 / 512);
625        assert_eq!(disk.sector_count(), 128 * 1024 * 2 / 512);
626
627        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
628        validate_async_striping_disk_ios(
629            &disk,
630            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
631            &[0, 0, 0],
632            1024,
633            &[0],
634            &[1],
635        )
636        .await;
637
638        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
639        validate_async_striping_disk_ios(
640            &disk,
641            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
642            &[0, 0, 0],
643            512,
644            &[0],
645            &[1],
646        )
647        .await;
648
649        // Read 16K data from the beginning, middle, and end of the disk using paged aligned buffers.
650        validate_async_striping_disk_ios(
651            &disk,
652            &[0, disk.sector_count() / 2 - 16, disk.sector_count() - 32],
653            &[0, 0, 0],
654            16 * 1024,
655            &[0, 1, 2, 3],
656            &[4, 5, 6, 7],
657        )
658        .await;
659
660        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
661        validate_async_striping_disk_ios(
662            &disk,
663            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
664            &[512, 513, 1028],
665            512,
666            &[0],
667            &[1],
668        )
669        .await;
670
671        // Read 5K data from the beginning, middle, and end of the disk using un-aligned buffers.
672        validate_async_striping_disk_ios(
673            &disk,
674            &[0, disk.sector_count() / 2 - 5, disk.sector_count() - 10],
675            &[512, 513, 1028],
676            5 * 1024,
677            &[0, 1],
678            &[2, 3],
679        )
680        .await;
681    }
682
683    #[async_test]
684    async fn run_async_128k_striping_disk_io() {
685        // Create a striping disk with four disks, set the chunk size to 128K and total size to 4M.
686        let disk = new_strip_device(4, Some(1024 * 1024), Some(128 * 1024), None);
687        assert_eq!(disk.sector_size, 512);
688        assert_eq!(disk.sector_count_per_chunk, 128 * 1024 / 512);
689        assert_eq!(disk.sector_count(), 1024 * 1024 * 4 / 512);
690
691        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
692        validate_async_striping_disk_ios(
693            &disk,
694            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
695            &[0, 0, 0],
696            1024,
697            &[0],
698            &[1],
699        )
700        .await;
701
702        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
703        validate_async_striping_disk_ios(
704            &disk,
705            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
706            &[0, 0, 0],
707            512,
708            &[0],
709            &[1],
710        )
711        .await;
712
713        // Read 256K data from the beginning, middle, and end of the disk using paged aligned buffers.
714        let mut write_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
715            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
716        for (i, write_gpn) in write_gpns.iter_mut().enumerate() {
717            *write_gpn = i as u64;
718        }
719
720        let mut read_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
721            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
722        for (i, read_gpn) in read_gpns.iter_mut().enumerate() {
723            *read_gpn = (i + write_gpns.len()) as u64;
724        }
725
726        validate_async_striping_disk_ios(
727            &disk,
728            &[0, disk.sector_count() / 2 - 256, disk.sector_count() - 512],
729            &[0, 0, 0],
730            256 * 1024,
731            &write_gpns,
732            &read_gpns,
733        )
734        .await;
735
736        // Read 9K data from the beginning, middle, and end of the disk using un-aligned buffers.
737        validate_async_striping_disk_ios(
738            &disk,
739            &[0, disk.sector_count() / 2 - 9, disk.sector_count() - 18],
740            &[512, 513, 1028],
741            9 * 1024,
742            &[0, 1, 2],
743            &[3, 4, 5],
744        )
745        .await;
746
747        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
748        validate_async_striping_disk_ios(
749            &disk,
750            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
751            &[512, 513, 1028],
752            512,
753            &[0],
754            &[1],
755        )
756        .await;
757    }
758
759    #[async_test]
760    async fn run_async_64k_striping_disk_io() {
761        // Create a striping disk with thirty two disks, set the chunk size to 64K and total size to 32M.
762        let disk = new_strip_device(32, Some(1024 * 1024), Some(64 * 1024), None);
763        assert_eq!(disk.sector_size, 512);
764        assert_eq!(disk.sector_count_per_chunk, 64 * 1024 / 512);
765        assert_eq!(disk.sector_count(), 1024 * 1024 * 32 / 512);
766
767        // Read 1K data from the beginning, middle, and end of the disk using paged aligned buffers.
768        validate_async_striping_disk_ios(
769            &disk,
770            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 2],
771            &[0, 0, 0],
772            1024,
773            &[0],
774            &[1],
775        )
776        .await;
777
778        // Read 512 bytes data from the beginning, middle, and end of the disk using aligned buffers.
779        validate_async_striping_disk_ios(
780            &disk,
781            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 1],
782            &[0, 0, 0],
783            512,
784            &[0],
785            &[1],
786        )
787        .await;
788
789        // Read 256K data from the beginning, middle, and end of the disk using paged aligned buffers.
790        let mut write_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
791            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
792        for (i, write_gpn) in write_gpns.iter_mut().enumerate() {
793            *write_gpn = i as u64;
794        }
795
796        let mut read_gpns: [u64; 256 * 1024 / HV_PAGE_SIZE as usize] =
797            [0; 256 * 1024 / HV_PAGE_SIZE as usize];
798        for (i, read_gpn) in read_gpns.iter_mut().enumerate() {
799            *read_gpn = (i + write_gpns.len()) as u64;
800        }
801
802        validate_async_striping_disk_ios(
803            &disk,
804            &[0, disk.sector_count() / 2 - 256, disk.sector_count() - 512],
805            &[0, 0, 0],
806            256 * 1024,
807            &write_gpns,
808            &read_gpns,
809        )
810        .await;
811
812        // Read 9K data from the beginning, middle, and end of the disk using un-aligned buffers.
813        validate_async_striping_disk_ios(
814            &disk,
815            &[0, disk.sector_count() / 2 - 9, disk.sector_count() - 18],
816            &[512, 513, 1028],
817            9 * 1024,
818            &[0, 1, 2],
819            &[3, 4, 5],
820        )
821        .await;
822
823        // Read 512 bytes data from the beginning, middle, and end of the disk using un-aligned buffers.
824        validate_async_striping_disk_ios(
825            &disk,
826            &[0, disk.sector_count() / 2 - 1, disk.sector_count() - 4],
827            &[512, 513, 1028],
828            512,
829            &[0],
830            &[1],
831        )
832        .await;
833    }
834
835    #[async_test]
836    async fn run_async_striping_disk_negative() {
837        // Creating striping disk using incompatible files shall fail.
838        let mut devices = Vec::new();
839        for i in 0..2 {
840            let ramdisk = disklayer_ram::ram_disk(1024 * 1024 + i * 64 * 1024, false).unwrap();
841            devices.push(ramdisk);
842        }
843
844        StripedDisk::new(devices, None, None)
845            .expect_err("Expected failure because of incompatible files");
846
847        // Creating striping disk using invalid chunk size shall fail.
848        let mut block_devices = Vec::new();
849        for _ in 0..2 {
850            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
851            block_devices.push(ramdisk);
852        }
853
854        StripedDisk::new(block_devices, Some(4 * 1024 + 1), None)
855            .expect_err("Expected failure since chunk size is invalid");
856
857        // Creating striping disk using invalid logic sector count shall fail.
858        let mut block_devices = Vec::new();
859        for _ in 0..2 {
860            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
861            block_devices.push(ramdisk);
862        }
863
864        StripedDisk::new(
865            block_devices,
866            Some(4 * 1024),
867            Some(1024 * 1024 * 2 / 512 + 1),
868        )
869        .expect_err("Expected failure since logic sector count is invalid");
870
871        // Create a simple striping disk.
872        let mut block_devices = Vec::new();
873        for _ in 0..2 {
874            let ramdisk = disklayer_ram::ram_disk(1024 * 1024, false).unwrap();
875            block_devices.push(ramdisk);
876        }
877
878        let disk = StripedDisk::new(block_devices, Some(8 * 1024), None)
879            .expect("Failed to create striping disk");
880
881        assert_eq!(disk.sector_size, 512);
882        assert_eq!(disk.sector_count_per_chunk, 8 * 1024 / 512);
883        assert_eq!(disk.sector_count(), 1024 * 1024 * 2 / 512);
884
885        // write 1 sector off shall be caught.
886        let guest_mem = create_guest_mem(2 * HV_PAGE_SIZE as usize);
887        let write_buffers = OwnedRequestBuffers::new(&[0]);
888        let buf_sector_count = write_buffers.len().div_ceil(disk.sector_size as usize);
889        disk.write_vectored(
890            &write_buffers.buffer(&guest_mem),
891            disk.sector_count() - buf_sector_count as u64 + 1,
892            false,
893        )
894        .await
895        .expect_err("Expected write failure because of 1 sector off");
896
897        // read 1 sector off shall be caught.
898        let guest_mem = create_guest_mem(2 * HV_PAGE_SIZE as usize);
899        let read_buffers = OwnedRequestBuffers::new(&[1]);
900        let buf_sector_count = read_buffers.len().div_ceil(disk.sector_size as usize);
901        disk.read_vectored(
902            &read_buffers.buffer(&guest_mem),
903            disk.sector_count() - buf_sector_count as u64 + 1,
904        )
905        .await
906        .expect_err("Expected read failure because of 1 sector off");
907
908        disk.unmap(disk.sector_count() - 2, 3, true)
909            .await
910            .expect_err("Expected unmap failure because of 1 sector off");
911    }
912
913    #[async_test]
914    async fn run_async_striping_disk_unmap() {
915        let disk = new_strip_device(2, Some(128 * 1024 * 1024), Some(4096), None);
916        assert_eq!(disk.sector_size, 512);
917        assert_eq!(disk.sector_count_per_chunk, 4096 / 512);
918        assert_eq!(disk.sector_count(), 128 * 1024 * 1024 * 2 / 512); //sector_count =  524288
919        disk.unmap(0, 1, false).await.unwrap();
920        disk.unmap(0, 524288, false).await.unwrap();
921        disk.unmap(8, 524280, false).await.unwrap();
922        disk.unmap(disk.sector_count() / 2 - 512, 1024, false)
923            .await
924            .unwrap();
925        disk.unmap(disk.sector_count() - 1024, 1024, false)
926            .await
927            .unwrap();
928        disk.unmap(0, disk.sector_count() / 2, false).await.unwrap();
929        disk.unmap(disk.sector_count() / 2, disk.sector_count() / 2, false)
930            .await
931            .unwrap();
932        disk.unmap(disk.sector_count() / 2 - 500, 1000, false)
933            .await
934            .unwrap();
935        //this one should fail, out of bounds
936        assert!(disk.unmap(disk.sector_count(), 100, false).await.is_err());
937        //unmap zero sector
938        disk.unmap(1000, 0, false).await.unwrap();
939    }
940}