1#![expect(missing_docs)]
5#![cfg(target_os = "linux")]
6
7#![expect(unsafe_code)]
12
13mod ioctl;
14mod nvme;
15pub mod resolver;
16
17use anyhow::Context;
18use blocking::unblock;
19use disk_backend::DiskError;
20use disk_backend::DiskIo;
21use disk_backend::UnmapBehavior;
22use disk_backend::pr::PersistentReservation;
23use disk_backend::pr::ReservationCapabilities;
24use disk_backend::pr::ReservationReport;
25use disk_backend::pr::ReservationType;
26use fs_err::PathExt;
27use guestmem::MemoryRead;
28use guestmem::MemoryWrite;
29use inspect::Inspect;
30use io_uring::opcode;
31use io_uring::types;
32use nvme::check_nvme_status;
33use nvme_spec::nvm;
34use pal::unix::affinity;
35use pal_async::driver::Driver;
36use scsi_buffers::BounceBuffer;
37use scsi_buffers::BounceBufferTracker;
38use scsi_buffers::RequestBuffers;
39use std::fmt::Debug;
40use std::fs;
41use std::os::unix::io::AsRawFd;
42use std::os::unix::prelude::FileTypeExt;
43use std::os::unix::prelude::MetadataExt;
44use std::path::Path;
45use std::path::PathBuf;
46use std::str::FromStr;
47use std::sync::Arc;
48use std::sync::atomic::AtomicU64;
49use std::sync::atomic::Ordering;
50use thiserror::Error;
51use uevent::CallbackHandle;
52use uevent::UeventListener;
53
54pub fn open_file_for_block(
57 path: &Path,
58 read_only: bool,
59 direct: bool,
60) -> std::io::Result<fs::File> {
61 use std::os::unix::prelude::*;
62
63 tracing::debug!(?path, read_only, direct, "open_file_for_block");
64 let mut opts = fs::OpenOptions::new();
65 opts.read(true).write(!read_only);
66 if direct {
67 opts.custom_flags(libc::O_DIRECT);
68 }
69 opts.open(path)
70}
71
72enum MaybeBounceBuffer<'a> {
75 Tracked(scsi_buffers::TrackedBounceBuffer<'a>),
76 Untracked(BounceBuffer),
77}
78
79impl MaybeBounceBuffer<'_> {
80 fn io_vecs(&self) -> &[scsi_buffers::IoBuffer<'_>] {
81 match self {
82 Self::Tracked(t) => t.buffer.io_vecs(),
83 Self::Untracked(b) => b.io_vecs(),
84 }
85 }
86
87 fn as_mut_bytes(&mut self) -> &mut [u8] {
88 match self {
89 Self::Tracked(t) => t.buffer.as_mut_bytes(),
90 Self::Untracked(b) => b.as_mut_bytes(),
91 }
92 }
93}
94
95#[derive(Inspect)]
97#[inspect(extra = "BlockDevice::inspect_extra")]
98pub struct BlockDevice {
99 file: Arc<fs::File>,
100 sector_size: u32,
101 physical_sector_size: u32,
102 sector_shift: u32,
103 sector_count: AtomicU64,
104 optimal_unmap_sectors: u32,
105 read_only: bool,
106 #[inspect(skip)]
107 driver: Box<dyn Driver>,
108 #[inspect(flatten)]
109 device_type: DeviceType,
110 supports_pr: bool,
111 supports_fua: bool,
112 #[inspect(skip)]
113 _uevent_filter: Option<CallbackHandle>,
114 resize_epoch: Arc<ResizeEpoch>,
115 resized_acked: AtomicU64,
116 #[inspect(skip)]
117 bounce_buffer_tracker: Option<Arc<BounceBufferTracker>>,
118 always_bounce: bool,
119}
120
121#[derive(Inspect, Debug, Default)]
122#[inspect(transparent)]
123struct ResizeEpoch {
124 epoch: AtomicU64,
125 #[inspect(skip)]
126 event: event_listener::Event,
127}
128
129#[derive(Debug, Copy, Clone, Inspect)]
130#[inspect(tag = "device_type")]
131enum DeviceType {
132 File {
133 sector_count: u64,
134 },
135 UnknownBlock,
136 NVMe {
137 ns_id: u32,
138 rescap: nvm::ReservationCapabilities,
139 },
140}
141
142impl BlockDevice {
143 fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
144 match self.device_type {
145 DeviceType::NVMe { .. } => {
146 resp.field_mut_with("interrupt_aggregation", |new_value| {
147 self.inspect_interrupt_coalescing(new_value)
148 });
149 }
150 DeviceType::UnknownBlock => {}
151 DeviceType::File { .. } => {}
152 }
153 }
154
155 fn inspect_interrupt_coalescing(&self, new_value: Option<&str>) -> anyhow::Result<String> {
156 let coalescing = if let Some(new_value) = new_value {
157 let coalescing = (|| {
158 let (threshold, time) = new_value.split_once(' ')?;
159 Some(
160 nvme::InterruptCoalescing::new()
161 .with_aggregation_threshold(threshold.parse().ok()?)
162 .with_aggregation_time(time.parse().ok()?),
163 )
164 })()
165 .context("expected `<aggregation_threshold> <aggregation_time>`")?;
166 nvme::nvme_set_features_interrupt_coalescing(&self.file, coalescing)?;
167 coalescing
168 } else if let Ok(coalescing) = nvme::nvme_get_features_interrupt_coalescing(&self.file) {
169 coalescing
170 } else {
171 return Ok("not supported".into());
172 };
173 Ok(format!(
174 "{} {}",
175 coalescing.aggregation_threshold(),
176 coalescing.aggregation_time()
177 ))
178 }
179}
180
181#[derive(Debug, Error)]
183pub enum NewDeviceError {
184 #[error("block device ioctl error")]
185 IoctlError(#[from] DiskError),
186 #[error("failed to read device metadata")]
187 DeviceMetadata(#[source] anyhow::Error),
188 #[error("invalid file type, not a file or block device")]
189 InvalidFileType,
190 #[error("invalid disk size {0:#x}")]
191 InvalidDiskSize(u64),
192 #[error("driver does not support io-uring")]
193 NoIoUring,
194}
195
196impl BlockDevice {
197 pub async fn new(
205 file: fs::File,
206 read_only: bool,
207 driver: impl Driver,
208 uevent_listener: Option<&UeventListener>,
209 bounce_buffer_tracker: Option<Arc<BounceBufferTracker>>,
210 always_bounce: bool,
211 ) -> Result<BlockDevice, NewDeviceError> {
212 if !driver.io_uring_probe(opcode::Read::CODE) {
213 return Err(NewDeviceError::NoIoUring);
214 }
215 assert!(driver.io_uring_probe(opcode::Write::CODE));
216 assert!(driver.io_uring_probe(opcode::Readv::CODE));
217 assert!(driver.io_uring_probe(opcode::Writev::CODE));
218 assert!(driver.io_uring_probe(opcode::Fsync::CODE));
219
220 let metadata = file.metadata().map_err(DiskError::Io)?;
221
222 let mut uevent_filter = None;
223 let resize_epoch = Arc::new(ResizeEpoch::default());
224
225 let devmeta = if metadata.file_type().is_block_device() {
226 let rdev = metadata.rdev();
227 let (major, minor) = (libc::major(rdev), libc::minor(rdev));
228
229 if let Some(uevent_listener) = uevent_listener {
231 let resize_epoch = resize_epoch.clone();
232 uevent_filter = Some(
233 uevent_listener
234 .add_block_resize_callback(major, minor, {
235 move || {
236 tracing::info!(major, minor, "disk resized");
237 resize_epoch.epoch.fetch_add(1, Ordering::SeqCst);
238 resize_epoch.event.notify(usize::MAX);
239 }
240 })
241 .await,
242 );
243 }
244
245 DeviceMetadata::from_block_device(&file, major, minor)
246 .map_err(NewDeviceError::DeviceMetadata)?
247 } else if metadata.file_type().is_file() {
248 DeviceMetadata::from_file(&metadata).map_err(NewDeviceError::DeviceMetadata)?
249 } else {
250 return Err(NewDeviceError::InvalidFileType);
251 };
252
253 let sector_size = devmeta.logical_block_size;
254 let sector_shift = sector_size.trailing_zeros();
255 let physical_sector_size = devmeta.physical_block_size.max(sector_size);
256 let sector_count = devmeta.disk_size >> sector_shift;
257 let unmap_granularity = devmeta.discard_granularity >> sector_shift;
258 let file = Arc::new(file);
259 let device = BlockDevice {
260 file,
261 sector_size,
262 physical_sector_size,
263 sector_shift: sector_size.trailing_zeros(),
264 sector_count: sector_count.into(),
265 optimal_unmap_sectors: unmap_granularity,
266 read_only,
267 driver: Box::new(driver),
268 device_type: devmeta.device_type,
269 supports_pr: devmeta.supports_pr,
270 supports_fua: devmeta.fua,
271 _uevent_filter: uevent_filter,
272 resize_epoch,
273 resized_acked: 0.into(),
274 bounce_buffer_tracker,
275 always_bounce,
276 };
277
278 Ok(device)
279 }
280
281 async fn acquire_bounce_buffer(&self, size: usize) -> Box<MaybeBounceBuffer<'_>> {
284 Box::new(if let Some(tracker) = &self.bounce_buffer_tracker {
285 MaybeBounceBuffer::Tracked(
286 tracker
287 .acquire_bounce_buffers(size, affinity::get_cpu_number() as usize)
288 .await,
289 )
290 } else {
291 MaybeBounceBuffer::Untracked(BounceBuffer::new(size))
292 })
293 }
294
295 fn handle_resize(&self) {
296 if let Err(err) = self.handle_resize_inner() {
297 tracing::error!(
298 error = &err as &dyn std::error::Error,
299 "failed to update disk size"
300 );
301 }
302 }
303
304 fn handle_resize_inner(&self) -> std::io::Result<()> {
305 let mut acked = self.resized_acked.load(Ordering::SeqCst);
306 loop {
307 let epoch = self.resize_epoch.epoch.load(Ordering::SeqCst);
308 if acked == epoch {
309 break Ok(());
310 }
311
312 let size_in_bytes = ioctl::query_block_device_size_in_bytes(&self.file)?;
313
314 let new_sector_count = size_in_bytes / self.sector_size as u64;
315 let original_sector_count = self.sector_count.load(Ordering::SeqCst);
316
317 tracing::debug!(original_sector_count, new_sector_count, "resize");
318 if original_sector_count != new_sector_count {
319 tracing::info!(
320 original_sector_count,
321 new_sector_count,
322 "Disk size updating..."
323 );
324 self.sector_count.store(new_sector_count, Ordering::SeqCst);
325 }
326
327 acked = self
328 .resized_acked
329 .compare_exchange(acked, epoch, Ordering::SeqCst, Ordering::SeqCst)
330 .unwrap_or_else(|x| x);
331 }
332 }
333
334 fn map_io_error(&self, err: std::io::Error) -> DiskError {
335 if !matches!(self.device_type, DeviceType::File { .. }) {
336 match err.raw_os_error() {
337 Some(libc::EBADE) => return DiskError::ReservationConflict,
338 Some(libc::ENOSPC) => return DiskError::IllegalBlock,
339 _ => {}
340 }
341 }
342 DiskError::Io(err)
343 }
344}
345
346struct DeviceMetadata {
347 device_type: DeviceType,
348 disk_size: u64,
349 logical_block_size: u32,
350 physical_block_size: u32,
351 discard_granularity: u32,
352 supports_pr: bool,
353 fua: bool,
354}
355
356impl DeviceMetadata {
357 fn from_block_device(file: &fs::File, major: u32, minor: u32) -> anyhow::Result<Self> {
358 let devpath = PathBuf::from(format!("/sys/dev/block/{major}:{minor}"));
360 devpath
361 .fs_err_metadata()
362 .context("could not find sysfs path for block device")?;
363
364 let mut supports_pr = false;
365
366 let device_type = match fs_err::read_to_string(devpath.join("nsid")) {
368 Ok(ns_id) => {
369 let ns_id = ns_id
370 .trim()
371 .parse()
372 .context("failed to parse NVMe namespace ID")?;
373
374 let rescap = nvme::nvme_identify_namespace_data(file, ns_id)?.rescap;
375 let oncs = nvme::nvme_identify_controller_data(file)?.oncs;
376 tracing::debug!(rescap = ?rescap, oncs = ?oncs, "get identify data");
377 supports_pr = oncs.reservations() && u8::from(rescap) != 0;
378 Some(DeviceType::NVMe { ns_id, rescap })
379 }
380 Err(err) if err.kind() == std::io::ErrorKind::NotFound => None,
381 Err(err) => Err(err).context("failed to read NVMe namespace ID")?,
382 };
383
384 let device_type = device_type.unwrap_or(DeviceType::UnknownBlock);
386
387 fn read_val<T: FromStr>(devpath: &Path, path: &str, msg: &str) -> anyhow::Result<T>
388 where
389 T::Err: 'static + std::error::Error + Send + Sync,
390 {
391 fs_err::read_to_string(devpath.join(path))
392 .with_context(|| format!("failed to read {msg}"))?
393 .trim()
394 .parse()
395 .with_context(|| format!("failed to parse {msg}"))
396 }
397
398 let logical_block_size = read_val(&devpath, "queue/logical_block_size", "sector size")?;
399 let physical_block_size = read_val(
400 &devpath,
401 "queue/physical_block_size",
402 "physical sector size",
403 )?;
404
405 let disk_size = read_val::<u64>(&devpath, "size", "disk size")? * 512;
408 let discard_granularity =
409 read_val(&devpath, "queue/discard_granularity", "discard granularity")?;
410
411 let fua = read_val::<u8>(&devpath, "queue/fua", "fua")? != 0;
412
413 Self {
414 device_type,
415 disk_size,
416 logical_block_size,
417 physical_block_size,
418 discard_granularity,
419 supports_pr,
420 fua,
421 }
422 .validate()
423 }
424
425 fn from_file(metadata: &fs::Metadata) -> anyhow::Result<Self> {
426 let logical_block_size = 512;
427 Self {
428 device_type: DeviceType::File {
429 sector_count: metadata.len() / logical_block_size as u64,
430 },
431 disk_size: metadata.size(),
432 logical_block_size,
433 physical_block_size: metadata.blksize() as u32,
434 discard_granularity: 0,
435 supports_pr: false,
436 fua: false,
437 }
438 .validate()
439 }
440
441 fn validate(self) -> anyhow::Result<Self> {
442 let Self {
443 device_type: _,
444 disk_size,
445 logical_block_size,
446 physical_block_size,
447 discard_granularity,
448 supports_pr: _,
449 fua: _,
450 } = self;
451 if logical_block_size < 512 || !logical_block_size.is_power_of_two() {
452 anyhow::bail!("invalid sector size {logical_block_size}");
453 }
454 if !physical_block_size.is_power_of_two() {
455 anyhow::bail!("invalid physical sector size {physical_block_size}");
456 }
457 if disk_size % logical_block_size as u64 != 0 {
458 anyhow::bail!("invalid disk size {disk_size:#x}");
459 }
460 if discard_granularity % logical_block_size != 0 {
461 anyhow::bail!("invalid discard granularity {discard_granularity}");
462 }
463 Ok(self)
464 }
465}
466
467impl DiskIo for BlockDevice {
468 fn disk_type(&self) -> &str {
469 "block_device"
470 }
471
472 fn sector_count(&self) -> u64 {
473 if self.resize_epoch.epoch.load(Ordering::Relaxed)
474 != self.resized_acked.load(Ordering::Relaxed)
475 {
476 self.handle_resize();
477 }
478 self.sector_count.load(Ordering::Relaxed)
479 }
480
481 fn sector_size(&self) -> u32 {
482 self.sector_size
483 }
484
485 fn disk_id(&self) -> Option<[u8; 16]> {
486 None
487 }
488
489 fn physical_sector_size(&self) -> u32 {
490 self.physical_sector_size
491 }
492
493 fn is_fua_respected(&self) -> bool {
494 self.supports_fua
495 }
496
497 fn is_read_only(&self) -> bool {
498 self.read_only
499 }
500
501 fn pr(&self) -> Option<&dyn PersistentReservation> {
502 if self.supports_pr { Some(self) } else { None }
503 }
504
505 async fn eject(&self) -> Result<(), DiskError> {
506 let file = self.file.clone();
507 unblock(move || {
508 ioctl::lockdoor(&file, false)?;
509 ioctl::eject(&file)
510 })
511 .await
512 .map_err(|err| self.map_io_error(err))?;
513 Ok(())
514 }
515
516 async fn read_vectored(
517 &self,
518 buffers: &RequestBuffers<'_>,
519 sector: u64,
520 ) -> Result<(), DiskError> {
521 let io_size = buffers.len();
522 tracing::trace!(sector, io_size, "read_vectored");
523
524 let mut bounce_buffer = None;
525 let locked;
526 let should_bounce = self.always_bounce || !buffers.is_aligned(self.sector_size() as usize);
527 let io_vecs = if !should_bounce {
528 locked = buffers.lock(true)?;
529 locked.io_vecs()
530 } else {
531 tracing::trace!("double buffering IO");
532
533 bounce_buffer
534 .insert(self.acquire_bounce_buffer(buffers.len()).await)
535 .io_vecs()
536 };
537
538 let bytes_read = unsafe {
543 self.driver.io_uring_submit(
544 opcode::Readv::new(
545 types::Fd(self.file.as_raw_fd()),
546 io_vecs.as_ptr().cast(),
547 io_vecs.len() as u32,
548 )
549 .offset((sector * self.sector_size() as u64) as _)
550 .build(),
551 )
552 }
553 .await
554 .map_err(|err| self.map_io_error(err))?;
555 tracing::trace!(bytes_read, "read_vectored");
556 if bytes_read != io_size as i32 {
557 return Err(DiskError::IllegalBlock);
558 }
559
560 if let Some(mut bounce_buffer) = bounce_buffer {
561 buffers.writer().write(bounce_buffer.as_mut_bytes())?;
562 }
563 Ok(())
564 }
565
566 async fn write_vectored(
567 &self,
568 buffers: &RequestBuffers<'_>,
569 sector: u64,
570 fua: bool,
571 ) -> Result<(), DiskError> {
572 let io_size = buffers.len();
573 tracing::trace!(sector, io_size, "write_vectored");
574
575 if let DeviceType::File { sector_count } = self.device_type {
577 if sector + (io_size as u64 >> self.sector_shift) > sector_count {
578 return Err(DiskError::IllegalBlock);
579 }
580 }
581
582 let mut bounce_buffer;
583 let locked;
584 let should_bounce = self.always_bounce || !buffers.is_aligned(self.sector_size() as usize);
585 let io_vecs = if !should_bounce {
586 locked = buffers.lock(false)?;
587 locked.io_vecs()
588 } else {
589 tracing::trace!("double buffering IO");
590 bounce_buffer = self.acquire_bounce_buffer(buffers.len()).await;
591 buffers.reader().read(bounce_buffer.as_mut_bytes())?;
592 bounce_buffer.io_vecs()
593 };
594
595 let bytes_written = unsafe {
600 self.driver.io_uring_submit(
601 opcode::Writev::new(
602 types::Fd(self.file.as_raw_fd()),
603 io_vecs.as_ptr().cast::<libc::iovec>(),
604 io_vecs.len() as _,
605 )
606 .offset((sector * self.sector_size() as u64) as _)
607 .rw_flags(if fua { libc::RWF_DSYNC } else { 0 })
608 .build(),
609 )
610 }
611 .await
612 .map_err(|err| self.map_io_error(err))?;
613 tracing::trace!(bytes_written, "write_vectored");
614 if bytes_written != io_size as i32 {
615 return Err(DiskError::IllegalBlock);
616 }
617
618 Ok(())
619 }
620
621 async fn sync_cache(&self) -> Result<(), DiskError> {
622 unsafe {
624 self.driver
625 .io_uring_submit(opcode::Fsync::new(types::Fd(self.file.as_raw_fd())).build())
626 }
627 .await
628 .map_err(|err| self.map_io_error(err))?;
629 Ok(())
630 }
631
632 async fn wait_resize(&self, sector_count: u64) -> u64 {
633 loop {
634 let listen = self.resize_epoch.event.listen();
635 let current = self.sector_count();
636 if current != sector_count {
637 break current;
638 }
639 listen.await;
640 }
641 }
642
643 async fn unmap(
644 &self,
645 sector_offset: u64,
646 sector_count: u64,
647 _block_level_only: bool,
648 ) -> Result<(), DiskError> {
649 if self.optimal_unmap_sectors == 0 {
653 return Ok(());
654 }
655
656 let file = self.file.clone();
657 let file_offset = sector_offset << self.sector_shift;
658 let length = sector_count << self.sector_shift;
659 tracing::debug!(file = ?file, file_offset, length, "unmap_async");
660 match unblock(move || ioctl::discard(&file, file_offset, length)).await {
661 Ok(()) => {}
662 Err(_) if sector_offset + sector_count > self.sector_count() => {
663 return Err(DiskError::IllegalBlock);
664 }
665 Err(err)
666 if matches!(
667 err.raw_os_error(),
668 Some(libc::ENOTTY | libc::EOPNOTSUPP | libc::ENOSYS)
669 ) =>
670 {
671 tracing::debug!(
672 error = &err as &dyn std::error::Error,
673 "discard not supported; ignoring"
674 );
675 }
676 Err(err) => return Err(self.map_io_error(err)),
677 }
678 Ok(())
679 }
680
681 fn unmap_behavior(&self) -> UnmapBehavior {
682 if self.optimal_unmap_sectors == 0 {
683 UnmapBehavior::Ignored
684 } else {
685 UnmapBehavior::Unspecified
686 }
687 }
688
689 fn optimal_unmap_sectors(&self) -> u32 {
690 self.optimal_unmap_sectors
691 }
692}
693
694#[async_trait::async_trait]
695impl PersistentReservation for BlockDevice {
696 fn capabilities(&self) -> ReservationCapabilities {
697 match &self.device_type {
698 &DeviceType::NVMe { rescap, .. } => {
699 nvme_common::from_nvme_reservation_capabilities(rescap)
700 }
701 DeviceType::File { .. } | DeviceType::UnknownBlock => unreachable!(),
702 }
703 }
704
705 async fn report(&self) -> Result<ReservationReport, DiskError> {
706 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
707 self.nvme_persistent_reservation_report()
708 .await
709 .map_err(|err| self.map_io_error(err))
710 }
711
712 async fn register(
713 &self,
714 current_key: Option<u64>,
715 new_key: u64,
716 ptpl: Option<bool>,
717 ) -> Result<(), DiskError> {
718 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
719
720 if ptpl == Some(false) {
724 tracing::warn!("ignoring guest request to disable persist through power loss");
725 }
726
727 let file = self.file.clone();
728 unblock(move || {
729 ioctl::pr_register(
730 &file,
731 current_key.unwrap_or(0),
732 new_key,
733 if current_key.is_none() {
734 ioctl::PR_FL_IGNORE_KEY
735 } else {
736 0
737 },
738 )
739 })
740 .await
741 .and_then(check_nvme_status)
742 .map_err(|err| self.map_io_error(err))?;
743 Ok(())
744 }
745
746 async fn reserve(&self, key: u64, reservation_type: ReservationType) -> Result<(), DiskError> {
747 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
748 let file = self.file.clone();
749 unblock(move || ioctl::pr_reserve(&file, reservation_type, key))
750 .await
751 .and_then(check_nvme_status)
752 .map_err(|err| self.map_io_error(err))?;
753 Ok(())
754 }
755
756 async fn release(&self, key: u64, reservation_type: ReservationType) -> Result<(), DiskError> {
757 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
758 let file = self.file.clone();
759 unblock(move || ioctl::pr_release(&file, reservation_type, key))
760 .await
761 .and_then(check_nvme_status)
762 .map_err(|err| self.map_io_error(err))?;
763 Ok(())
764 }
765
766 async fn clear(&self, key: u64) -> Result<(), DiskError> {
767 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
768 let file = self.file.clone();
769 unblock(move || ioctl::pr_clear(&file, key))
770 .await
771 .and_then(check_nvme_status)
772 .map_err(|err| self.map_io_error(err))?;
773 Ok(())
774 }
775
776 async fn preempt(
777 &self,
778 current_key: u64,
779 preempt_key: u64,
780 reservation_type: ReservationType,
781 abort: bool,
782 ) -> Result<(), DiskError> {
783 assert!(matches!(self.device_type, DeviceType::NVMe { .. }));
784 let file = self.file.clone();
785 unblock(move || {
786 ioctl::pr_preempt(&file, reservation_type, current_key, preempt_key, abort)
787 })
788 .await
789 .and_then(check_nvme_status)
790 .map_err(|err| self.map_io_error(err))?;
791 Ok(())
792 }
793}
794
795#[cfg(test)]
796mod tests {
797 use super::*;
798 use futures::executor::block_on;
799 use guestmem::GuestMemory;
800 use hvdef::HV_PAGE_SIZE;
801 use hvdef::HV_PAGE_SIZE_USIZE;
802 use once_cell::sync::OnceCell;
803 use pal_async::async_test;
804 use pal_uring::IoUringPool;
805 use pal_uring::PoolClient;
806 use scsi_buffers::OwnedRequestBuffers;
807
808 fn is_buggy_kernel() -> bool {
809 let output = String::from_utf8(
811 std::process::Command::new("uname")
812 .arg("-r")
813 .output()
814 .unwrap()
815 .stdout,
816 )
817 .unwrap();
818
819 output.contains("5.13")
820 }
821
822 fn new_block_device() -> Result<BlockDevice, NewDeviceError> {
823 static POOL: OnceCell<PoolClient> = OnceCell::new();
825
826 let client = POOL
827 .get_or_try_init(|| {
828 let pool = IoUringPool::new("test", 16)?;
829 let client = pool.client().clone();
830 std::thread::spawn(|| pool.run());
831 Ok(client)
832 })
833 .map_err(|err| NewDeviceError::IoctlError(DiskError::Io(err)))?;
834
835 let test_file = tempfile::tempfile().unwrap();
836 test_file.set_len(1024 * 64).unwrap();
837 block_on(BlockDevice::new(
838 test_file.try_clone().unwrap(),
839 false,
840 client.initiator().clone(),
841 None,
842 None,
843 false,
844 ))
845 }
846
847 macro_rules! get_block_device_or_skip {
848 () => {
849 match new_block_device() {
850 Ok(pool) => {
851 if is_buggy_kernel() {
852 println!("Test case skipped (buggy kernel version)");
853 return;
854 }
855
856 pool
857 }
858 Err(NewDeviceError::IoctlError(DiskError::Io(err)))
859 if err.raw_os_error() == Some(libc::ENOSYS) =>
860 {
861 println!("Test case skipped (no IO-Uring support)");
862 return;
863 }
864 Err(err) => panic!("{}", err),
865 }
866 };
867 }
868
869 async fn run_async_disk_io(fua: bool) {
870 let disk = get_block_device_or_skip!();
871
872 let test_guest_mem = GuestMemory::allocate(0x8000);
873 test_guest_mem
874 .write_at(0, &(0..0x8000).map(|x| x as u8).collect::<Vec<_>>())
875 .unwrap();
876
877 let write_buffers = OwnedRequestBuffers::new(&[3, 2, 1, 0]);
878 disk.write_vectored(&write_buffers.buffer(&test_guest_mem), 0, fua)
879 .await
880 .unwrap();
881
882 if !fua {
883 disk.sync_cache().await.unwrap();
884 }
885
886 let read_buffers = OwnedRequestBuffers::new(&[7, 6, 5, 4]);
887 disk.read_vectored(&read_buffers.buffer(&test_guest_mem), 0)
888 .await
889 .unwrap();
890
891 let mut source = vec![0u8; 4 * HV_PAGE_SIZE_USIZE];
892 test_guest_mem.read_at(0, &mut source).unwrap();
893 let mut target = vec![0u8; 4 * HV_PAGE_SIZE_USIZE];
894 test_guest_mem
895 .read_at(4 * HV_PAGE_SIZE, &mut target)
896 .unwrap();
897 assert_eq!(source, target);
898 }
899
900 #[async_test]
901 async fn test_async_disk_io() {
902 run_async_disk_io(false).await;
903 }
904
905 #[async_test]
906 async fn test_async_disk_io_fua() {
907 run_async_disk_io(true).await;
908 }
909
910 async fn run_async_disk_io_unaligned(fua: bool) {
911 let disk = get_block_device_or_skip!();
912
913 let test_guest_mem = GuestMemory::allocate(0x8000);
914 test_guest_mem
915 .write_at(0, &(0..0x8000).map(|x| x as u8).collect::<Vec<_>>())
916 .unwrap();
917
918 let write_buffers =
919 OwnedRequestBuffers::new_unaligned(&[0, 1, 2, 3], 512, 3 * HV_PAGE_SIZE_USIZE);
920
921 disk.write_vectored(&write_buffers.buffer(&test_guest_mem), 0, fua)
922 .await
923 .unwrap();
924
925 if !fua {
926 disk.sync_cache().await.unwrap();
927 }
928
929 let read_buffers =
930 OwnedRequestBuffers::new_unaligned(&[4, 5, 6, 7], 512, 3 * HV_PAGE_SIZE_USIZE);
931 disk.read_vectored(&read_buffers.buffer(&test_guest_mem), 0)
932 .await
933 .unwrap();
934
935 let mut source = vec![0u8; 3 * HV_PAGE_SIZE_USIZE];
936 test_guest_mem.read_at(512, &mut source).unwrap();
937 let mut target = vec![0u8; 3 * HV_PAGE_SIZE_USIZE];
938 test_guest_mem
939 .read_at(4 * HV_PAGE_SIZE + 512, &mut target)
940 .unwrap();
941 assert_eq!(source, target);
942 }
943
944 #[async_test]
945 async fn test_async_disk_io_unaligned() {
946 run_async_disk_io_unaligned(false).await;
947 }
948
949 #[async_test]
950 async fn test_async_disk_io_unaligned_fua() {
951 run_async_disk_io_unaligned(true).await;
952 }
953
954 #[async_test]
955 async fn test_illegal_lba() {
956 let disk = get_block_device_or_skip!();
957 let gm = GuestMemory::allocate(512);
958 match disk
959 .write_vectored(
960 &OwnedRequestBuffers::linear(0, 512, true).buffer(&gm),
961 i64::MAX as u64 / 512,
962 false,
963 )
964 .await
965 {
966 Err(DiskError::IllegalBlock) => {}
967 r => panic!("unexpected result: {:?}", r),
968 }
969 }
970}