1#![cfg(any(windows, target_os = "linux"))]
7#![forbid(unsafe_code)]
8#![expect(missing_docs)]
9
10use async_trait::async_trait;
11use disk_backend::DiskError;
12use disk_backend::DiskIo;
13use disk_backend::MediumErrorDetails;
14use disk_backend::pr;
15use inspect::Inspect;
16use nvme_common::from_nvme_reservation_report;
17use nvme_spec::Status;
18use nvme_spec::nvm;
19#[cfg(target_os = "linux")]
20use pal::unix::affinity::get_cpu_number;
21#[cfg(windows)]
22use pal::windows::affinity::get_cpu_number;
23use std::io;
24
25#[derive(Debug, Inspect)]
26pub struct NvmeDisk {
27 #[inspect(flatten)]
29 namespace: nvme_driver::NamespaceHandle,
30 #[inspect(skip)]
31 block_shift: u32,
32}
33
34impl NvmeDisk {
35 pub fn new(namespace: nvme_driver::NamespaceHandle) -> Self {
36 Self {
37 block_shift: namespace.block_size().trailing_zeros(),
38 namespace,
39 }
40 }
41}
42
43impl DiskIo for NvmeDisk {
44 fn disk_type(&self) -> &str {
45 "nvme"
46 }
47
48 fn sector_count(&self) -> u64 {
49 self.namespace.block_count()
50 }
51
52 fn sector_size(&self) -> u32 {
53 self.namespace.block_size()
54 }
55
56 fn disk_id(&self) -> Option<[u8; 16]> {
57 None }
59
60 fn physical_sector_size(&self) -> u32 {
61 4096 }
63
64 fn is_fua_respected(&self) -> bool {
65 true
67 }
68
69 fn is_read_only(&self) -> bool {
70 false }
72
73 fn pr(&self) -> Option<&dyn pr::PersistentReservation> {
74 (u8::from(self.namespace.reservation_capabilities()) != 0).then_some(self)
75 }
76
77 async fn read_vectored(
78 &self,
79 buffers: &scsi_buffers::RequestBuffers<'_>,
80 sector: u64,
81 ) -> Result<(), DiskError> {
82 let block_count = buffers.len() as u64 >> self.block_shift;
83 let mut block_offset = 0;
84 while block_offset < block_count {
85 let this_block_count = (block_count - block_offset)
86 .min(self.namespace.max_transfer_block_count().into())
87 as u32;
88
89 self.namespace
90 .read(
91 get_cpu_number(),
92 sector + block_offset,
93 this_block_count,
94 buffers.guest_memory(),
95 buffers.range().subrange(
96 (block_offset as usize) << self.block_shift,
97 (this_block_count as usize) << self.block_shift,
98 ),
99 )
100 .await
101 .map_err(map_nvme_error)?;
102
103 block_offset += this_block_count as u64;
104 }
105 Ok(())
106 }
107
108 async fn write_vectored(
109 &self,
110 buffers: &scsi_buffers::RequestBuffers<'_>,
111 sector: u64,
112 fua: bool,
113 ) -> Result<(), DiskError> {
114 let block_count = buffers.len() as u64 >> self.block_shift;
115 let mut block_offset = 0;
116 while block_offset < block_count {
117 let this_block_count = (block_count - block_offset)
118 .min(self.namespace.max_transfer_block_count().into())
119 as u32;
120
121 self.namespace
122 .write(
123 get_cpu_number(),
124 sector + block_offset,
125 this_block_count,
126 fua,
127 buffers.guest_memory(),
128 buffers.range().subrange(
129 (block_offset as usize) << self.block_shift,
130 (this_block_count as usize) << self.block_shift,
131 ),
132 )
133 .await
134 .map_err(map_nvme_error)?;
135
136 block_offset += this_block_count as u64;
137 }
138 Ok(())
139 }
140
141 async fn sync_cache(&self) -> Result<(), DiskError> {
142 self.namespace
143 .flush(get_cpu_number())
144 .await
145 .map_err(map_nvme_error)?;
146 Ok(())
147 }
148
149 async fn wait_resize(&self, sector_count: u64) -> u64 {
150 self.namespace.wait_resize(sector_count).await
151 }
152
153 async fn unmap(
154 &self,
155 sector_offset: u64,
156 sector_count: u64,
157 _block_level_only: bool,
158 ) -> Result<(), DiskError> {
159 if !self.namespace.supports_dataset_management() {
160 return Ok(());
161 }
162 let mut processed = 0;
163 let max = self.namespace.dataset_management_range_size_limit();
164 while processed < sector_count {
165 let lba_count = (sector_count - processed).min(max.into());
166 self.namespace
167 .deallocate(
168 get_cpu_number(),
169 &[nvm::DsmRange {
170 context_attributes: 0,
171 lba_count: lba_count as u32,
172 starting_lba: sector_offset + processed,
173 }],
174 )
175 .await
176 .map_err(map_nvme_error)?;
177
178 processed += lba_count;
179 }
180 Ok(())
181 }
182
183 fn unmap_behavior(&self) -> disk_backend::UnmapBehavior {
184 if self.namespace.supports_dataset_management() {
185 disk_backend::UnmapBehavior::Unspecified
186 } else {
187 disk_backend::UnmapBehavior::Ignored
188 }
189 }
190
191 fn optimal_unmap_sectors(&self) -> u32 {
192 self.namespace.preferred_deallocate_granularity().into()
193 }
194}
195
196#[async_trait]
197impl pr::PersistentReservation for NvmeDisk {
198 fn capabilities(&self) -> pr::ReservationCapabilities {
199 nvme_common::from_nvme_reservation_capabilities(self.namespace.reservation_capabilities())
200 }
201
202 async fn report(&self) -> Result<pr::ReservationReport, DiskError> {
203 let (report, controllers) = self
204 .namespace
205 .reservation_report_extended(get_cpu_number())
206 .await
207 .map_err(map_nvme_error)?;
208
209 from_nvme_reservation_report(&report.report, &controllers)
210 .map_err(|err| DiskError::Io(io::Error::new(io::ErrorKind::InvalidInput, err)))
211 }
212
213 async fn register(
214 &self,
215 current_key: Option<u64>,
216 new_key: u64,
217 ptpl: Option<bool>,
218 ) -> Result<(), DiskError> {
219 let action = if new_key == 0 {
220 nvm::ReservationRegisterAction::UNREGISTER
221 } else if current_key.is_some() {
222 nvm::ReservationRegisterAction::REPLACE
223 } else {
224 nvm::ReservationRegisterAction::REGISTER
225 };
226 self.namespace
227 .reservation_register(get_cpu_number(), action, current_key, new_key, ptpl)
228 .await
229 .map_err(map_nvme_error)?;
230
231 Ok(())
232 }
233
234 async fn reserve(
235 &self,
236 key: u64,
237 reservation_type: pr::ReservationType,
238 ) -> Result<(), DiskError> {
239 self.namespace
240 .reservation_acquire(
241 get_cpu_number(),
242 nvm::ReservationAcquireAction::ACQUIRE,
243 key,
244 0,
245 nvme_common::to_nvme_reservation_type(reservation_type),
246 )
247 .await
248 .map_err(map_nvme_error)?;
249
250 Ok(())
251 }
252
253 async fn release(
254 &self,
255 key: u64,
256 reservation_type: pr::ReservationType,
257 ) -> Result<(), DiskError> {
258 self.namespace
259 .reservation_release(
260 get_cpu_number(),
261 nvm::ReservationReleaseAction::RELEASE,
262 key,
263 nvme_common::to_nvme_reservation_type(reservation_type),
264 )
265 .await
266 .map_err(map_nvme_error)?;
267
268 Ok(())
269 }
270
271 async fn clear(&self, key: u64) -> Result<(), DiskError> {
272 self.namespace
273 .reservation_release(
274 get_cpu_number(),
275 nvm::ReservationReleaseAction::CLEAR,
276 key,
277 nvm::ReservationType(0),
278 )
279 .await
280 .map_err(map_nvme_error)?;
281
282 Ok(())
283 }
284
285 async fn preempt(
286 &self,
287 current_key: u64,
288 preempt_key: u64,
289 reservation_type: pr::ReservationType,
290 abort: bool,
291 ) -> Result<(), DiskError> {
292 self.namespace
293 .reservation_acquire(
294 get_cpu_number(),
295 if abort {
296 nvm::ReservationAcquireAction::PREEMPT_AND_ABORT
297 } else {
298 nvm::ReservationAcquireAction::PREEMPT
299 },
300 current_key,
301 preempt_key,
302 nvme_common::to_nvme_reservation_type(reservation_type),
303 )
304 .await
305 .map_err(map_nvme_error)?;
306
307 Ok(())
308 }
309}
310
311fn map_nvme_error(err: nvme_driver::RequestError) -> DiskError {
312 match err {
313 err @ nvme_driver::RequestError::Gone(_) => {
314 DiskError::Io(io::Error::new(io::ErrorKind::NotConnected, err))
315 }
316 nvme_driver::RequestError::Nvme(err) => {
317 match err.status() {
318 Status::RESERVATION_CONFLICT => DiskError::ReservationConflict,
319
320 Status::INVALID_FIELD_IN_COMMAND => DiskError::InvalidInput,
321
322 Status::LBA_OUT_OF_RANGE => DiskError::IllegalBlock,
323
324 Status::DATA_TRANSFER_ERROR | Status::CAPACITY_EXCEEDED => {
326 DiskError::Io(io::Error::other(err))
327 }
328 Status::MEDIA_WRITE_FAULT => {
329 DiskError::MediumError(io::Error::other(err), MediumErrorDetails::WriteFault)
330 }
331 Status::MEDIA_UNRECOVERED_READ_ERROR => DiskError::MediumError(
332 io::Error::other(err),
333 MediumErrorDetails::UnrecoveredReadError,
334 ),
335 Status::MEDIA_END_TO_END_GUARD_CHECK_ERROR => DiskError::MediumError(
336 io::Error::other(err),
337 MediumErrorDetails::GuardCheckFailed,
338 ),
339 Status::MEDIA_END_TO_END_APPLICATION_TAG_CHECK_ERROR => DiskError::MediumError(
340 io::Error::other(err),
341 MediumErrorDetails::ApplicationTagCheckFailed,
342 ),
343 Status::MEDIA_END_TO_END_REFERENCE_TAG_CHECK_ERROR => DiskError::MediumError(
344 io::Error::other(err),
345 MediumErrorDetails::ReferenceTagCheckFailed,
346 ),
347
348 Status::COMMAND_ABORTED_DUE_TO_PREEMPT_AND_ABORT => {
349 DiskError::AbortDueToPreemptAndAbort
350 }
351
352 _ => DiskError::Io(io::Error::other(err)),
353 }
354 }
355 nvme_driver::RequestError::Memory(err) => DiskError::MemoryAccess(err.into()),
356 err @ nvme_driver::RequestError::TooLarge => {
357 DiskError::Io(io::Error::new(io::ErrorKind::InvalidInput, err))
358 }
359 }
360}