1#![cfg(target_os = "linux")]
7#![forbid(unsafe_code)]
8#![expect(missing_docs)]
9
10use async_trait::async_trait;
11use disk_backend::DiskError;
12use disk_backend::DiskIo;
13use disk_backend::MediumErrorDetails;
14use disk_backend::pr;
15use inspect::Inspect;
16use nvme_common::from_nvme_reservation_report;
17use nvme_spec::Status;
18use nvme_spec::nvm;
19use pal::unix::affinity::get_cpu_number;
20use std::io;
21
22#[derive(Debug, Inspect)]
23pub struct NvmeDisk {
24 #[inspect(flatten)]
26 namespace: nvme_driver::Namespace,
27 #[inspect(skip)]
28 block_shift: u32,
29}
30
31impl NvmeDisk {
32 pub fn new(namespace: nvme_driver::Namespace) -> Self {
33 Self {
34 block_shift: namespace.block_size().trailing_zeros(),
35 namespace,
36 }
37 }
38}
39
40impl DiskIo for NvmeDisk {
41 fn disk_type(&self) -> &str {
42 "nvme"
43 }
44
45 fn sector_count(&self) -> u64 {
46 self.namespace.block_count()
47 }
48
49 fn sector_size(&self) -> u32 {
50 self.namespace.block_size()
51 }
52
53 fn disk_id(&self) -> Option<[u8; 16]> {
54 None }
56
57 fn physical_sector_size(&self) -> u32 {
58 4096 }
60
61 fn is_fua_respected(&self) -> bool {
62 true
64 }
65
66 fn is_read_only(&self) -> bool {
67 false }
69
70 fn pr(&self) -> Option<&dyn pr::PersistentReservation> {
71 (u8::from(self.namespace.reservation_capabilities()) != 0).then_some(self)
72 }
73
74 async fn read_vectored(
75 &self,
76 buffers: &scsi_buffers::RequestBuffers<'_>,
77 sector: u64,
78 ) -> Result<(), DiskError> {
79 let block_count = buffers.len() as u64 >> self.block_shift;
80 let mut block_offset = 0;
81 while block_offset < block_count {
82 let this_block_count = (block_count - block_offset)
83 .min(self.namespace.max_transfer_block_count().into())
84 as u32;
85
86 self.namespace
87 .read(
88 get_cpu_number(),
89 sector + block_offset,
90 this_block_count,
91 buffers.guest_memory(),
92 buffers.range().subrange(
93 (block_offset as usize) << self.block_shift,
94 (this_block_count as usize) << self.block_shift,
95 ),
96 )
97 .await
98 .map_err(map_nvme_error)?;
99
100 block_offset += this_block_count as u64;
101 }
102 Ok(())
103 }
104
105 async fn write_vectored(
106 &self,
107 buffers: &scsi_buffers::RequestBuffers<'_>,
108 sector: u64,
109 fua: bool,
110 ) -> Result<(), DiskError> {
111 let block_count = buffers.len() as u64 >> self.block_shift;
112 let mut block_offset = 0;
113 while block_offset < block_count {
114 let this_block_count = (block_count - block_offset)
115 .min(self.namespace.max_transfer_block_count().into())
116 as u32;
117
118 self.namespace
119 .write(
120 get_cpu_number(),
121 sector + block_offset,
122 this_block_count,
123 fua,
124 buffers.guest_memory(),
125 buffers.range().subrange(
126 (block_offset as usize) << self.block_shift,
127 (this_block_count as usize) << self.block_shift,
128 ),
129 )
130 .await
131 .map_err(map_nvme_error)?;
132
133 block_offset += this_block_count as u64;
134 }
135 Ok(())
136 }
137
138 async fn sync_cache(&self) -> Result<(), DiskError> {
139 self.namespace
140 .flush(get_cpu_number())
141 .await
142 .map_err(map_nvme_error)?;
143 Ok(())
144 }
145
146 async fn wait_resize(&self, sector_count: u64) -> u64 {
147 self.namespace.wait_resize(sector_count).await
148 }
149
150 async fn unmap(
151 &self,
152 sector_offset: u64,
153 sector_count: u64,
154 _block_level_only: bool,
155 ) -> Result<(), DiskError> {
156 if !self.namespace.supports_dataset_management() {
157 return Ok(());
158 }
159 let mut processed = 0;
160 let max = self.namespace.dataset_management_range_size_limit();
161 while processed < sector_count {
162 let lba_count = (sector_count - processed).min(max.into());
163 self.namespace
164 .deallocate(
165 get_cpu_number(),
166 &[nvm::DsmRange {
167 context_attributes: 0,
168 lba_count: lba_count as u32,
169 starting_lba: sector_offset + processed,
170 }],
171 )
172 .await
173 .map_err(map_nvme_error)?;
174
175 processed += lba_count;
176 }
177 Ok(())
178 }
179
180 fn unmap_behavior(&self) -> disk_backend::UnmapBehavior {
181 if self.namespace.supports_dataset_management() {
182 disk_backend::UnmapBehavior::Unspecified
183 } else {
184 disk_backend::UnmapBehavior::Ignored
185 }
186 }
187
188 fn optimal_unmap_sectors(&self) -> u32 {
189 self.namespace.preferred_deallocate_granularity().into()
190 }
191}
192
193#[async_trait]
194impl pr::PersistentReservation for NvmeDisk {
195 fn capabilities(&self) -> pr::ReservationCapabilities {
196 nvme_common::from_nvme_reservation_capabilities(self.namespace.reservation_capabilities())
197 }
198
199 async fn report(&self) -> Result<pr::ReservationReport, DiskError> {
200 let (report, controllers) = self
201 .namespace
202 .reservation_report_extended(get_cpu_number())
203 .await
204 .map_err(map_nvme_error)?;
205
206 from_nvme_reservation_report(&report.report, &controllers)
207 .map_err(|err| DiskError::Io(io::Error::new(io::ErrorKind::InvalidInput, err)))
208 }
209
210 async fn register(
211 &self,
212 current_key: Option<u64>,
213 new_key: u64,
214 ptpl: Option<bool>,
215 ) -> Result<(), DiskError> {
216 let action = if new_key == 0 {
217 nvm::ReservationRegisterAction::UNREGISTER
218 } else if current_key.is_some() {
219 nvm::ReservationRegisterAction::REPLACE
220 } else {
221 nvm::ReservationRegisterAction::REGISTER
222 };
223 self.namespace
224 .reservation_register(get_cpu_number(), action, current_key, new_key, ptpl)
225 .await
226 .map_err(map_nvme_error)?;
227
228 Ok(())
229 }
230
231 async fn reserve(
232 &self,
233 key: u64,
234 reservation_type: pr::ReservationType,
235 ) -> Result<(), DiskError> {
236 self.namespace
237 .reservation_acquire(
238 get_cpu_number(),
239 nvm::ReservationAcquireAction::ACQUIRE,
240 key,
241 0,
242 nvme_common::to_nvme_reservation_type(reservation_type),
243 )
244 .await
245 .map_err(map_nvme_error)?;
246
247 Ok(())
248 }
249
250 async fn release(
251 &self,
252 key: u64,
253 reservation_type: pr::ReservationType,
254 ) -> Result<(), DiskError> {
255 self.namespace
256 .reservation_release(
257 get_cpu_number(),
258 nvm::ReservationReleaseAction::RELEASE,
259 key,
260 nvme_common::to_nvme_reservation_type(reservation_type),
261 )
262 .await
263 .map_err(map_nvme_error)?;
264
265 Ok(())
266 }
267
268 async fn clear(&self, key: u64) -> Result<(), DiskError> {
269 self.namespace
270 .reservation_release(
271 get_cpu_number(),
272 nvm::ReservationReleaseAction::CLEAR,
273 key,
274 nvm::ReservationType(0),
275 )
276 .await
277 .map_err(map_nvme_error)?;
278
279 Ok(())
280 }
281
282 async fn preempt(
283 &self,
284 current_key: u64,
285 preempt_key: u64,
286 reservation_type: pr::ReservationType,
287 abort: bool,
288 ) -> Result<(), DiskError> {
289 self.namespace
290 .reservation_acquire(
291 get_cpu_number(),
292 if abort {
293 nvm::ReservationAcquireAction::PREEMPT_AND_ABORT
294 } else {
295 nvm::ReservationAcquireAction::PREEMPT
296 },
297 current_key,
298 preempt_key,
299 nvme_common::to_nvme_reservation_type(reservation_type),
300 )
301 .await
302 .map_err(map_nvme_error)?;
303
304 Ok(())
305 }
306}
307
308fn map_nvme_error(err: nvme_driver::RequestError) -> DiskError {
309 match err {
310 err @ nvme_driver::RequestError::Gone(_) => {
311 DiskError::Io(io::Error::new(io::ErrorKind::NotConnected, err))
312 }
313 nvme_driver::RequestError::Nvme(err) => {
314 match err.status() {
315 Status::RESERVATION_CONFLICT => DiskError::ReservationConflict,
316
317 Status::INVALID_FIELD_IN_COMMAND => DiskError::InvalidInput,
318
319 Status::LBA_OUT_OF_RANGE => DiskError::IllegalBlock,
320
321 Status::DATA_TRANSFER_ERROR | Status::CAPACITY_EXCEEDED => {
323 DiskError::Io(io::Error::other(err))
324 }
325 Status::MEDIA_WRITE_FAULT => {
326 DiskError::MediumError(io::Error::other(err), MediumErrorDetails::WriteFault)
327 }
328 Status::MEDIA_UNRECOVERED_READ_ERROR => DiskError::MediumError(
329 io::Error::other(err),
330 MediumErrorDetails::UnrecoveredReadError,
331 ),
332 Status::MEDIA_END_TO_END_GUARD_CHECK_ERROR => DiskError::MediumError(
333 io::Error::other(err),
334 MediumErrorDetails::GuardCheckFailed,
335 ),
336 Status::MEDIA_END_TO_END_APPLICATION_TAG_CHECK_ERROR => DiskError::MediumError(
337 io::Error::other(err),
338 MediumErrorDetails::ApplicationTagCheckFailed,
339 ),
340 Status::MEDIA_END_TO_END_REFERENCE_TAG_CHECK_ERROR => DiskError::MediumError(
341 io::Error::other(err),
342 MediumErrorDetails::ReferenceTagCheckFailed,
343 ),
344
345 Status::COMMAND_ABORTED_DUE_TO_PREEMPT_AND_ABORT => {
346 DiskError::AbortDueToPreemptAndAbort
347 }
348
349 _ => DiskError::Io(io::Error::other(err)),
350 }
351 }
352 nvme_driver::RequestError::Memory(err) => DiskError::MemoryAccess(err.into()),
353 err @ nvme_driver::RequestError::TooLarge => {
354 DiskError::Io(io::Error::new(io::ErrorKind::InvalidInput, err))
355 }
356 }
357}