1use crate::BAR0_LEN;
7use crate::DEVICE_ID;
8use crate::DOORBELL_STRIDE_BITS;
9use crate::IOCQES;
10use crate::IOSQES;
11use crate::MAX_QES;
12use crate::NVME_VERSION;
13use crate::NvmeControllerClient;
14use crate::PAGE_MASK;
15use crate::VENDOR_ID;
16use crate::spec;
17use crate::workers::IoQueueEntrySizes;
18use crate::workers::NvmeWorkers;
19use chipset_device::ChipsetDevice;
20use chipset_device::io::IoError;
21use chipset_device::io::IoError::InvalidRegister;
22use chipset_device::io::IoResult;
23use chipset_device::mmio::MmioIntercept;
24use chipset_device::mmio::RegisterMmioIntercept;
25use chipset_device::pci::PciConfigSpace;
26use device_emulators::ReadWriteRequestType;
27use device_emulators::read_as_u32_chunks;
28use device_emulators::write_as_u32_chunks;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use parking_lot::Mutex;
33use pci_core::capabilities::msix::MsixEmulator;
34use pci_core::capabilities::pci_express::PciExpressCapability;
35use pci_core::cfg_space_emu::BarMemoryKind;
36use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
37use pci_core::cfg_space_emu::DeviceBars;
38use pci_core::dma::DmaTarget;
39use pci_core::spec::hwid::ClassCode;
40use pci_core::spec::hwid::HardwareIds;
41use pci_core::spec::hwid::ProgrammingInterface;
42use pci_core::spec::hwid::Subclass;
43use std::sync::Arc;
44use vmcore::device_state::ChangeDeviceState;
45use vmcore::save_restore::SaveError;
46use vmcore::save_restore::SaveRestore;
47use vmcore::save_restore::SavedStateNotSupported;
48use vmcore::vm_task::VmTaskDriverSource;
49
50#[derive(InspectMut)]
52pub struct NvmeController {
53 cfg_space: ConfigSpaceType0Emulator,
54 #[inspect(skip)]
55 msix: MsixEmulator,
56
57 registers: RegState,
58 #[inspect(skip)]
59 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
60 #[inspect(flatten, mut)]
61 workers: NvmeWorkers,
62}
63
64#[derive(Inspect)]
65struct RegState {
66 #[inspect(hex)]
67 interrupt_mask: u32,
68 cc: spec::Cc,
69 csts: spec::Csts,
70 aqa: spec::Aqa,
71 #[inspect(hex)]
72 asq: u64,
73 #[inspect(hex)]
74 acq: u64,
75}
76
77impl RegState {
78 fn new() -> Self {
79 Self {
80 interrupt_mask: 0,
81 cc: spec::Cc::new(),
82 csts: spec::Csts::new(),
83 aqa: spec::Aqa::new(),
84 asq: 0,
85 acq: 0,
86 }
87 }
88}
89
90const CAP: spec::Cap = spec::Cap::new()
91 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
92 .with_mqes_z(MAX_QES - 1)
93 .with_cqr(true)
94 .with_css_nvm(true)
95 .with_to(!0);
96
97#[derive(Debug, Copy, Clone)]
99pub struct NvmeControllerCaps {
100 pub msix_count: u16,
102 pub max_io_queues: u16,
104 pub subsystem_id: Guid,
107}
108
109impl NvmeController {
110 pub fn new(
112 driver_source: &VmTaskDriverSource,
113 dma_target: &DmaTarget,
114 register_mmio: &mut dyn RegisterMmioIntercept,
115 caps: NvmeControllerCaps,
116 ) -> Self {
117 let msi_target = dma_target.msi_target();
118 let guest_memory = dma_target.guest_memory().clone();
119 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
120 let bars = DeviceBars::new()
121 .bar0(
122 BAR0_LEN,
123 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
124 )
125 .bar4(
126 msix.bar_len(),
127 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
128 );
129
130 let cfg_space = ConfigSpaceType0Emulator::new(
131 HardwareIds {
132 vendor_id: VENDOR_ID,
133 device_id: DEVICE_ID,
134 revision_id: 0,
135 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
136 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
137 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
138 type0_sub_vendor_id: 0,
139 type0_sub_system_id: 0,
140 },
141 vec![
142 Box::new(msix_cap),
143 Box::new(PciExpressCapability::new(
144 pci_core::spec::caps::pci_express::DevicePortType::Endpoint,
145 None,
146 )),
147 ],
148 Vec::new(),
149 bars,
150 );
151
152 let interrupts = (0..caps.msix_count)
153 .map(|i| msix.interrupt(i).unwrap())
154 .collect();
155
156 let qe_sizes = Arc::new(Default::default());
157 let admin = NvmeWorkers::new(
158 driver_source,
159 guest_memory,
160 interrupts,
161 caps.max_io_queues,
162 caps.max_io_queues,
163 Arc::clone(&qe_sizes),
164 caps.subsystem_id,
165 );
166
167 Self {
168 cfg_space,
169 msix,
170 registers: RegState::new(),
171 workers: admin,
172 qe_sizes,
173 }
174 }
175
176 pub fn client(&self) -> NvmeControllerClient {
178 self.workers.client()
179 }
180
181 pub fn read_bar0(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
183 if data.len() < 4 {
184 return IoResult::Err(IoError::InvalidAccessSize);
185 }
186 if addr & (data.len() as u64 - 1) != 0 {
187 return IoResult::Err(IoError::UnalignedAccess);
188 }
189
190 let d: Option<u64> = match spec::Register(addr & !7) {
192 spec::Register::CAP => Some(CAP.into()),
193 spec::Register::ASQ => Some(self.registers.asq),
194 spec::Register::ACQ => Some(self.registers.acq),
195 spec::Register::BPMBL => Some(0),
196 _ => None,
197 };
198 if let Some(d) = d {
199 if data.len() == 8 {
200 data.copy_from_slice(&d.to_ne_bytes());
201 } else if addr & 7 == 0 {
202 data.copy_from_slice(&(d as u32).to_ne_bytes());
203 } else {
204 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
205 }
206 return IoResult::Ok;
207 }
208
209 if data.len() != 4 {
210 return IoResult::Err(IoError::InvalidAccessSize);
211 }
212
213 let d: u32 = match spec::Register(addr) {
215 spec::Register::VS => NVME_VERSION,
216 spec::Register::INTMS => self.registers.interrupt_mask,
217 spec::Register::INTMC => self.registers.interrupt_mask,
218 spec::Register::CC => self.registers.cc.into(),
219 spec::Register::RESERVED => 0,
220 spec::Register::CSTS => self.get_csts(),
221 spec::Register::NSSR => 0,
222 spec::Register::AQA => self.registers.aqa.into(),
223 spec::Register::CMBLOC => 0,
224 spec::Register::CMBSZ => 0,
225 spec::Register::BPINFO => 0,
226 spec::Register::BPRSEL => 0,
227 _ => return IoResult::Err(InvalidRegister),
228 };
229 data.copy_from_slice(&d.to_ne_bytes());
230 IoResult::Ok
231 }
232
233 pub fn write_bar0(&mut self, addr: u64, data: &[u8]) -> IoResult {
235 if addr >= 0x1000 {
236 let base = addr - 0x1000;
238 let db_id = base >> DOORBELL_STRIDE_BITS;
239 if (db_id << DOORBELL_STRIDE_BITS) != base {
240 return IoResult::Err(InvalidRegister);
241 }
242 let Ok(data) = data.try_into() else {
243 return IoResult::Err(IoError::InvalidAccessSize);
244 };
245 let value = u32::from_ne_bytes(data);
246 let db_id = match u16::try_from(db_id) {
247 Ok(id) => id,
248 Err(_) => return IoResult::Err(InvalidRegister),
249 };
250 self.workers.doorbell(db_id, value);
251 return IoResult::Ok;
252 }
253
254 if data.len() < 4 {
255 return IoResult::Err(IoError::InvalidAccessSize);
256 }
257 if addr & (data.len() as u64 - 1) != 0 {
258 return IoResult::Err(IoError::UnalignedAccess);
259 }
260
261 let update_reg = |x: u64| {
262 if data.len() == 8 {
263 u64::from_ne_bytes(data.try_into().unwrap())
264 } else {
265 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
266 if addr & 7 == 0 {
267 (x & !(u32::MAX as u64)) | data
268 } else {
269 (x & u32::MAX as u64) | (data << 32)
270 }
271 }
272 };
273
274 let handled = match spec::Register(addr & !7) {
276 spec::Register::ASQ => {
277 if !self.registers.cc.en() {
278 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
279 } else {
280 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
281 }
282 true
283 }
284 spec::Register::ACQ => {
285 if !self.registers.cc.en() {
286 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
287 } else {
288 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
289 }
290 true
291 }
292 _ => false,
293 };
294 if handled {
295 return IoResult::Ok;
296 }
297
298 let Ok(data) = data.try_into() else {
299 return IoResult::Err(IoError::InvalidAccessSize);
300 };
301 let data = u32::from_ne_bytes(data);
302
303 match spec::Register(addr) {
305 spec::Register::INTMS => self.registers.interrupt_mask |= data,
306 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
307 spec::Register::CC => self.set_cc(data.into()),
308 spec::Register::AQA => self.registers.aqa = data.into(),
309 _ => return IoResult::Err(InvalidRegister),
310 }
311 IoResult::Ok
312 }
313
314 fn set_cc(&mut self, cc: spec::Cc) {
315 tracing::debug!(?cc, "set cc");
316
317 if cc.mps() != 0 {
318 tracelimit::warn_ratelimited!(
319 "This implementation only supports memory page sizes of 4K."
320 );
321 self.fatal_error();
322 return;
323 }
324
325 if cc.css() != 0 {
326 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
327 self.fatal_error();
328 return;
329 }
330
331 if let 2..=6 = cc.ams() {
332 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
333 self.fatal_error();
334 }
335
336 let mask: u32 = u32::from(
337 spec::Cc::new()
338 .with_en(true)
339 .with_shn(0b11)
340 .with_iosqes(0b1111)
341 .with_iocqes(0b1111),
342 );
343 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
344
345 if cc.shn() != 0 {
346 self.registers.csts.set_shst(0b10);
350 }
351
352 if cc.en() != self.registers.cc.en() {
353 if cc.en() {
354 if cc.iocqes() == 0 {
356 cc.set_iocqes(IOCQES);
357 } else if cc.iocqes() != IOCQES {
358 tracelimit::warn_ratelimited!(
359 "This implementation only supports CQEs of the default size."
360 );
361 self.fatal_error();
362 return;
363 }
364
365 if cc.iosqes() == 0 {
366 cc.set_iosqes(IOSQES);
367 } else if cc.iosqes() != IOSQES {
368 tracelimit::warn_ratelimited!(
369 "This implementation only supports SQEs of the default size."
370 );
371 self.fatal_error();
372 return;
373 }
374
375 if self.registers.csts.rdy() {
376 tracelimit::warn_ratelimited!("enabling during reset");
377 return;
378 }
379 if cc.shn() == 0 {
380 self.registers.csts.set_shst(0);
381 }
382
383 self.workers.enable(
384 self.registers.asq,
385 self.registers.aqa.asqs_z().max(1) + 1,
386 self.registers.acq,
387 self.registers.aqa.acqs_z().max(1) + 1,
388 );
389 } else if self.registers.csts.rdy() {
390 self.workers.controller_reset();
391 } else {
392 tracelimit::warn_ratelimited!("disabling while not ready");
393 return;
394 }
395 }
396
397 self.registers.cc = cc;
398 *self.qe_sizes.lock() = IoQueueEntrySizes {
399 sqe_bits: cc.iosqes(),
400 cqe_bits: cc.iocqes(),
401 };
402 }
403
404 fn get_csts(&mut self) -> u32 {
405 if !self.registers.cc.en() && self.registers.csts.rdy() {
406 if self.workers.poll_controller_reset() {
408 self.registers.csts = 0.into();
410 self.registers.cc = 0.into();
411 self.registers.interrupt_mask = 0;
412 }
413 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
414 if self.workers.poll_enabled() {
415 self.registers.csts.set_rdy(true);
416 }
417 }
418
419 let csts = self.registers.csts;
420 tracing::debug!(?csts, "get csts");
421 csts.into()
422 }
423
424 pub fn fatal_error(&mut self) {
427 self.registers.csts.set_cfs(true);
428 }
429}
430
431impl ChangeDeviceState for NvmeController {
432 fn start(&mut self) {}
433
434 async fn stop(&mut self) {}
435
436 async fn reset(&mut self) {
437 let Self {
438 cfg_space,
439 msix: _,
440 registers,
441 qe_sizes,
442 workers,
443 } = self;
444 workers.reset().await;
445 cfg_space.reset();
446 *registers = RegState::new();
447 *qe_sizes.lock() = Default::default();
448 }
449}
450
451impl ChipsetDevice for NvmeController {
452 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
453 Some(self)
454 }
455
456 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
457 Some(self)
458 }
459}
460
461impl MmioIntercept for NvmeController {
462 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
463 match self.cfg_space.find_bar(addr) {
464 Some((0, offset)) => self.read_bar0(offset, data),
465 Some((4, offset)) => {
466 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
467 IoResult::Ok
468 }
469 _ => IoResult::Err(InvalidRegister),
470 }
471 }
472
473 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
474 match self.cfg_space.find_bar(addr) {
475 Some((0, offset)) => self.write_bar0(offset, data),
476 Some((4, offset)) => {
477 write_as_u32_chunks(offset, data, |offset, ty| match ty {
478 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
479 ReadWriteRequestType::Write(val) => {
480 self.msix.write_u32(offset, val);
481 None
482 }
483 });
484 IoResult::Ok
485 }
486 _ => IoResult::Err(InvalidRegister),
487 }
488 }
489}
490
491impl PciConfigSpace for NvmeController {
492 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
493 self.cfg_space.read_u32(offset, value)
494 }
495
496 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
497 self.cfg_space.write_u32(offset, value)
498 }
499}
500
501impl SaveRestore for NvmeController {
502 type SavedState = SavedStateNotSupported;
503
504 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
505 Err(SaveError::NotSupported)
506 }
507
508 fn restore(
509 &mut self,
510 state: Self::SavedState,
511 ) -> Result<(), vmcore::save_restore::RestoreError> {
512 match state {}
513 }
514}