1use crate::BAR0_LEN;
7use crate::DOORBELL_STRIDE_BITS;
8use crate::IOCQES;
9use crate::IOSQES;
10use crate::MAX_QES;
11use crate::NVME_VERSION;
12use crate::NvmeControllerClient;
13use crate::PAGE_MASK;
14use crate::VENDOR_ID;
15use crate::spec;
16use crate::workers::IoQueueEntrySizes;
17use crate::workers::NvmeWorkers;
18use chipset_device::ChipsetDevice;
19use chipset_device::io::IoError;
20use chipset_device::io::IoError::InvalidRegister;
21use chipset_device::io::IoResult;
22use chipset_device::mmio::MmioIntercept;
23use chipset_device::mmio::RegisterMmioIntercept;
24use chipset_device::pci::PciConfigSpace;
25use device_emulators::ReadWriteRequestType;
26use device_emulators::read_as_u32_chunks;
27use device_emulators::write_as_u32_chunks;
28use guestmem::GuestMemory;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use parking_lot::Mutex;
33use pci_core::capabilities::msix::MsixEmulator;
34use pci_core::capabilities::pci_express::PciExpressCapability;
35use pci_core::cfg_space_emu::BarMemoryKind;
36use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
37use pci_core::cfg_space_emu::DeviceBars;
38use pci_core::msi::MsiTarget;
39use pci_core::spec::hwid::ClassCode;
40use pci_core::spec::hwid::HardwareIds;
41use pci_core::spec::hwid::ProgrammingInterface;
42use pci_core::spec::hwid::Subclass;
43use std::sync::Arc;
44use vmcore::device_state::ChangeDeviceState;
45use vmcore::save_restore::SaveError;
46use vmcore::save_restore::SaveRestore;
47use vmcore::save_restore::SavedStateNotSupported;
48use vmcore::vm_task::VmTaskDriverSource;
49
50#[derive(InspectMut)]
52pub struct NvmeController {
53 cfg_space: ConfigSpaceType0Emulator,
54 #[inspect(skip)]
55 msix: MsixEmulator,
56
57 registers: RegState,
58 #[inspect(skip)]
59 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
60 #[inspect(flatten, mut)]
61 workers: NvmeWorkers,
62}
63
64#[derive(Inspect)]
65struct RegState {
66 #[inspect(hex)]
67 interrupt_mask: u32,
68 cc: spec::Cc,
69 csts: spec::Csts,
70 aqa: spec::Aqa,
71 #[inspect(hex)]
72 asq: u64,
73 #[inspect(hex)]
74 acq: u64,
75}
76
77impl RegState {
78 fn new() -> Self {
79 Self {
80 interrupt_mask: 0,
81 cc: spec::Cc::new(),
82 csts: spec::Csts::new(),
83 aqa: spec::Aqa::new(),
84 asq: 0,
85 acq: 0,
86 }
87 }
88}
89
90const CAP: spec::Cap = spec::Cap::new()
91 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
92 .with_mqes_z(MAX_QES - 1)
93 .with_cqr(true)
94 .with_css_nvm(true)
95 .with_to(!0);
96
97#[derive(Debug, Copy, Clone)]
99pub struct NvmeControllerCaps {
100 pub msix_count: u16,
102 pub max_io_queues: u16,
104 pub subsystem_id: Guid,
107}
108
109impl NvmeController {
110 pub fn new(
112 driver_source: &VmTaskDriverSource,
113 guest_memory: GuestMemory,
114 msi_target: &MsiTarget,
115 register_mmio: &mut dyn RegisterMmioIntercept,
116 caps: NvmeControllerCaps,
117 ) -> Self {
118 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
119 let bars = DeviceBars::new()
120 .bar0(
121 BAR0_LEN,
122 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
123 )
124 .bar4(
125 msix.bar_len(),
126 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
127 );
128
129 let cfg_space = ConfigSpaceType0Emulator::new(
130 HardwareIds {
131 vendor_id: VENDOR_ID,
132 device_id: 0x00a9,
133 revision_id: 0,
134 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
135 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
136 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
137 type0_sub_vendor_id: 0,
138 type0_sub_system_id: 0,
139 },
140 vec![
141 Box::new(msix_cap),
142 Box::new(PciExpressCapability::new(
143 pci_core::spec::caps::pci_express::DevicePortType::Endpoint,
144 None,
145 )),
146 ],
147 Vec::new(),
148 bars,
149 );
150
151 let interrupts = (0..caps.msix_count)
152 .map(|i| msix.interrupt(i).unwrap())
153 .collect();
154
155 let qe_sizes = Arc::new(Default::default());
156 let admin = NvmeWorkers::new(
157 driver_source,
158 guest_memory,
159 interrupts,
160 caps.max_io_queues,
161 caps.max_io_queues,
162 Arc::clone(&qe_sizes),
163 caps.subsystem_id,
164 );
165
166 Self {
167 cfg_space,
168 msix,
169 registers: RegState::new(),
170 workers: admin,
171 qe_sizes,
172 }
173 }
174
175 pub fn client(&self) -> NvmeControllerClient {
177 self.workers.client()
178 }
179
180 pub fn read_bar0(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
182 if data.len() < 4 {
183 return IoResult::Err(IoError::InvalidAccessSize);
184 }
185 if addr & (data.len() as u64 - 1) != 0 {
186 return IoResult::Err(IoError::UnalignedAccess);
187 }
188
189 let d: Option<u64> = match spec::Register(addr & !7) {
191 spec::Register::CAP => Some(CAP.into()),
192 spec::Register::ASQ => Some(self.registers.asq),
193 spec::Register::ACQ => Some(self.registers.acq),
194 spec::Register::BPMBL => Some(0),
195 _ => None,
196 };
197 if let Some(d) = d {
198 if data.len() == 8 {
199 data.copy_from_slice(&d.to_ne_bytes());
200 } else if addr & 7 == 0 {
201 data.copy_from_slice(&(d as u32).to_ne_bytes());
202 } else {
203 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
204 }
205 return IoResult::Ok;
206 }
207
208 if data.len() != 4 {
209 return IoResult::Err(IoError::InvalidAccessSize);
210 }
211
212 let d: u32 = match spec::Register(addr) {
214 spec::Register::VS => NVME_VERSION,
215 spec::Register::INTMS => self.registers.interrupt_mask,
216 spec::Register::INTMC => self.registers.interrupt_mask,
217 spec::Register::CC => self.registers.cc.into(),
218 spec::Register::RESERVED => 0,
219 spec::Register::CSTS => self.get_csts(),
220 spec::Register::NSSR => 0,
221 spec::Register::AQA => self.registers.aqa.into(),
222 spec::Register::CMBLOC => 0,
223 spec::Register::CMBSZ => 0,
224 spec::Register::BPINFO => 0,
225 spec::Register::BPRSEL => 0,
226 _ => return IoResult::Err(InvalidRegister),
227 };
228 data.copy_from_slice(&d.to_ne_bytes());
229 IoResult::Ok
230 }
231
232 pub fn write_bar0(&mut self, addr: u64, data: &[u8]) -> IoResult {
234 if addr >= 0x1000 {
235 let base = addr - 0x1000;
237 let db_id = base >> DOORBELL_STRIDE_BITS;
238 if (db_id << DOORBELL_STRIDE_BITS) != base {
239 return IoResult::Err(InvalidRegister);
240 }
241 let Ok(data) = data.try_into() else {
242 return IoResult::Err(IoError::InvalidAccessSize);
243 };
244 let value = u32::from_ne_bytes(data);
245 let db_id = match u16::try_from(db_id) {
246 Ok(id) => id,
247 Err(_) => return IoResult::Err(InvalidRegister),
248 };
249 self.workers.doorbell(db_id, value);
250 return IoResult::Ok;
251 }
252
253 if data.len() < 4 {
254 return IoResult::Err(IoError::InvalidAccessSize);
255 }
256 if addr & (data.len() as u64 - 1) != 0 {
257 return IoResult::Err(IoError::UnalignedAccess);
258 }
259
260 let update_reg = |x: u64| {
261 if data.len() == 8 {
262 u64::from_ne_bytes(data.try_into().unwrap())
263 } else {
264 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
265 if addr & 7 == 0 {
266 (x & !(u32::MAX as u64)) | data
267 } else {
268 (x & u32::MAX as u64) | (data << 32)
269 }
270 }
271 };
272
273 let handled = match spec::Register(addr & !7) {
275 spec::Register::ASQ => {
276 if !self.registers.cc.en() {
277 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
278 } else {
279 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
280 }
281 true
282 }
283 spec::Register::ACQ => {
284 if !self.registers.cc.en() {
285 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
286 } else {
287 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
288 }
289 true
290 }
291 _ => false,
292 };
293 if handled {
294 return IoResult::Ok;
295 }
296
297 let Ok(data) = data.try_into() else {
298 return IoResult::Err(IoError::InvalidAccessSize);
299 };
300 let data = u32::from_ne_bytes(data);
301
302 match spec::Register(addr) {
304 spec::Register::INTMS => self.registers.interrupt_mask |= data,
305 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
306 spec::Register::CC => self.set_cc(data.into()),
307 spec::Register::AQA => self.registers.aqa = data.into(),
308 _ => return IoResult::Err(InvalidRegister),
309 }
310 IoResult::Ok
311 }
312
313 fn set_cc(&mut self, cc: spec::Cc) {
314 tracing::debug!(?cc, "set cc");
315
316 if cc.mps() != 0 {
317 tracelimit::warn_ratelimited!(
318 "This implementation only supports memory page sizes of 4K."
319 );
320 self.fatal_error();
321 return;
322 }
323
324 if cc.css() != 0 {
325 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
326 self.fatal_error();
327 return;
328 }
329
330 if let 2..=6 = cc.ams() {
331 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
332 self.fatal_error();
333 }
334
335 let mask: u32 = u32::from(
336 spec::Cc::new()
337 .with_en(true)
338 .with_shn(0b11)
339 .with_iosqes(0b1111)
340 .with_iocqes(0b1111),
341 );
342 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
343
344 if cc.shn() != 0 {
345 self.registers.csts.set_shst(0b10);
349 }
350
351 if cc.en() != self.registers.cc.en() {
352 if cc.en() {
353 if cc.iocqes() == 0 {
355 cc.set_iocqes(IOCQES);
356 } else if cc.iocqes() != IOCQES {
357 tracelimit::warn_ratelimited!(
358 "This implementation only supports CQEs of the default size."
359 );
360 self.fatal_error();
361 return;
362 }
363
364 if cc.iosqes() == 0 {
365 cc.set_iosqes(IOSQES);
366 } else if cc.iosqes() != IOSQES {
367 tracelimit::warn_ratelimited!(
368 "This implementation only supports SQEs of the default size."
369 );
370 self.fatal_error();
371 return;
372 }
373
374 if self.registers.csts.rdy() {
375 tracelimit::warn_ratelimited!("enabling during reset");
376 return;
377 }
378 if cc.shn() == 0 {
379 self.registers.csts.set_shst(0);
380 }
381
382 self.workers.enable(
383 self.registers.asq,
384 self.registers.aqa.asqs_z().max(1) + 1,
385 self.registers.acq,
386 self.registers.aqa.acqs_z().max(1) + 1,
387 );
388 } else if self.registers.csts.rdy() {
389 self.workers.controller_reset();
390 } else {
391 tracelimit::warn_ratelimited!("disabling while not ready");
392 return;
393 }
394 }
395
396 self.registers.cc = cc;
397 *self.qe_sizes.lock() = IoQueueEntrySizes {
398 sqe_bits: cc.iosqes(),
399 cqe_bits: cc.iocqes(),
400 };
401 }
402
403 fn get_csts(&mut self) -> u32 {
404 if !self.registers.cc.en() && self.registers.csts.rdy() {
405 if self.workers.poll_controller_reset() {
407 self.registers.csts = 0.into();
409 self.registers.cc = 0.into();
410 self.registers.interrupt_mask = 0;
411 }
412 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
413 if self.workers.poll_enabled() {
414 self.registers.csts.set_rdy(true);
415 }
416 }
417
418 let csts = self.registers.csts;
419 tracing::debug!(?csts, "get csts");
420 csts.into()
421 }
422
423 pub fn fatal_error(&mut self) {
426 self.registers.csts.set_cfs(true);
427 }
428}
429
430impl ChangeDeviceState for NvmeController {
431 fn start(&mut self) {}
432
433 async fn stop(&mut self) {}
434
435 async fn reset(&mut self) {
436 let Self {
437 cfg_space,
438 msix: _,
439 registers,
440 qe_sizes,
441 workers,
442 } = self;
443 workers.reset().await;
444 cfg_space.reset();
445 *registers = RegState::new();
446 *qe_sizes.lock() = Default::default();
447 }
448}
449
450impl ChipsetDevice for NvmeController {
451 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
452 Some(self)
453 }
454
455 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
456 Some(self)
457 }
458}
459
460impl MmioIntercept for NvmeController {
461 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
462 match self.cfg_space.find_bar(addr) {
463 Some((0, offset)) => self.read_bar0(offset, data),
464 Some((4, offset)) => {
465 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
466 IoResult::Ok
467 }
468 _ => IoResult::Err(InvalidRegister),
469 }
470 }
471
472 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
473 match self.cfg_space.find_bar(addr) {
474 Some((0, offset)) => self.write_bar0(offset, data),
475 Some((4, offset)) => {
476 write_as_u32_chunks(offset, data, |offset, ty| match ty {
477 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
478 ReadWriteRequestType::Write(val) => {
479 self.msix.write_u32(offset, val);
480 None
481 }
482 });
483 IoResult::Ok
484 }
485 _ => IoResult::Err(InvalidRegister),
486 }
487 }
488}
489
490impl PciConfigSpace for NvmeController {
491 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
492 self.cfg_space.read_u32(offset, value)
493 }
494
495 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
496 self.cfg_space.write_u32(offset, value)
497 }
498}
499
500impl SaveRestore for NvmeController {
501 type SavedState = SavedStateNotSupported;
502
503 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
504 Err(SaveError::NotSupported)
505 }
506
507 fn restore(
508 &mut self,
509 state: Self::SavedState,
510 ) -> Result<(), vmcore::save_restore::RestoreError> {
511 match state {}
512 }
513}