1use crate::BAR0_LEN;
7use crate::DOORBELL_STRIDE_BITS;
8use crate::IOCQES;
9use crate::IOSQES;
10use crate::MAX_QES;
11use crate::NVME_VERSION;
12use crate::NvmeControllerClient;
13use crate::PAGE_MASK;
14use crate::VENDOR_ID;
15use crate::spec;
16use crate::workers::IoQueueEntrySizes;
17use crate::workers::NvmeWorkers;
18use chipset_device::ChipsetDevice;
19use chipset_device::io::IoError;
20use chipset_device::io::IoError::InvalidRegister;
21use chipset_device::io::IoResult;
22use chipset_device::mmio::MmioIntercept;
23use chipset_device::mmio::RegisterMmioIntercept;
24use chipset_device::pci::PciConfigSpace;
25use device_emulators::ReadWriteRequestType;
26use device_emulators::read_as_u32_chunks;
27use device_emulators::write_as_u32_chunks;
28use guestmem::GuestMemory;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use parking_lot::Mutex;
33use pci_core::capabilities::msix::MsixEmulator;
34use pci_core::cfg_space_emu::BarMemoryKind;
35use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
36use pci_core::cfg_space_emu::DeviceBars;
37use pci_core::msi::MsiTarget;
38use pci_core::spec::hwid::ClassCode;
39use pci_core::spec::hwid::HardwareIds;
40use pci_core::spec::hwid::ProgrammingInterface;
41use pci_core::spec::hwid::Subclass;
42use std::sync::Arc;
43use vmcore::device_state::ChangeDeviceState;
44use vmcore::save_restore::SaveError;
45use vmcore::save_restore::SaveRestore;
46use vmcore::save_restore::SavedStateNotSupported;
47use vmcore::vm_task::VmTaskDriverSource;
48
49#[derive(InspectMut)]
51pub struct NvmeController {
52 cfg_space: ConfigSpaceType0Emulator,
53 #[inspect(skip)]
54 msix: MsixEmulator,
55
56 registers: RegState,
57 #[inspect(skip)]
58 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
59 #[inspect(flatten, mut)]
60 workers: NvmeWorkers,
61}
62
63#[derive(Inspect)]
64struct RegState {
65 #[inspect(hex)]
66 interrupt_mask: u32,
67 cc: spec::Cc,
68 csts: spec::Csts,
69 aqa: spec::Aqa,
70 #[inspect(hex)]
71 asq: u64,
72 #[inspect(hex)]
73 acq: u64,
74}
75
76impl RegState {
77 fn new() -> Self {
78 Self {
79 interrupt_mask: 0,
80 cc: spec::Cc::new(),
81 csts: spec::Csts::new(),
82 aqa: spec::Aqa::new(),
83 asq: 0,
84 acq: 0,
85 }
86 }
87}
88
89const CAP: spec::Cap = spec::Cap::new()
90 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
91 .with_mqes_z(MAX_QES - 1)
92 .with_cqr(true)
93 .with_css_nvm(true)
94 .with_to(!0);
95
96#[derive(Debug, Copy, Clone)]
98pub struct NvmeControllerCaps {
99 pub msix_count: u16,
101 pub max_io_queues: u16,
103 pub subsystem_id: Guid,
106}
107
108impl NvmeController {
109 pub fn new(
111 driver_source: &VmTaskDriverSource,
112 guest_memory: GuestMemory,
113 msi_target: &MsiTarget,
114 register_mmio: &mut dyn RegisterMmioIntercept,
115 caps: NvmeControllerCaps,
116 ) -> Self {
117 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
118 let bars = DeviceBars::new()
119 .bar0(
120 BAR0_LEN,
121 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
122 )
123 .bar4(
124 msix.bar_len(),
125 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
126 );
127
128 let cfg_space = ConfigSpaceType0Emulator::new(
129 HardwareIds {
130 vendor_id: VENDOR_ID,
131 device_id: 0x00a9,
132 revision_id: 0,
133 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
134 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
135 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
136 type0_sub_vendor_id: 0,
137 type0_sub_system_id: 0,
138 },
139 vec![Box::new(msix_cap)],
140 bars,
141 );
142
143 let interrupts = (0..caps.msix_count)
144 .map(|i| msix.interrupt(i).unwrap())
145 .collect();
146
147 let qe_sizes = Arc::new(Default::default());
148 let admin = NvmeWorkers::new(
149 driver_source,
150 guest_memory,
151 interrupts,
152 caps.max_io_queues,
153 caps.max_io_queues,
154 Arc::clone(&qe_sizes),
155 caps.subsystem_id,
156 );
157
158 Self {
159 cfg_space,
160 msix,
161 registers: RegState::new(),
162 workers: admin,
163 qe_sizes,
164 }
165 }
166
167 pub fn client(&self) -> NvmeControllerClient {
169 self.workers.client()
170 }
171
172 pub fn read_bar0(&mut self, addr: u16, data: &mut [u8]) -> IoResult {
174 if data.len() < 4 {
175 return IoResult::Err(IoError::InvalidAccessSize);
176 }
177 if addr & (data.len() - 1) as u16 != 0 {
178 return IoResult::Err(IoError::UnalignedAccess);
179 }
180
181 let d: Option<u64> = match spec::Register(addr & !7) {
183 spec::Register::CAP => Some(CAP.into()),
184 spec::Register::ASQ => Some(self.registers.asq),
185 spec::Register::ACQ => Some(self.registers.acq),
186 spec::Register::BPMBL => Some(0),
187 _ => None,
188 };
189 if let Some(d) = d {
190 if data.len() == 8 {
191 data.copy_from_slice(&d.to_ne_bytes());
192 } else if addr & 7 == 0 {
193 data.copy_from_slice(&(d as u32).to_ne_bytes());
194 } else {
195 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
196 }
197 return IoResult::Ok;
198 }
199
200 if data.len() != 4 {
201 return IoResult::Err(IoError::InvalidAccessSize);
202 }
203
204 let d: u32 = match spec::Register(addr) {
206 spec::Register::VS => NVME_VERSION,
207 spec::Register::INTMS => self.registers.interrupt_mask,
208 spec::Register::INTMC => self.registers.interrupt_mask,
209 spec::Register::CC => self.registers.cc.into(),
210 spec::Register::RESERVED => 0,
211 spec::Register::CSTS => self.get_csts(),
212 spec::Register::NSSR => 0,
213 spec::Register::AQA => self.registers.aqa.into(),
214 spec::Register::CMBLOC => 0,
215 spec::Register::CMBSZ => 0,
216 spec::Register::BPINFO => 0,
217 spec::Register::BPRSEL => 0,
218 _ => return IoResult::Err(InvalidRegister),
219 };
220 data.copy_from_slice(&d.to_ne_bytes());
221 IoResult::Ok
222 }
223
224 pub fn write_bar0(&mut self, addr: u16, data: &[u8]) -> IoResult {
226 if addr >= 0x1000 {
227 let base = addr - 0x1000;
229 let db_id = base >> DOORBELL_STRIDE_BITS;
230 if (db_id << DOORBELL_STRIDE_BITS) != base {
231 return IoResult::Err(InvalidRegister);
232 }
233 let Ok(data) = data.try_into() else {
234 return IoResult::Err(IoError::InvalidAccessSize);
235 };
236 let value = u32::from_ne_bytes(data);
237 self.workers.doorbell(db_id, value);
238 return IoResult::Ok;
239 }
240
241 if data.len() < 4 {
242 return IoResult::Err(IoError::InvalidAccessSize);
243 }
244 if addr & (data.len() - 1) as u16 != 0 {
245 return IoResult::Err(IoError::UnalignedAccess);
246 }
247
248 let update_reg = |x: u64| {
249 if data.len() == 8 {
250 u64::from_ne_bytes(data.try_into().unwrap())
251 } else {
252 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
253 if addr & 7 == 0 {
254 (x & !(u32::MAX as u64)) | data
255 } else {
256 (x & u32::MAX as u64) | (data << 32)
257 }
258 }
259 };
260
261 let handled = match spec::Register(addr & !7) {
263 spec::Register::ASQ => {
264 if !self.registers.cc.en() {
265 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
266 } else {
267 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
268 }
269 true
270 }
271 spec::Register::ACQ => {
272 if !self.registers.cc.en() {
273 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
274 } else {
275 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
276 }
277 true
278 }
279 _ => false,
280 };
281 if handled {
282 return IoResult::Ok;
283 }
284
285 let Ok(data) = data.try_into() else {
286 return IoResult::Err(IoError::InvalidAccessSize);
287 };
288 let data = u32::from_ne_bytes(data);
289
290 match spec::Register(addr) {
292 spec::Register::INTMS => self.registers.interrupt_mask |= data,
293 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
294 spec::Register::CC => self.set_cc(data.into()),
295 spec::Register::AQA => self.registers.aqa = data.into(),
296 _ => return IoResult::Err(InvalidRegister),
297 }
298 IoResult::Ok
299 }
300
301 fn set_cc(&mut self, cc: spec::Cc) {
302 tracing::debug!(?cc, "set cc");
303
304 if cc.mps() != 0 {
305 tracelimit::warn_ratelimited!(
306 "This implementation only supports memory page sizes of 4K."
307 );
308 self.fatal_error();
309 return;
310 }
311
312 if cc.css() != 0 {
313 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
314 self.fatal_error();
315 return;
316 }
317
318 if let 2..=6 = cc.ams() {
319 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
320 self.fatal_error();
321 }
322
323 let mask: u32 = u32::from(
324 spec::Cc::new()
325 .with_en(true)
326 .with_shn(0b11)
327 .with_iosqes(0b1111)
328 .with_iocqes(0b1111),
329 );
330 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
331
332 if cc.shn() != 0 {
333 self.registers.csts.set_shst(0b10);
337 }
338
339 if cc.en() != self.registers.cc.en() {
340 if cc.en() {
341 if cc.iocqes() == 0 {
343 cc.set_iocqes(IOCQES);
344 } else if cc.iocqes() != IOCQES {
345 tracelimit::warn_ratelimited!(
346 "This implementation only supports CQEs of the default size."
347 );
348 self.fatal_error();
349 return;
350 }
351
352 if cc.iosqes() == 0 {
353 cc.set_iosqes(IOSQES);
354 } else if cc.iosqes() != IOSQES {
355 tracelimit::warn_ratelimited!(
356 "This implementation only supports SQEs of the default size."
357 );
358 self.fatal_error();
359 return;
360 }
361
362 if self.registers.csts.rdy() {
363 tracelimit::warn_ratelimited!("enabling during reset");
364 return;
365 }
366 if cc.shn() == 0 {
367 self.registers.csts.set_shst(0);
368 }
369
370 self.workers.enable(
371 self.registers.asq,
372 self.registers.aqa.asqs_z().max(1) + 1,
373 self.registers.acq,
374 self.registers.aqa.acqs_z().max(1) + 1,
375 );
376 } else if self.registers.csts.rdy() {
377 self.workers.controller_reset();
378 } else {
379 tracelimit::warn_ratelimited!("disabling while not ready");
380 return;
381 }
382 }
383
384 self.registers.cc = cc;
385 *self.qe_sizes.lock() = IoQueueEntrySizes {
386 sqe_bits: cc.iosqes(),
387 cqe_bits: cc.iocqes(),
388 };
389 }
390
391 fn get_csts(&mut self) -> u32 {
392 if !self.registers.cc.en() && self.registers.csts.rdy() {
393 if self.workers.poll_controller_reset() {
395 self.registers.csts = 0.into();
397 self.registers.cc = 0.into();
398 self.registers.interrupt_mask = 0;
399 }
400 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
401 if self.workers.poll_enabled() {
402 self.registers.csts.set_rdy(true);
403 }
404 }
405
406 let csts = self.registers.csts;
407 tracing::debug!(?csts, "get csts");
408 csts.into()
409 }
410
411 pub fn fatal_error(&mut self) {
414 self.registers.csts.set_cfs(true);
415 }
416}
417
418impl ChangeDeviceState for NvmeController {
419 fn start(&mut self) {}
420
421 async fn stop(&mut self) {}
422
423 async fn reset(&mut self) {
424 let Self {
425 cfg_space,
426 msix: _,
427 registers,
428 qe_sizes,
429 workers,
430 } = self;
431 workers.reset().await;
432 cfg_space.reset();
433 *registers = RegState::new();
434 *qe_sizes.lock() = Default::default();
435 }
436}
437
438impl ChipsetDevice for NvmeController {
439 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
440 Some(self)
441 }
442
443 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
444 Some(self)
445 }
446}
447
448impl MmioIntercept for NvmeController {
449 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
450 match self.cfg_space.find_bar(addr) {
451 Some((0, offset)) => self.read_bar0(offset, data),
452 Some((4, offset)) => {
453 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
454 IoResult::Ok
455 }
456 _ => IoResult::Err(InvalidRegister),
457 }
458 }
459
460 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
461 match self.cfg_space.find_bar(addr) {
462 Some((0, offset)) => self.write_bar0(offset, data),
463 Some((4, offset)) => {
464 write_as_u32_chunks(offset, data, |offset, ty| match ty {
465 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
466 ReadWriteRequestType::Write(val) => {
467 self.msix.write_u32(offset, val);
468 None
469 }
470 });
471 IoResult::Ok
472 }
473 _ => IoResult::Err(InvalidRegister),
474 }
475 }
476}
477
478impl PciConfigSpace for NvmeController {
479 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
480 self.cfg_space.read_u32(offset, value)
481 }
482
483 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
484 self.cfg_space.write_u32(offset, value)
485 }
486}
487
488impl SaveRestore for NvmeController {
489 type SavedState = SavedStateNotSupported;
490
491 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
492 Err(SaveError::NotSupported)
493 }
494
495 fn restore(
496 &mut self,
497 state: Self::SavedState,
498 ) -> Result<(), vmcore::save_restore::RestoreError> {
499 match state {}
500 }
501}