1use crate::BAR0_LEN;
7use crate::DOORBELL_STRIDE_BITS;
8use crate::IOCQES;
9use crate::IOSQES;
10use crate::MAX_QES;
11use crate::NVME_VERSION;
12use crate::NvmeFaultControllerClient;
13use crate::PAGE_MASK;
14use crate::VENDOR_ID;
15use crate::spec;
16use crate::workers::IoQueueEntrySizes;
17use crate::workers::NvmeWorkers;
18use chipset_device::ChipsetDevice;
19use chipset_device::io::IoError;
20use chipset_device::io::IoError::InvalidRegister;
21use chipset_device::io::IoResult;
22use chipset_device::mmio::MmioIntercept;
23use chipset_device::mmio::RegisterMmioIntercept;
24use chipset_device::pci::PciConfigSpace;
25use device_emulators::ReadWriteRequestType;
26use device_emulators::read_as_u32_chunks;
27use device_emulators::write_as_u32_chunks;
28use guestmem::GuestMemory;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use nvme_resources::fault::FaultConfiguration;
33use nvme_resources::fault::PciFaultBehavior;
34use nvme_resources::fault::PciFaultConfig;
35use parking_lot::Mutex;
36use pci_core::capabilities::msix::MsixEmulator;
37use pci_core::cfg_space_emu::BarMemoryKind;
38use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
39use pci_core::cfg_space_emu::DeviceBars;
40use pci_core::msi::MsiTarget;
41use pci_core::spec::hwid::ClassCode;
42use pci_core::spec::hwid::HardwareIds;
43use pci_core::spec::hwid::ProgrammingInterface;
44use pci_core::spec::hwid::Subclass;
45use std::sync::Arc;
46use tdisp::TdispHostDeviceTarget;
47use vmcore::device_state::ChangeDeviceState;
48use vmcore::save_restore::SaveError;
49use vmcore::save_restore::SaveRestore;
50use vmcore::save_restore::SavedStateNotSupported;
51use vmcore::vm_task::VmTaskDriverSource;
52
53#[derive(InspectMut)]
55pub struct NvmeFaultController {
56 cfg_space: ConfigSpaceType0Emulator,
57 #[inspect(skip)]
58 msix: MsixEmulator,
59 registers: RegState,
60 #[inspect(skip)]
61 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
62 #[inspect(flatten, mut)]
63 workers: NvmeWorkers,
64 #[inspect(skip)]
65 pci_fault_config: PciFaultConfig,
66 #[inspect(skip)]
67 fault_active: mesh::Cell<bool>,
68 #[inspect(skip)]
70 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
71}
72
73#[derive(Inspect)]
74struct RegState {
75 #[inspect(hex)]
76 interrupt_mask: u32,
77 cc: spec::Cc,
78 csts: spec::Csts,
79 aqa: spec::Aqa,
80 #[inspect(hex)]
81 asq: u64,
82 #[inspect(hex)]
83 acq: u64,
84}
85
86impl RegState {
87 fn new() -> Self {
88 Self {
89 interrupt_mask: 0,
90 cc: spec::Cc::new(),
91 csts: spec::Csts::new(),
92 aqa: spec::Aqa::new(),
93 asq: 0,
94 acq: 0,
95 }
96 }
97}
98
99const CAP: spec::Cap = spec::Cap::new()
100 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
101 .with_mqes_z(MAX_QES - 1)
102 .with_cqr(true)
103 .with_css_nvm(true)
104 .with_to(!0);
105
106#[derive(Debug, Copy, Clone)]
108pub struct NvmeFaultControllerCaps {
109 pub msix_count: u16,
111 pub max_io_queues: u16,
113 pub subsystem_id: Guid,
116}
117
118impl NvmeFaultController {
119 pub fn new(
121 driver_source: &VmTaskDriverSource,
122 guest_memory: GuestMemory,
123 msi_target: &MsiTarget,
124 register_mmio: &mut dyn RegisterMmioIntercept,
125 caps: NvmeFaultControllerCaps,
126 mut fault_configuration: FaultConfiguration,
127 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
128 ) -> Self {
129 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
130 let bars = DeviceBars::new()
131 .bar0(
132 BAR0_LEN,
133 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
134 )
135 .bar4(
136 msix.bar_len(),
137 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
138 );
139
140 let cfg_space = ConfigSpaceType0Emulator::new(
141 HardwareIds {
142 vendor_id: VENDOR_ID,
143 device_id: 0x00a9,
144 revision_id: 0,
145 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
146 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
147 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
148 type0_sub_vendor_id: 0,
149 type0_sub_system_id: 0,
150 },
151 vec![Box::new(msix_cap)],
152 bars,
153 );
154
155 let interrupts = (0..caps.msix_count)
156 .map(|i| msix.interrupt(i).unwrap())
157 .collect();
158
159 let pci_fault_config = fault_configuration
160 .pci_fault
161 .take()
162 .unwrap_or(PciFaultConfig::new());
163
164 let fault_active = fault_configuration.fault_active.clone();
165
166 let qe_sizes = Arc::new(Default::default());
167 let admin = NvmeWorkers::new(
168 driver_source,
169 guest_memory,
170 interrupts,
171 caps.max_io_queues,
172 caps.max_io_queues,
173 Arc::clone(&qe_sizes),
174 caps.subsystem_id,
175 fault_configuration,
176 );
177
178 Self {
179 cfg_space,
180 msix,
181 registers: RegState::new(),
182 workers: admin,
183 qe_sizes,
184 pci_fault_config,
185 fault_active,
186 tdisp_interface,
187 }
188 }
189
190 pub fn client(&self) -> NvmeFaultControllerClient {
192 self.workers.client()
193 }
194
195 pub fn read_bar0(&mut self, addr: u16, data: &mut [u8]) -> IoResult {
197 if data.len() < 4 {
198 return IoResult::Err(IoError::InvalidAccessSize);
199 }
200 if addr & (data.len() - 1) as u16 != 0 {
201 return IoResult::Err(IoError::UnalignedAccess);
202 }
203
204 let d: Option<u64> = match spec::Register(addr & !7) {
206 spec::Register::CAP => {
207 if let Some(mqes) = self.pci_fault_config.max_queue_size {
208 Some(CAP.with_mqes_z(mqes - 1).into())
209 } else {
210 Some(CAP.into())
211 }
212 }
213 spec::Register::ASQ => Some(self.registers.asq),
214 spec::Register::ACQ => Some(self.registers.acq),
215 spec::Register::BPMBL => Some(0),
216 _ => None,
217 };
218 if let Some(d) = d {
219 if data.len() == 8 {
220 data.copy_from_slice(&d.to_ne_bytes());
221 } else if addr & 7 == 0 {
222 data.copy_from_slice(&(d as u32).to_ne_bytes());
223 } else {
224 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
225 }
226 return IoResult::Ok;
227 }
228
229 if data.len() != 4 {
230 return IoResult::Err(IoError::InvalidAccessSize);
231 }
232
233 let d: u32 = match spec::Register(addr) {
235 spec::Register::VS => NVME_VERSION,
236 spec::Register::INTMS => self.registers.interrupt_mask,
237 spec::Register::INTMC => self.registers.interrupt_mask,
238 spec::Register::CC => self.registers.cc.into(),
239 spec::Register::RESERVED => 0,
240 spec::Register::CSTS => self.get_csts(),
241 spec::Register::NSSR => 0,
242 spec::Register::AQA => self.registers.aqa.into(),
243 spec::Register::CMBLOC => 0,
244 spec::Register::CMBSZ => 0,
245 spec::Register::BPINFO => 0,
246 spec::Register::BPRSEL => 0,
247 _ => return IoResult::Err(InvalidRegister),
248 };
249 data.copy_from_slice(&d.to_ne_bytes());
250 IoResult::Ok
251 }
252
253 pub fn write_bar0(&mut self, addr: u16, data: &[u8]) -> IoResult {
255 if addr >= 0x1000 {
256 let base = addr - 0x1000;
258 let db_id = base >> DOORBELL_STRIDE_BITS;
259 if (db_id << DOORBELL_STRIDE_BITS) != base {
260 return IoResult::Err(InvalidRegister);
261 }
262 let Ok(data) = data.try_into() else {
263 return IoResult::Err(IoError::InvalidAccessSize);
264 };
265 let value = u32::from_ne_bytes(data);
266 self.workers.doorbell(db_id, value);
267 return IoResult::Ok;
268 }
269
270 if data.len() < 4 {
271 return IoResult::Err(IoError::InvalidAccessSize);
272 }
273 if addr & (data.len() - 1) as u16 != 0 {
274 return IoResult::Err(IoError::UnalignedAccess);
275 }
276
277 let update_reg = |x: u64| {
278 if data.len() == 8 {
279 u64::from_ne_bytes(data.try_into().unwrap())
280 } else {
281 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
282 if addr & 7 == 0 {
283 (x & !(u32::MAX as u64)) | data
284 } else {
285 (x & u32::MAX as u64) | (data << 32)
286 }
287 }
288 };
289
290 let handled = match spec::Register(addr & !7) {
292 spec::Register::ASQ => {
293 if !self.registers.cc.en() {
294 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
295 } else {
296 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
297 }
298 true
299 }
300 spec::Register::ACQ => {
301 if !self.registers.cc.en() {
302 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
303 } else {
304 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
305 }
306 true
307 }
308 _ => false,
309 };
310 if handled {
311 return IoResult::Ok;
312 }
313
314 let Ok(data) = data.try_into() else {
315 return IoResult::Err(IoError::InvalidAccessSize);
316 };
317 let data = u32::from_ne_bytes(data);
318
319 match spec::Register(addr) {
321 spec::Register::INTMS => self.registers.interrupt_mask |= data,
322 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
323 spec::Register::CC => self.set_cc(data.into()),
324 spec::Register::AQA => self.registers.aqa = data.into(),
325 _ => return IoResult::Err(InvalidRegister),
326 }
327 IoResult::Ok
328 }
329
330 fn set_cc(&mut self, cc: spec::Cc) {
331 tracing::debug!(?cc, "set cc");
332
333 if cc.mps() != 0 {
334 tracelimit::warn_ratelimited!(
335 "This implementation only supports memory page sizes of 4K."
336 );
337 self.fatal_error();
338 return;
339 }
340
341 if cc.css() != 0 {
342 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
343 self.fatal_error();
344 return;
345 }
346
347 if let 2..=6 = cc.ams() {
348 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
349 self.fatal_error();
350 }
351
352 let mask: u32 = u32::from(
353 spec::Cc::new()
354 .with_en(true)
355 .with_shn(0b11)
356 .with_iosqes(0b1111)
357 .with_iocqes(0b1111),
358 );
359 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
360
361 if cc.shn() != 0 {
362 self.registers.csts.set_shst(0b10);
366 }
367
368 if cc.en() != self.registers.cc.en() {
369 if cc.en() {
370 if self.fault_active.get() {
372 match &mut self.pci_fault_config.controller_management_fault_enable {
373 PciFaultBehavior::Delay(duration) => {
374 std::thread::sleep(*duration);
375 }
376 PciFaultBehavior::Default => {}
377 PciFaultBehavior::Verify(send) => {
378 if let Some(send) = send.take() {
379 send.send(());
380 }
381 }
382 }
383 }
384
385 if cc.iocqes() == 0 {
387 cc.set_iocqes(IOCQES);
388 } else if cc.iocqes() != IOCQES {
389 tracelimit::warn_ratelimited!(
390 "This implementation only supports CQEs of the default size."
391 );
392 self.fatal_error();
393 return;
394 }
395
396 if cc.iosqes() == 0 {
397 cc.set_iosqes(IOSQES);
398 } else if cc.iosqes() != IOSQES {
399 tracelimit::warn_ratelimited!(
400 "This implementation only supports SQEs of the default size."
401 );
402 self.fatal_error();
403 return;
404 }
405
406 if self.registers.csts.rdy() {
407 tracelimit::warn_ratelimited!("enabling during reset");
408 return;
409 }
410 if cc.shn() == 0 {
411 self.registers.csts.set_shst(0);
412 }
413
414 self.workers.enable(
415 self.registers.asq,
416 self.registers.aqa.asqs_z().max(1) + 1,
417 self.registers.acq,
418 self.registers.aqa.acqs_z().max(1) + 1,
419 );
420 } else if self.registers.csts.rdy() {
421 self.workers.controller_reset();
422 } else {
423 tracelimit::warn_ratelimited!("disabling while not ready");
424 return;
425 }
426 }
427
428 self.registers.cc = cc;
429 *self.qe_sizes.lock() = IoQueueEntrySizes {
430 sqe_bits: cc.iosqes(),
431 cqe_bits: cc.iocqes(),
432 };
433 }
434
435 fn get_csts(&mut self) -> u32 {
436 if !self.registers.cc.en() && self.registers.csts.rdy() {
437 if self.workers.poll_controller_reset() {
439 self.registers.csts = 0.into();
441 self.registers.cc = 0.into();
442 self.registers.interrupt_mask = 0;
443 }
444 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
445 if self.workers.poll_enabled() {
446 self.registers.csts.set_rdy(true);
447 }
448 }
449
450 let csts = self.registers.csts;
451 tracing::debug!(?csts, "get csts");
452 csts.into()
453 }
454
455 pub fn fatal_error(&mut self) {
458 self.registers.csts.set_cfs(true);
459 }
460}
461
462impl ChangeDeviceState for NvmeFaultController {
463 fn start(&mut self) {}
464
465 async fn stop(&mut self) {}
466
467 async fn reset(&mut self) {
468 let Self {
469 cfg_space,
470 msix: _,
471 registers,
472 qe_sizes,
473 workers,
474 pci_fault_config: _,
475 fault_active: _,
476 tdisp_interface: _,
477 } = self;
478 workers.reset().await;
479 cfg_space.reset();
480 *registers = RegState::new();
481 *qe_sizes.lock() = Default::default();
482 }
483}
484
485impl ChipsetDevice for NvmeFaultController {
486 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
487 Some(self)
488 }
489
490 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
491 Some(self)
492 }
493
494 fn supports_tdisp(&mut self) -> Option<&mut dyn TdispHostDeviceTarget> {
496 tracing::debug!(
497 supported = self.tdisp_interface.is_some(),
498 "fault controller TDISP support in ChipsetDevice"
499 );
500
501 match &mut self.tdisp_interface {
502 Some(tdisp) => Some(tdisp.as_mut()),
503 None => None,
504 }
505 }
506}
507
508impl MmioIntercept for NvmeFaultController {
509 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
510 match self.cfg_space.find_bar(addr) {
511 Some((0, offset)) => self.read_bar0(offset, data),
512 Some((4, offset)) => {
513 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
514 IoResult::Ok
515 }
516 _ => IoResult::Err(InvalidRegister),
517 }
518 }
519
520 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
521 match self.cfg_space.find_bar(addr) {
522 Some((0, offset)) => self.write_bar0(offset, data),
523 Some((4, offset)) => {
524 write_as_u32_chunks(offset, data, |offset, ty| match ty {
525 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
526 ReadWriteRequestType::Write(val) => {
527 self.msix.write_u32(offset, val);
528 None
529 }
530 });
531 IoResult::Ok
532 }
533 _ => IoResult::Err(InvalidRegister),
534 }
535 }
536}
537
538impl PciConfigSpace for NvmeFaultController {
539 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
540 self.cfg_space.read_u32(offset, value)
541 }
542
543 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
544 self.cfg_space.write_u32(offset, value)
545 }
546}
547
548impl SaveRestore for NvmeFaultController {
549 type SavedState = SavedStateNotSupported;
550
551 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
552 Err(SaveError::NotSupported)
553 }
554
555 fn restore(
556 &mut self,
557 state: Self::SavedState,
558 ) -> Result<(), vmcore::save_restore::RestoreError> {
559 match state {}
560 }
561}