1use crate::BAR0_LEN;
7use crate::DOORBELL_STRIDE_BITS;
8use crate::IOCQES;
9use crate::IOSQES;
10use crate::MAX_QES;
11use crate::NVME_VERSION;
12use crate::NvmeFaultControllerClient;
13use crate::PAGE_MASK;
14use crate::VENDOR_ID;
15use crate::spec;
16use crate::workers::IoQueueEntrySizes;
17use crate::workers::NvmeWorkers;
18use chipset_device::ChipsetDevice;
19use chipset_device::io::IoError;
20use chipset_device::io::IoError::InvalidRegister;
21use chipset_device::io::IoResult;
22use chipset_device::mmio::MmioIntercept;
23use chipset_device::mmio::RegisterMmioIntercept;
24use chipset_device::pci::PciConfigSpace;
25use device_emulators::ReadWriteRequestType;
26use device_emulators::read_as_u32_chunks;
27use device_emulators::write_as_u32_chunks;
28use guestmem::GuestMemory;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use nvme_resources::fault::FaultConfiguration;
33use nvme_resources::fault::PciFaultBehavior;
34use nvme_resources::fault::PciFaultConfig;
35use parking_lot::Mutex;
36use pci_core::capabilities::msix::MsixEmulator;
37use pci_core::cfg_space_emu::BarMemoryKind;
38use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
39use pci_core::cfg_space_emu::DeviceBars;
40use pci_core::msi::MsiTarget;
41use pci_core::spec::hwid::ClassCode;
42use pci_core::spec::hwid::HardwareIds;
43use pci_core::spec::hwid::ProgrammingInterface;
44use pci_core::spec::hwid::Subclass;
45use std::sync::Arc;
46use tdisp::TdispHostDeviceTarget;
47use vmcore::device_state::ChangeDeviceState;
48use vmcore::save_restore::SaveError;
49use vmcore::save_restore::SaveRestore;
50use vmcore::save_restore::SavedStateNotSupported;
51use vmcore::vm_task::VmTaskDriverSource;
52
53#[derive(InspectMut)]
55pub struct NvmeFaultController {
56 cfg_space: ConfigSpaceType0Emulator,
57 #[inspect(skip)]
58 msix: MsixEmulator,
59 registers: RegState,
60 #[inspect(skip)]
61 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
62 #[inspect(flatten, mut)]
63 workers: NvmeWorkers,
64 #[inspect(skip)]
65 pci_fault_config: PciFaultConfig,
66 #[inspect(skip)]
67 fault_active: mesh::Cell<bool>,
68 #[inspect(skip)]
70 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
71}
72
73#[derive(Inspect)]
74struct RegState {
75 #[inspect(hex)]
76 interrupt_mask: u32,
77 cc: spec::Cc,
78 csts: spec::Csts,
79 aqa: spec::Aqa,
80 #[inspect(hex)]
81 asq: u64,
82 #[inspect(hex)]
83 acq: u64,
84}
85
86impl RegState {
87 fn new() -> Self {
88 Self {
89 interrupt_mask: 0,
90 cc: spec::Cc::new(),
91 csts: spec::Csts::new(),
92 aqa: spec::Aqa::new(),
93 asq: 0,
94 acq: 0,
95 }
96 }
97}
98
99const CAP: spec::Cap = spec::Cap::new()
100 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
101 .with_mqes_z(MAX_QES - 1)
102 .with_cqr(true)
103 .with_css_nvm(true)
104 .with_to(!0);
105
106#[derive(Debug, Copy, Clone)]
108pub struct NvmeFaultControllerCaps {
109 pub msix_count: u16,
111 pub max_io_queues: u16,
113 pub subsystem_id: Guid,
116}
117
118impl NvmeFaultController {
119 pub fn new(
121 driver_source: &VmTaskDriverSource,
122 guest_memory: GuestMemory,
123 msi_target: &MsiTarget,
124 register_mmio: &mut dyn RegisterMmioIntercept,
125 caps: NvmeFaultControllerCaps,
126 mut fault_configuration: FaultConfiguration,
127 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
128 ) -> Self {
129 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
130 let bars = DeviceBars::new()
131 .bar0(
132 BAR0_LEN,
133 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
134 )
135 .bar4(
136 msix.bar_len(),
137 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
138 );
139
140 let cfg_space = ConfigSpaceType0Emulator::new(
141 HardwareIds {
142 vendor_id: VENDOR_ID,
143 device_id: 0x00a9,
144 revision_id: 0,
145 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
146 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
147 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
148 type0_sub_vendor_id: 0,
149 type0_sub_system_id: 0,
150 },
151 vec![Box::new(msix_cap)],
152 bars,
153 );
154
155 let interrupts = (0..caps.msix_count)
156 .map(|i| msix.interrupt(i).unwrap())
157 .collect();
158
159 let pci_fault_config = fault_configuration
160 .pci_fault
161 .take()
162 .unwrap_or(PciFaultConfig::new());
163
164 let fault_active = fault_configuration.fault_active.clone();
165
166 let qe_sizes = Arc::new(Default::default());
167 let admin = NvmeWorkers::new(
168 driver_source,
169 guest_memory,
170 interrupts,
171 caps.max_io_queues,
172 caps.max_io_queues,
173 Arc::clone(&qe_sizes),
174 caps.subsystem_id,
175 fault_configuration,
176 );
177
178 Self {
179 cfg_space,
180 msix,
181 registers: RegState::new(),
182 workers: admin,
183 qe_sizes,
184 pci_fault_config,
185 fault_active,
186 tdisp_interface,
187 }
188 }
189
190 pub fn client(&self) -> NvmeFaultControllerClient {
192 self.workers.client()
193 }
194
195 pub fn read_bar0(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
197 if data.len() < 4 {
198 return IoResult::Err(IoError::InvalidAccessSize);
199 }
200 if addr & (data.len() as u64 - 1) != 0 {
201 return IoResult::Err(IoError::UnalignedAccess);
202 }
203
204 let d: Option<u64> = match spec::Register(addr & !7) {
206 spec::Register::CAP => {
207 if let Some(mqes) = self.pci_fault_config.max_queue_size {
208 Some(CAP.with_mqes_z(mqes - 1).into())
209 } else {
210 Some(CAP.into())
211 }
212 }
213 spec::Register::ASQ => Some(self.registers.asq),
214 spec::Register::ACQ => Some(self.registers.acq),
215 spec::Register::BPMBL => Some(0),
216 _ => None,
217 };
218 if let Some(d) = d {
219 if data.len() == 8 {
220 data.copy_from_slice(&d.to_ne_bytes());
221 } else if addr & 7 == 0 {
222 data.copy_from_slice(&(d as u32).to_ne_bytes());
223 } else {
224 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
225 }
226 return IoResult::Ok;
227 }
228
229 if data.len() != 4 {
230 return IoResult::Err(IoError::InvalidAccessSize);
231 }
232
233 let d: u32 = match spec::Register(addr) {
235 spec::Register::VS => NVME_VERSION,
236 spec::Register::INTMS => self.registers.interrupt_mask,
237 spec::Register::INTMC => self.registers.interrupt_mask,
238 spec::Register::CC => self.registers.cc.into(),
239 spec::Register::RESERVED => 0,
240 spec::Register::CSTS => self.get_csts(),
241 spec::Register::NSSR => 0,
242 spec::Register::AQA => self.registers.aqa.into(),
243 spec::Register::CMBLOC => 0,
244 spec::Register::CMBSZ => 0,
245 spec::Register::BPINFO => 0,
246 spec::Register::BPRSEL => 0,
247 _ => return IoResult::Err(InvalidRegister),
248 };
249 data.copy_from_slice(&d.to_ne_bytes());
250 IoResult::Ok
251 }
252
253 pub fn write_bar0(&mut self, addr: u64, data: &[u8]) -> IoResult {
255 if addr >= 0x1000 {
256 let base = addr - 0x1000;
258 let db_id = base >> DOORBELL_STRIDE_BITS;
259 if (db_id << DOORBELL_STRIDE_BITS) != base {
260 return IoResult::Err(InvalidRegister);
261 }
262 let Ok(db_id) = u16::try_from(db_id) else {
263 return IoResult::Err(InvalidRegister);
264 };
265 let Ok(data) = data.try_into() else {
266 return IoResult::Err(IoError::InvalidAccessSize);
267 };
268 let value = u32::from_ne_bytes(data);
269 self.workers.doorbell(db_id, value);
270 return IoResult::Ok;
271 }
272
273 if data.len() < 4 {
274 return IoResult::Err(IoError::InvalidAccessSize);
275 }
276 if addr & (data.len() as u64 - 1) != 0 {
277 return IoResult::Err(IoError::UnalignedAccess);
278 }
279
280 let update_reg = |x: u64| {
281 if data.len() == 8 {
282 u64::from_ne_bytes(data.try_into().unwrap())
283 } else {
284 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
285 if addr & 7 == 0 {
286 (x & !(u32::MAX as u64)) | data
287 } else {
288 (x & u32::MAX as u64) | (data << 32)
289 }
290 }
291 };
292
293 let handled = match spec::Register(addr & !7) {
295 spec::Register::ASQ => {
296 if !self.registers.cc.en() {
297 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
298 } else {
299 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
300 }
301 true
302 }
303 spec::Register::ACQ => {
304 if !self.registers.cc.en() {
305 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
306 } else {
307 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
308 }
309 true
310 }
311 _ => false,
312 };
313 if handled {
314 return IoResult::Ok;
315 }
316
317 let Ok(data) = data.try_into() else {
318 return IoResult::Err(IoError::InvalidAccessSize);
319 };
320 let data = u32::from_ne_bytes(data);
321
322 match spec::Register(addr) {
324 spec::Register::INTMS => self.registers.interrupt_mask |= data,
325 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
326 spec::Register::CC => self.set_cc(data.into()),
327 spec::Register::AQA => self.registers.aqa = data.into(),
328 _ => return IoResult::Err(InvalidRegister),
329 }
330 IoResult::Ok
331 }
332
333 fn set_cc(&mut self, cc: spec::Cc) {
334 tracing::debug!(?cc, "set cc");
335
336 if cc.mps() != 0 {
337 tracelimit::warn_ratelimited!(
338 "This implementation only supports memory page sizes of 4K."
339 );
340 self.fatal_error();
341 return;
342 }
343
344 if cc.css() != 0 {
345 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
346 self.fatal_error();
347 return;
348 }
349
350 if let 2..=6 = cc.ams() {
351 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
352 self.fatal_error();
353 }
354
355 let mask: u32 = u32::from(
356 spec::Cc::new()
357 .with_en(true)
358 .with_shn(0b11)
359 .with_iosqes(0b1111)
360 .with_iocqes(0b1111),
361 );
362 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
363
364 if cc.shn() != 0 {
365 self.registers.csts.set_shst(0b10);
369 }
370
371 if cc.en() != self.registers.cc.en() {
372 if cc.en() {
373 if self.fault_active.get() {
375 match &mut self.pci_fault_config.controller_management_fault_enable {
376 PciFaultBehavior::Delay(duration) => {
377 std::thread::sleep(*duration);
378 }
379 PciFaultBehavior::Default => {}
380 PciFaultBehavior::Verify(send) => {
381 if let Some(send) = send.take() {
382 send.send(());
383 }
384 }
385 }
386 }
387
388 if cc.iocqes() == 0 {
390 cc.set_iocqes(IOCQES);
391 } else if cc.iocqes() != IOCQES {
392 tracelimit::warn_ratelimited!(
393 "This implementation only supports CQEs of the default size."
394 );
395 self.fatal_error();
396 return;
397 }
398
399 if cc.iosqes() == 0 {
400 cc.set_iosqes(IOSQES);
401 } else if cc.iosqes() != IOSQES {
402 tracelimit::warn_ratelimited!(
403 "This implementation only supports SQEs of the default size."
404 );
405 self.fatal_error();
406 return;
407 }
408
409 if self.registers.csts.rdy() {
410 tracelimit::warn_ratelimited!("enabling during reset");
411 return;
412 }
413 if cc.shn() == 0 {
414 self.registers.csts.set_shst(0);
415 }
416
417 self.workers.enable(
418 self.registers.asq,
419 self.registers.aqa.asqs_z().max(1) + 1,
420 self.registers.acq,
421 self.registers.aqa.acqs_z().max(1) + 1,
422 );
423 } else if self.registers.csts.rdy() {
424 self.workers.controller_reset();
425 } else {
426 tracelimit::warn_ratelimited!("disabling while not ready");
427 return;
428 }
429 }
430
431 self.registers.cc = cc;
432 *self.qe_sizes.lock() = IoQueueEntrySizes {
433 sqe_bits: cc.iosqes(),
434 cqe_bits: cc.iocqes(),
435 };
436 }
437
438 fn get_csts(&mut self) -> u32 {
439 if !self.registers.cc.en() && self.registers.csts.rdy() {
440 if self.workers.poll_controller_reset() {
442 self.registers.csts = 0.into();
444 self.registers.cc = 0.into();
445 self.registers.interrupt_mask = 0;
446 }
447 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
448 if self.workers.poll_enabled() {
449 self.registers.csts.set_rdy(true);
450 }
451 }
452
453 let csts = self.registers.csts;
454 tracing::debug!(?csts, "get csts");
455 csts.into()
456 }
457
458 pub fn fatal_error(&mut self) {
461 self.registers.csts.set_cfs(true);
462 }
463}
464
465impl ChangeDeviceState for NvmeFaultController {
466 fn start(&mut self) {}
467
468 async fn stop(&mut self) {}
469
470 async fn reset(&mut self) {
471 let Self {
472 cfg_space,
473 msix: _,
474 registers,
475 qe_sizes,
476 workers,
477 pci_fault_config: _,
478 fault_active: _,
479 tdisp_interface: _,
480 } = self;
481 workers.reset().await;
482 cfg_space.reset();
483 *registers = RegState::new();
484 *qe_sizes.lock() = Default::default();
485 }
486}
487
488impl ChipsetDevice for NvmeFaultController {
489 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
490 Some(self)
491 }
492
493 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
494 Some(self)
495 }
496
497 fn supports_tdisp(&mut self) -> Option<&mut dyn TdispHostDeviceTarget> {
499 tracing::debug!(
500 supported = self.tdisp_interface.is_some(),
501 "fault controller TDISP support in ChipsetDevice"
502 );
503
504 match &mut self.tdisp_interface {
505 Some(tdisp) => Some(tdisp.as_mut()),
506 None => None,
507 }
508 }
509}
510
511impl MmioIntercept for NvmeFaultController {
512 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
513 match self.cfg_space.find_bar(addr) {
514 Some((0, offset)) => self.read_bar0(offset, data),
515 Some((4, offset)) => {
516 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
517 IoResult::Ok
518 }
519 _ => IoResult::Err(InvalidRegister),
520 }
521 }
522
523 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
524 match self.cfg_space.find_bar(addr) {
525 Some((0, offset)) => self.write_bar0(offset, data),
526 Some((4, offset)) => {
527 write_as_u32_chunks(offset, data, |offset, ty| match ty {
528 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
529 ReadWriteRequestType::Write(val) => {
530 self.msix.write_u32(offset, val);
531 None
532 }
533 });
534 IoResult::Ok
535 }
536 _ => IoResult::Err(InvalidRegister),
537 }
538 }
539}
540
541impl PciConfigSpace for NvmeFaultController {
542 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
543 self.cfg_space.read_u32(offset, value)
544 }
545
546 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
547 self.cfg_space.write_u32(offset, value)
548 }
549}
550
551impl SaveRestore for NvmeFaultController {
552 type SavedState = SavedStateNotSupported;
553
554 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
555 Err(SaveError::NotSupported)
556 }
557
558 fn restore(
559 &mut self,
560 state: Self::SavedState,
561 ) -> Result<(), vmcore::save_restore::RestoreError> {
562 match state {}
563 }
564}