1use crate::BAR0_LEN;
7use crate::DEVICE_ID;
8use crate::DOORBELL_STRIDE_BITS;
9use crate::IOCQES;
10use crate::IOSQES;
11use crate::MAX_QES;
12use crate::NVME_VERSION;
13use crate::NvmeFaultControllerClient;
14use crate::PAGE_MASK;
15use crate::VENDOR_ID;
16use crate::spec;
17use crate::workers::IoQueueEntrySizes;
18use crate::workers::NvmeWorkers;
19use chipset_device::ChipsetDevice;
20use chipset_device::io::IoError;
21use chipset_device::io::IoError::InvalidRegister;
22use chipset_device::io::IoResult;
23use chipset_device::mmio::MmioIntercept;
24use chipset_device::mmio::RegisterMmioIntercept;
25use chipset_device::pci::PciConfigSpace;
26use device_emulators::ReadWriteRequestType;
27use device_emulators::read_as_u32_chunks;
28use device_emulators::write_as_u32_chunks;
29use guestmem::GuestMemory;
30use guid::Guid;
31use inspect::Inspect;
32use inspect::InspectMut;
33use nvme_resources::fault::FaultConfiguration;
34use nvme_resources::fault::PciFaultBehavior;
35use nvme_resources::fault::PciFaultConfig;
36use parking_lot::Mutex;
37use pci_core::capabilities::msix::MsixEmulator;
38use pci_core::cfg_space_emu::BarMemoryKind;
39use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
40use pci_core::cfg_space_emu::DeviceBars;
41use pci_core::msi::MsiTarget;
42use pci_core::spec::hwid::ClassCode;
43use pci_core::spec::hwid::HardwareIds;
44use pci_core::spec::hwid::ProgrammingInterface;
45use pci_core::spec::hwid::Subclass;
46use std::sync::Arc;
47use tdisp::TdispHostDeviceTarget;
48use vmcore::device_state::ChangeDeviceState;
49use vmcore::save_restore::SaveError;
50use vmcore::save_restore::SaveRestore;
51use vmcore::save_restore::SavedStateNotSupported;
52use vmcore::vm_task::VmTaskDriverSource;
53
54#[derive(InspectMut)]
56pub struct NvmeFaultController {
57 cfg_space: ConfigSpaceType0Emulator,
58 #[inspect(skip)]
59 msix: MsixEmulator,
60 registers: RegState,
61 #[inspect(skip)]
62 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
63 #[inspect(flatten, mut)]
64 workers: NvmeWorkers,
65 #[inspect(skip)]
66 pci_fault_config: PciFaultConfig,
67 #[inspect(skip)]
68 fault_active: mesh::Cell<bool>,
69 #[inspect(skip)]
71 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
72}
73
74#[derive(Inspect)]
75struct RegState {
76 #[inspect(hex)]
77 interrupt_mask: u32,
78 cc: spec::Cc,
79 csts: spec::Csts,
80 aqa: spec::Aqa,
81 #[inspect(hex)]
82 asq: u64,
83 #[inspect(hex)]
84 acq: u64,
85}
86
87impl RegState {
88 fn new() -> Self {
89 Self {
90 interrupt_mask: 0,
91 cc: spec::Cc::new(),
92 csts: spec::Csts::new(),
93 aqa: spec::Aqa::new(),
94 asq: 0,
95 acq: 0,
96 }
97 }
98}
99
100const CAP: spec::Cap = spec::Cap::new()
101 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
102 .with_mqes_z(MAX_QES - 1)
103 .with_cqr(true)
104 .with_css_nvm(true)
105 .with_to(!0);
106
107#[derive(Debug, Copy, Clone)]
109pub struct NvmeFaultControllerCaps {
110 pub msix_count: u16,
112 pub max_io_queues: u16,
114 pub subsystem_id: Guid,
117}
118
119impl NvmeFaultController {
120 pub fn new(
122 driver_source: &VmTaskDriverSource,
123 guest_memory: GuestMemory,
124 msi_target: &MsiTarget,
125 register_mmio: &mut dyn RegisterMmioIntercept,
126 caps: NvmeFaultControllerCaps,
127 mut fault_configuration: FaultConfiguration,
128 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
129 ) -> Self {
130 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
131 let bars = DeviceBars::new()
132 .bar0(
133 BAR0_LEN,
134 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
135 )
136 .bar4(
137 msix.bar_len(),
138 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
139 );
140
141 let hardware_config_fault = fault_configuration.hardware_config_fault.take();
145 let vendor_id = hardware_config_fault
146 .and_then(|f| f.vendor_id)
147 .unwrap_or(VENDOR_ID);
148 let device_id = hardware_config_fault
149 .and_then(|f| f.device_id)
150 .unwrap_or(DEVICE_ID);
151
152 let cfg_space = ConfigSpaceType0Emulator::new(
153 HardwareIds {
154 vendor_id,
155 device_id,
156 revision_id: 0,
157 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
158 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
159 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
160 type0_sub_vendor_id: 0,
161 type0_sub_system_id: 0,
162 },
163 vec![Box::new(msix_cap)],
164 Vec::new(),
165 bars,
166 );
167
168 let interrupts = (0..caps.msix_count)
169 .map(|i| msix.interrupt(i).unwrap())
170 .collect();
171
172 let pci_fault_config = fault_configuration
173 .pci_fault
174 .take()
175 .unwrap_or(PciFaultConfig::new());
176
177 let fault_active = fault_configuration.fault_active.clone();
178
179 let qe_sizes = Arc::new(Default::default());
180 let admin = NvmeWorkers::new(
181 driver_source,
182 guest_memory,
183 interrupts,
184 caps.max_io_queues,
185 caps.max_io_queues,
186 Arc::clone(&qe_sizes),
187 caps.subsystem_id,
188 fault_configuration,
189 );
190
191 Self {
192 cfg_space,
193 msix,
194 registers: RegState::new(),
195 workers: admin,
196 qe_sizes,
197 pci_fault_config,
198 fault_active,
199 tdisp_interface,
200 }
201 }
202
203 pub fn client(&self) -> NvmeFaultControllerClient {
205 self.workers.client()
206 }
207
208 pub fn read_bar0(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
210 if data.len() < 4 {
211 return IoResult::Err(IoError::InvalidAccessSize);
212 }
213 if addr & (data.len() as u64 - 1) != 0 {
214 return IoResult::Err(IoError::UnalignedAccess);
215 }
216
217 let d: Option<u64> = match spec::Register(addr & !7) {
219 spec::Register::CAP => {
220 if let Some(mqes) = self.pci_fault_config.max_queue_size {
221 Some(CAP.with_mqes_z(mqes - 1).into())
222 } else {
223 Some(CAP.into())
224 }
225 }
226 spec::Register::ASQ => Some(self.registers.asq),
227 spec::Register::ACQ => Some(self.registers.acq),
228 spec::Register::BPMBL => Some(0),
229 _ => None,
230 };
231 if let Some(d) = d {
232 if data.len() == 8 {
233 data.copy_from_slice(&d.to_ne_bytes());
234 } else if addr & 7 == 0 {
235 data.copy_from_slice(&(d as u32).to_ne_bytes());
236 } else {
237 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
238 }
239 return IoResult::Ok;
240 }
241
242 if data.len() != 4 {
243 return IoResult::Err(IoError::InvalidAccessSize);
244 }
245
246 let d: u32 = match spec::Register(addr) {
248 spec::Register::VS => NVME_VERSION,
249 spec::Register::INTMS => self.registers.interrupt_mask,
250 spec::Register::INTMC => self.registers.interrupt_mask,
251 spec::Register::CC => self.registers.cc.into(),
252 spec::Register::RESERVED => 0,
253 spec::Register::CSTS => self.get_csts(),
254 spec::Register::NSSR => 0,
255 spec::Register::AQA => self.registers.aqa.into(),
256 spec::Register::CMBLOC => 0,
257 spec::Register::CMBSZ => 0,
258 spec::Register::BPINFO => 0,
259 spec::Register::BPRSEL => 0,
260 _ => return IoResult::Err(InvalidRegister),
261 };
262 data.copy_from_slice(&d.to_ne_bytes());
263 IoResult::Ok
264 }
265
266 pub fn write_bar0(&mut self, addr: u64, data: &[u8]) -> IoResult {
268 if addr >= 0x1000 {
269 let base = addr - 0x1000;
271 let db_id = base >> DOORBELL_STRIDE_BITS;
272 if (db_id << DOORBELL_STRIDE_BITS) != base {
273 return IoResult::Err(InvalidRegister);
274 }
275 let Ok(db_id) = u16::try_from(db_id) else {
276 return IoResult::Err(InvalidRegister);
277 };
278 let Ok(data) = data.try_into() else {
279 return IoResult::Err(IoError::InvalidAccessSize);
280 };
281 let value = u32::from_ne_bytes(data);
282 self.workers.doorbell(db_id, value);
283 return IoResult::Ok;
284 }
285
286 if data.len() < 4 {
287 return IoResult::Err(IoError::InvalidAccessSize);
288 }
289 if addr & (data.len() as u64 - 1) != 0 {
290 return IoResult::Err(IoError::UnalignedAccess);
291 }
292
293 let update_reg = |x: u64| {
294 if data.len() == 8 {
295 u64::from_ne_bytes(data.try_into().unwrap())
296 } else {
297 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
298 if addr & 7 == 0 {
299 (x & !(u32::MAX as u64)) | data
300 } else {
301 (x & u32::MAX as u64) | (data << 32)
302 }
303 }
304 };
305
306 let handled = match spec::Register(addr & !7) {
308 spec::Register::ASQ => {
309 if !self.registers.cc.en() {
310 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
311 } else {
312 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
313 }
314 true
315 }
316 spec::Register::ACQ => {
317 if !self.registers.cc.en() {
318 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
319 } else {
320 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
321 }
322 true
323 }
324 _ => false,
325 };
326 if handled {
327 return IoResult::Ok;
328 }
329
330 let Ok(data) = data.try_into() else {
331 return IoResult::Err(IoError::InvalidAccessSize);
332 };
333 let data = u32::from_ne_bytes(data);
334
335 match spec::Register(addr) {
337 spec::Register::INTMS => self.registers.interrupt_mask |= data,
338 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
339 spec::Register::CC => self.set_cc(data.into()),
340 spec::Register::AQA => self.registers.aqa = data.into(),
341 _ => return IoResult::Err(InvalidRegister),
342 }
343 IoResult::Ok
344 }
345
346 fn set_cc(&mut self, cc: spec::Cc) {
347 tracing::debug!(?cc, "set cc");
348
349 if cc.mps() != 0 {
350 tracelimit::warn_ratelimited!(
351 "This implementation only supports memory page sizes of 4K."
352 );
353 self.fatal_error();
354 return;
355 }
356
357 if cc.css() != 0 {
358 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
359 self.fatal_error();
360 return;
361 }
362
363 if let 2..=6 = cc.ams() {
364 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
365 self.fatal_error();
366 }
367
368 let mask: u32 = u32::from(
369 spec::Cc::new()
370 .with_en(true)
371 .with_shn(0b11)
372 .with_iosqes(0b1111)
373 .with_iocqes(0b1111),
374 );
375 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
376
377 if cc.shn() != 0 {
378 self.registers.csts.set_shst(0b10);
382 }
383
384 if cc.en() != self.registers.cc.en() {
385 if cc.en() {
386 if self.fault_active.get() {
388 match &mut self.pci_fault_config.controller_management_fault_enable {
389 PciFaultBehavior::Delay(duration) => {
390 std::thread::sleep(*duration);
391 }
392 PciFaultBehavior::Default => {}
393 PciFaultBehavior::Verify(send) => {
394 if let Some(send) = send.take() {
395 send.send(());
396 }
397 }
398 }
399 }
400
401 if cc.iocqes() == 0 {
403 cc.set_iocqes(IOCQES);
404 } else if cc.iocqes() != IOCQES {
405 tracelimit::warn_ratelimited!(
406 "This implementation only supports CQEs of the default size."
407 );
408 self.fatal_error();
409 return;
410 }
411
412 if cc.iosqes() == 0 {
413 cc.set_iosqes(IOSQES);
414 } else if cc.iosqes() != IOSQES {
415 tracelimit::warn_ratelimited!(
416 "This implementation only supports SQEs of the default size."
417 );
418 self.fatal_error();
419 return;
420 }
421
422 if self.registers.csts.rdy() {
423 tracelimit::warn_ratelimited!("enabling during reset");
424 return;
425 }
426 if cc.shn() == 0 {
427 self.registers.csts.set_shst(0);
428 }
429
430 self.workers.enable(
431 self.registers.asq,
432 self.registers.aqa.asqs_z().max(1) + 1,
433 self.registers.acq,
434 self.registers.aqa.acqs_z().max(1) + 1,
435 );
436 } else if self.registers.csts.rdy() {
437 self.workers.controller_reset();
438 } else {
439 tracelimit::warn_ratelimited!("disabling while not ready");
440 return;
441 }
442 }
443
444 self.registers.cc = cc;
445 *self.qe_sizes.lock() = IoQueueEntrySizes {
446 sqe_bits: cc.iosqes(),
447 cqe_bits: cc.iocqes(),
448 };
449 }
450
451 fn get_csts(&mut self) -> u32 {
452 if !self.registers.cc.en() && self.registers.csts.rdy() {
453 if self.workers.poll_controller_reset() {
455 self.registers.csts = 0.into();
457 self.registers.cc = 0.into();
458 self.registers.interrupt_mask = 0;
459 }
460 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
461 if self.workers.poll_enabled() {
462 self.registers.csts.set_rdy(true);
463 }
464 }
465
466 let csts = self.registers.csts;
467 tracing::debug!(?csts, "get csts");
468 csts.into()
469 }
470
471 pub fn fatal_error(&mut self) {
474 self.registers.csts.set_cfs(true);
475 }
476}
477
478impl ChangeDeviceState for NvmeFaultController {
479 fn start(&mut self) {}
480
481 async fn stop(&mut self) {}
482
483 async fn reset(&mut self) {
484 let Self {
485 cfg_space,
486 msix: _,
487 registers,
488 qe_sizes,
489 workers,
490 pci_fault_config: _,
491 fault_active: _,
492 tdisp_interface: _,
493 } = self;
494 workers.reset().await;
495 cfg_space.reset();
496 *registers = RegState::new();
497 *qe_sizes.lock() = Default::default();
498 }
499}
500
501impl ChipsetDevice for NvmeFaultController {
502 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
503 Some(self)
504 }
505
506 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
507 Some(self)
508 }
509
510 fn supports_tdisp(&mut self) -> Option<&mut dyn TdispHostDeviceTarget> {
512 tracing::debug!(
513 supported = self.tdisp_interface.is_some(),
514 "fault controller TDISP support in ChipsetDevice"
515 );
516
517 match &mut self.tdisp_interface {
518 Some(tdisp) => Some(tdisp.as_mut()),
519 None => None,
520 }
521 }
522}
523
524impl MmioIntercept for NvmeFaultController {
525 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
526 match self.cfg_space.find_bar(addr) {
527 Some((0, offset)) => self.read_bar0(offset, data),
528 Some((4, offset)) => {
529 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
530 IoResult::Ok
531 }
532 _ => IoResult::Err(InvalidRegister),
533 }
534 }
535
536 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
537 match self.cfg_space.find_bar(addr) {
538 Some((0, offset)) => self.write_bar0(offset, data),
539 Some((4, offset)) => {
540 write_as_u32_chunks(offset, data, |offset, ty| match ty {
541 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
542 ReadWriteRequestType::Write(val) => {
543 self.msix.write_u32(offset, val);
544 None
545 }
546 });
547 IoResult::Ok
548 }
549 _ => IoResult::Err(InvalidRegister),
550 }
551 }
552}
553
554impl PciConfigSpace for NvmeFaultController {
555 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
556 self.cfg_space.read_u32(offset, value)
557 }
558
559 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
560 self.cfg_space.write_u32(offset, value)
561 }
562}
563
564impl SaveRestore for NvmeFaultController {
565 type SavedState = SavedStateNotSupported;
566
567 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
568 Err(SaveError::NotSupported)
569 }
570
571 fn restore(
572 &mut self,
573 state: Self::SavedState,
574 ) -> Result<(), vmcore::save_restore::RestoreError> {
575 match state {}
576 }
577}