1use crate::BAR0_LEN;
7use crate::DOORBELL_STRIDE_BITS;
8use crate::IOCQES;
9use crate::IOSQES;
10use crate::MAX_QES;
11use crate::NVME_VERSION;
12use crate::NvmeFaultControllerClient;
13use crate::PAGE_MASK;
14use crate::VENDOR_ID;
15use crate::spec;
16use crate::workers::IoQueueEntrySizes;
17use crate::workers::NvmeWorkers;
18use chipset_device::ChipsetDevice;
19use chipset_device::io::IoError;
20use chipset_device::io::IoError::InvalidRegister;
21use chipset_device::io::IoResult;
22use chipset_device::mmio::MmioIntercept;
23use chipset_device::mmio::RegisterMmioIntercept;
24use chipset_device::pci::PciConfigSpace;
25use device_emulators::ReadWriteRequestType;
26use device_emulators::read_as_u32_chunks;
27use device_emulators::write_as_u32_chunks;
28use guestmem::GuestMemory;
29use guid::Guid;
30use inspect::Inspect;
31use inspect::InspectMut;
32use nvme_resources::fault::FaultConfiguration;
33use nvme_resources::fault::PciFaultBehavior;
34use nvme_resources::fault::PciFaultConfig;
35use parking_lot::Mutex;
36use pci_core::capabilities::msix::MsixEmulator;
37use pci_core::cfg_space_emu::BarMemoryKind;
38use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
39use pci_core::cfg_space_emu::DeviceBars;
40use pci_core::msi::MsiTarget;
41use pci_core::spec::hwid::ClassCode;
42use pci_core::spec::hwid::HardwareIds;
43use pci_core::spec::hwid::ProgrammingInterface;
44use pci_core::spec::hwid::Subclass;
45use std::sync::Arc;
46use tdisp::TdispHostDeviceTarget;
47use vmcore::device_state::ChangeDeviceState;
48use vmcore::save_restore::SaveError;
49use vmcore::save_restore::SaveRestore;
50use vmcore::save_restore::SavedStateNotSupported;
51use vmcore::vm_task::VmTaskDriverSource;
52
53#[derive(InspectMut)]
55pub struct NvmeFaultController {
56 cfg_space: ConfigSpaceType0Emulator,
57 #[inspect(skip)]
58 msix: MsixEmulator,
59 registers: RegState,
60 #[inspect(skip)]
61 qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
62 #[inspect(flatten, mut)]
63 workers: NvmeWorkers,
64 #[inspect(skip)]
65 pci_fault_config: PciFaultConfig,
66 #[inspect(skip)]
67 fault_active: mesh::Cell<bool>,
68 #[inspect(skip)]
70 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
71}
72
73#[derive(Inspect)]
74struct RegState {
75 #[inspect(hex)]
76 interrupt_mask: u32,
77 cc: spec::Cc,
78 csts: spec::Csts,
79 aqa: spec::Aqa,
80 #[inspect(hex)]
81 asq: u64,
82 #[inspect(hex)]
83 acq: u64,
84}
85
86impl RegState {
87 fn new() -> Self {
88 Self {
89 interrupt_mask: 0,
90 cc: spec::Cc::new(),
91 csts: spec::Csts::new(),
92 aqa: spec::Aqa::new(),
93 asq: 0,
94 acq: 0,
95 }
96 }
97}
98
99const CAP: spec::Cap = spec::Cap::new()
100 .with_dstrd(DOORBELL_STRIDE_BITS - 2)
101 .with_mqes_z(MAX_QES - 1)
102 .with_cqr(true)
103 .with_css_nvm(true)
104 .with_to(!0);
105
106#[derive(Debug, Copy, Clone)]
108pub struct NvmeFaultControllerCaps {
109 pub msix_count: u16,
111 pub max_io_queues: u16,
113 pub subsystem_id: Guid,
116}
117
118impl NvmeFaultController {
119 pub fn new(
121 driver_source: &VmTaskDriverSource,
122 guest_memory: GuestMemory,
123 msi_target: &MsiTarget,
124 register_mmio: &mut dyn RegisterMmioIntercept,
125 caps: NvmeFaultControllerCaps,
126 mut fault_configuration: FaultConfiguration,
127 tdisp_interface: Option<Box<dyn TdispHostDeviceTarget>>,
128 ) -> Self {
129 let (msix, msix_cap) = MsixEmulator::new(4, caps.msix_count, msi_target);
130 let bars = DeviceBars::new()
131 .bar0(
132 BAR0_LEN,
133 BarMemoryKind::Intercept(register_mmio.new_io_region("bar0", BAR0_LEN)),
134 )
135 .bar4(
136 msix.bar_len(),
137 BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
138 );
139
140 let hardware_config_fault = fault_configuration.hardware_config_fault.take();
144 let vendor_id = hardware_config_fault
145 .and_then(|f| f.vendor_id)
146 .unwrap_or(VENDOR_ID);
147 let device_id = hardware_config_fault
148 .and_then(|f| f.device_id)
149 .unwrap_or(0x00a9);
150
151 let cfg_space = ConfigSpaceType0Emulator::new(
152 HardwareIds {
153 vendor_id,
154 device_id,
155 revision_id: 0,
156 prog_if: ProgrammingInterface::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY_NVME,
157 sub_class: Subclass::MASS_STORAGE_CONTROLLER_NON_VOLATILE_MEMORY,
158 base_class: ClassCode::MASS_STORAGE_CONTROLLER,
159 type0_sub_vendor_id: 0,
160 type0_sub_system_id: 0,
161 },
162 vec![Box::new(msix_cap)],
163 Vec::new(),
164 bars,
165 );
166
167 let interrupts = (0..caps.msix_count)
168 .map(|i| msix.interrupt(i).unwrap())
169 .collect();
170
171 let pci_fault_config = fault_configuration
172 .pci_fault
173 .take()
174 .unwrap_or(PciFaultConfig::new());
175
176 let fault_active = fault_configuration.fault_active.clone();
177
178 let qe_sizes = Arc::new(Default::default());
179 let admin = NvmeWorkers::new(
180 driver_source,
181 guest_memory,
182 interrupts,
183 caps.max_io_queues,
184 caps.max_io_queues,
185 Arc::clone(&qe_sizes),
186 caps.subsystem_id,
187 fault_configuration,
188 );
189
190 Self {
191 cfg_space,
192 msix,
193 registers: RegState::new(),
194 workers: admin,
195 qe_sizes,
196 pci_fault_config,
197 fault_active,
198 tdisp_interface,
199 }
200 }
201
202 pub fn client(&self) -> NvmeFaultControllerClient {
204 self.workers.client()
205 }
206
207 pub fn read_bar0(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
209 if data.len() < 4 {
210 return IoResult::Err(IoError::InvalidAccessSize);
211 }
212 if addr & (data.len() as u64 - 1) != 0 {
213 return IoResult::Err(IoError::UnalignedAccess);
214 }
215
216 let d: Option<u64> = match spec::Register(addr & !7) {
218 spec::Register::CAP => {
219 if let Some(mqes) = self.pci_fault_config.max_queue_size {
220 Some(CAP.with_mqes_z(mqes - 1).into())
221 } else {
222 Some(CAP.into())
223 }
224 }
225 spec::Register::ASQ => Some(self.registers.asq),
226 spec::Register::ACQ => Some(self.registers.acq),
227 spec::Register::BPMBL => Some(0),
228 _ => None,
229 };
230 if let Some(d) = d {
231 if data.len() == 8 {
232 data.copy_from_slice(&d.to_ne_bytes());
233 } else if addr & 7 == 0 {
234 data.copy_from_slice(&(d as u32).to_ne_bytes());
235 } else {
236 data.copy_from_slice(&((d >> 32) as u32).to_ne_bytes());
237 }
238 return IoResult::Ok;
239 }
240
241 if data.len() != 4 {
242 return IoResult::Err(IoError::InvalidAccessSize);
243 }
244
245 let d: u32 = match spec::Register(addr) {
247 spec::Register::VS => NVME_VERSION,
248 spec::Register::INTMS => self.registers.interrupt_mask,
249 spec::Register::INTMC => self.registers.interrupt_mask,
250 spec::Register::CC => self.registers.cc.into(),
251 spec::Register::RESERVED => 0,
252 spec::Register::CSTS => self.get_csts(),
253 spec::Register::NSSR => 0,
254 spec::Register::AQA => self.registers.aqa.into(),
255 spec::Register::CMBLOC => 0,
256 spec::Register::CMBSZ => 0,
257 spec::Register::BPINFO => 0,
258 spec::Register::BPRSEL => 0,
259 _ => return IoResult::Err(InvalidRegister),
260 };
261 data.copy_from_slice(&d.to_ne_bytes());
262 IoResult::Ok
263 }
264
265 pub fn write_bar0(&mut self, addr: u64, data: &[u8]) -> IoResult {
267 if addr >= 0x1000 {
268 let base = addr - 0x1000;
270 let db_id = base >> DOORBELL_STRIDE_BITS;
271 if (db_id << DOORBELL_STRIDE_BITS) != base {
272 return IoResult::Err(InvalidRegister);
273 }
274 let Ok(db_id) = u16::try_from(db_id) else {
275 return IoResult::Err(InvalidRegister);
276 };
277 let Ok(data) = data.try_into() else {
278 return IoResult::Err(IoError::InvalidAccessSize);
279 };
280 let value = u32::from_ne_bytes(data);
281 self.workers.doorbell(db_id, value);
282 return IoResult::Ok;
283 }
284
285 if data.len() < 4 {
286 return IoResult::Err(IoError::InvalidAccessSize);
287 }
288 if addr & (data.len() as u64 - 1) != 0 {
289 return IoResult::Err(IoError::UnalignedAccess);
290 }
291
292 let update_reg = |x: u64| {
293 if data.len() == 8 {
294 u64::from_ne_bytes(data.try_into().unwrap())
295 } else {
296 let data = u32::from_ne_bytes(data.try_into().unwrap()) as u64;
297 if addr & 7 == 0 {
298 (x & !(u32::MAX as u64)) | data
299 } else {
300 (x & u32::MAX as u64) | (data << 32)
301 }
302 }
303 };
304
305 let handled = match spec::Register(addr & !7) {
307 spec::Register::ASQ => {
308 if !self.registers.cc.en() {
309 self.registers.asq = update_reg(self.registers.asq) & PAGE_MASK;
310 } else {
311 tracelimit::warn_ratelimited!("attempt to set asq while enabled");
312 }
313 true
314 }
315 spec::Register::ACQ => {
316 if !self.registers.cc.en() {
317 self.registers.acq = update_reg(self.registers.acq) & PAGE_MASK;
318 } else {
319 tracelimit::warn_ratelimited!("attempt to set acq while enabled");
320 }
321 true
322 }
323 _ => false,
324 };
325 if handled {
326 return IoResult::Ok;
327 }
328
329 let Ok(data) = data.try_into() else {
330 return IoResult::Err(IoError::InvalidAccessSize);
331 };
332 let data = u32::from_ne_bytes(data);
333
334 match spec::Register(addr) {
336 spec::Register::INTMS => self.registers.interrupt_mask |= data,
337 spec::Register::INTMC => self.registers.interrupt_mask &= !data,
338 spec::Register::CC => self.set_cc(data.into()),
339 spec::Register::AQA => self.registers.aqa = data.into(),
340 _ => return IoResult::Err(InvalidRegister),
341 }
342 IoResult::Ok
343 }
344
345 fn set_cc(&mut self, cc: spec::Cc) {
346 tracing::debug!(?cc, "set cc");
347
348 if cc.mps() != 0 {
349 tracelimit::warn_ratelimited!(
350 "This implementation only supports memory page sizes of 4K."
351 );
352 self.fatal_error();
353 return;
354 }
355
356 if cc.css() != 0 {
357 tracelimit::warn_ratelimited!("This implementation only supports the NVM command set.");
358 self.fatal_error();
359 return;
360 }
361
362 if let 2..=6 = cc.ams() {
363 tracelimit::warn_ratelimited!("Undefined arbitration mechanism.");
364 self.fatal_error();
365 }
366
367 let mask: u32 = u32::from(
368 spec::Cc::new()
369 .with_en(true)
370 .with_shn(0b11)
371 .with_iosqes(0b1111)
372 .with_iocqes(0b1111),
373 );
374 let mut cc: spec::Cc = (u32::from(cc) & mask).into();
375
376 if cc.shn() != 0 {
377 self.registers.csts.set_shst(0b10);
381 }
382
383 if cc.en() != self.registers.cc.en() {
384 if cc.en() {
385 if self.fault_active.get() {
387 match &mut self.pci_fault_config.controller_management_fault_enable {
388 PciFaultBehavior::Delay(duration) => {
389 std::thread::sleep(*duration);
390 }
391 PciFaultBehavior::Default => {}
392 PciFaultBehavior::Verify(send) => {
393 if let Some(send) = send.take() {
394 send.send(());
395 }
396 }
397 }
398 }
399
400 if cc.iocqes() == 0 {
402 cc.set_iocqes(IOCQES);
403 } else if cc.iocqes() != IOCQES {
404 tracelimit::warn_ratelimited!(
405 "This implementation only supports CQEs of the default size."
406 );
407 self.fatal_error();
408 return;
409 }
410
411 if cc.iosqes() == 0 {
412 cc.set_iosqes(IOSQES);
413 } else if cc.iosqes() != IOSQES {
414 tracelimit::warn_ratelimited!(
415 "This implementation only supports SQEs of the default size."
416 );
417 self.fatal_error();
418 return;
419 }
420
421 if self.registers.csts.rdy() {
422 tracelimit::warn_ratelimited!("enabling during reset");
423 return;
424 }
425 if cc.shn() == 0 {
426 self.registers.csts.set_shst(0);
427 }
428
429 self.workers.enable(
430 self.registers.asq,
431 self.registers.aqa.asqs_z().max(1) + 1,
432 self.registers.acq,
433 self.registers.aqa.acqs_z().max(1) + 1,
434 );
435 } else if self.registers.csts.rdy() {
436 self.workers.controller_reset();
437 } else {
438 tracelimit::warn_ratelimited!("disabling while not ready");
439 return;
440 }
441 }
442
443 self.registers.cc = cc;
444 *self.qe_sizes.lock() = IoQueueEntrySizes {
445 sqe_bits: cc.iosqes(),
446 cqe_bits: cc.iocqes(),
447 };
448 }
449
450 fn get_csts(&mut self) -> u32 {
451 if !self.registers.cc.en() && self.registers.csts.rdy() {
452 if self.workers.poll_controller_reset() {
454 self.registers.csts = 0.into();
456 self.registers.cc = 0.into();
457 self.registers.interrupt_mask = 0;
458 }
459 } else if self.registers.cc.en() && !self.registers.csts.rdy() {
460 if self.workers.poll_enabled() {
461 self.registers.csts.set_rdy(true);
462 }
463 }
464
465 let csts = self.registers.csts;
466 tracing::debug!(?csts, "get csts");
467 csts.into()
468 }
469
470 pub fn fatal_error(&mut self) {
473 self.registers.csts.set_cfs(true);
474 }
475}
476
477impl ChangeDeviceState for NvmeFaultController {
478 fn start(&mut self) {}
479
480 async fn stop(&mut self) {}
481
482 async fn reset(&mut self) {
483 let Self {
484 cfg_space,
485 msix: _,
486 registers,
487 qe_sizes,
488 workers,
489 pci_fault_config: _,
490 fault_active: _,
491 tdisp_interface: _,
492 } = self;
493 workers.reset().await;
494 cfg_space.reset();
495 *registers = RegState::new();
496 *qe_sizes.lock() = Default::default();
497 }
498}
499
500impl ChipsetDevice for NvmeFaultController {
501 fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
502 Some(self)
503 }
504
505 fn supports_pci(&mut self) -> Option<&mut dyn PciConfigSpace> {
506 Some(self)
507 }
508
509 fn supports_tdisp(&mut self) -> Option<&mut dyn TdispHostDeviceTarget> {
511 tracing::debug!(
512 supported = self.tdisp_interface.is_some(),
513 "fault controller TDISP support in ChipsetDevice"
514 );
515
516 match &mut self.tdisp_interface {
517 Some(tdisp) => Some(tdisp.as_mut()),
518 None => None,
519 }
520 }
521}
522
523impl MmioIntercept for NvmeFaultController {
524 fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> IoResult {
525 match self.cfg_space.find_bar(addr) {
526 Some((0, offset)) => self.read_bar0(offset, data),
527 Some((4, offset)) => {
528 read_as_u32_chunks(offset, data, |offset| self.msix.read_u32(offset));
529 IoResult::Ok
530 }
531 _ => IoResult::Err(InvalidRegister),
532 }
533 }
534
535 fn mmio_write(&mut self, addr: u64, data: &[u8]) -> IoResult {
536 match self.cfg_space.find_bar(addr) {
537 Some((0, offset)) => self.write_bar0(offset, data),
538 Some((4, offset)) => {
539 write_as_u32_chunks(offset, data, |offset, ty| match ty {
540 ReadWriteRequestType::Read => Some(self.msix.read_u32(offset)),
541 ReadWriteRequestType::Write(val) => {
542 self.msix.write_u32(offset, val);
543 None
544 }
545 });
546 IoResult::Ok
547 }
548 _ => IoResult::Err(InvalidRegister),
549 }
550 }
551}
552
553impl PciConfigSpace for NvmeFaultController {
554 fn pci_cfg_read(&mut self, offset: u16, value: &mut u32) -> IoResult {
555 self.cfg_space.read_u32(offset, value)
556 }
557
558 fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
559 self.cfg_space.write_u32(offset, value)
560 }
561}
562
563impl SaveRestore for NvmeFaultController {
564 type SavedState = SavedStateNotSupported;
565
566 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
567 Err(SaveError::NotSupported)
568 }
569
570 fn restore(
571 &mut self,
572 state: Self::SavedState,
573 ) -> Result<(), vmcore::save_restore::RestoreError> {
574 match state {}
575 }
576}