pci_core/capabilities/
msix.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! MSI-X Capability.
5
6use super::PciCapability;
7use crate::msi::MsiTarget;
8use crate::spec::caps::CapabilityId;
9use crate::spec::caps::msix::MsixCapabilityHeader;
10use crate::spec::caps::msix::MsixTableEntryIdx;
11use inspect::Inspect;
12use inspect::InspectMut;
13use parking_lot::Mutex;
14use std::fmt::Debug;
15use std::sync::Arc;
16use vmcore::interrupt::Interrupt;
17
18#[derive(Debug, Inspect)]
19struct MsiTableLocation {
20    #[inspect(hex)]
21    // MSI-X table offsets are, per spec, no larger than 32 bits.
22    offset: u32,
23    bar: u8,
24}
25
26impl MsiTableLocation {
27    fn new(bar: u8, offset: u32) -> Self {
28        assert!(bar < 6);
29        assert!(offset & 7 == 0);
30        Self { offset, bar }
31    }
32
33    fn read_u32(&self) -> u32 {
34        self.offset | self.bar as u32
35    }
36}
37
38#[derive(Inspect)]
39struct MsixCapability {
40    count: u16,
41    #[inspect(with = "|x| inspect::adhoc(|req| x.lock().inspect_mut(req))")]
42    state: Arc<Mutex<MsixState>>,
43    config_table_location: MsiTableLocation,
44    pending_bits_location: MsiTableLocation,
45}
46
47impl PciCapability for MsixCapability {
48    fn label(&self) -> &str {
49        "msi-x"
50    }
51
52    fn capability_id(&self) -> CapabilityId {
53        CapabilityId::MSIX
54    }
55
56    fn len(&self) -> usize {
57        12
58    }
59
60    fn read_u32(&self, offset: u16) -> u32 {
61        match MsixCapabilityHeader(offset) {
62            MsixCapabilityHeader::CONTROL_CAPS => {
63                CapabilityId::MSIX.0 as u32
64                    | ((self.count as u32 - 1) | if self.state.lock().enabled { 0x8000 } else { 0 })
65                        << 16
66            }
67            MsixCapabilityHeader::OFFSET_TABLE => self.config_table_location.read_u32(),
68            MsixCapabilityHeader::OFFSET_PBA => self.pending_bits_location.read_u32(),
69            _ => panic!("Unreachable read offset {}", offset),
70        }
71    }
72
73    fn write_u32(&mut self, offset: u16, val: u32) {
74        match MsixCapabilityHeader(offset) {
75            MsixCapabilityHeader::CONTROL_CAPS => {
76                let enabled = val & 0x80000000 != 0;
77                let mut state = self.state.lock();
78                let was_enabled = state.enabled;
79                state.enabled = enabled;
80                if was_enabled && !enabled {
81                    for entry in &mut state.vectors {
82                        if entry.is_enabled(true) {
83                            entry.msi.disable();
84                        }
85                    }
86                } else if enabled && !was_enabled {
87                    for entry in &mut state.vectors {
88                        if entry.is_enabled(true) {
89                            entry.msi.enable(
90                                entry.state.address,
91                                entry.state.data,
92                                entry.state.is_pending,
93                            );
94                            entry.state.is_pending = false;
95                        }
96                    }
97                }
98            }
99            MsixCapabilityHeader::OFFSET_TABLE | MsixCapabilityHeader::OFFSET_PBA => {
100                tracelimit::warn_ratelimited!(
101                    "Unexpected write offset {:?}",
102                    MsixCapabilityHeader(offset)
103                )
104            }
105            _ => panic!("Unreachable write offset {}", offset),
106        }
107    }
108
109    fn reset(&mut self) {
110        let mut state = self.state.lock();
111        state.enabled = false;
112        for vector in &mut state.vectors {
113            vector.state = EntryState::new();
114        }
115    }
116}
117
118#[derive(Clone, Inspect, Debug)]
119pub(crate) struct MsiInterrupt(#[inspect(flatten)] Arc<Mutex<MsiInterruptInner>>);
120
121#[derive(Inspect, Debug)]
122struct MsiInterruptInner {
123    target: MsiTarget,
124    pending: bool,
125    enabled: bool,
126    address: u64,
127    data: u32,
128}
129
130impl MsiInterrupt {
131    pub fn new(target: MsiTarget) -> Self {
132        Self(Arc::new(Mutex::new(MsiInterruptInner {
133            target,
134            pending: false,
135            enabled: false,
136            address: 0,
137            data: 0,
138        })))
139    }
140
141    pub fn enable(&self, address: u64, data: u32, set_pending: bool) {
142        let mut state = self.0.lock();
143        state.pending |= set_pending;
144        state.address = address;
145        state.data = data;
146        state.enabled = true;
147        if state.pending {
148            state.target.signal_msi(0, address, data);
149            state.pending = false;
150        }
151    }
152
153    pub fn disable(&self) {
154        let mut state = self.0.lock();
155        state.enabled = false;
156    }
157
158    pub fn drain_pending(&self) -> bool {
159        let mut state = self.0.lock();
160        let was_pending = state.pending;
161        state.pending = false;
162        was_pending
163    }
164
165    pub fn interrupt(&self) -> Interrupt {
166        let state = self.0.clone();
167        Interrupt::from_fn(move || {
168            let mut state = state.lock();
169            if state.enabled {
170                state.target.signal_msi(0, state.address, state.data);
171            } else {
172                state.pending = true;
173            }
174        })
175    }
176}
177
178struct MsixMessageTableEntry {
179    msi: MsiInterrupt,
180    state: EntryState,
181}
182
183impl InspectMut for MsixMessageTableEntry {
184    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
185        req.respond()
186            .hex("address", self.state.address)
187            .hex("data", self.state.data)
188            .hex("control", self.state.control)
189            .field("enabled", self.state.control & 1 == 0)
190            .field("is_pending", self.check_is_pending(true));
191    }
192}
193
194#[derive(Debug)]
195struct EntryState {
196    address: u64,
197    data: u32,
198    control: u32,
199    is_pending: bool,
200}
201
202impl EntryState {
203    fn new() -> Self {
204        Self {
205            address: 0,
206            data: 0,
207            control: 1,
208            is_pending: false,
209        }
210    }
211}
212
213impl MsixMessageTableEntry {
214    fn new(msi: MsiInterrupt) -> Self {
215        Self {
216            msi,
217            state: EntryState::new(),
218        }
219    }
220
221    fn read_u32(&self, offset: u64) -> u32 {
222        match MsixTableEntryIdx(offset) {
223            MsixTableEntryIdx::MSG_ADDR_LO => self.state.address as u32,
224            MsixTableEntryIdx::MSG_ADDR_HI => (self.state.address >> 32) as u32,
225            MsixTableEntryIdx::MSG_DATA => self.state.data,
226            MsixTableEntryIdx::VECTOR_CTL => self.state.control,
227            _ => panic!("Unexpected read offset {}", offset),
228        }
229    }
230
231    fn write_u32(&mut self, offset: u64, val: u32) {
232        match MsixTableEntryIdx(offset) {
233            MsixTableEntryIdx::MSG_ADDR_LO => {
234                self.state.address = (self.state.address & 0xffffffff00000000) | val as u64
235            }
236            MsixTableEntryIdx::MSG_ADDR_HI => {
237                self.state.address = (val as u64) << 32 | self.state.address & 0xffffffff
238            }
239            MsixTableEntryIdx::MSG_DATA => self.state.data = val,
240            MsixTableEntryIdx::VECTOR_CTL => self.state.control = val,
241            _ => panic!("Unexpected write offset {}", offset),
242        }
243    }
244
245    fn is_enabled(&self, global_enabled: bool) -> bool {
246        global_enabled && self.state.control & 1 == 0
247    }
248
249    fn check_is_pending(&mut self, global_enabled: bool) -> bool {
250        if !self.state.is_pending && !self.is_enabled(global_enabled) {
251            self.state.is_pending = self.msi.drain_pending();
252        }
253        self.state.is_pending
254    }
255}
256
257#[derive(InspectMut)]
258struct MsixState {
259    enabled: bool,
260    #[inspect(mut, with = "inspect_entries")]
261    vectors: Vec<MsixMessageTableEntry>,
262}
263
264fn inspect_entries(entries: &mut [MsixMessageTableEntry]) -> impl '_ + InspectMut {
265    inspect::adhoc_mut(|req| {
266        let mut resp = req.respond();
267        for (i, entry) in entries.iter_mut().enumerate() {
268            resp.field_mut(&i.to_string(), entry);
269        }
270    })
271}
272
273/// Emulator for the hardware-level interface required to configure and trigger
274/// MSI-X interrupts on a PCI device.
275#[derive(Clone)]
276pub struct MsixEmulator {
277    state: Arc<Mutex<MsixState>>,
278    // PBA offsets, per spec, are no larger than 32 bits.
279    pending_bits_offset: u32,
280    pending_bits_dword_count: u16,
281}
282
283impl MsixEmulator {
284    /// Create a new [`MsixEmulator`] instance, along with with its associated
285    /// [`PciCapability`] structure.
286    ///
287    /// This implementation of MSI-X expects a dedicated BAR to store the vector
288    /// and pending tables.
289    ///
290    /// * * *
291    ///
292    /// DEVNOTE: This current implementation of MSI-X isn't particularly
293    /// "flexible" with respect to the various ways the PCI spec allows MSI-X to
294    /// be implemented. e.g: it uses a shared BAR for the table and BPA, with
295    /// fixed offsets into the BAR for both of those tables. It would be nice to
296    /// re-visit this code and make it more flexible.
297    pub fn new(bar: u8, count: u16, msi_target: &MsiTarget) -> (Self, impl PciCapability + use<>) {
298        let state = MsixState {
299            enabled: false,
300            vectors: (0..count)
301                .map(|_| MsixMessageTableEntry::new(MsiInterrupt::new(msi_target.clone())))
302                .collect(),
303        };
304        let state = Arc::new(Mutex::new(state));
305        let pending_bits_offset = count as u32 * 16;
306        (
307            Self {
308                state: state.clone(),
309                pending_bits_offset,
310                pending_bits_dword_count: count.div_ceil(32),
311            },
312            MsixCapability {
313                count,
314                state,
315                config_table_location: MsiTableLocation::new(bar, 0),
316                pending_bits_location: MsiTableLocation::new(bar, pending_bits_offset),
317            },
318        )
319    }
320
321    /// Return the total length of the MSI-X BAR
322    /// (Actually, the notion that there is an "MSI-X BAR" is an issue to fix sometime.
323    /// MSI-X tables are often in the same bar as other things.)
324    pub fn bar_len(&self) -> u64 {
325        self.pending_bits_offset as u64 + self.pending_bits_dword_count as u64 * 4
326    }
327
328    /// Read a `u32` from the MSI-X BAR at the given offset.
329    pub fn read_u32(&self, offset: u64) -> u32 {
330        let mut state = self.state.lock();
331        let state: &mut MsixState = &mut state;
332        if offset < self.pending_bits_offset as u64 {
333            let index = offset / 16;
334            if let Some(entry) = state.vectors.get(index as usize) {
335                return entry.read_u32(offset & 0xf);
336            }
337        } else {
338            let dword = (offset - self.pending_bits_offset as u64) / 4;
339            let start = dword as usize * 32;
340            if start < state.vectors.len() {
341                let end = (start + 32).min(state.vectors.len());
342                let mut val = 0u32;
343                for (i, entry) in state.vectors[start..end].iter_mut().enumerate() {
344                    if entry.check_is_pending(state.enabled) {
345                        val |= 1 << i;
346                    }
347                }
348                return val;
349            }
350        }
351        tracelimit::warn_ratelimited!(offset, "Unexpected read offset");
352        0
353    }
354
355    /// Write a `u32` to the MSI-X BAR at the given offset.
356    pub fn write_u32(&mut self, offset: u64, val: u32) {
357        let mut state = self.state.lock();
358        if offset < self.pending_bits_offset as u64 {
359            let index = offset / 16;
360            let global = state.enabled;
361            if let Some(entry) = state.vectors.get_mut(index as usize) {
362                let was_enabled = entry.is_enabled(global);
363                entry.write_u32(offset & 0xf, val);
364                let is_enabled = entry.is_enabled(global);
365                if is_enabled && !was_enabled {
366                    entry.msi.enable(
367                        entry.state.address,
368                        entry.state.data,
369                        entry.state.is_pending,
370                    );
371                    entry.state.is_pending = false;
372                } else if was_enabled && !is_enabled {
373                    entry.msi.disable();
374                }
375                return;
376            }
377        } else if offset - (self.pending_bits_offset as u64)
378            < self.pending_bits_dword_count as u64 * 4
379        {
380            return;
381        }
382        tracelimit::warn_ratelimited!(offset, "Unexpected write offset");
383    }
384
385    /// Return an [`Interrupt`] associated with the particular MSI-X vector, or
386    /// `None` if the index is out of bounds.
387    pub fn interrupt(&self, index: u16) -> Option<Interrupt> {
388        Some(
389            self.state
390                .lock()
391                .vectors
392                .get_mut(index as usize)?
393                .msi
394                .interrupt(),
395        )
396    }
397
398    #[cfg(test)]
399    fn clear_pending_bit(&self, index: u8) {
400        let mut state = self.state.lock();
401        state.vectors[index as usize].state.is_pending = false;
402    }
403
404    #[cfg(test)]
405    fn set_pending_bit(&self, index: u8) {
406        let mut state = self.state.lock();
407        state.vectors[index as usize].state.is_pending = true;
408    }
409}
410
411mod save_restore {
412    use super::*;
413    use thiserror::Error;
414    use vmcore::save_restore::RestoreError;
415    use vmcore::save_restore::SaveError;
416    use vmcore::save_restore::SaveRestore;
417
418    mod state {
419        use mesh::payload::Protobuf;
420        use vmcore::save_restore::SavedStateRoot;
421
422        #[derive(Debug, Protobuf)]
423        #[mesh(package = "pci.caps.msix")]
424        pub struct SavedMsixMessageTableEntryState {
425            #[mesh(1)]
426            pub address: u64,
427            #[mesh(2)]
428            pub data: u32,
429            #[mesh(3)]
430            pub control: u32,
431            #[mesh(4)]
432            pub is_pending: bool,
433        }
434
435        #[derive(Debug, Protobuf, SavedStateRoot)]
436        #[mesh(package = "pci.caps.msix")]
437        pub struct SavedState {
438            #[mesh(2)]
439            pub enabled: bool,
440            #[mesh(3)]
441            pub vectors: Vec<SavedMsixMessageTableEntryState>,
442        }
443    }
444
445    #[derive(Debug, Error)]
446    enum MsixRestoreError {
447        #[error("mismatched vector lengths: current:{0}, saved:{1}")]
448        MismatchedTableLengths(usize, usize),
449    }
450
451    impl SaveRestore for MsixCapability {
452        type SavedState = state::SavedState;
453
454        fn save(&mut self) -> Result<Self::SavedState, SaveError> {
455            let state = self.state.lock();
456            let saved_state = state::SavedState {
457                enabled: state.enabled,
458                vectors: {
459                    state
460                        .vectors
461                        .iter()
462                        .map(|vec| {
463                            let EntryState {
464                                address,
465                                data,
466                                control,
467                                is_pending,
468                            } = vec.state;
469
470                            state::SavedMsixMessageTableEntryState {
471                                address,
472                                data,
473                                control,
474                                is_pending,
475                            }
476                        })
477                        .collect()
478                },
479            };
480            Ok(saved_state)
481        }
482
483        fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
484            let state::SavedState { enabled, vectors } = state;
485
486            let mut state = self.state.lock();
487            state.enabled = enabled;
488
489            if vectors.len() != state.vectors.len() {
490                return Err(RestoreError::InvalidSavedState(
491                    MsixRestoreError::MismatchedTableLengths(vectors.len(), state.vectors.len())
492                        .into(),
493                ));
494            }
495
496            for (new_vec, vec) in vectors.into_iter().zip(state.vectors.iter_mut()) {
497                vec.state = EntryState {
498                    address: new_vec.address,
499                    data: new_vec.data,
500                    control: new_vec.control,
501                    is_pending: new_vec.is_pending,
502                }
503            }
504
505            Ok(())
506        }
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513    use crate::{msi::MsiConnection, test_helpers::TestPciInterruptController};
514
515    #[test]
516    fn msix_check() {
517        let msi_conn = MsiConnection::new();
518        let (mut msix, mut cap) = MsixEmulator::new(2, 64, msi_conn.target());
519        let msi_controller = TestPciInterruptController::new();
520        msi_conn.connect(msi_controller.signal_msi());
521        // check capabilities
522        assert_eq!(cap.read_u32(0), 0x3f0011);
523        assert_eq!(cap.read_u32(4), 2);
524        assert_eq!(cap.read_u32(8), 0x402);
525        cap.write_u32(0, 0xffffffff);
526        assert_eq!(cap.read_u32(0), 0x803f0011);
527        // check BAR
528        // Vector[0]
529        assert_eq!(msix.read_u32(0), 0);
530        assert_eq!(msix.read_u32(4), 0);
531        assert_eq!(msix.read_u32(8), 0);
532        assert_eq!(msix.read_u32(12), 1);
533        msix.write_u32(0, 0x12345678);
534        msix.write_u32(4, 0x9abcdef0);
535        msix.write_u32(8, 0x123);
536        msix.write_u32(12, 0x456);
537        assert_eq!(msix.read_u32(0), 0x12345678);
538        assert_eq!(msix.read_u32(4), 0x9abcdef0);
539        assert_eq!(msix.read_u32(8), 0x123);
540        assert_eq!(msix.read_u32(12), 0x456);
541        // Vector[63]
542        assert_eq!(msix.read_u32(0x3f0), 0);
543        assert_eq!(msix.read_u32(0x3f4), 0);
544        assert_eq!(msix.read_u32(0x3f8), 0);
545        assert_eq!(msix.read_u32(0x3fc), 1);
546        msix.write_u32(0x3f0, 0x12345678);
547        msix.write_u32(0x3f4, 0x9abcdef0);
548        msix.write_u32(0x3f8, 0x123);
549        msix.write_u32(0x3fc, 0x456);
550        assert_eq!(msix.read_u32(0x3f0), 0x12345678);
551        assert_eq!(msix.read_u32(0x3f4), 0x9abcdef0);
552        assert_eq!(msix.read_u32(0x3f8), 0x123);
553        assert_eq!(msix.read_u32(0x3fc), 0x456);
554        // Pending Bit Array
555        assert_eq!(msix.read_u32(0x400), 0);
556        assert_eq!(msix.read_u32(0x404), 0);
557        msix.set_pending_bit(1);
558        assert_eq!(msix.read_u32(0x400), 2);
559        assert_eq!(msix.read_u32(0x404), 0);
560        msix.set_pending_bit(33);
561        assert_eq!(msix.read_u32(0x400), 2);
562        assert_eq!(msix.read_u32(0x404), 2);
563        msix.set_pending_bit(63);
564        msix.set_pending_bit(31);
565        assert_eq!(msix.read_u32(0x400), 0x80000002);
566        assert_eq!(msix.read_u32(0x404), 0x80000002);
567        msix.clear_pending_bit(1);
568        assert_eq!(msix.read_u32(0x400), 0x80000000);
569        assert_eq!(msix.read_u32(0x404), 0x80000002);
570    }
571}