Skip to main content

virt_support_apic/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Local APIC emulator.
5//!
6//! This emulates the local APIC, as documented by the Intel SDM. It supports
7//! both legacy (MMIO) and X2APIC (MSR) modes.
8
9#![forbid(unsafe_code)]
10
11use bitfield_struct::bitfield;
12use inspect::Inspect;
13use inspect_counters::Counter;
14use parking_lot::RwLock;
15use std::sync::Arc;
16use std::sync::atomic::AtomicU32;
17use std::sync::atomic::Ordering;
18use std::time::Duration;
19use thiserror::Error;
20use virt::x86::MsrError;
21use virt::x86::vp::ApicRegisters;
22use vm_topology::processor::VpIndex;
23use vm_topology::processor::x86::X86VpInfo;
24use vmcore::vmtime::VmTime;
25use vmcore::vmtime::VmTimeAccess;
26use x86defs::X86X_MSR_APIC_BASE;
27use x86defs::apic::APIC_BASE_PAGE;
28use x86defs::apic::ApicBase;
29use x86defs::apic::ApicRegister;
30use x86defs::apic::Dcr;
31use x86defs::apic::DeliveryMode;
32use x86defs::apic::DestinationShorthand;
33use x86defs::apic::Dfr;
34use x86defs::apic::Icr;
35use x86defs::apic::Lvt;
36use x86defs::apic::Svr;
37use x86defs::apic::TimerMode;
38use x86defs::apic::X2APIC_MSR_BASE;
39use x86defs::apic::X2APIC_MSR_END;
40use x86defs::apic::X2ApicLogicalId;
41use x86defs::apic::XApicClusterLogicalId;
42use x86defs::msi::MsiAddress;
43use x86defs::msi::MsiData;
44
45const NANOS_PER_TICK: u64 = 5; // 200Mhz
46const TIMER_FREQUENCY: u64 = 1_000_000_000 / NANOS_PER_TICK;
47const APIC_VERSION: u32 = 0x50014;
48
49const ICR_LOW_MASK: Icr = Icr::new()
50    .with_vector(!0)
51    .with_delivery_mode(0b111)
52    .with_destination_mode_logical(true)
53    .with_level_assert(true)
54    .with_trigger_mode_level(true)
55    .with_destination_shorthand(0b11);
56
57const ICR_XAPIC_MASK: Icr = ICR_LOW_MASK.with_xapic_mda(!0);
58const ICR_X2APIC_MASK: Icr = ICR_LOW_MASK.with_x2apic_mda(!0);
59
60/// An individual local APIC for a processor.
61#[derive(Inspect)]
62pub struct LocalApic {
63    #[inspect(flatten)]
64    shared: Arc<SharedState>,
65    #[inspect(skip)]
66    global: Arc<GlobalState>,
67
68    #[inspect(hex)]
69    apic_base: u64,
70    #[inspect(hex)]
71    base_address: Option<u64>,
72    #[inspect(hex)]
73    id: u32,
74    #[inspect(hex)]
75    version: u32,
76    #[inspect(hex)]
77    ldr: u32,
78    cluster_mode: bool,
79    #[inspect(hex)]
80    svr: u32,
81    #[inspect(hex, with = "|x| inspect::iter_by_index(x.to_bits())")]
82    isr: IsrStack,
83    #[inspect(hex, iter_by_index)]
84    irr: [u32; 8],
85    #[inspect(hex, iter_by_index)]
86    tmr: [u32; 8],
87    #[inspect(hex, iter_by_index)]
88    auto_eoi: [u32; 8],
89    next_irr: Option<u8>,
90    #[inspect(hex)]
91    esr: u32,
92    #[inspect(hex)]
93    icr: u64,
94    #[inspect(hex)]
95    lvt_timer: u32,
96    #[inspect(hex)]
97    lvt_thermal: u32,
98    #[inspect(hex)]
99    lvt_pmc: u32,
100    #[inspect(hex, iter_by_index)]
101    lvt_lint: [u32; 2],
102    #[inspect(hex)]
103    lvt_error: u32,
104    #[inspect(hex)]
105    timer_icr: u32,
106    #[inspect(hex)]
107    timer_ccr: u32,
108    last_time: VmTime,
109    next_timeout: Option<VmTime>,
110    #[inspect(hex)]
111    timer_dcr: u32,
112    active_auto_eoi: bool,
113    is_offloaded: bool,
114    needs_offload_reeval: bool,
115    scan_irr: bool,
116
117    #[inspect(safe)]
118    stats: Stats,
119}
120
121#[derive(Inspect, Default)]
122struct Stats {
123    #[inspect(safe)]
124    eoi: Counter,
125    #[inspect(safe)]
126    eoi_level: Counter,
127    #[inspect(safe)]
128    spurious_eoi: Counter,
129    #[inspect(safe)]
130    lazy_eoi: Counter,
131    #[inspect(safe)]
132    interrupt: Counter,
133    #[inspect(safe)]
134    nmi: Counter,
135    #[inspect(safe)]
136    extint: Counter,
137    #[inspect(safe)]
138    init: Counter,
139    #[inspect(safe)]
140    sipi: Counter,
141    #[inspect(safe)]
142    self_ipi: Counter,
143    #[inspect(safe)]
144    broadcast_ipi: Counter,
145    #[inspect(safe)]
146    other_ipi: Counter,
147    #[inspect(safe)]
148    offload_push: Counter,
149    #[inspect(safe)]
150    offload_pull: Counter,
151}
152
153fn priority(v: u8) -> u8 {
154    v >> 4
155}
156
157fn dcr_divider_shift(dcr: Dcr) -> u8 {
158    let value = dcr.value_low() | (dcr.value_high() << 2);
159    value.wrapping_add(1) & 0b111
160}
161
162fn bank_mask(vector: u8) -> (usize, u32) {
163    (vector as usize / 32, 1 << (vector % 32))
164}
165
166fn cluster_mode(value: u32) -> bool {
167    match Dfr(value | 0x0fff_ffff) {
168        Dfr::CLUSTERED_MODE => true,
169        Dfr::FLAT_MODE => false,
170        _ => unreachable!("Unknown DFR value {value}"),
171    }
172}
173
174#[derive(Debug)]
175struct IsrStack(Vec<u8>);
176
177impl IsrStack {
178    fn new() -> Self {
179        Self(Vec::with_capacity(16))
180    }
181
182    fn push(&mut self, v: u8) {
183        assert!(v >= 16);
184        assert!(self.0.len() < 16);
185        assert!(priority(self.top().unwrap_or(0)) < priority(v));
186
187        self.0.push(v);
188    }
189
190    fn to_bits(&self) -> [u32; 8] {
191        let mut bits = [0; 8];
192        for &v in &self.0 {
193            bits[v as usize / 32] |= 1 << (v % 32);
194        }
195        bits
196    }
197
198    fn load_from_bits(&mut self, bits: [u32; 8]) {
199        // Only restore at most one interrupt per priority level, skipping the
200        // first (invalid) level.
201        self.clear();
202        let bits = bits.map(|v| [v as u16, (v >> 16) as u16]);
203        for (pri, &v) in bits.iter().flatten().enumerate().skip(1) {
204            if v != 0 {
205                let n = 15 - v.leading_zeros() as u8;
206                self.push(pri as u8 * 16 + n);
207            }
208        }
209    }
210
211    fn is_empty(&self) -> bool {
212        self.0.is_empty()
213    }
214
215    fn top(&self) -> Option<u8> {
216        self.0.last().copied()
217    }
218
219    fn pop(&mut self) -> Option<u8> {
220        self.0.pop()
221    }
222
223    fn clear(&mut self) {
224        self.0.clear();
225    }
226}
227
228#[derive(Debug, Inspect)]
229struct SharedState {
230    vp_index: VpIndex,
231    #[inspect(hex, iter_by_index)]
232    tmr: [AtomicU32; 8],
233    #[inspect(hex, iter_by_index)]
234    new_irr: [AtomicU32; 8],
235    #[inspect(hex, iter_by_index)]
236    auto_eoi: [AtomicU32; 8],
237    work: AtomicU32,
238    software_enabled_on_reset: bool,
239}
240
241#[bitfield(u32)]
242struct WorkFlags {
243    init: bool,
244    sipi: bool,
245    sipi_vector: u8,
246    extint: bool,
247    nmi: bool,
248    #[bits(20)]
249    _rsvd: u32,
250}
251
252/// The interface to the local APIC for all processors.
253#[derive(Inspect)]
254pub struct LocalApicSet {
255    #[inspect(flatten)]
256    global: Arc<GlobalState>,
257}
258
259#[derive(Debug, Inspect)]
260struct GlobalState {
261    x2apic_capable: bool,
262    hyperv_enlightenments: bool,
263    #[inspect(flatten)]
264    mutable: RwLock<MutableGlobalState>,
265}
266
267#[derive(Debug, Inspect)]
268struct MutableGlobalState {
269    x2apic_enabled: usize,
270    logical_cluster_mode: usize,
271    #[inspect(
272        with = "|x| inspect::iter_by_key(x.iter().enumerate().filter(|x| x.1.shared.is_some()))"
273    )]
274    by_apic_id: Vec<ApicSlot>,
275    #[inspect(iter_by_index)]
276    by_index: Vec<u32>,
277}
278
279#[derive(Debug, Inspect)]
280struct ApicSlot {
281    logical_id: u8,
282    hardware_enabled: bool,
283    software_enabled: bool,
284    cluster_mode: bool,
285    x2apic_enabled: bool,
286    #[inspect(skip)]
287    lint: [Lvt; 2],
288    #[inspect(skip)]
289    shared: Option<Arc<SharedState>>,
290}
291
292/// Builder for [`LocalApicSet`].
293pub struct LocalApicSetBuilder {
294    /// Allow X2APIC mode.
295    x2apic_capable: bool,
296    /// Handle Hyper-V enlightenment MSRs.
297    hyperv_enlightenments: bool,
298}
299
300impl LocalApicSetBuilder {
301    fn new() -> Self {
302        Self {
303            x2apic_capable: false,
304            hyperv_enlightenments: false,
305        }
306    }
307
308    /// Sets whether X2APIC mode is allowed.
309    pub fn x2apic_capable(&mut self, x2apic_capable: bool) -> &mut Self {
310        self.x2apic_capable = x2apic_capable;
311        self
312    }
313
314    /// Sets whether Hyper-V enlightenment MSRs are handled.
315    pub fn hyperv_enlightenments(&mut self, hyperv_enlightenments: bool) -> &mut Self {
316        self.hyperv_enlightenments = hyperv_enlightenments;
317        self
318    }
319
320    /// Builds a new local APIC set.
321    pub fn build(&self) -> LocalApicSet {
322        LocalApicSet {
323            global: Arc::new(GlobalState {
324                x2apic_capable: self.x2apic_capable,
325                hyperv_enlightenments: self.hyperv_enlightenments,
326                mutable: RwLock::new(MutableGlobalState {
327                    x2apic_enabled: 0,
328                    logical_cluster_mode: 0,
329                    by_apic_id: Vec::new(),
330                    by_index: Vec::new(),
331                }),
332            }),
333        }
334    }
335}
336
337impl LocalApicSet {
338    /// Creates a new builder for a local APIC set.
339    pub fn builder() -> LocalApicSetBuilder {
340        LocalApicSetBuilder::new()
341    }
342
343    /// Returns the frequency of the APIC timer clock.
344    pub fn frequency(&self) -> u64 {
345        TIMER_FREQUENCY
346    }
347
348    /// Adds an APIC for the specified VP to the set.
349    pub fn add_apic(&self, vp: &X86VpInfo, software_enabled_on_reset: bool) -> LocalApic {
350        let shared = Arc::new(SharedState {
351            vp_index: vp.base.vp_index,
352            tmr: Default::default(),
353            new_irr: Default::default(),
354            auto_eoi: Default::default(),
355            work: 0.into(),
356            software_enabled_on_reset,
357        });
358
359        {
360            let mut mutable = self.global.mutable.write();
361            if mutable.by_apic_id.len() <= vp.apic_id as usize {
362                mutable
363                    .by_apic_id
364                    .resize_with(vp.apic_id as usize + 1, || ApicSlot {
365                        logical_id: 0,
366                        hardware_enabled: false,
367                        software_enabled: false,
368                        cluster_mode: false,
369                        x2apic_enabled: false,
370                        lint: [Lvt::new(); 2],
371                        shared: None,
372                    });
373            }
374            assert!(mutable.by_apic_id[vp.apic_id as usize].shared.is_none());
375            mutable.by_apic_id[vp.apic_id as usize].shared = Some(shared.clone());
376            if mutable.by_index.len() <= vp.base.vp_index.index() as usize {
377                mutable
378                    .by_index
379                    .resize(vp.base.vp_index.index() as usize + 1, !0);
380            }
381            mutable.by_index[vp.base.vp_index.index() as usize] = vp.apic_id;
382        }
383
384        let mut apic = LocalApic {
385            shared,
386            global: self.global.clone(),
387            apic_base: 0,
388            base_address: None,
389            id: vp.apic_id,
390            version: APIC_VERSION,
391            ldr: 0,
392            cluster_mode: false,
393            svr: 0,
394            isr: IsrStack::new(),
395            next_irr: None,
396            irr: [0; 8],
397            tmr: [0; 8],
398            auto_eoi: [0; 8],
399            esr: 0,
400            icr: 0,
401            lvt_timer: 0,
402            lvt_thermal: 0,
403            lvt_pmc: 0,
404            lvt_lint: [0; 2],
405            lvt_error: 0,
406            timer_icr: 0,
407            timer_ccr: 0,
408            timer_dcr: 0,
409            last_time: VmTime::from_100ns(0),
410            next_timeout: None,
411            active_auto_eoi: false,
412            needs_offload_reeval: false,
413            is_offloaded: false,
414            scan_irr: false,
415            stats: Stats::default(),
416        };
417        apic.reset();
418        apic
419    }
420
421    /// Requests a message-signaled interrupt.
422    ///
423    /// Calls `wake` for each processor that should be woken up for APIC
424    /// handling.
425    pub fn request_interrupt(&self, address: u64, data: u32, wake: impl FnMut(VpIndex)) {
426        let address = MsiAddress::from(address as u32);
427        let data = MsiData::from(data);
428        self.global.request_interrupt(
429            Destination::from_external(
430                address.destination_mode_logical(),
431                address.virt_destination().into(),
432                self.global.x2apic_capable,
433            ),
434            DeliveryMode(data.delivery_mode()),
435            data.vector(),
436            data.trigger_mode_level(),
437            wake,
438        );
439    }
440
441    /// Pulses the specified LINT.
442    ///
443    /// Typically LINT0 is programmed by the guest for EXTINT interrupts and
444    /// LINT1 is programmed for NMIs.
445    pub fn lint(&self, vp_index: VpIndex, lint_index: usize, wake: impl FnOnce(VpIndex)) {
446        let mutable = self.global.mutable.read();
447        if let Some(slot) = mutable
448            .by_index
449            .get(vp_index.index() as usize)
450            .and_then(|&apic_id| mutable.by_apic_id.get(apic_id as usize))
451        {
452            let lvt = slot.lint[lint_index];
453            if !lvt.masked() {
454                if lvt.trigger_mode_level() {
455                    // Don't know how to manage remote IRR.
456                    return;
457                }
458                slot.request_interrupt(
459                    DeliveryMode(lvt.delivery_mode()),
460                    lvt.vector(),
461                    lvt.trigger_mode_level(),
462                    false,
463                    wake,
464                );
465            }
466        }
467    }
468
469    /// Asserts a synic interrupt to the specified virtual processor, optionally
470    /// with auto EOI (meaning the corresponding ISR bit will not be set when
471    /// the interrupt is delivered).
472    pub fn synic_interrupt(
473        &self,
474        vp_index: VpIndex,
475        vector: u8,
476        auto_eoi: bool,
477        wake: impl FnOnce(VpIndex),
478    ) {
479        let mutable = self.global.mutable.read();
480        if let Some(slot) = mutable
481            .by_index
482            .get(vp_index.index() as usize)
483            .and_then(|&apic_id| mutable.by_apic_id.get(apic_id as usize))
484        {
485            slot.request_interrupt(DeliveryMode::FIXED, vector, false, auto_eoi, wake);
486        }
487    }
488}
489
490impl GlobalState {
491    fn request_interrupt(
492        &self,
493        destination: Destination,
494        delivery_mode: DeliveryMode,
495        vector: u8,
496        level: bool,
497        mut wake: impl FnMut(VpIndex),
498    ) {
499        let mutable = self.mutable.read();
500        match destination {
501            Destination::Physical(id) => {
502                if let Some(slot) = mutable.by_apic_id.get(id as usize) {
503                    slot.request_interrupt(delivery_mode, vector, level, false, &mut wake);
504                }
505            }
506            Destination::Logical(id) => {
507                if mutable.x2apic_enabled > 0 {
508                    // X2APIC cluster mode.
509                    if id == !0 {
510                        mutable.request_broadcast_interrupt(
511                            delivery_mode,
512                            vector,
513                            level,
514                            &mut wake,
515                        );
516                    } else {
517                        let lowest_priority = delivery_mode == DeliveryMode::LOWEST_PRIORITY;
518                        let id = X2ApicLogicalId::from(id);
519                        let base = (id.cluster_id() as u32) << 4;
520                        for i in 0..16 {
521                            if id.logical_id() & (1 << i) == 0 {
522                                continue;
523                            }
524                            let phys_id = base | i;
525                            if let Some(slot) = mutable.by_apic_id.get(phys_id as usize) {
526                                // For now, just pick the first enabled APIC in the set for lowest priority.
527                                if !lowest_priority || slot.software_enabled {
528                                    slot.request_interrupt(
529                                        delivery_mode,
530                                        vector,
531                                        level,
532                                        false,
533                                        &mut wake,
534                                    );
535                                    if lowest_priority {
536                                        break;
537                                    }
538                                }
539                            }
540                        }
541                    }
542                } else if mutable.logical_cluster_mode > 0 {
543                    if id as u8 == !0 {
544                        mutable.request_broadcast_interrupt(
545                            delivery_mode,
546                            vector,
547                            level,
548                            &mut wake,
549                        );
550                    } else {
551                        // XAPIC cluster mode. Easy and fast to iterate through the APICs.
552                        let id = XApicClusterLogicalId::from(id as u8);
553                        mutable.request_set_interrupt(
554                            delivery_mode,
555                            vector,
556                            level,
557                            &mut wake,
558                            |_, slot| {
559                                let ldr = XApicClusterLogicalId::from(slot.logical_id);
560                                ldr.cluster_id() == id.cluster_id()
561                                    && ldr.logical_id() & id.logical_id() != 0
562                            },
563                        );
564                    }
565                } else {
566                    // APIC flat mode. Just iterate through all the VPs.
567                    mutable.request_set_interrupt(
568                        delivery_mode,
569                        vector,
570                        level,
571                        &mut wake,
572                        |_, slot| slot.logical_id & id as u8 != 0,
573                    );
574                }
575            }
576            Destination::Broadcast => {
577                mutable.request_broadcast_interrupt(delivery_mode, vector, level, &mut wake);
578            }
579            Destination::AllExcept(except) => {
580                mutable.request_set_interrupt(
581                    delivery_mode,
582                    vector,
583                    level,
584                    &mut wake,
585                    |apic_id, _| apic_id != except,
586                );
587            }
588        }
589    }
590}
591
592enum Destination {
593    Physical(u32),
594    Logical(u32),
595    Broadcast,
596    AllExcept(u32),
597}
598
599impl Destination {
600    fn from_icr(icr: Icr, x2apic: bool) -> Self {
601        if x2apic {
602            if icr.destination_mode_logical() {
603                Self::Logical(icr.x2apic_mda())
604            } else if icr.x2apic_mda() == !0u32 {
605                Self::Broadcast
606            } else {
607                Self::Physical(icr.x2apic_mda())
608            }
609        } else {
610            if icr.destination_mode_logical() {
611                Self::Logical(icr.xapic_mda().into())
612            } else if icr.xapic_mda() == !0u8 {
613                Self::Broadcast
614            } else {
615                Self::Physical(icr.xapic_mda().into())
616            }
617        }
618    }
619
620    fn from_external(
621        logical_destination_mode: bool,
622        destination: u32,
623        x2apic_capable: bool,
624    ) -> Self {
625        if logical_destination_mode {
626            Self::Logical(destination)
627        } else if (x2apic_capable && destination == !0u32)
628            || (!x2apic_capable && destination == 0xff)
629        {
630            Self::Broadcast
631        } else {
632            Self::Physical(destination)
633        }
634    }
635}
636
637/// Access to a local APIC.
638pub struct LocalApicAccess<'a, T> {
639    apic: &'a mut LocalApic,
640    client: &'a mut T,
641}
642
643/// The client to pass to [`LocalApic::access`], to handle requests needed when
644/// accessing the APIC.
645pub trait ApicClient {
646    /// Get the CR8 register.
647    fn cr8(&mut self) -> u32;
648
649    /// Set the CR8 register.
650    fn set_cr8(&mut self, value: u32);
651
652    /// Set the APIC base MSR.
653    ///
654    /// This is just to accelerate reads of the MSR. If apic base MSR reads
655    /// always come to `msr_read`, then this can be a no-op.
656    fn set_apic_base(&mut self, value: u64);
657
658    /// Ensure the processor at `vp_index` calls `scan` soon.
659    fn wake(&mut self, vp_index: VpIndex);
660
661    /// Notify the IO-APIC of an EOI.
662    fn eoi(&mut self, vector: u8);
663
664    /// Returns the current time.
665    fn now(&mut self) -> VmTime;
666
667    /// Retrieve the offloaded IRR and ISR state, clearing them in the
668    /// offloaded APIC.
669    fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]);
670}
671
672fn is_valid_apic_access(address: u64) -> bool {
673    // Any aligned access is valid.
674    if address & 0xf == 0 {
675        return true;
676    }
677    // Allow high byte accesses for some registers. This isn't spec compliant
678    // but some guests rely on this.
679    if address & 0xf == 3 {
680        return matches!(
681            ApicRegister((address >> 4) as u8),
682            ApicRegister::ID | ApicRegister::LDR | ApicRegister::DFR
683        );
684    }
685    false
686}
687
688impl<T: ApicClient> LocalApicAccess<'_, T> {
689    /// Performs an EOI that was signaled lazily, out of band from the normal
690    /// APIC interfaces.
691    pub fn lazy_eoi(&mut self) {
692        debug_assert!(self.apic.is_lazy_eoi_pending());
693        self.eoi(true);
694    }
695
696    fn eoi(&mut self, lazy: bool) {
697        self.ensure_state_local();
698        if let Some(vector) = self.apic.isr.pop() {
699            tracing::trace!(vector, "eoi");
700            if lazy {
701                self.apic.stats.lazy_eoi.increment();
702            } else {
703                self.apic.stats.eoi.increment();
704            }
705            let (bank, mask) = bank_mask(vector);
706            // If this was a level-triggered interrupt, notify IO-APIC of the EOI.
707            if self.apic.tmr[bank] & mask != 0 {
708                self.client.eoi(vector);
709                self.apic.stats.eoi_level.increment();
710            }
711        } else {
712            tracelimit::warn_ratelimited!(lazy, "eoi when no interrupts pending");
713            self.apic.stats.spurious_eoi.increment();
714        }
715    }
716
717    /// Reads from the legacy APIC MMIO page.
718    pub fn mmio_read(&mut self, address: u64, data: &mut [u8]) {
719        if !self.apic.xapic_enabled() || !is_valid_apic_access(address) {
720            tracelimit::warn_ratelimited!(
721                address,
722                len = data.len(),
723                enabled = self.apic.hardware_enabled(),
724                x2apic = self.apic.x2apic_enabled(),
725                "invalid apic read"
726            );
727            data.fill(!0);
728            return;
729        }
730
731        let value = self
732            .read_register(ApicRegister((address >> 4) as u8))
733            .unwrap_or(0);
734
735        let offset = address as usize & 3;
736        data.fill(0);
737        let len = data.len().min(4 - offset);
738        let data = &mut data[..len];
739        data.copy_from_slice(&value.to_ne_bytes()[offset..offset + data.len()]);
740    }
741
742    /// Writes to the legacy APIC MMIO page.
743    pub fn mmio_write(&mut self, address: u64, data: &[u8]) {
744        if !self.apic.xapic_enabled() || !is_valid_apic_access(address) {
745            tracelimit::warn_ratelimited!(
746                address,
747                len = data.len(),
748                enabled = self.apic.hardware_enabled(),
749                x2apic = self.apic.x2apic_enabled(),
750                "invalid apic write"
751            );
752            return;
753        }
754
755        let mut value = [0; 4];
756        let offset = address as usize & 3;
757        let data = &data[..data.len().min(4 - offset)];
758        value[offset..offset + data.len()].copy_from_slice(data);
759
760        self.write_register(
761            ApicRegister((address >> 4) as u8),
762            u32::from_ne_bytes(value),
763        );
764    }
765
766    /// Reads from the APIC base MSR, X2APIC MSR, or Hyper-V enlightenment MSR.
767    pub fn msr_read(&mut self, msr: u32) -> Result<u64, MsrError> {
768        let v = match msr {
769            X86X_MSR_APIC_BASE => self.apic.apic_base,
770            X2APIC_MSR_BASE..=X2APIC_MSR_END if self.apic.x2apic_enabled() => {
771                let register = ApicRegister((msr - X2APIC_MSR_BASE) as u8);
772                if register == ApicRegister::ICR0 {
773                    // ICR is a 64-bit register in X2APIC.
774                    self.apic.icr
775                } else {
776                    self.read_register(register)
777                        .ok_or(MsrError::InvalidAccess)?
778                        .into()
779                }
780            }
781            hvdef::HV_X64_MSR_APIC_FREQUENCY if self.apic.global.hyperv_enlightenments => {
782                TIMER_FREQUENCY
783            }
784            hvdef::HV_X64_MSR_EOI if self.apic.global.hyperv_enlightenments => {
785                return Err(MsrError::InvalidAccess);
786            }
787            hvdef::HV_X64_MSR_ICR if self.apic.global.hyperv_enlightenments => {
788                if !self.apic.hardware_enabled() {
789                    return Err(MsrError::InvalidAccess);
790                }
791                self.apic.icr
792            }
793            hvdef::HV_X64_MSR_TPR if self.apic.global.hyperv_enlightenments => {
794                (self.client.cr8() << 4) as u64
795            }
796            _ => return Err(MsrError::Unknown),
797        };
798        Ok(v)
799    }
800
801    /// Writes to the APIC base MSR or an X2APIC MSR.
802    pub fn msr_write(&mut self, msr: u32, value: u64) -> Result<(), MsrError> {
803        match msr {
804            X86X_MSR_APIC_BASE => {
805                // The APIC may be disabled by this, so we need IRR/ISR local to
806                // be reset.
807                self.ensure_state_local();
808                self.apic.set_apic_base_inner(value).map_err(|err| {
809                    tracelimit::warn_ratelimited!(
810                        error = &err as &dyn std::error::Error,
811                        "invalid apic base write"
812                    );
813                    MsrError::InvalidAccess
814                })?;
815                self.client.set_apic_base(self.apic.apic_base);
816            }
817            X2APIC_MSR_BASE..=X2APIC_MSR_END if self.apic.x2apic_enabled() => {
818                let register = ApicRegister((msr - X2APIC_MSR_BASE) as u8);
819                if register == ApicRegister::ICR0 {
820                    // ICR is a 64-bit register in X2APIC.
821                    self.apic.icr = value & u64::from(ICR_X2APIC_MASK);
822                    self.handle_ipi(Icr::from(self.apic.icr));
823                } else if !self.write_register(register, value as u32) {
824                    return Err(MsrError::InvalidAccess);
825                }
826            }
827            hvdef::HV_X64_MSR_APIC_FREQUENCY if self.apic.global.hyperv_enlightenments => {
828                return Err(MsrError::InvalidAccess);
829            }
830            hvdef::HV_X64_MSR_EOI if self.apic.global.hyperv_enlightenments => {
831                if !self.apic.hardware_enabled() {
832                    return Err(MsrError::InvalidAccess);
833                }
834                self.eoi(false);
835            }
836            hvdef::HV_X64_MSR_ICR if self.apic.global.hyperv_enlightenments => {
837                let mask = if self.apic.x2apic_enabled() {
838                    ICR_X2APIC_MASK
839                } else if self.apic.xapic_enabled() {
840                    ICR_XAPIC_MASK
841                } else {
842                    return Err(MsrError::InvalidAccess);
843                };
844                self.apic.icr = value & u64::from(mask);
845                self.handle_ipi(Icr::from(self.apic.icr));
846            }
847            hvdef::HV_X64_MSR_TPR if self.apic.global.hyperv_enlightenments => {
848                if value > 0xff {
849                    return Err(MsrError::InvalidAccess);
850                }
851                self.client.set_cr8((value as u32) >> 4);
852            }
853            _ => return Err(MsrError::Unknown),
854        }
855        Ok(())
856    }
857
858    fn read_register(&mut self, register: ApicRegister) -> Option<u32> {
859        let value = match register {
860            ApicRegister::ID => self.apic.id_register(),
861            ApicRegister::VERSION => self.apic.version,
862            ApicRegister::TPR => self.client.cr8() << 4,
863            ApicRegister::PPR => self.get_ppr(),
864            ApicRegister::LDR => self.apic.ldr_register(),
865            ApicRegister::DFR if !self.apic.x2apic_enabled() => {
866                if self.apic.cluster_mode {
867                    Dfr::CLUSTERED_MODE.0
868                } else {
869                    Dfr::FLAT_MODE.0
870                }
871            }
872            ApicRegister::SVR => self.apic.svr,
873            reg if (ApicRegister::ISR0..=ApicRegister::ISR7).contains(&reg) => {
874                self.ensure_state_local();
875                let index = reg.0 - ApicRegister::ISR0.0;
876                self.apic.isr.to_bits()[index as usize]
877            }
878            reg if (ApicRegister::TMR0..=ApicRegister::TMR7).contains(&reg) => {
879                self.apic.pull_irr();
880                let index = reg.0 - ApicRegister::TMR0.0;
881                self.apic.tmr[index as usize]
882            }
883            reg if (ApicRegister::IRR0..=ApicRegister::IRR7).contains(&reg) => {
884                self.ensure_state_local();
885                self.apic.pull_irr();
886                let index = reg.0 - ApicRegister::IRR0.0;
887                self.apic.irr[index as usize]
888            }
889            ApicRegister::ESR => self.apic.esr,
890            ApicRegister::ICR0 if !self.apic.x2apic_enabled() => self.apic.icr as u32,
891            ApicRegister::ICR1 if !self.apic.x2apic_enabled() => (self.apic.icr >> 32) as u32,
892            ApicRegister::LVT_TIMER => self.apic.lvt_timer,
893            ApicRegister::LVT_THERMAL => self.apic.lvt_thermal,
894            ApicRegister::LVT_PMC => self.apic.lvt_pmc,
895            ApicRegister::LVT_LINT0 => self.apic.lvt_lint[0],
896            ApicRegister::LVT_LINT1 => self.apic.lvt_lint[1],
897            ApicRegister::LVT_ERROR => self.apic.lvt_error,
898            ApicRegister::TIMER_ICR => self.apic.timer_icr,
899            ApicRegister::TIMER_CCR => {
900                self.apic.eval_time(self.client.now());
901                self.apic.timer_ccr
902            }
903            ApicRegister::TIMER_DCR => self.apic.timer_dcr,
904            register => {
905                tracelimit::warn_ratelimited!(?register, "unimplemented apic register read");
906                return None;
907            }
908        };
909        Some(value)
910    }
911
912    fn write_register(&mut self, register: ApicRegister, value: u32) -> bool {
913        match register {
914            ApicRegister::TPR => {
915                self.client.set_cr8(value >> 4);
916            }
917            ApicRegister::EOI => {
918                if self.apic.x2apic_enabled() && value != 0 {
919                    return false;
920                }
921                self.eoi(false);
922            }
923            ApicRegister::LDR if !self.apic.x2apic_enabled() => {
924                self.apic.ldr = value & 0xff000000;
925                self.apic.update_slot();
926            }
927            ApicRegister::DFR if !self.apic.x2apic_enabled() => {
928                self.apic.cluster_mode = cluster_mode(value);
929                self.apic.update_slot();
930            }
931            ApicRegister::SVR => {
932                // The APIC may be disabled by this, so we need to reevaluate
933                // offloading.
934                self.ensure_state_local();
935                // Accumulate any requested interrupts before changing the
936                // enable state. `pull_irr` holds them in the IRR regardless of
937                // the enable state, so they survive a software enable/disable.
938                self.apic.pull_irr();
939                self.apic.svr = value & u32::from(Svr::new().with_vector(0xff).with_enable(true));
940                if !self.apic.software_enabled() {
941                    // Mask all the LVTs.
942                    for lvt in [
943                        &mut self.apic.lvt_timer,
944                        &mut self.apic.lvt_thermal,
945                        &mut self.apic.lvt_pmc,
946                        &mut self.apic.lvt_error,
947                    ]
948                    .into_iter()
949                    .chain(&mut self.apic.lvt_lint)
950                    {
951                        *lvt = Lvt::from(*lvt).with_masked(true).into();
952                    }
953                }
954                self.apic.update_slot();
955            }
956            ApicRegister::ESR => {
957                if self.apic.x2apic_enabled() && value != 0 {
958                    return false;
959                }
960                // This would copy and zero the hidden error register, but we
961                // never set that to a non-zero value and don't include it in
962                // the saved-state format.
963                self.apic.esr = 0;
964            }
965            ApicRegister::ICR0 if !self.apic.x2apic_enabled() => {
966                self.apic.icr = (value as u64 | (self.apic.icr & 0xffffffff_00000000))
967                    & u64::from(ICR_XAPIC_MASK);
968
969                self.handle_ipi(self.apic.icr.into());
970            }
971            ApicRegister::ICR1 if !self.apic.x2apic_enabled() => {
972                self.apic.icr = (((value as u64) << 32) | self.apic.icr & 0xffffffff)
973                    & u64::from(ICR_XAPIC_MASK);
974            }
975            ApicRegister::LVT_TIMER => {
976                self.apic.lvt_timer = self.apic.effective_lvt(
977                    value
978                        & u32::from(
979                            Lvt::new()
980                                .with_vector(0xff)
981                                .with_masked(true)
982                                .with_timer_mode(1), // no TSC deadline support
983                        ),
984                );
985            }
986            ApicRegister::LVT_THERMAL => {
987                self.apic.lvt_thermal = self.apic.effective_lvt(
988                    value
989                        & u32::from(
990                            Lvt::new()
991                                .with_vector(0xff)
992                                .with_delivery_mode(0b111)
993                                .with_masked(true),
994                        ),
995                );
996            }
997            ApicRegister::LVT_PMC => {
998                self.apic.lvt_pmc = self.apic.effective_lvt(
999                    value
1000                        & u32::from(
1001                            Lvt::new()
1002                                .with_vector(0xff)
1003                                .with_delivery_mode(0b111)
1004                                .with_masked(true),
1005                        ),
1006                );
1007            }
1008            reg @ (ApicRegister::LVT_LINT0 | ApicRegister::LVT_LINT1) => {
1009                let index = if reg == ApicRegister::LVT_LINT0 { 0 } else { 1 };
1010                self.apic.lvt_lint[index] = self.apic.effective_lvt(
1011                    value
1012                        & u32::from(
1013                            Lvt::new()
1014                                .with_vector(0xff)
1015                                .with_input_pin_polarity(true)
1016                                .with_trigger_mode_level(true)
1017                                .with_delivery_mode(0b111)
1018                                .with_masked(true),
1019                        ),
1020                );
1021                self.apic.update_slot();
1022            }
1023            ApicRegister::LVT_ERROR => {
1024                self.apic.lvt_error = self.apic.effective_lvt(
1025                    value & u32::from(Lvt::new().with_vector(0xff).with_masked(true)),
1026                );
1027            }
1028            ApicRegister::TIMER_ICR => {
1029                let now = self.client.now();
1030                self.apic.timer_icr = value;
1031                self.apic.timer_ccr = value;
1032                self.apic.last_time = now;
1033                self.apic.update_timeout(now);
1034            }
1035            ApicRegister::TIMER_DCR => {
1036                let now = self.client.now();
1037                self.apic.eval_time(now);
1038                self.apic.timer_dcr =
1039                    value & u32::from(Dcr::new().with_value_low(0b11).with_value_high(0b1));
1040                self.apic.update_timeout(now);
1041            }
1042            ApicRegister::SELF_IPI if self.apic.x2apic_enabled() => {
1043                self.apic.stats.self_ipi.increment();
1044                self.apic.scan_irr |= self.apic.shared.request_interrupt(
1045                    self.apic.software_enabled(),
1046                    DeliveryMode::FIXED,
1047                    value as u8,
1048                    false,
1049                    false,
1050                );
1051            }
1052            register => {
1053                tracelimit::warn_ratelimited!(?register, "unimplemented apic register write");
1054                return false;
1055            }
1056        }
1057        true
1058    }
1059
1060    /// Computes and returns the current effective PPR value.
1061    pub fn get_ppr(&mut self) -> u32 {
1062        self.ensure_state_local();
1063        let task_pri = self.client.cr8();
1064        let isr_pri = priority(self.apic.isr.top().unwrap_or(0));
1065        task_pri.max(isr_pri.into()) << 4
1066    }
1067
1068    fn ensure_state_local(&mut self) {
1069        if self.apic.is_offloaded {
1070            let (irr, isr) = self.client.pull_offload();
1071            self.apic.accumulate_from_offload(&irr, &isr);
1072            self.apic.stats.offload_pull.increment();
1073
1074            // Make sure that we commit any bits we read from the offloaded apic
1075            // before we dispatch back to the vp, since pull offload clears
1076            // corresponding bits in the offloaded apic state. Otherwise, we
1077            // could commit state bits which would result in the guest being in
1078            // a broken state.
1079            self.apic.needs_offload_reeval = true;
1080        }
1081    }
1082
1083    fn handle_ipi(&mut self, icr: Icr) {
1084        let delivery_mode = DeliveryMode(icr.delivery_mode());
1085        match delivery_mode {
1086            DeliveryMode::FIXED => {}
1087            DeliveryMode::LOWEST_PRIORITY => {
1088                if self.apic.x2apic_enabled() {
1089                    // Don't allow lowest priority IPIs via x2apic.
1090                    return;
1091                }
1092            }
1093            DeliveryMode::NMI => {}
1094            DeliveryMode::INIT => {
1095                // Ignore INIT level deasserts here.
1096                if !icr.level_assert() {
1097                    return;
1098                }
1099            }
1100            DeliveryMode::SIPI => {}
1101            DeliveryMode::EXTINT => {
1102                // Not allowed as an IPI.
1103                return;
1104            }
1105            _ => return,
1106        }
1107
1108        match DestinationShorthand(icr.destination_shorthand()) {
1109            DestinationShorthand::NONE => {
1110                let destination = Destination::from_icr(icr, self.apic.x2apic_enabled());
1111                match destination {
1112                    Destination::Physical(_) | Destination::Logical(_) => {
1113                        self.apic.stats.other_ipi.increment()
1114                    }
1115                    Destination::Broadcast | Destination::AllExcept(_) => {
1116                        self.apic.stats.broadcast_ipi.increment()
1117                    }
1118                }
1119                self.apic.global.request_interrupt(
1120                    destination,
1121                    delivery_mode,
1122                    icr.vector(),
1123                    false,
1124                    |vp| self.client.wake(vp),
1125                );
1126            }
1127            DestinationShorthand::SELF => {
1128                self.apic.stats.self_ipi.increment();
1129                self.apic.scan_irr |= self.apic.shared.request_interrupt(
1130                    self.apic.software_enabled(),
1131                    delivery_mode,
1132                    icr.vector(),
1133                    icr.trigger_mode_level(),
1134                    false,
1135                );
1136            }
1137            DestinationShorthand::ALL_INCLUDING_SELF => {
1138                self.apic.stats.broadcast_ipi.increment();
1139                self.apic.global.request_interrupt(
1140                    Destination::Broadcast,
1141                    delivery_mode,
1142                    icr.vector(),
1143                    false,
1144                    |vp| self.client.wake(vp),
1145                );
1146            }
1147            DestinationShorthand::ALL_EXCLUDING_SELF => {
1148                self.apic.stats.broadcast_ipi.increment();
1149                self.apic.global.request_interrupt(
1150                    Destination::AllExcept(self.apic.id),
1151                    delivery_mode,
1152                    icr.vector(),
1153                    false,
1154                    |vp| self.client.wake(vp),
1155                );
1156            }
1157            _ => unreachable!(),
1158        }
1159    }
1160}
1161
1162impl SharedState {
1163    /// Returns true if the VP should be woken up to scan the APIC.
1164    #[must_use]
1165    fn request_interrupt(
1166        &self,
1167        software_enabled: bool,
1168        delivery_mode: DeliveryMode,
1169        vector: u8,
1170        level_triggered: bool,
1171        auto_eoi: bool,
1172    ) -> bool {
1173        match delivery_mode {
1174            DeliveryMode::FIXED | DeliveryMode::LOWEST_PRIORITY => {
1175                if !software_enabled || !(16..=255).contains(&vector) {
1176                    return false;
1177                }
1178                let (bank, mask) = bank_mask(vector);
1179                if (self.tmr[bank].load(Ordering::Relaxed) & mask != 0) != level_triggered {
1180                    if level_triggered {
1181                        self.tmr[bank].fetch_or(mask, Ordering::Relaxed);
1182                    } else {
1183                        self.tmr[bank].fetch_and(!mask, Ordering::Relaxed);
1184                    }
1185                }
1186                if (self.auto_eoi[bank].load(Ordering::Relaxed) & mask != 0) != auto_eoi {
1187                    if auto_eoi {
1188                        self.auto_eoi[bank].fetch_or(mask, Ordering::Relaxed);
1189                    } else {
1190                        self.auto_eoi[bank].fetch_and(!mask, Ordering::Relaxed);
1191                    }
1192                }
1193                if self.new_irr[bank].fetch_or(mask, Ordering::Release) & mask == 0 {
1194                    return true;
1195                }
1196                false
1197            }
1198            DeliveryMode::NMI => {
1199                let old = self
1200                    .work
1201                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1202                        Some(WorkFlags::from(w).with_nmi(true).into())
1203                    })
1204                    .unwrap();
1205                old == 0
1206            }
1207            DeliveryMode::INIT => {
1208                let old = self
1209                    .work
1210                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1211                        Some(WorkFlags::from(w).with_init(true).into())
1212                    })
1213                    .unwrap();
1214                old == 0
1215            }
1216            DeliveryMode::SIPI => {
1217                let old = self
1218                    .work
1219                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1220                        Some(
1221                            WorkFlags::from(w)
1222                                .with_sipi(true)
1223                                .with_sipi_vector(vector)
1224                                .into(),
1225                        )
1226                    })
1227                    .unwrap();
1228                old == 0
1229            }
1230            DeliveryMode::EXTINT => {
1231                let old = self
1232                    .work
1233                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1234                        Some(WorkFlags::from(w).with_extint(true).into())
1235                    })
1236                    .unwrap();
1237                old == 0
1238            }
1239            _ => false,
1240        }
1241    }
1242}
1243
1244impl MutableGlobalState {
1245    fn request_broadcast_interrupt(
1246        &self,
1247        delivery_mode: DeliveryMode,
1248        vector: u8,
1249        level_triggered: bool,
1250        wake: impl FnMut(VpIndex),
1251    ) {
1252        self.request_set_interrupt(delivery_mode, vector, level_triggered, wake, |_, _| true);
1253    }
1254
1255    fn request_set_interrupt(
1256        &self,
1257        delivery_mode: DeliveryMode,
1258        vector: u8,
1259        level_triggered: bool,
1260        mut wake: impl FnMut(VpIndex),
1261        mut filter: impl FnMut(u32, &ApicSlot) -> bool,
1262    ) {
1263        let lowest_priority = delivery_mode == DeliveryMode::LOWEST_PRIORITY;
1264        for (apic_id, slot) in self.by_apic_id.iter().enumerate() {
1265            if !filter(apic_id as u32, slot) {
1266                continue;
1267            }
1268            // For now, just pick the first enabled APIC in the set for lowest priority.
1269            if !lowest_priority || slot.software_enabled {
1270                slot.request_interrupt(delivery_mode, vector, level_triggered, false, &mut wake);
1271                if lowest_priority {
1272                    break;
1273                }
1274            }
1275        }
1276    }
1277}
1278
1279impl ApicSlot {
1280    fn request_interrupt(
1281        &self,
1282        delivery_mode: DeliveryMode,
1283        vector: u8,
1284        level_triggered: bool,
1285        auto_eoi: bool,
1286        wake: impl FnOnce(VpIndex),
1287    ) {
1288        if let Some(shared) = &self.shared {
1289            if self.hardware_enabled
1290                && shared.request_interrupt(
1291                    self.software_enabled,
1292                    delivery_mode,
1293                    vector,
1294                    level_triggered,
1295                    auto_eoi,
1296                )
1297            {
1298                wake(shared.vp_index);
1299            }
1300        }
1301    }
1302}
1303
1304/// Work to do as a result of [`LocalApic::scan`] or [`LocalApic::flush`].
1305#[derive(Debug, Default)]
1306pub struct ApicWork {
1307    /// An INIT interrupt was requested.
1308    ///
1309    /// Reset register state (including APIC state) as documented in the Intel
1310    /// manual.
1311    pub init: bool,
1312    /// A SIPI interrupt was requested with the given vector.
1313    ///
1314    /// Update the cs and rip to the appropriate values and clear the
1315    /// wait-for-SIPI state.
1316    pub sipi: Option<u8>,
1317    /// An extint interrupt was requested.
1318    ///
1319    /// When the processor is ready for extint injection, query the PIC for the
1320    /// vector and inject the interrupt.
1321    pub extint: bool,
1322    /// An NMI was requested.
1323    pub nmi: bool,
1324    /// A fixed interrupt was requested.
1325    ///
1326    /// Call [`LocalApic::acknowledge_interrupt`] after it has been injected.
1327    pub interrupt: Option<u8>,
1328}
1329
1330/// An error writing the APIC base MSR.
1331#[derive(Debug, Error)]
1332pub enum InvalidApicBase {
1333    /// Reserved bits set.
1334    #[error("reserved bits set")]
1335    ReservedBits,
1336    /// Invalid x2apic state.
1337    #[error("invalid x2apic state")]
1338    InvalidX2Apic,
1339    /// Can't disable x2apic without reset.
1340    #[error("can't disable x2apic without reset")]
1341    CantDisableX2Apic,
1342}
1343
1344/// APIC offload is not supported with the current request state, likely due to
1345/// auto EOI. The caller must disable offloads and scan the APIC again.
1346pub struct OffloadNotSupported;
1347
1348impl LocalApic {
1349    /// Returns an object to access APIC registers.
1350    pub fn access<'a, T: ApicClient>(&'a mut self, client: &'a mut T) -> LocalApicAccess<'a, T> {
1351        LocalApicAccess { apic: self, client }
1352    }
1353
1354    /// Fast path for updating IRR on the local processor.
1355    pub fn request_fixed_interrupts(&mut self, mut irr: [u32; 8]) {
1356        if self.hardware_enabled() && self.software_enabled() {
1357            // Don't allow setting invalid bits.
1358            irr[0] &= !0xffff;
1359            for (bank, &irr) in irr.iter().enumerate() {
1360                self.irr[bank] |= irr;
1361                self.tmr[bank] &= !irr;
1362                self.auto_eoi[bank] &= !irr;
1363            }
1364            self.needs_offload_reeval = true;
1365            self.recompute_next_irr();
1366        }
1367    }
1368
1369    /// Gets the APIC base MSR.
1370    pub fn apic_base(&self) -> u64 {
1371        self.apic_base
1372    }
1373
1374    /// Gets the APIC base address, if the APIC is enabled and in xapic mode.
1375    pub fn base_address(&self) -> Option<u64> {
1376        self.base_address
1377    }
1378
1379    /// Sets the APIC base MSR.
1380    ///
1381    /// Returns false if the value is invalid.
1382    pub fn set_apic_base(&mut self, apic_base: u64) -> Result<(), InvalidApicBase> {
1383        assert!(
1384            !self.is_offloaded,
1385            "failed to onload before setting the APIC base"
1386        );
1387        self.set_apic_base_inner(apic_base)
1388    }
1389
1390    /// The caller must ensure that the offloaded APIC state is local.
1391    fn set_apic_base_inner(&mut self, apic_base: u64) -> Result<(), InvalidApicBase> {
1392        let current = ApicBase::from(self.apic_base);
1393
1394        let requested = ApicBase::from(apic_base);
1395        let allowed = ApicBase::new()
1396            .with_enable(true)
1397            .with_x2apic(true)
1398            .with_base_page(0xffffff)
1399            .with_bsp(true);
1400        if u64::from(requested) & !u64::from(allowed) != 0 {
1401            return Err(InvalidApicBase::ReservedBits);
1402        }
1403
1404        // Ignore writes to the BSP bit.
1405        let new = requested.with_bsp(current.bsp());
1406
1407        tracing::debug!(
1408            ?current,
1409            ?new,
1410            apic_base,
1411            vp = self.shared.vp_index.index(),
1412            "update apic base"
1413        );
1414
1415        if new.x2apic() && (!new.enable() || !self.global.x2apic_capable) {
1416            // Invalid x2apic state.
1417            return Err(InvalidApicBase::InvalidX2Apic);
1418        }
1419
1420        if current.x2apic() && new.enable() && !new.x2apic() {
1421            // Can't disable x2apic once it is enabled without going through a
1422            // reset or disable.
1423            return Err(InvalidApicBase::CantDisableX2Apic);
1424        }
1425
1426        if current.enable() && !new.enable() {
1427            self.reset_registers();
1428        }
1429
1430        self.apic_base = new.into();
1431        self.update_slot();
1432        Ok(())
1433    }
1434
1435    fn hardware_enabled(&self) -> bool {
1436        ApicBase::from(self.apic_base).enable()
1437    }
1438
1439    fn xapic_enabled(&self) -> bool {
1440        self.hardware_enabled() && !self.x2apic_enabled()
1441    }
1442
1443    /// X2APIC is enabled on this local APIC
1444    pub fn x2apic_enabled(&self) -> bool {
1445        ApicBase::from(self.apic_base).x2apic()
1446    }
1447
1448    fn software_enabled(&self) -> bool {
1449        Svr::from(self.svr).enable()
1450    }
1451
1452    /// Sets the masked bit in an LVT if the APIC is software disabled.
1453    fn effective_lvt(&self, lvt: u32) -> u32 {
1454        let mut lvt = Lvt::from(lvt);
1455        if !self.software_enabled() {
1456            lvt.set_masked(true);
1457        }
1458        lvt.into()
1459    }
1460
1461    /// Scans for pending interrupts.
1462    pub fn scan(&mut self, vmtime: &mut VmTimeAccess, scan_irr: bool) -> ApicWork {
1463        if !self.hardware_enabled() {
1464            return Default::default();
1465        }
1466
1467        if let Some(next) = self.next_timeout {
1468            let now = vmtime.now();
1469            if now.is_after(next) {
1470                self.eval_time(now);
1471                self.update_timeout(now);
1472            }
1473            if let Some(next) = self.next_timeout {
1474                vmtime.set_timeout_if_before(next);
1475            }
1476        }
1477
1478        let mut r = self.flush();
1479        if scan_irr || self.scan_irr {
1480            self.pull_irr();
1481        }
1482        if !self.is_offloaded {
1483            r.interrupt = self.next_irr();
1484        }
1485
1486        r
1487    }
1488
1489    /// Returns the next pending interrupt vector, if any.
1490    pub fn next_irr(&self) -> Option<u8> {
1491        if !self.software_enabled() {
1492            return None;
1493        }
1494        let vector = self.next_irr?;
1495        let pri = priority(vector);
1496        if self.isr.top().map_or(0, priority) < pri {
1497            Some(vector)
1498        } else {
1499            None
1500        }
1501    }
1502
1503    /// Handles APIC offload, calling `update` with new bits in IRR, ISR, and
1504    /// the current value of TMR.
1505    ///
1506    /// `update` should accumulate IRR and ISR into the offload APIC page and
1507    /// update the EOI exit bitmap if TMR has changed since the last call.
1508    ///
1509    /// `update` will not be called if there are no changes (i.e. if IRR and ISR
1510    /// are both zero).
1511    pub fn push_to_offload(
1512        &mut self,
1513        update: impl FnOnce(&[u32; 8], &[u32; 8], &[u32; 8]),
1514    ) -> Result<(), OffloadNotSupported> {
1515        if self.needs_offload_reeval && self.is_offloaded && self.software_enabled() {
1516            if self.active_auto_eoi {
1517                return Err(OffloadNotSupported);
1518            }
1519            update(&self.irr, &self.isr.to_bits(), &self.tmr);
1520            self.irr = [0; 8];
1521            self.isr.clear();
1522            self.stats.offload_push.increment();
1523            self.needs_offload_reeval = false;
1524        }
1525        Ok(())
1526    }
1527
1528    /// Returns whether APIC offload is enabled.
1529    pub fn is_offloaded(&self) -> bool {
1530        self.is_offloaded
1531    }
1532
1533    /// Returns true if it is safe to set an IRR bit directly in offloaded APIC
1534    /// state.
1535    pub fn can_offload_irr(&self) -> bool {
1536        self.is_offloaded && self.software_enabled()
1537    }
1538
1539    /// Enables APIC offload.
1540    pub fn enable_offload(&mut self) {
1541        self.is_offloaded = true;
1542        self.needs_offload_reeval = true;
1543    }
1544
1545    /// Disables APIC offload, accumulating IRR and ISR from the offload APIC
1546    /// page.
1547    pub fn disable_offload(&mut self, irr: &[u32; 8], isr: &[u32; 8]) {
1548        self.accumulate_from_offload(irr, isr);
1549        self.is_offloaded = false;
1550    }
1551
1552    fn accumulate_from_offload(&mut self, irr: &[u32; 8], isr: &[u32; 8]) {
1553        let mut local_isr = self.isr.to_bits();
1554
1555        // TODO: We probably should instead not touch ISR at all unless we are
1556        // about to disable offload. Refactor this later.
1557        assert!(self.is_offloaded);
1558
1559        for (((local_irr, &remote_irr), local_isr), &remote_isr) in
1560            self.irr.iter_mut().zip(irr).zip(&mut local_isr).zip(isr)
1561        {
1562            *local_irr |= remote_irr;
1563            *local_isr |= remote_isr;
1564        }
1565        self.isr.load_from_bits(local_isr);
1566        self.recompute_next_irr();
1567        self.needs_offload_reeval = true;
1568    }
1569
1570    /// Flushes work as in [`Self::scan`], but does not poll timers or IRR.
1571    ///
1572    /// This must be called before [`Self::save`] to flush hidden state to
1573    /// registers.
1574    pub fn flush(&mut self) -> ApicWork {
1575        if self.shared.work.load(Ordering::Relaxed) == 0 {
1576            return Default::default();
1577        }
1578
1579        let mut r = ApicWork::default();
1580        let work = WorkFlags::from(self.shared.work.swap(0, Ordering::SeqCst));
1581        if work.init() {
1582            self.stats.init.increment();
1583            r.init = true;
1584        }
1585        if work.sipi() {
1586            self.stats.sipi.increment();
1587            r.sipi = Some(work.sipi_vector());
1588        }
1589        if work.nmi() {
1590            self.stats.nmi.increment();
1591            r.nmi = true;
1592        }
1593        if work.extint() {
1594            self.stats.extint.increment();
1595            r.extint = true;
1596        }
1597
1598        r
1599    }
1600
1601    /// Acknowledges the interrupt returned by `scan`.
1602    pub fn acknowledge_interrupt(&mut self, vector: u8) {
1603        assert!(!self.is_offloaded);
1604        assert_eq!(Some(vector), self.next_irr);
1605        let (bank, mask) = bank_mask(vector);
1606        self.irr[bank] &= !mask;
1607        self.recompute_next_irr();
1608        if self.auto_eoi[bank] & mask == 0 {
1609            self.isr.push(vector);
1610        }
1611        self.stats.interrupt.increment();
1612    }
1613
1614    /// Returns whether an EOI is pending that can be completed lazily, without
1615    /// intercepting the VP.
1616    pub fn is_lazy_eoi_pending(&self) -> bool {
1617        if self.is_offloaded {
1618            return false;
1619        }
1620        let eoi_vector = if let Some(next_irr) = self.next_irr {
1621            // There is at least one pending interrupt. Allow lazy EOI only if
1622            // there are no in-service interrupts:
1623            //
1624            // 1. If there are any in-service interrupts with a higher priority,
1625            //    then we need an EOI intercept to know when to inject the
1626            //    pending interrupt.
1627            //
1628            // 2. If there are any in-service interrupts with a lower priority,
1629            //    then the pending interrupt is pending injection, so it would
1630            //    be ambiguous whether the lazy EOI was for the in-service
1631            //    interrupt or the pending interrupt.
1632            if !self.isr.is_empty() {
1633                return false;
1634            }
1635
1636            // Only allow lazy EOI if next_irr is the only irr
1637            let (bank, mask) = bank_mask(next_irr);
1638            let mut expected = [0; 8];
1639            expected[bank] = mask;
1640            if !expected.iter().eq(self.irr.iter()) {
1641                return false;
1642            }
1643
1644            next_irr
1645        } else if let Some(vector) = self.isr.top() {
1646            // There are no pending interrupts. Allow lazy EOI for the top
1647            // in-service interrupt.
1648            vector
1649        } else {
1650            return false;
1651        };
1652
1653        // Only allow lazy EOI if the interrupt is edge-triggered. Otherwise, we
1654        // need an intercept to check whether to reassert the interrupt.
1655        let (bank, mask) = bank_mask(eoi_vector);
1656        self.tmr[bank] & mask == 0
1657    }
1658
1659    fn eval_time(&mut self, now: VmTime) {
1660        if self.timer_ccr == 0 {
1661            return;
1662        }
1663
1664        let shift = dcr_divider_shift(Dcr::from(self.timer_dcr));
1665
1666        let raw_nanos = now.checked_sub(self.last_time).unwrap().as_nanos() as u64;
1667        let counts = (raw_nanos / NANOS_PER_TICK) >> shift;
1668
1669        let lvt = Lvt::from(self.lvt_timer);
1670        if counts >= self.timer_ccr as u64 {
1671            if !lvt.masked() {
1672                self.scan_irr |= self.shared.request_interrupt(
1673                    self.software_enabled(),
1674                    DeliveryMode::FIXED,
1675                    lvt.vector(),
1676                    false,
1677                    false,
1678                );
1679            }
1680
1681            if TimerMode(lvt.timer_mode()) == TimerMode::ONE_SHOT {
1682                self.timer_ccr = 0;
1683                // Don't bother to update the last eval time.
1684                return;
1685            } else {
1686                let remaining = counts - self.timer_ccr as u64;
1687                // Avoid the divide in the common case.
1688                if remaining < self.timer_icr as u64 {
1689                    self.timer_ccr = self.timer_icr - remaining as u32;
1690                } else {
1691                    self.timer_ccr = self.timer_icr - (remaining % self.timer_icr as u64) as u32;
1692                }
1693            }
1694        } else {
1695            self.timer_ccr -= counts as u32;
1696        }
1697
1698        let elapsed_nanos = (counts << shift) * NANOS_PER_TICK;
1699
1700        self.last_time = self
1701            .last_time
1702            .wrapping_add(Duration::from_nanos(elapsed_nanos));
1703    }
1704
1705    fn update_timeout(&mut self, now: VmTime) {
1706        self.next_timeout = (self.timer_ccr != 0).then(|| {
1707            let counts = self.timer_ccr;
1708            let ticks = (counts as u64) << dcr_divider_shift(Dcr::from(self.timer_dcr));
1709            now.wrapping_add(Duration::from_nanos(ticks * NANOS_PER_TICK))
1710        });
1711    }
1712
1713    /// Resets the APIC state.
1714    pub fn reset(&mut self) {
1715        assert!(!self.is_offloaded);
1716
1717        self.apic_base = ApicBase::new()
1718            .with_base_page(APIC_BASE_PAGE)
1719            .with_bsp(self.shared.vp_index.is_bsp())
1720            .with_enable(true)
1721            .into();
1722
1723        self.reset_registers();
1724        // Drop any pending requests, including interrupts that were staged in
1725        // `new_irr` but not yet pulled into the local register state.
1726        self.shared.work.store(0, Ordering::Relaxed);
1727        for new_irr in &self.shared.new_irr {
1728            new_irr.store(0, Ordering::Relaxed);
1729        }
1730    }
1731
1732    fn reset_registers(&mut self) {
1733        let Self {
1734            shared,
1735            global: _,
1736            apic_base: _,
1737            base_address: _,
1738            id: _,
1739            version: _,
1740            ldr,
1741            cluster_mode,
1742            svr,
1743            isr,
1744            next_irr,
1745            irr,
1746            tmr,
1747            auto_eoi,
1748            esr,
1749            icr,
1750            lvt_timer,
1751            lvt_thermal,
1752            lvt_pmc,
1753            lvt_lint,
1754            lvt_error,
1755            timer_icr,
1756            timer_ccr,
1757            last_time: _,
1758            next_timeout,
1759            timer_dcr,
1760            active_auto_eoi,
1761            needs_offload_reeval,
1762            scan_irr,
1763            is_offloaded: _,
1764            stats: _,
1765        } = self;
1766
1767        *ldr = 0;
1768        *cluster_mode = false;
1769        *svr = u32::from(Svr::from(0xff).with_enable(shared.software_enabled_on_reset));
1770        isr.clear();
1771        *esr = 0;
1772        *icr = 0;
1773        *next_irr = None;
1774        // Leave any interrupts staged in `shared.new_irr` in place. On a
1775        // transient hardware disable they are held: the next `pull_irr`
1776        // accumulates them into the IRR, and they become deliverable once the
1777        // APIC is software-enabled. A full `reset` clears them separately.
1778        *irr = [0; 8];
1779        *needs_offload_reeval = false;
1780        *scan_irr = false;
1781        *tmr = [0; 8];
1782        *auto_eoi = [0; 8];
1783        *active_auto_eoi = false;
1784        for lvt in [lvt_timer, lvt_thermal, lvt_pmc, lvt_error]
1785            .into_iter()
1786            .chain(lvt_lint)
1787        {
1788            *lvt = Lvt::new().with_masked(true).into();
1789        }
1790        *timer_icr = 0;
1791        *timer_ccr = 0;
1792        *timer_dcr = 0;
1793        *next_timeout = None;
1794        self.update_slot();
1795    }
1796
1797    fn update_slot(&mut self) {
1798        // Cache the base address, since `base_address()` is called in the
1799        // instruction emulator hot path.
1800        self.base_address = self
1801            .xapic_enabled()
1802            .then(|| (ApicBase::from(self.apic_base).base_page() as u64) << 12);
1803
1804        let mut mutable = self.global.mutable.write();
1805        let mutable = &mut *mutable;
1806        let slot = &mut mutable.by_apic_id[self.id as usize];
1807        slot.lint = self.lvt_lint.map(Lvt::from);
1808        slot.logical_id = (self.ldr >> 24) as u8;
1809        slot.hardware_enabled = self.hardware_enabled();
1810        slot.software_enabled = self.software_enabled();
1811
1812        mutable.x2apic_enabled -= slot.x2apic_enabled as usize;
1813        let apic_base = ApicBase::from(self.apic_base);
1814        slot.x2apic_enabled = apic_base.enable() && apic_base.x2apic();
1815        mutable.x2apic_enabled += slot.x2apic_enabled as usize;
1816
1817        mutable.logical_cluster_mode -= slot.cluster_mode as usize;
1818        slot.cluster_mode = self.cluster_mode;
1819        mutable.logical_cluster_mode += slot.cluster_mode as usize;
1820    }
1821
1822    /// Returns the APIC register state.
1823    pub fn save(&mut self) -> virt::x86::vp::Apic {
1824        assert!(!self.is_offloaded, "failed to disable offload before save");
1825
1826        // Ensure any pending interrupt requests have been pulled into the local
1827        // state.
1828        self.pull_irr();
1829
1830        let registers = ApicRegisters {
1831            reserved_0: [0; 2],
1832            id: self.id_register(),
1833            version: self.version,
1834            reserved_4: [0; 4],
1835            tpr: 0, // TODO
1836            apr: 0,
1837            ppr: 0,
1838            eoi: 0,
1839            rrd: 0,
1840            ldr: self.ldr_register(),
1841            dfr: if self.x2apic_enabled() {
1842                0
1843            } else if self.cluster_mode {
1844                Dfr::CLUSTERED_MODE.0
1845            } else {
1846                Dfr::FLAT_MODE.0
1847            },
1848            svr: self.svr,
1849            isr: self.isr.to_bits(),
1850            tmr: self.tmr,
1851            irr: self.irr,
1852            esr: self.esr,
1853            reserved_29: [0; 6],
1854            lvt_cmci: 0,
1855            icr: [self.icr as u32, (self.icr >> 32) as u32],
1856            lvt_timer: self.lvt_timer,
1857            lvt_thermal: self.lvt_thermal,
1858            lvt_pmc: self.lvt_pmc,
1859            lvt_lint0: self.lvt_lint[0],
1860            lvt_lint1: self.lvt_lint[1],
1861            lvt_error: self.lvt_error,
1862            timer_icr: self.timer_icr,
1863            timer_ccr: 0,
1864            reserved_3a: [0; 4],
1865            timer_dcr: self.timer_dcr,
1866            reserved_3f: 0,
1867        };
1868        virt::x86::vp::Apic::new(self.apic_base.into(), registers, self.auto_eoi)
1869    }
1870
1871    /// Restores the APIC register state.
1872    pub fn restore(&mut self, state: &virt::x86::vp::Apic) -> Result<(), InvalidApicBase> {
1873        assert!(!self.is_offloaded);
1874
1875        let virt::x86::vp::Apic {
1876            apic_base,
1877            registers: _,
1878            auto_eoi,
1879        } = state;
1880
1881        self.set_apic_base_inner(*apic_base)?;
1882
1883        // No register modifications allowed if the APIC is disabled.
1884        if !self.hardware_enabled() {
1885            return Ok(());
1886        }
1887
1888        let ApicRegisters {
1889            reserved_0: _,
1890            id,
1891            version,
1892            reserved_4: _,
1893            tpr: _,
1894            apr: _,
1895            ppr: _,
1896            eoi: _,
1897            rrd: _,
1898            ldr,
1899            dfr,
1900            svr,
1901            isr,
1902            tmr,
1903            irr,
1904            esr,
1905            reserved_29: _,
1906            lvt_cmci: _,
1907            icr,
1908            lvt_timer,
1909            lvt_thermal,
1910            lvt_pmc,
1911            lvt_lint0,
1912            lvt_lint1,
1913            lvt_error,
1914            timer_icr,
1915            timer_ccr: _,
1916            reserved_3a: _,
1917            timer_dcr,
1918            reserved_3f: _,
1919        } = *state.registers();
1920
1921        self.id = if self.x2apic_enabled() { id } else { id >> 24 };
1922        self.version = version;
1923        if !self.x2apic_enabled() {
1924            self.ldr = ldr & 0xff000000;
1925        }
1926        self.cluster_mode = cluster_mode(dfr);
1927        self.svr = svr;
1928        self.irr = irr;
1929        self.tmr = tmr;
1930        self.auto_eoi = *auto_eoi;
1931        self.recompute_next_irr();
1932        self.isr.clear();
1933        self.isr.load_from_bits(isr);
1934        self.esr = esr;
1935        self.icr = icr[0] as u64 | ((icr[1] as u64) << 32);
1936        self.lvt_timer = self.effective_lvt(lvt_timer);
1937        self.lvt_thermal = self.effective_lvt(lvt_thermal);
1938        self.lvt_pmc = self.effective_lvt(lvt_pmc);
1939        self.lvt_lint = [self.effective_lvt(lvt_lint0), self.effective_lvt(lvt_lint1)];
1940        self.lvt_error = self.effective_lvt(lvt_error);
1941        self.timer_icr = timer_icr;
1942        self.timer_dcr = timer_dcr;
1943        self.update_slot();
1944        self.needs_offload_reeval = true;
1945        Ok(())
1946    }
1947
1948    fn recompute_next_irr(&mut self) {
1949        for (i, &v) in self.irr.iter().enumerate().rev() {
1950            if v != 0 {
1951                let vector = (i as u32) * 32 + (31 - v.leading_zeros());
1952                self.next_irr = Some(vector as u8);
1953                return;
1954            }
1955        }
1956        self.next_irr = None;
1957        self.active_auto_eoi = false;
1958    }
1959
1960    /// Read all the remote IRR bits into the local IRR array. Having two arrays
1961    /// like this ensures that we don't miss an interrupt if a second instance
1962    /// of one arrives while the first instance is being injected into the
1963    /// processor.
1964    ///
1965    /// Hypervisor backends that acknowledge interrupts before running the VP
1966    /// would never hit this condition, because the VP would not have a chance
1967    /// to run code to act upon the interrupt, and the two interrupts could be
1968    /// merged.
1969    ///
1970    /// But hypervisor backends that acknowledge interrupts only after running
1971    /// the VP for some time (such as those backed by AMD SNP) could hit this,
1972    /// since the VP will act on the interrupt, which might cause a device or
1973    /// another processor to generate a second interrupt before the first VP
1974    /// exits and acknowledges the first interrupt.
1975    fn pull_irr(&mut self) {
1976        for (
1977            ((((local_irr, local_tmr), local_auto_eoi), remote_irr), remote_tmr),
1978            remote_auto_eoi,
1979        ) in self
1980            .irr
1981            .iter_mut()
1982            .zip(&mut self.tmr)
1983            .zip(&mut self.auto_eoi)
1984            .zip(&self.shared.new_irr)
1985            .zip(&self.shared.tmr)
1986            .zip(&self.shared.auto_eoi)
1987        {
1988            // Read `irr` first with acquire ordering so that the TMR bit
1989            // associated with each requested interrupt is correct.
1990            if remote_irr.load(Ordering::Relaxed) == 0 {
1991                continue;
1992            }
1993            let irr = remote_irr.swap(0, Ordering::Acquire);
1994            let tmr = remote_tmr.load(Ordering::Relaxed);
1995            let auto_eoi = remote_auto_eoi.load(Ordering::Relaxed);
1996            // Always accumulate staged interrupts into the IRR, even while the
1997            // APIC is software-disabled. Delivery is gated separately:
1998            // `next_irr` and `push_to_offload` both check the software-enable
1999            // state, so held interrupts only become deliverable once the APIC
2000            // is software-enabled again.
2001            *local_irr |= irr;
2002            *local_tmr &= !irr;
2003            *local_tmr |= tmr & irr;
2004            *local_auto_eoi &= !irr;
2005            *local_auto_eoi |= auto_eoi & irr;
2006            self.active_auto_eoi |= (auto_eoi & irr) != 0;
2007            self.needs_offload_reeval = true;
2008        }
2009        self.recompute_next_irr();
2010        self.scan_irr = false;
2011    }
2012
2013    fn id_register(&self) -> u32 {
2014        if self.x2apic_enabled() {
2015            self.id
2016        } else {
2017            self.id << 24
2018        }
2019    }
2020
2021    fn ldr_register(&self) -> u32 {
2022        if self.x2apic_enabled() {
2023            X2ApicLogicalId::new()
2024                .with_cluster_id((self.id >> 4) as u16)
2025                .with_logical_id(1 << (self.id & 0xf))
2026                .into()
2027        } else {
2028            self.ldr
2029        }
2030    }
2031}
2032
2033#[cfg(test)]
2034mod tests {
2035    use super::*;
2036    use test_with_tracing::test;
2037    use vm_topology::processor::VpInfo;
2038
2039    /// Minimal [`ApicClient`] for driving register accesses in tests.
2040    #[derive(Default)]
2041    struct TestClient {
2042        cr8: u32,
2043    }
2044
2045    impl ApicClient for TestClient {
2046        fn cr8(&mut self) -> u32 {
2047            self.cr8
2048        }
2049
2050        fn set_cr8(&mut self, value: u32) {
2051            self.cr8 = value;
2052        }
2053
2054        fn set_apic_base(&mut self, _value: u64) {}
2055
2056        fn wake(&mut self, _vp_index: VpIndex) {}
2057
2058        fn eoi(&mut self, _vector: u8) {}
2059
2060        fn now(&mut self) -> VmTime {
2061            VmTime::from_100ns(0)
2062        }
2063
2064        fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]) {
2065            unreachable!()
2066        }
2067    }
2068
2069    // MMIO address of the spurious-interrupt-vector register (offset 0xf0).
2070    const SVR_ADDR: u64 = ((APIC_BASE_PAGE as u64) << 12) | 0xf0;
2071    // SVR value with the APIC software-enabled and spurious vector 0xff.
2072    const SVR_ENABLED: u32 = 0x1ff;
2073    const TEST_VECTOR: u8 = 0xd1;
2074
2075    fn new_apic() -> (LocalApicSet, LocalApic) {
2076        let set = LocalApicSet::builder().build();
2077        let vp = X86VpInfo {
2078            base: VpInfo {
2079                vp_index: VpIndex::new(0),
2080                vnode: 0,
2081            },
2082            apic_id: 0,
2083        };
2084        let apic = set.add_apic(&vp, false);
2085        (set, apic)
2086    }
2087
2088    fn apic_base(enable: bool) -> u64 {
2089        ApicBase::new()
2090            .with_base_page(APIC_BASE_PAGE)
2091            .with_enable(enable)
2092            .into()
2093    }
2094
2095    fn software_enable(apic: &mut LocalApic, client: &mut TestClient) {
2096        apic.access(client)
2097            .mmio_write(SVR_ADDR, &SVR_ENABLED.to_le_bytes());
2098    }
2099
2100    /// A FIXED interrupt that is requested before the guest transiently disables
2101    /// (and then re-enables) its APIC must still be delivered once the APIC is
2102    /// software-enabled again, rather than being silently discarded.
2103    ///
2104    /// Regression test for a lost-interrupt hang: the staged interrupt's
2105    /// `new_irr` bit survives a hardware disable, and re-enabling the APIC in
2106    /// software must accumulate it into the IRR rather than dropping it.
2107    #[test]
2108    fn irr_retained_across_transient_disable() {
2109        let (set, mut apic) = new_apic();
2110        let mut client = TestClient::default();
2111
2112        apic.reset();
2113        software_enable(&mut apic, &mut client);
2114
2115        // Stage an interrupt while the APIC is fully enabled.
2116        set.synic_interrupt(VpIndex::new(0), TEST_VECTOR, false, |_| {});
2117
2118        // The guest hardware-disables and then re-enables the APIC. The staged
2119        // interrupt's `new_irr` bit is preserved across the disable.
2120        apic.access(&mut client)
2121            .msr_write(X86X_MSR_APIC_BASE, apic_base(false))
2122            .unwrap();
2123        apic.access(&mut client)
2124            .msr_write(X86X_MSR_APIC_BASE, apic_base(true))
2125            .unwrap();
2126
2127        // Re-enabling in software must accumulate the staged interrupt.
2128        software_enable(&mut apic, &mut client);
2129
2130        assert_eq!(apic.next_irr(), Some(TEST_VECTOR));
2131    }
2132
2133    /// A full reset must discard interrupts that were staged before the reset,
2134    /// rather than redelivering them once the APIC is re-enabled.
2135    #[test]
2136    fn reset_discards_staged_irr() {
2137        let (set, mut apic) = new_apic();
2138        let mut client = TestClient::default();
2139
2140        apic.reset();
2141        software_enable(&mut apic, &mut client);
2142        set.synic_interrupt(VpIndex::new(0), TEST_VECTOR, false, |_| {});
2143
2144        // A full reset clears the staged interrupt.
2145        apic.reset();
2146        software_enable(&mut apic, &mut client);
2147
2148        assert_eq!(apic.next_irr(), None);
2149    }
2150
2151    /// `pull_irr` can run while the APIC is software-disabled via paths other
2152    /// than the `SVR` write (e.g. a guest reading the IRR, or `save`). Those
2153    /// paths must also hold staged interrupts rather than discarding them.
2154    #[test]
2155    fn staged_irr_survives_pull_while_software_disabled() {
2156        let (set, mut apic) = new_apic();
2157        let mut client = TestClient::default();
2158
2159        apic.reset();
2160        software_enable(&mut apic, &mut client);
2161        set.synic_interrupt(VpIndex::new(0), TEST_VECTOR, false, |_| {});
2162
2163        // Hardware-disable then re-enable, leaving the APIC software-disabled
2164        // with the interrupt still staged in `new_irr`.
2165        apic.access(&mut client)
2166            .msr_write(X86X_MSR_APIC_BASE, apic_base(false))
2167            .unwrap();
2168        apic.access(&mut client)
2169            .msr_write(X86X_MSR_APIC_BASE, apic_base(true))
2170            .unwrap();
2171
2172        // A guest read of the IRR pulls staged interrupts while the APIC is
2173        // still software-disabled; this must not drop the staged interrupt.
2174        let irr_bank = u64::from(TEST_VECTOR) / 32;
2175        let irr_addr = ((APIC_BASE_PAGE as u64) << 12) | (0x200 + irr_bank * 0x10);
2176        let mut buf = [0u8; 4];
2177        apic.access(&mut client).mmio_read(irr_addr, &mut buf);
2178
2179        // Re-enabling in software must still deliver the held interrupt.
2180        software_enable(&mut apic, &mut client);
2181        assert_eq!(apic.next_irr(), Some(TEST_VECTOR));
2182    }
2183
2184    /// `active_auto_eoi` must reflect only the interrupts that were actually
2185    /// pulled, not stale auto-EOI configuration left in `shared.auto_eoi`.
2186    /// Otherwise a pending non-auto-EOI interrupt could spuriously block APIC
2187    /// offload (`push_to_offload` returning `OffloadNotSupported`).
2188    #[test]
2189    fn active_auto_eoi_tracks_only_pulled_interrupts() {
2190        // Reading an IRR register triggers `pull_irr` for all banks.
2191        fn pull(apic: &mut LocalApic, client: &mut TestClient) {
2192            let mut buf = [0u8; 4];
2193            apic.access(client)
2194                .mmio_read(((APIC_BASE_PAGE as u64) << 12) | 0x200, &mut buf);
2195        }
2196
2197        const VEC_AUTO: u8 = 0xd0; // auto-EOI
2198        const VEC_NORMAL: u8 = 0xd1; // not auto-EOI, same IRR bank
2199
2200        let (set, mut apic) = new_apic();
2201        let mut client = TestClient::default();
2202
2203        apic.reset();
2204        software_enable(&mut apic, &mut client);
2205
2206        // Request and deliver an auto-EOI interrupt. Its auto-EOI bit persists
2207        // in `shared.auto_eoi` even though no auto-EOI interrupt stays pending.
2208        set.synic_interrupt(VpIndex::new(0), VEC_AUTO, true, |_| {});
2209        pull(&mut apic, &mut client);
2210        assert_eq!(apic.next_irr(), Some(VEC_AUTO));
2211        apic.acknowledge_interrupt(VEC_AUTO);
2212        assert_eq!(apic.next_irr(), None);
2213
2214        // Request a normal (non-auto-EOI) interrupt in the same bank and pull it.
2215        set.synic_interrupt(VpIndex::new(0), VEC_NORMAL, false, |_| {});
2216        pull(&mut apic, &mut client);
2217
2218        // Only the non-auto-EOI interrupt is pending, so offload must still be
2219        // supported; the stale auto-EOI bit must not block it.
2220        apic.enable_offload();
2221        assert!(apic.push_to_offload(|_, _, _| {}).is_ok());
2222    }
2223}