virt_support_apic/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Local APIC emulator.
5//!
6//! This emulates the local APIC, as documented by the Intel SDM. It supports
7//! both legacy (MMIO) and X2APIC (MSR) modes.
8
9#![forbid(unsafe_code)]
10
11use bitfield_struct::bitfield;
12use inspect::Inspect;
13use inspect_counters::Counter;
14use parking_lot::RwLock;
15use std::sync::Arc;
16use std::sync::atomic::AtomicU32;
17use std::sync::atomic::Ordering;
18use std::time::Duration;
19use thiserror::Error;
20use virt::x86::MsrError;
21use virt::x86::vp::ApicRegisters;
22use vm_topology::processor::VpIndex;
23use vm_topology::processor::x86::X86VpInfo;
24use vmcore::vmtime::VmTime;
25use vmcore::vmtime::VmTimeAccess;
26use x86defs::X86X_MSR_APIC_BASE;
27use x86defs::apic::APIC_BASE_PAGE;
28use x86defs::apic::ApicBase;
29use x86defs::apic::ApicRegister;
30use x86defs::apic::Dcr;
31use x86defs::apic::DeliveryMode;
32use x86defs::apic::DestinationShorthand;
33use x86defs::apic::Dfr;
34use x86defs::apic::Icr;
35use x86defs::apic::Lvt;
36use x86defs::apic::Svr;
37use x86defs::apic::TimerMode;
38use x86defs::apic::X2APIC_MSR_BASE;
39use x86defs::apic::X2APIC_MSR_END;
40use x86defs::apic::X2ApicLogicalId;
41use x86defs::apic::XApicClusterLogicalId;
42use x86defs::msi::MsiAddress;
43use x86defs::msi::MsiData;
44
45const NANOS_PER_TICK: u64 = 5; // 200Mhz
46const TIMER_FREQUENCY: u64 = 1_000_000_000 / NANOS_PER_TICK;
47const APIC_VERSION: u32 = 0x50014;
48
49const ICR_LOW_MASK: Icr = Icr::new()
50    .with_vector(!0)
51    .with_delivery_mode(0b111)
52    .with_destination_mode_logical(true)
53    .with_level_assert(true)
54    .with_trigger_mode_level(true)
55    .with_destination_shorthand(0b11);
56
57const ICR_XAPIC_MASK: Icr = ICR_LOW_MASK.with_xapic_mda(!0);
58const ICR_X2APIC_MASK: Icr = ICR_LOW_MASK.with_x2apic_mda(!0);
59
60/// An individual local APIC for a processor.
61#[derive(Inspect)]
62pub struct LocalApic {
63    #[inspect(flatten)]
64    shared: Arc<SharedState>,
65    #[inspect(skip)]
66    global: Arc<GlobalState>,
67
68    #[inspect(hex)]
69    apic_base: u64,
70    #[inspect(hex)]
71    base_address: Option<u64>,
72    #[inspect(hex)]
73    id: u32,
74    #[inspect(hex)]
75    version: u32,
76    #[inspect(hex)]
77    ldr: u32,
78    cluster_mode: bool,
79    #[inspect(hex)]
80    svr: u32,
81    #[inspect(hex, with = "|x| inspect::iter_by_index(x.to_bits())")]
82    isr: IsrStack,
83    #[inspect(hex, iter_by_index)]
84    irr: [u32; 8],
85    #[inspect(hex, iter_by_index)]
86    tmr: [u32; 8],
87    #[inspect(hex, iter_by_index)]
88    auto_eoi: [u32; 8],
89    next_irr: Option<u8>,
90    #[inspect(hex)]
91    esr: u32,
92    #[inspect(hex)]
93    icr: u64,
94    #[inspect(hex)]
95    lvt_timer: u32,
96    #[inspect(hex)]
97    lvt_thermal: u32,
98    #[inspect(hex)]
99    lvt_pmc: u32,
100    #[inspect(hex, iter_by_index)]
101    lvt_lint: [u32; 2],
102    #[inspect(hex)]
103    lvt_error: u32,
104    #[inspect(hex)]
105    timer_icr: u32,
106    #[inspect(hex)]
107    timer_ccr: u32,
108    last_time: VmTime,
109    next_timeout: Option<VmTime>,
110    #[inspect(hex)]
111    timer_dcr: u32,
112    active_auto_eoi: bool,
113    is_offloaded: bool,
114    needs_offload_reeval: bool,
115    scan_irr: bool,
116
117    stats: Stats,
118}
119
120#[derive(Inspect, Default)]
121struct Stats {
122    eoi: Counter,
123    eoi_level: Counter,
124    spurious_eoi: Counter,
125    lazy_eoi: Counter,
126    interrupt: Counter,
127    nmi: Counter,
128    extint: Counter,
129    init: Counter,
130    sipi: Counter,
131    self_ipi: Counter,
132    broadcast_ipi: Counter,
133    other_ipi: Counter,
134    offload_push: Counter,
135    offload_pull: Counter,
136}
137
138fn priority(v: u8) -> u8 {
139    v >> 4
140}
141
142fn dcr_divider_shift(dcr: Dcr) -> u8 {
143    let value = dcr.value_low() | (dcr.value_high() << 2);
144    value.wrapping_add(1) & 0b111
145}
146
147fn bank_mask(vector: u8) -> (usize, u32) {
148    (vector as usize / 32, 1 << (vector % 32))
149}
150
151fn cluster_mode(value: u32) -> bool {
152    match Dfr(value | 0x0fff_ffff) {
153        Dfr::CLUSTERED_MODE => true,
154        Dfr::FLAT_MODE => false,
155        _ => unreachable!("Unknown DFR value {value}"),
156    }
157}
158
159#[derive(Debug)]
160struct IsrStack(Vec<u8>);
161
162impl IsrStack {
163    fn new() -> Self {
164        Self(Vec::with_capacity(16))
165    }
166
167    fn push(&mut self, v: u8) {
168        assert!(v >= 16);
169        assert!(self.0.len() < 16);
170        assert!(priority(self.top().unwrap_or(0)) < priority(v));
171
172        self.0.push(v);
173    }
174
175    fn to_bits(&self) -> [u32; 8] {
176        let mut bits = [0; 8];
177        for &v in &self.0 {
178            bits[v as usize / 32] |= 1 << (v % 32);
179        }
180        bits
181    }
182
183    fn load_from_bits(&mut self, bits: [u32; 8]) {
184        // Only restore at most one interrupt per priority level, skipping the
185        // first (invalid) level.
186        self.clear();
187        let bits = bits.map(|v| [v as u16, (v >> 16) as u16]);
188        for (pri, &v) in bits.iter().flatten().enumerate().skip(1) {
189            if v != 0 {
190                let n = 15 - v.leading_zeros() as u8;
191                self.push(pri as u8 * 16 + n);
192            }
193        }
194    }
195
196    fn is_empty(&self) -> bool {
197        self.0.is_empty()
198    }
199
200    fn top(&self) -> Option<u8> {
201        self.0.last().copied()
202    }
203
204    fn pop(&mut self) -> Option<u8> {
205        self.0.pop()
206    }
207
208    fn clear(&mut self) {
209        self.0.clear();
210    }
211}
212
213#[derive(Debug, Inspect)]
214struct SharedState {
215    vp_index: VpIndex,
216    #[inspect(hex, iter_by_index)]
217    tmr: [AtomicU32; 8],
218    #[inspect(hex, iter_by_index)]
219    new_irr: [AtomicU32; 8],
220    #[inspect(hex, iter_by_index)]
221    auto_eoi: [AtomicU32; 8],
222    work: AtomicU32,
223    software_enabled_on_reset: bool,
224}
225
226#[bitfield(u32)]
227struct WorkFlags {
228    init: bool,
229    sipi: bool,
230    sipi_vector: u8,
231    extint: bool,
232    nmi: bool,
233    #[bits(20)]
234    _rsvd: u32,
235}
236
237/// The interface to the local APIC for all processors.
238#[derive(Inspect)]
239pub struct LocalApicSet {
240    #[inspect(flatten)]
241    global: Arc<GlobalState>,
242}
243
244#[derive(Debug, Inspect)]
245struct GlobalState {
246    x2apic_capable: bool,
247    hyperv_enlightenments: bool,
248    #[inspect(flatten)]
249    mutable: RwLock<MutableGlobalState>,
250}
251
252#[derive(Debug, Inspect)]
253struct MutableGlobalState {
254    x2apic_enabled: usize,
255    logical_cluster_mode: usize,
256    #[inspect(
257        with = "|x| inspect::iter_by_key(x.iter().enumerate().filter(|x| x.1.shared.is_some()))"
258    )]
259    by_apic_id: Vec<ApicSlot>,
260    #[inspect(iter_by_index)]
261    by_index: Vec<u32>,
262}
263
264#[derive(Debug, Inspect)]
265struct ApicSlot {
266    logical_id: u8,
267    hardware_enabled: bool,
268    software_enabled: bool,
269    cluster_mode: bool,
270    x2apic_enabled: bool,
271    #[inspect(skip)]
272    lint: [Lvt; 2],
273    #[inspect(skip)]
274    shared: Option<Arc<SharedState>>,
275}
276
277/// Builder for [`LocalApicSet`].
278pub struct LocalApicSetBuilder {
279    /// Allow X2APIC mode.
280    x2apic_capable: bool,
281    /// Handle Hyper-V enlightenment MSRs.
282    hyperv_enlightenments: bool,
283}
284
285impl LocalApicSetBuilder {
286    fn new() -> Self {
287        Self {
288            x2apic_capable: false,
289            hyperv_enlightenments: false,
290        }
291    }
292
293    /// Sets whether X2APIC mode is allowed.
294    pub fn x2apic_capable(&mut self, x2apic_capable: bool) -> &mut Self {
295        self.x2apic_capable = x2apic_capable;
296        self
297    }
298
299    /// Sets whether Hyper-V enlightenment MSRs are handled.
300    pub fn hyperv_enlightenments(&mut self, hyperv_enlightenments: bool) -> &mut Self {
301        self.hyperv_enlightenments = hyperv_enlightenments;
302        self
303    }
304
305    /// Builds a new local APIC set.
306    pub fn build(&self) -> LocalApicSet {
307        LocalApicSet {
308            global: Arc::new(GlobalState {
309                x2apic_capable: self.x2apic_capable,
310                hyperv_enlightenments: self.hyperv_enlightenments,
311                mutable: RwLock::new(MutableGlobalState {
312                    x2apic_enabled: 0,
313                    logical_cluster_mode: 0,
314                    by_apic_id: Vec::new(),
315                    by_index: Vec::new(),
316                }),
317            }),
318        }
319    }
320}
321
322impl LocalApicSet {
323    /// Creates a new builder for a local APIC set.
324    pub fn builder() -> LocalApicSetBuilder {
325        LocalApicSetBuilder::new()
326    }
327
328    /// Returns the frequency of the APIC timer clock.
329    pub fn frequency(&self) -> u64 {
330        TIMER_FREQUENCY
331    }
332
333    /// Adds an APIC for the specified VP to the set.
334    pub fn add_apic(&self, vp: &X86VpInfo, software_enabled_on_reset: bool) -> LocalApic {
335        let shared = Arc::new(SharedState {
336            vp_index: vp.base.vp_index,
337            tmr: Default::default(),
338            new_irr: Default::default(),
339            auto_eoi: Default::default(),
340            work: 0.into(),
341            software_enabled_on_reset,
342        });
343
344        {
345            let mut mutable = self.global.mutable.write();
346            if mutable.by_apic_id.len() <= vp.apic_id as usize {
347                mutable
348                    .by_apic_id
349                    .resize_with(vp.apic_id as usize + 1, || ApicSlot {
350                        logical_id: 0,
351                        hardware_enabled: false,
352                        software_enabled: false,
353                        cluster_mode: false,
354                        x2apic_enabled: false,
355                        lint: [Lvt::new(); 2],
356                        shared: None,
357                    });
358            }
359            assert!(mutable.by_apic_id[vp.apic_id as usize].shared.is_none());
360            mutable.by_apic_id[vp.apic_id as usize].shared = Some(shared.clone());
361            if mutable.by_index.len() <= vp.base.vp_index.index() as usize {
362                mutable
363                    .by_index
364                    .resize(vp.base.vp_index.index() as usize + 1, !0);
365            }
366            mutable.by_index[vp.base.vp_index.index() as usize] = vp.apic_id;
367        }
368
369        let mut apic = LocalApic {
370            shared,
371            global: self.global.clone(),
372            apic_base: 0,
373            base_address: None,
374            id: vp.apic_id,
375            version: APIC_VERSION,
376            ldr: 0,
377            cluster_mode: false,
378            svr: 0,
379            isr: IsrStack::new(),
380            next_irr: None,
381            irr: [0; 8],
382            tmr: [0; 8],
383            auto_eoi: [0; 8],
384            esr: 0,
385            icr: 0,
386            lvt_timer: 0,
387            lvt_thermal: 0,
388            lvt_pmc: 0,
389            lvt_lint: [0; 2],
390            lvt_error: 0,
391            timer_icr: 0,
392            timer_ccr: 0,
393            timer_dcr: 0,
394            last_time: VmTime::from_100ns(0),
395            next_timeout: None,
396            active_auto_eoi: false,
397            needs_offload_reeval: false,
398            is_offloaded: false,
399            scan_irr: false,
400            stats: Stats::default(),
401        };
402        apic.reset();
403        apic
404    }
405
406    /// Requests a message-signaled interrupt.
407    ///
408    /// Calls `wake` for each processor that should be woken up for APIC
409    /// handling.
410    pub fn request_interrupt(&self, address: u64, data: u32, wake: impl FnMut(VpIndex)) {
411        let address = MsiAddress::from(address as u32);
412        let data = MsiData::from(data);
413        self.global.request_interrupt(
414            Destination::from_external(
415                address.destination_mode_logical(),
416                address.virt_destination().into(),
417                self.global.x2apic_capable,
418            ),
419            DeliveryMode(data.delivery_mode()),
420            data.vector(),
421            data.trigger_mode_level(),
422            wake,
423        );
424    }
425
426    /// Pulses the specified LINT.
427    ///
428    /// Typically LINT0 is programmed by the guest for EXTINT interrupts and
429    /// LINT1 is programmed for NMIs.
430    pub fn lint(&self, vp_index: VpIndex, lint_index: usize, wake: impl FnOnce(VpIndex)) {
431        let mutable = self.global.mutable.read();
432        if let Some(slot) = mutable
433            .by_index
434            .get(vp_index.index() as usize)
435            .and_then(|&apic_id| mutable.by_apic_id.get(apic_id as usize))
436        {
437            let lvt = slot.lint[lint_index];
438            if !lvt.masked() {
439                if lvt.trigger_mode_level() {
440                    // Don't know how to manage remote IRR.
441                    return;
442                }
443                slot.request_interrupt(
444                    DeliveryMode(lvt.delivery_mode()),
445                    lvt.vector(),
446                    lvt.trigger_mode_level(),
447                    false,
448                    wake,
449                );
450            }
451        }
452    }
453
454    /// Asserts a synic interrupt to the specified virtual processor, optionally
455    /// with auto EOI (meaning the corresponding ISR bit will not be set when
456    /// the interrupt is delivered).
457    pub fn synic_interrupt(
458        &self,
459        vp_index: VpIndex,
460        vector: u8,
461        auto_eoi: bool,
462        wake: impl FnOnce(VpIndex),
463    ) {
464        let mutable = self.global.mutable.read();
465        if let Some(slot) = mutable
466            .by_index
467            .get(vp_index.index() as usize)
468            .and_then(|&apic_id| mutable.by_apic_id.get(apic_id as usize))
469        {
470            slot.request_interrupt(DeliveryMode::FIXED, vector, false, auto_eoi, wake);
471        }
472    }
473}
474
475impl GlobalState {
476    fn request_interrupt(
477        &self,
478        destination: Destination,
479        delivery_mode: DeliveryMode,
480        vector: u8,
481        level: bool,
482        mut wake: impl FnMut(VpIndex),
483    ) {
484        let mutable = self.mutable.read();
485        match destination {
486            Destination::Physical(id) => {
487                if let Some(slot) = mutable.by_apic_id.get(id as usize) {
488                    slot.request_interrupt(delivery_mode, vector, level, false, &mut wake);
489                }
490            }
491            Destination::Logical(id) => {
492                if mutable.x2apic_enabled > 0 {
493                    // X2APIC cluster mode.
494                    if id == !0 {
495                        mutable.request_broadcast_interrupt(
496                            delivery_mode,
497                            vector,
498                            level,
499                            &mut wake,
500                        );
501                    } else {
502                        let lowest_priority = delivery_mode == DeliveryMode::LOWEST_PRIORITY;
503                        let id = X2ApicLogicalId::from(id);
504                        let base = (id.cluster_id() as u32) << 4;
505                        for i in 0..16 {
506                            if id.logical_id() & (1 << i) == 0 {
507                                continue;
508                            }
509                            let phys_id = base | i;
510                            if let Some(slot) = mutable.by_apic_id.get(phys_id as usize) {
511                                // For now, just pick the first enabled APIC in the set for lowest priority.
512                                if !lowest_priority || slot.software_enabled {
513                                    slot.request_interrupt(
514                                        delivery_mode,
515                                        vector,
516                                        level,
517                                        false,
518                                        &mut wake,
519                                    );
520                                    if lowest_priority {
521                                        break;
522                                    }
523                                }
524                            }
525                        }
526                    }
527                } else if mutable.logical_cluster_mode > 0 {
528                    if id as u8 == !0 {
529                        mutable.request_broadcast_interrupt(
530                            delivery_mode,
531                            vector,
532                            level,
533                            &mut wake,
534                        );
535                    } else {
536                        // XAPIC cluster mode. Easy and fast to iterate through the APICs.
537                        let id = XApicClusterLogicalId::from(id as u8);
538                        mutable.request_set_interrupt(
539                            delivery_mode,
540                            vector,
541                            level,
542                            &mut wake,
543                            |_, slot| {
544                                let ldr = XApicClusterLogicalId::from(slot.logical_id);
545                                ldr.cluster_id() == id.cluster_id()
546                                    && ldr.logical_id() & id.logical_id() != 0
547                            },
548                        );
549                    }
550                } else {
551                    // APIC flat mode. Just iterate through all the VPs.
552                    mutable.request_set_interrupt(
553                        delivery_mode,
554                        vector,
555                        level,
556                        &mut wake,
557                        |_, slot| slot.logical_id & id as u8 != 0,
558                    );
559                }
560            }
561            Destination::Broadcast => {
562                mutable.request_broadcast_interrupt(delivery_mode, vector, level, &mut wake);
563            }
564            Destination::AllExcept(except) => {
565                mutable.request_set_interrupt(
566                    delivery_mode,
567                    vector,
568                    level,
569                    &mut wake,
570                    |apic_id, _| apic_id != except,
571                );
572            }
573        }
574    }
575}
576
577enum Destination {
578    Physical(u32),
579    Logical(u32),
580    Broadcast,
581    AllExcept(u32),
582}
583
584impl Destination {
585    fn from_icr(icr: Icr, x2apic: bool) -> Self {
586        if x2apic {
587            if icr.destination_mode_logical() {
588                Self::Logical(icr.x2apic_mda())
589            } else if icr.x2apic_mda() == !0u32 {
590                Self::Broadcast
591            } else {
592                Self::Physical(icr.x2apic_mda())
593            }
594        } else {
595            if icr.destination_mode_logical() {
596                Self::Logical(icr.xapic_mda().into())
597            } else if icr.xapic_mda() == !0u8 {
598                Self::Broadcast
599            } else {
600                Self::Physical(icr.xapic_mda().into())
601            }
602        }
603    }
604
605    fn from_external(
606        logical_destination_mode: bool,
607        destination: u32,
608        x2apic_capable: bool,
609    ) -> Self {
610        if logical_destination_mode {
611            Self::Logical(destination)
612        } else if (x2apic_capable && destination == !0u32)
613            || (!x2apic_capable && destination == 0xff)
614        {
615            Self::Broadcast
616        } else {
617            Self::Physical(destination)
618        }
619    }
620}
621
622/// Access to a local APIC.
623pub struct LocalApicAccess<'a, T> {
624    apic: &'a mut LocalApic,
625    client: &'a mut T,
626}
627
628/// The client to pass to [`LocalApic::access`], to handle requests needed when
629/// accessing the APIC.
630pub trait ApicClient {
631    /// Get the CR8 register.
632    fn cr8(&mut self) -> u32;
633
634    /// Set the CR8 register.
635    fn set_cr8(&mut self, value: u32);
636
637    /// Set the APIC base MSR.
638    ///
639    /// This is just to accelerate reads of the MSR. If apic base MSR reads
640    /// always come to `msr_read`, then this can be a no-op.
641    fn set_apic_base(&mut self, value: u64);
642
643    /// Ensure the processor at `vp_index` calls `scan` soon.
644    fn wake(&mut self, vp_index: VpIndex);
645
646    /// Notify the IO-APIC of an EOI.
647    fn eoi(&mut self, vector: u8);
648
649    /// Returns the current time.
650    fn now(&mut self) -> VmTime;
651
652    /// Retrieve the offloaded IRR and ISR state, clearing them in the
653    /// offloaded APIC.
654    fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]);
655}
656
657fn is_valid_apic_access(address: u64) -> bool {
658    // Any aligned access is valid.
659    if address & 0xf == 0 {
660        return true;
661    }
662    // Allow high byte accesses for some registers. This isn't spec compliant
663    // but some guests rely on this.
664    if address & 0xf == 3 {
665        return matches!(
666            ApicRegister((address >> 4) as u8),
667            ApicRegister::ID | ApicRegister::LDR | ApicRegister::DFR
668        );
669    }
670    false
671}
672
673impl<T: ApicClient> LocalApicAccess<'_, T> {
674    /// Performs an EOI that was signaled lazily, out of band from the normal
675    /// APIC interfaces.
676    pub fn lazy_eoi(&mut self) {
677        debug_assert!(self.apic.is_lazy_eoi_pending());
678        self.eoi(true);
679    }
680
681    fn eoi(&mut self, lazy: bool) {
682        self.ensure_state_local();
683        if let Some(vector) = self.apic.isr.pop() {
684            tracing::trace!(vector, "eoi");
685            if lazy {
686                self.apic.stats.lazy_eoi.increment();
687            } else {
688                self.apic.stats.eoi.increment();
689            }
690            let (bank, mask) = bank_mask(vector);
691            // If this was a level-triggered interrupt, notify IO-APIC of the EOI.
692            if self.apic.tmr[bank] & mask != 0 {
693                self.client.eoi(vector);
694                self.apic.stats.eoi_level.increment();
695            }
696        } else {
697            tracelimit::warn_ratelimited!(lazy, "eoi when no interrupts pending");
698            self.apic.stats.spurious_eoi.increment();
699        }
700    }
701
702    /// Reads from the legacy APIC MMIO page.
703    pub fn mmio_read(&mut self, address: u64, data: &mut [u8]) {
704        if !self.apic.xapic_enabled() || !is_valid_apic_access(address) {
705            tracelimit::warn_ratelimited!(
706                address,
707                len = data.len(),
708                enabled = self.apic.hardware_enabled(),
709                x2apic = self.apic.x2apic_enabled(),
710                "invalid apic read"
711            );
712            data.fill(!0);
713            return;
714        }
715
716        let value = self
717            .read_register(ApicRegister((address >> 4) as u8))
718            .unwrap_or(0);
719
720        let offset = address as usize & 3;
721        data.fill(0);
722        let len = data.len().min(4 - offset);
723        let data = &mut data[..len];
724        data.copy_from_slice(&value.to_ne_bytes()[offset..offset + data.len()]);
725    }
726
727    /// Writes to the legacy APIC MMIO page.
728    pub fn mmio_write(&mut self, address: u64, data: &[u8]) {
729        if !self.apic.xapic_enabled() || !is_valid_apic_access(address) {
730            tracelimit::warn_ratelimited!(
731                address,
732                len = data.len(),
733                enabled = self.apic.hardware_enabled(),
734                x2apic = self.apic.x2apic_enabled(),
735                "invalid apic write"
736            );
737            return;
738        }
739
740        let mut value = [0; 4];
741        let offset = address as usize & 3;
742        let data = &data[..data.len().min(4 - offset)];
743        value[offset..offset + data.len()].copy_from_slice(data);
744
745        self.write_register(
746            ApicRegister((address >> 4) as u8),
747            u32::from_ne_bytes(value),
748        );
749    }
750
751    /// Reads from the APIC base MSR, X2APIC MSR, or Hyper-V enlightenment MSR.
752    pub fn msr_read(&mut self, msr: u32) -> Result<u64, MsrError> {
753        let v = match msr {
754            X86X_MSR_APIC_BASE => self.apic.apic_base,
755            X2APIC_MSR_BASE..=X2APIC_MSR_END if self.apic.x2apic_enabled() => {
756                let register = ApicRegister((msr - X2APIC_MSR_BASE) as u8);
757                if register == ApicRegister::ICR0 {
758                    // ICR is a 64-bit register in X2APIC.
759                    self.apic.icr
760                } else {
761                    self.read_register(register)
762                        .ok_or(MsrError::InvalidAccess)?
763                        .into()
764                }
765            }
766            hvdef::HV_X64_MSR_APIC_FREQUENCY if self.apic.global.hyperv_enlightenments => {
767                TIMER_FREQUENCY
768            }
769            hvdef::HV_X64_MSR_EOI if self.apic.global.hyperv_enlightenments => {
770                return Err(MsrError::InvalidAccess);
771            }
772            hvdef::HV_X64_MSR_ICR if self.apic.global.hyperv_enlightenments => {
773                if !self.apic.hardware_enabled() {
774                    return Err(MsrError::InvalidAccess);
775                }
776                self.apic.icr
777            }
778            hvdef::HV_X64_MSR_TPR if self.apic.global.hyperv_enlightenments => {
779                (self.client.cr8() << 4) as u64
780            }
781            _ => return Err(MsrError::Unknown),
782        };
783        Ok(v)
784    }
785
786    /// Writes to the APIC base MSR or an X2APIC MSR.
787    pub fn msr_write(&mut self, msr: u32, value: u64) -> Result<(), MsrError> {
788        match msr {
789            X86X_MSR_APIC_BASE => {
790                // The APIC may be disabled by this, so we need IRR/ISR local to
791                // be reset.
792                self.ensure_state_local();
793                self.apic.set_apic_base_inner(value).map_err(|err| {
794                    tracelimit::warn_ratelimited!(
795                        error = &err as &dyn std::error::Error,
796                        "invalid apic base write"
797                    );
798                    MsrError::InvalidAccess
799                })?;
800                self.client.set_apic_base(self.apic.apic_base);
801            }
802            X2APIC_MSR_BASE..=X2APIC_MSR_END if self.apic.x2apic_enabled() => {
803                let register = ApicRegister((msr - X2APIC_MSR_BASE) as u8);
804                if register == ApicRegister::ICR0 {
805                    // ICR is a 64-bit register in X2APIC.
806                    self.apic.icr = value & u64::from(ICR_X2APIC_MASK);
807                    self.handle_ipi(Icr::from(self.apic.icr));
808                } else if !self.write_register(register, value as u32) {
809                    return Err(MsrError::InvalidAccess);
810                }
811            }
812            hvdef::HV_X64_MSR_APIC_FREQUENCY if self.apic.global.hyperv_enlightenments => {
813                return Err(MsrError::InvalidAccess);
814            }
815            hvdef::HV_X64_MSR_EOI if self.apic.global.hyperv_enlightenments => {
816                if !self.apic.hardware_enabled() {
817                    return Err(MsrError::InvalidAccess);
818                }
819                self.eoi(false);
820            }
821            hvdef::HV_X64_MSR_ICR if self.apic.global.hyperv_enlightenments => {
822                let mask = if self.apic.x2apic_enabled() {
823                    ICR_X2APIC_MASK
824                } else if self.apic.xapic_enabled() {
825                    ICR_XAPIC_MASK
826                } else {
827                    return Err(MsrError::InvalidAccess);
828                };
829                self.apic.icr = value & u64::from(mask);
830                self.handle_ipi(Icr::from(self.apic.icr));
831            }
832            hvdef::HV_X64_MSR_TPR if self.apic.global.hyperv_enlightenments => {
833                if value > 0xff {
834                    return Err(MsrError::InvalidAccess);
835                }
836                self.client.set_cr8((value as u32) >> 4);
837            }
838            _ => return Err(MsrError::Unknown),
839        }
840        Ok(())
841    }
842
843    fn read_register(&mut self, register: ApicRegister) -> Option<u32> {
844        let value = match register {
845            ApicRegister::ID => self.apic.id_register(),
846            ApicRegister::VERSION => self.apic.version,
847            ApicRegister::TPR => self.client.cr8() << 4,
848            ApicRegister::PPR => self.get_ppr(),
849            ApicRegister::LDR => self.apic.ldr_register(),
850            ApicRegister::DFR if !self.apic.x2apic_enabled() => {
851                if self.apic.cluster_mode {
852                    Dfr::CLUSTERED_MODE.0
853                } else {
854                    Dfr::FLAT_MODE.0
855                }
856            }
857            ApicRegister::SVR => self.apic.svr,
858            reg if (ApicRegister::ISR0..=ApicRegister::ISR7).contains(&reg) => {
859                self.ensure_state_local();
860                let index = reg.0 - ApicRegister::ISR0.0;
861                self.apic.isr.to_bits()[index as usize]
862            }
863            reg if (ApicRegister::TMR0..=ApicRegister::TMR7).contains(&reg) => {
864                self.apic.pull_irr();
865                let index = reg.0 - ApicRegister::TMR0.0;
866                self.apic.tmr[index as usize]
867            }
868            reg if (ApicRegister::IRR0..=ApicRegister::IRR7).contains(&reg) => {
869                self.ensure_state_local();
870                self.apic.pull_irr();
871                let index = reg.0 - ApicRegister::IRR0.0;
872                self.apic.irr[index as usize]
873            }
874            ApicRegister::ESR => self.apic.esr,
875            ApicRegister::ICR0 if !self.apic.x2apic_enabled() => self.apic.icr as u32,
876            ApicRegister::ICR1 if !self.apic.x2apic_enabled() => (self.apic.icr >> 32) as u32,
877            ApicRegister::LVT_TIMER => self.apic.lvt_timer,
878            ApicRegister::LVT_THERMAL => self.apic.lvt_thermal,
879            ApicRegister::LVT_PMC => self.apic.lvt_pmc,
880            ApicRegister::LVT_LINT0 => self.apic.lvt_lint[0],
881            ApicRegister::LVT_LINT1 => self.apic.lvt_lint[1],
882            ApicRegister::LVT_ERROR => self.apic.lvt_error,
883            ApicRegister::TIMER_ICR => self.apic.timer_icr,
884            ApicRegister::TIMER_CCR => {
885                self.apic.eval_time(self.client.now());
886                self.apic.timer_ccr
887            }
888            ApicRegister::TIMER_DCR => self.apic.timer_dcr,
889            register => {
890                tracelimit::warn_ratelimited!(?register, "unimplemented apic register read");
891                return None;
892            }
893        };
894        Some(value)
895    }
896
897    fn write_register(&mut self, register: ApicRegister, value: u32) -> bool {
898        match register {
899            ApicRegister::TPR => {
900                self.client.set_cr8(value >> 4);
901            }
902            ApicRegister::EOI => {
903                if self.apic.x2apic_enabled() && value != 0 {
904                    return false;
905                }
906                self.eoi(false);
907            }
908            ApicRegister::LDR if !self.apic.x2apic_enabled() => {
909                self.apic.ldr = value & 0xff000000;
910                self.apic.update_slot();
911            }
912            ApicRegister::DFR if !self.apic.x2apic_enabled() => {
913                self.apic.cluster_mode = cluster_mode(value);
914                self.apic.update_slot();
915            }
916            ApicRegister::SVR => {
917                // The APIC may be disabled by this, so we need to reevaluate
918                // offloading.
919                self.ensure_state_local();
920                // Accumulate any requested interrupts before changing the
921                // enable state.
922                self.apic.pull_irr();
923                self.apic.svr = value & u32::from(Svr::new().with_vector(0xff).with_enable(true));
924                if !self.apic.software_enabled() {
925                    // Mask all the LVTs.
926                    for lvt in [
927                        &mut self.apic.lvt_timer,
928                        &mut self.apic.lvt_thermal,
929                        &mut self.apic.lvt_pmc,
930                        &mut self.apic.lvt_error,
931                    ]
932                    .into_iter()
933                    .chain(&mut self.apic.lvt_lint)
934                    {
935                        *lvt = Lvt::from(*lvt).with_masked(true).into();
936                    }
937                }
938                self.apic.update_slot();
939            }
940            ApicRegister::ESR => {
941                if self.apic.x2apic_enabled() && value != 0 {
942                    return false;
943                }
944                // This would copy and zero the hidden error register, but we
945                // never set that to a non-zero value and don't include it in
946                // the saved-state format.
947                self.apic.esr = 0;
948            }
949            ApicRegister::ICR0 if !self.apic.x2apic_enabled() => {
950                self.apic.icr = (value as u64 | (self.apic.icr & 0xffffffff_00000000))
951                    & u64::from(ICR_XAPIC_MASK);
952
953                self.handle_ipi(self.apic.icr.into());
954            }
955            ApicRegister::ICR1 if !self.apic.x2apic_enabled() => {
956                self.apic.icr = (((value as u64) << 32) | self.apic.icr & 0xffffffff)
957                    & u64::from(ICR_XAPIC_MASK);
958            }
959            ApicRegister::LVT_TIMER => {
960                self.apic.lvt_timer = self.apic.effective_lvt(
961                    value
962                        & u32::from(
963                            Lvt::new()
964                                .with_vector(0xff)
965                                .with_masked(true)
966                                .with_timer_mode(1), // no TSC deadline support
967                        ),
968                );
969            }
970            ApicRegister::LVT_THERMAL => {
971                self.apic.lvt_thermal = self.apic.effective_lvt(
972                    value
973                        & u32::from(
974                            Lvt::new()
975                                .with_vector(0xff)
976                                .with_delivery_mode(0b111)
977                                .with_masked(true),
978                        ),
979                );
980            }
981            ApicRegister::LVT_PMC => {
982                self.apic.lvt_pmc = self.apic.effective_lvt(
983                    value
984                        & u32::from(
985                            Lvt::new()
986                                .with_vector(0xff)
987                                .with_delivery_mode(0b111)
988                                .with_masked(true),
989                        ),
990                );
991            }
992            reg @ (ApicRegister::LVT_LINT0 | ApicRegister::LVT_LINT1) => {
993                let index = if reg == ApicRegister::LVT_LINT0 { 0 } else { 1 };
994                self.apic.lvt_lint[index] = self.apic.effective_lvt(
995                    value
996                        & u32::from(
997                            Lvt::new()
998                                .with_vector(0xff)
999                                .with_input_pin_polarity(true)
1000                                .with_trigger_mode_level(true)
1001                                .with_delivery_mode(0b111)
1002                                .with_masked(true),
1003                        ),
1004                );
1005                self.apic.update_slot();
1006            }
1007            ApicRegister::LVT_ERROR => {
1008                self.apic.lvt_error = self.apic.effective_lvt(
1009                    value & u32::from(Lvt::new().with_vector(0xff).with_masked(true)),
1010                );
1011            }
1012            ApicRegister::TIMER_ICR => {
1013                let now = self.client.now();
1014                self.apic.timer_icr = value;
1015                self.apic.timer_ccr = value;
1016                self.apic.last_time = now;
1017                self.apic.update_timeout(now);
1018            }
1019            ApicRegister::TIMER_DCR => {
1020                let now = self.client.now();
1021                self.apic.eval_time(now);
1022                self.apic.timer_dcr =
1023                    value & u32::from(Dcr::new().with_value_low(0b11).with_value_high(0b1));
1024                self.apic.update_timeout(now);
1025            }
1026            ApicRegister::SELF_IPI if self.apic.x2apic_enabled() => {
1027                self.apic.stats.self_ipi.increment();
1028                self.apic.scan_irr |= self.apic.shared.request_interrupt(
1029                    self.apic.software_enabled(),
1030                    DeliveryMode::FIXED,
1031                    value as u8,
1032                    false,
1033                    false,
1034                );
1035            }
1036            register => {
1037                tracelimit::warn_ratelimited!(?register, "unimplemented apic register write");
1038                return false;
1039            }
1040        }
1041        true
1042    }
1043
1044    /// Computes and returns the current effective PPR value.
1045    pub fn get_ppr(&mut self) -> u32 {
1046        self.ensure_state_local();
1047        let task_pri = self.client.cr8();
1048        let isr_pri = priority(self.apic.isr.top().unwrap_or(0));
1049        task_pri.max(isr_pri.into()) << 4
1050    }
1051
1052    fn ensure_state_local(&mut self) {
1053        if self.apic.is_offloaded {
1054            let (irr, isr) = self.client.pull_offload();
1055            self.apic.accumulate_from_offload(&irr, &isr);
1056            self.apic.stats.offload_pull.increment();
1057
1058            // Make sure that we commit any bits we read from the offloaded apic
1059            // before we dispatch back to the vp, since pull offload clears
1060            // corresponding bits in the offloaded apic state. Otherwise, we
1061            // could commit state bits which would result in the guest being in
1062            // a broken state.
1063            self.apic.needs_offload_reeval = true;
1064        }
1065    }
1066
1067    fn handle_ipi(&mut self, icr: Icr) {
1068        let delivery_mode = DeliveryMode(icr.delivery_mode());
1069        match delivery_mode {
1070            DeliveryMode::FIXED => {}
1071            DeliveryMode::LOWEST_PRIORITY => {
1072                if self.apic.x2apic_enabled() {
1073                    // Don't allow lowest priority IPIs via x2apic.
1074                    return;
1075                }
1076            }
1077            DeliveryMode::NMI => {}
1078            DeliveryMode::INIT => {
1079                // Ignore INIT level deasserts here.
1080                if !icr.level_assert() {
1081                    return;
1082                }
1083            }
1084            DeliveryMode::SIPI => {}
1085            DeliveryMode::EXTINT => {
1086                // Not allowed as an IPI.
1087                return;
1088            }
1089            _ => return,
1090        }
1091
1092        match DestinationShorthand(icr.destination_shorthand()) {
1093            DestinationShorthand::NONE => {
1094                let destination = Destination::from_icr(icr, self.apic.x2apic_enabled());
1095                match destination {
1096                    Destination::Physical(_) | Destination::Logical(_) => {
1097                        self.apic.stats.other_ipi.increment()
1098                    }
1099                    Destination::Broadcast | Destination::AllExcept(_) => {
1100                        self.apic.stats.broadcast_ipi.increment()
1101                    }
1102                }
1103                self.apic.global.request_interrupt(
1104                    destination,
1105                    delivery_mode,
1106                    icr.vector(),
1107                    false,
1108                    |vp| self.client.wake(vp),
1109                );
1110            }
1111            DestinationShorthand::SELF => {
1112                self.apic.stats.self_ipi.increment();
1113                self.apic.scan_irr |= self.apic.shared.request_interrupt(
1114                    self.apic.software_enabled(),
1115                    delivery_mode,
1116                    icr.vector(),
1117                    icr.trigger_mode_level(),
1118                    false,
1119                );
1120            }
1121            DestinationShorthand::ALL_INCLUDING_SELF => {
1122                self.apic.stats.broadcast_ipi.increment();
1123                self.apic.global.request_interrupt(
1124                    Destination::Broadcast,
1125                    delivery_mode,
1126                    icr.vector(),
1127                    false,
1128                    |vp| self.client.wake(vp),
1129                );
1130            }
1131            DestinationShorthand::ALL_EXCLUDING_SELF => {
1132                self.apic.stats.broadcast_ipi.increment();
1133                self.apic.global.request_interrupt(
1134                    Destination::AllExcept(self.apic.id),
1135                    delivery_mode,
1136                    icr.vector(),
1137                    false,
1138                    |vp| self.client.wake(vp),
1139                );
1140            }
1141            _ => unreachable!(),
1142        }
1143    }
1144}
1145
1146impl SharedState {
1147    /// Returns true if the VP should be woken up to scan the APIC.
1148    #[must_use]
1149    fn request_interrupt(
1150        &self,
1151        software_enabled: bool,
1152        delivery_mode: DeliveryMode,
1153        vector: u8,
1154        level_triggered: bool,
1155        auto_eoi: bool,
1156    ) -> bool {
1157        match delivery_mode {
1158            DeliveryMode::FIXED | DeliveryMode::LOWEST_PRIORITY => {
1159                if !software_enabled || !(16..=255).contains(&vector) {
1160                    return false;
1161                }
1162                let (bank, mask) = bank_mask(vector);
1163                if (self.tmr[bank].load(Ordering::Relaxed) & mask != 0) != level_triggered {
1164                    if level_triggered {
1165                        self.tmr[bank].fetch_or(mask, Ordering::Relaxed);
1166                    } else {
1167                        self.tmr[bank].fetch_and(!mask, Ordering::Relaxed);
1168                    }
1169                }
1170                if (self.auto_eoi[bank].load(Ordering::Relaxed) & mask != 0) != auto_eoi {
1171                    if auto_eoi {
1172                        self.auto_eoi[bank].fetch_or(mask, Ordering::Relaxed);
1173                    } else {
1174                        self.auto_eoi[bank].fetch_and(!mask, Ordering::Relaxed);
1175                    }
1176                }
1177                if self.new_irr[bank].fetch_or(mask, Ordering::Release) & mask == 0 {
1178                    return true;
1179                }
1180                false
1181            }
1182            DeliveryMode::NMI => {
1183                let old = self
1184                    .work
1185                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1186                        Some(WorkFlags::from(w).with_nmi(true).into())
1187                    })
1188                    .unwrap();
1189                old == 0
1190            }
1191            DeliveryMode::INIT => {
1192                let old = self
1193                    .work
1194                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1195                        Some(WorkFlags::from(w).with_init(true).into())
1196                    })
1197                    .unwrap();
1198                old == 0
1199            }
1200            DeliveryMode::SIPI => {
1201                let old = self
1202                    .work
1203                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1204                        Some(
1205                            WorkFlags::from(w)
1206                                .with_sipi(true)
1207                                .with_sipi_vector(vector)
1208                                .into(),
1209                        )
1210                    })
1211                    .unwrap();
1212                old == 0
1213            }
1214            DeliveryMode::EXTINT => {
1215                let old = self
1216                    .work
1217                    .fetch_update(Ordering::Release, Ordering::Relaxed, |w| {
1218                        Some(WorkFlags::from(w).with_extint(true).into())
1219                    })
1220                    .unwrap();
1221                old == 0
1222            }
1223            _ => false,
1224        }
1225    }
1226}
1227
1228impl MutableGlobalState {
1229    fn request_broadcast_interrupt(
1230        &self,
1231        delivery_mode: DeliveryMode,
1232        vector: u8,
1233        level_triggered: bool,
1234        wake: impl FnMut(VpIndex),
1235    ) {
1236        self.request_set_interrupt(delivery_mode, vector, level_triggered, wake, |_, _| true);
1237    }
1238
1239    fn request_set_interrupt(
1240        &self,
1241        delivery_mode: DeliveryMode,
1242        vector: u8,
1243        level_triggered: bool,
1244        mut wake: impl FnMut(VpIndex),
1245        mut filter: impl FnMut(u32, &ApicSlot) -> bool,
1246    ) {
1247        let lowest_priority = delivery_mode == DeliveryMode::LOWEST_PRIORITY;
1248        for (apic_id, slot) in self.by_apic_id.iter().enumerate() {
1249            if !filter(apic_id as u32, slot) {
1250                continue;
1251            }
1252            // For now, just pick the first enabled APIC in the set for lowest priority.
1253            if !lowest_priority || slot.software_enabled {
1254                slot.request_interrupt(delivery_mode, vector, level_triggered, false, &mut wake);
1255                if lowest_priority {
1256                    break;
1257                }
1258            }
1259        }
1260    }
1261}
1262
1263impl ApicSlot {
1264    fn request_interrupt(
1265        &self,
1266        delivery_mode: DeliveryMode,
1267        vector: u8,
1268        level_triggered: bool,
1269        auto_eoi: bool,
1270        wake: impl FnOnce(VpIndex),
1271    ) {
1272        if let Some(shared) = &self.shared {
1273            if self.hardware_enabled
1274                && shared.request_interrupt(
1275                    self.software_enabled,
1276                    delivery_mode,
1277                    vector,
1278                    level_triggered,
1279                    auto_eoi,
1280                )
1281            {
1282                wake(shared.vp_index);
1283            }
1284        }
1285    }
1286}
1287
1288/// Work to do as a result of [`LocalApic::scan`] or [`LocalApic::flush`].
1289#[derive(Debug, Default)]
1290pub struct ApicWork {
1291    /// An INIT interrupt was requested.
1292    ///
1293    /// Reset register state (including APIC state) as documented in the Intel
1294    /// manual.
1295    pub init: bool,
1296    /// A SIPI interrupt was requested with the given vector.
1297    ///
1298    /// Update the cs and rip to the appropriate values and clear the
1299    /// wait-for-SIPI state.
1300    pub sipi: Option<u8>,
1301    /// An extint interrupt was requested.
1302    ///
1303    /// When the processor is ready for extint injection, query the PIC for the
1304    /// vector and inject the interrupt.
1305    pub extint: bool,
1306    /// An NMI was requested.
1307    pub nmi: bool,
1308    /// A fixed interrupt was requested.
1309    ///
1310    /// Call [`LocalApic::acknowledge_interrupt`] after it has been injected.
1311    pub interrupt: Option<u8>,
1312}
1313
1314/// An error writing the APIC base MSR.
1315#[derive(Debug, Error)]
1316pub enum InvalidApicBase {
1317    /// Reserved bits set.
1318    #[error("reserved bits set")]
1319    ReservedBits,
1320    /// Invalid x2apic state.
1321    #[error("invalid x2apic state")]
1322    InvalidX2Apic,
1323    /// Can't disable x2apic without reset.
1324    #[error("can't disable x2apic without reset")]
1325    CantDisableX2Apic,
1326}
1327
1328/// APIC offload is not supported with the current request state, likely due to
1329/// auto EOI. The caller must disable offloads and scan the APIC again.
1330pub struct OffloadNotSupported;
1331
1332impl LocalApic {
1333    /// Returns an object to access APIC registers.
1334    pub fn access<'a, T: ApicClient>(&'a mut self, client: &'a mut T) -> LocalApicAccess<'a, T> {
1335        LocalApicAccess { apic: self, client }
1336    }
1337
1338    /// Fast path for updating IRR on the local processor.
1339    pub fn request_fixed_interrupts(&mut self, mut irr: [u32; 8]) {
1340        if self.hardware_enabled() && self.software_enabled() {
1341            // Don't allow setting invalid bits.
1342            irr[0] &= !0xffff;
1343            for (bank, &irr) in irr.iter().enumerate() {
1344                self.irr[bank] |= irr;
1345                self.tmr[bank] &= !irr;
1346                self.auto_eoi[bank] &= !irr;
1347            }
1348            self.needs_offload_reeval = true;
1349            self.recompute_next_irr();
1350        }
1351    }
1352
1353    /// Gets the APIC base MSR.
1354    pub fn apic_base(&self) -> u64 {
1355        self.apic_base
1356    }
1357
1358    /// Gets the APIC base address, if the APIC is enabled and in xapic mode.
1359    pub fn base_address(&self) -> Option<u64> {
1360        self.base_address
1361    }
1362
1363    /// Sets the APIC base MSR.
1364    ///
1365    /// Returns false if the value is invalid.
1366    pub fn set_apic_base(&mut self, apic_base: u64) -> Result<(), InvalidApicBase> {
1367        assert!(
1368            !self.is_offloaded,
1369            "failed to onload before setting the APIC base"
1370        );
1371        self.set_apic_base_inner(apic_base)
1372    }
1373
1374    /// The caller must ensure that the offloaded APIC state is local.
1375    fn set_apic_base_inner(&mut self, apic_base: u64) -> Result<(), InvalidApicBase> {
1376        let current = ApicBase::from(self.apic_base);
1377
1378        let requested = ApicBase::from(apic_base);
1379        let allowed = ApicBase::new()
1380            .with_enable(true)
1381            .with_x2apic(true)
1382            .with_base_page(0xffffff)
1383            .with_bsp(true);
1384        if u64::from(requested) & !u64::from(allowed) != 0 {
1385            return Err(InvalidApicBase::ReservedBits);
1386        }
1387
1388        // Ignore writes to the BSP bit.
1389        let new = requested.with_bsp(current.bsp());
1390
1391        tracing::debug!(
1392            ?current,
1393            ?new,
1394            apic_base,
1395            vp = self.shared.vp_index.index(),
1396            "update apic base"
1397        );
1398
1399        if new.x2apic() && (!new.enable() || !self.global.x2apic_capable) {
1400            // Invalid x2apic state.
1401            return Err(InvalidApicBase::InvalidX2Apic);
1402        }
1403
1404        if current.x2apic() && new.enable() && !new.x2apic() {
1405            // Can't disable x2apic once it is enabled without going through a
1406            // reset or disable.
1407            return Err(InvalidApicBase::CantDisableX2Apic);
1408        }
1409
1410        if current.enable() && !new.enable() {
1411            self.reset_registers();
1412        }
1413
1414        self.apic_base = new.into();
1415        self.update_slot();
1416        Ok(())
1417    }
1418
1419    fn hardware_enabled(&self) -> bool {
1420        ApicBase::from(self.apic_base).enable()
1421    }
1422
1423    fn xapic_enabled(&self) -> bool {
1424        self.hardware_enabled() && !self.x2apic_enabled()
1425    }
1426
1427    /// X2APIC is enabled on this local APIC
1428    pub fn x2apic_enabled(&self) -> bool {
1429        ApicBase::from(self.apic_base).x2apic()
1430    }
1431
1432    fn software_enabled(&self) -> bool {
1433        Svr::from(self.svr).enable()
1434    }
1435
1436    /// Sets the masked bit in an LVT if the APIC is software disabled.
1437    fn effective_lvt(&self, lvt: u32) -> u32 {
1438        let mut lvt = Lvt::from(lvt);
1439        if !self.software_enabled() {
1440            lvt.set_masked(true);
1441        }
1442        lvt.into()
1443    }
1444
1445    /// Scans for pending interrupts.
1446    pub fn scan(&mut self, vmtime: &mut VmTimeAccess, scan_irr: bool) -> ApicWork {
1447        if !self.hardware_enabled() {
1448            return Default::default();
1449        }
1450
1451        if let Some(next) = self.next_timeout {
1452            let now = vmtime.now();
1453            if now.is_after(next) {
1454                self.eval_time(now);
1455                self.update_timeout(now);
1456            }
1457            if let Some(next) = self.next_timeout {
1458                vmtime.set_timeout_if_before(next);
1459            }
1460        }
1461
1462        let mut r = self.flush();
1463        if scan_irr || self.scan_irr {
1464            self.pull_irr();
1465        }
1466        if !self.is_offloaded {
1467            r.interrupt = self.next_irr();
1468        }
1469
1470        r
1471    }
1472
1473    /// Returns the next pending interrupt vector, if any.
1474    pub fn next_irr(&self) -> Option<u8> {
1475        if !self.software_enabled() {
1476            return None;
1477        }
1478        let vector = self.next_irr?;
1479        let pri = priority(vector);
1480        if self.isr.top().map_or(0, priority) < pri {
1481            Some(vector)
1482        } else {
1483            None
1484        }
1485    }
1486
1487    /// Handles APIC offload, calling `update` with new bits in IRR, ISR, and
1488    /// the current value of TMR.
1489    ///
1490    /// `update` should accumulate IRR and ISR into the offload APIC page and
1491    /// update the EOI exit bitmap if TMR has changed since the last call.
1492    ///
1493    /// `update` will not be called if there are no changes (i.e. if IRR and ISR
1494    /// are both zero).
1495    pub fn push_to_offload(
1496        &mut self,
1497        update: impl FnOnce(&[u32; 8], &[u32; 8], &[u32; 8]),
1498    ) -> Result<(), OffloadNotSupported> {
1499        if self.needs_offload_reeval && self.is_offloaded && self.software_enabled() {
1500            if self.active_auto_eoi {
1501                return Err(OffloadNotSupported);
1502            }
1503            update(&self.irr, &self.isr.to_bits(), &self.tmr);
1504            self.irr = [0; 8];
1505            self.isr.clear();
1506            self.stats.offload_push.increment();
1507            self.needs_offload_reeval = false;
1508        }
1509        Ok(())
1510    }
1511
1512    /// Returns whether APIC offload is enabled.
1513    pub fn is_offloaded(&self) -> bool {
1514        self.is_offloaded
1515    }
1516
1517    /// Returns true if it is safe to set an IRR bit directly in offloaded APIC
1518    /// state.
1519    pub fn can_offload_irr(&self) -> bool {
1520        self.is_offloaded && self.software_enabled()
1521    }
1522
1523    /// Enables APIC offload.
1524    pub fn enable_offload(&mut self) {
1525        self.is_offloaded = true;
1526        self.needs_offload_reeval = true;
1527    }
1528
1529    /// Disables APIC offload, accumulating IRR and ISR from the offload APIC
1530    /// page.
1531    pub fn disable_offload(&mut self, irr: &[u32; 8], isr: &[u32; 8]) {
1532        self.accumulate_from_offload(irr, isr);
1533        self.is_offloaded = false;
1534    }
1535
1536    fn accumulate_from_offload(&mut self, irr: &[u32; 8], isr: &[u32; 8]) {
1537        let mut local_isr = self.isr.to_bits();
1538
1539        // TODO: We probably should instead not touch ISR at all unless we are
1540        // about to disable offload. Refactor this later.
1541        assert!(self.is_offloaded);
1542
1543        for (((local_irr, &remote_irr), local_isr), &remote_isr) in
1544            self.irr.iter_mut().zip(irr).zip(&mut local_isr).zip(isr)
1545        {
1546            *local_irr |= remote_irr;
1547            *local_isr |= remote_isr;
1548        }
1549        self.isr.load_from_bits(local_isr);
1550        self.recompute_next_irr();
1551        self.needs_offload_reeval = true;
1552    }
1553
1554    /// Flushes work as in [`Self::scan`], but does not poll timers or IRR.
1555    ///
1556    /// This must be called before [`Self::save`] to flush hidden state to
1557    /// registers.
1558    pub fn flush(&mut self) -> ApicWork {
1559        if self.shared.work.load(Ordering::Relaxed) == 0 {
1560            return Default::default();
1561        }
1562
1563        let mut r = ApicWork::default();
1564        let work = WorkFlags::from(self.shared.work.swap(0, Ordering::SeqCst));
1565        if work.init() {
1566            self.stats.init.increment();
1567            r.init = true;
1568        }
1569        if work.sipi() {
1570            self.stats.sipi.increment();
1571            r.sipi = Some(work.sipi_vector());
1572        }
1573        if work.nmi() {
1574            self.stats.nmi.increment();
1575            r.nmi = true;
1576        }
1577        if work.extint() {
1578            self.stats.extint.increment();
1579            r.extint = true;
1580        }
1581
1582        r
1583    }
1584
1585    /// Acknowledges the interrupt returned by `scan`.
1586    pub fn acknowledge_interrupt(&mut self, vector: u8) {
1587        assert!(!self.is_offloaded);
1588        assert_eq!(Some(vector), self.next_irr);
1589        let (bank, mask) = bank_mask(vector);
1590        self.irr[bank] &= !mask;
1591        self.recompute_next_irr();
1592        if self.auto_eoi[bank] & mask == 0 {
1593            self.isr.push(vector);
1594        }
1595        self.stats.interrupt.increment();
1596    }
1597
1598    /// Returns whether an EOI is pending that can be completed lazily, without
1599    /// intercepting the VP.
1600    pub fn is_lazy_eoi_pending(&self) -> bool {
1601        if self.is_offloaded {
1602            return false;
1603        }
1604        let eoi_vector = if let Some(next_irr) = self.next_irr {
1605            // There is at least one pending interrupt. Allow lazy EOI only if
1606            // there are no in-service interrupts:
1607            //
1608            // 1. If there are any in-service interrupts with a higher priority,
1609            //    then we need an EOI intercept to know when to inject the
1610            //    pending interrupt.
1611            //
1612            // 2. If there are any in-service interrupts with a lower priority,
1613            //    then the pending interrupt is pending injection, so it would
1614            //    be ambiguous whether the lazy EOI was for the in-service
1615            //    interrupt or the pending interrupt.
1616            if !self.isr.is_empty() {
1617                return false;
1618            }
1619
1620            // Only allow lazy EOI if next_irr is the only irr
1621            let (bank, mask) = bank_mask(next_irr);
1622            let mut expected = [0; 8];
1623            expected[bank] = mask;
1624            if !expected.iter().eq(self.irr.iter()) {
1625                return false;
1626            }
1627
1628            next_irr
1629        } else if let Some(vector) = self.isr.top() {
1630            // There are no pending interrupts. Allow lazy EOI for the top
1631            // in-service interrupt.
1632            vector
1633        } else {
1634            return false;
1635        };
1636
1637        // Only allow lazy EOI if the interrupt is edge-triggered. Otherwise, we
1638        // need an intercept to check whether to reassert the interrupt.
1639        let (bank, mask) = bank_mask(eoi_vector);
1640        self.tmr[bank] & mask == 0
1641    }
1642
1643    fn eval_time(&mut self, now: VmTime) {
1644        if self.timer_ccr == 0 {
1645            return;
1646        }
1647
1648        let shift = dcr_divider_shift(Dcr::from(self.timer_dcr));
1649
1650        let raw_nanos = now.checked_sub(self.last_time).unwrap().as_nanos() as u64;
1651        let counts = (raw_nanos / NANOS_PER_TICK) >> shift;
1652
1653        let lvt = Lvt::from(self.lvt_timer);
1654        if counts >= self.timer_ccr as u64 {
1655            if !lvt.masked() {
1656                self.scan_irr |= self.shared.request_interrupt(
1657                    self.software_enabled(),
1658                    DeliveryMode::FIXED,
1659                    lvt.vector(),
1660                    false,
1661                    false,
1662                );
1663            }
1664
1665            if TimerMode(lvt.timer_mode()) == TimerMode::ONE_SHOT {
1666                self.timer_ccr = 0;
1667                // Don't bother to update the last eval time.
1668                return;
1669            } else {
1670                let remaining = counts - self.timer_ccr as u64;
1671                // Avoid the divide in the common case.
1672                if remaining < self.timer_icr as u64 {
1673                    self.timer_ccr = self.timer_icr - remaining as u32;
1674                } else {
1675                    self.timer_ccr = self.timer_icr - (remaining % self.timer_icr as u64) as u32;
1676                }
1677            }
1678        } else {
1679            self.timer_ccr -= counts as u32;
1680        }
1681
1682        let elapsed_nanos = (counts << shift) * NANOS_PER_TICK;
1683
1684        self.last_time = self
1685            .last_time
1686            .wrapping_add(Duration::from_nanos(elapsed_nanos));
1687    }
1688
1689    fn update_timeout(&mut self, now: VmTime) {
1690        self.next_timeout = (self.timer_ccr != 0).then(|| {
1691            let counts = self.timer_ccr;
1692            let ticks = (counts as u64) << dcr_divider_shift(Dcr::from(self.timer_dcr));
1693            now.wrapping_add(Duration::from_nanos(ticks * NANOS_PER_TICK))
1694        });
1695    }
1696
1697    /// Resets the APIC state.
1698    pub fn reset(&mut self) {
1699        assert!(!self.is_offloaded);
1700
1701        self.apic_base = ApicBase::new()
1702            .with_base_page(APIC_BASE_PAGE)
1703            .with_bsp(self.shared.vp_index.is_bsp())
1704            .with_enable(true)
1705            .into();
1706
1707        self.reset_registers();
1708        // Drop any pending requests.
1709        self.shared.work.store(0, Ordering::Relaxed);
1710    }
1711
1712    fn reset_registers(&mut self) {
1713        let Self {
1714            shared,
1715            global: _,
1716            apic_base: _,
1717            base_address: _,
1718            id: _,
1719            version: _,
1720            ldr,
1721            cluster_mode,
1722            svr,
1723            isr,
1724            next_irr,
1725            irr,
1726            tmr,
1727            auto_eoi,
1728            esr,
1729            icr,
1730            lvt_timer,
1731            lvt_thermal,
1732            lvt_pmc,
1733            lvt_lint,
1734            lvt_error,
1735            timer_icr,
1736            timer_ccr,
1737            last_time: _,
1738            next_timeout,
1739            timer_dcr,
1740            active_auto_eoi,
1741            needs_offload_reeval,
1742            scan_irr,
1743            is_offloaded: _,
1744            stats: _,
1745        } = self;
1746
1747        *ldr = 0;
1748        *cluster_mode = false;
1749        *svr = u32::from(Svr::from(0xff).with_enable(shared.software_enabled_on_reset));
1750        isr.clear();
1751        *esr = 0;
1752        *icr = 0;
1753        *next_irr = None;
1754        // Note that any bits in `shared.new_irr` will be cleared and ignored by
1755        // the next call to `pull_irr` since the APIC is now in a software
1756        // disabled state.
1757        *irr = [0; 8];
1758        *needs_offload_reeval = false;
1759        *scan_irr = false;
1760        *tmr = [0; 8];
1761        *auto_eoi = [0; 8];
1762        *active_auto_eoi = false;
1763        for lvt in [lvt_timer, lvt_thermal, lvt_pmc, lvt_error]
1764            .into_iter()
1765            .chain(lvt_lint)
1766        {
1767            *lvt = Lvt::new().with_masked(true).into();
1768        }
1769        *timer_icr = 0;
1770        *timer_ccr = 0;
1771        *timer_dcr = 0;
1772        *next_timeout = None;
1773        self.update_slot();
1774    }
1775
1776    fn update_slot(&mut self) {
1777        // Cache the base address, since `base_address()` is called in the
1778        // instruction emulator hot path.
1779        self.base_address = self
1780            .xapic_enabled()
1781            .then(|| (ApicBase::from(self.apic_base).base_page() as u64) << 12);
1782
1783        let mut mutable = self.global.mutable.write();
1784        let mutable = &mut *mutable;
1785        let slot = &mut mutable.by_apic_id[self.id as usize];
1786        slot.lint = self.lvt_lint.map(Lvt::from);
1787        slot.logical_id = (self.ldr >> 24) as u8;
1788        slot.hardware_enabled = self.hardware_enabled();
1789        slot.software_enabled = self.software_enabled();
1790
1791        mutable.x2apic_enabled -= slot.x2apic_enabled as usize;
1792        let apic_base = ApicBase::from(self.apic_base);
1793        slot.x2apic_enabled = apic_base.enable() && apic_base.x2apic();
1794        mutable.x2apic_enabled += slot.x2apic_enabled as usize;
1795
1796        mutable.logical_cluster_mode -= slot.cluster_mode as usize;
1797        slot.cluster_mode = self.cluster_mode;
1798        mutable.logical_cluster_mode += slot.cluster_mode as usize;
1799    }
1800
1801    /// Returns the APIC register state.
1802    pub fn save(&mut self) -> virt::x86::vp::Apic {
1803        assert!(!self.is_offloaded, "failed to disable offload before save");
1804
1805        // Ensure any pending interrupt requests have been pulled into the local
1806        // state.
1807        self.pull_irr();
1808
1809        let registers = ApicRegisters {
1810            reserved_0: [0; 2],
1811            id: self.id_register(),
1812            version: self.version,
1813            reserved_4: [0; 4],
1814            tpr: 0, // TODO
1815            apr: 0,
1816            ppr: 0,
1817            eoi: 0,
1818            rrd: 0,
1819            ldr: self.ldr_register(),
1820            dfr: if self.x2apic_enabled() {
1821                0
1822            } else if self.cluster_mode {
1823                Dfr::CLUSTERED_MODE.0
1824            } else {
1825                Dfr::FLAT_MODE.0
1826            },
1827            svr: self.svr,
1828            isr: self.isr.to_bits(),
1829            tmr: self.tmr,
1830            irr: self.irr,
1831            esr: self.esr,
1832            reserved_29: [0; 6],
1833            lvt_cmci: 0,
1834            icr: [self.icr as u32, (self.icr >> 32) as u32],
1835            lvt_timer: self.lvt_timer,
1836            lvt_thermal: self.lvt_thermal,
1837            lvt_pmc: self.lvt_pmc,
1838            lvt_lint0: self.lvt_lint[0],
1839            lvt_lint1: self.lvt_lint[1],
1840            lvt_error: self.lvt_error,
1841            timer_icr: self.timer_icr,
1842            timer_ccr: 0,
1843            reserved_3a: [0; 4],
1844            timer_dcr: self.timer_dcr,
1845            reserved_3f: 0,
1846        };
1847        virt::x86::vp::Apic {
1848            apic_base: self.apic_base,
1849            registers: registers.into(),
1850            auto_eoi: self.auto_eoi,
1851        }
1852    }
1853
1854    /// Restores the APIC register state.
1855    pub fn restore(&mut self, state: &virt::x86::vp::Apic) -> Result<(), InvalidApicBase> {
1856        assert!(!self.is_offloaded);
1857
1858        let virt::x86::vp::Apic {
1859            apic_base,
1860            registers,
1861            auto_eoi,
1862        } = state;
1863
1864        self.set_apic_base_inner(*apic_base)?;
1865
1866        // No register modifications allowed if the APIC is disabled.
1867        if !self.hardware_enabled() {
1868            return Ok(());
1869        }
1870
1871        let ApicRegisters {
1872            reserved_0: _,
1873            id,
1874            version,
1875            reserved_4: _,
1876            tpr: _,
1877            apr: _,
1878            ppr: _,
1879            eoi: _,
1880            rrd: _,
1881            ldr,
1882            dfr,
1883            svr,
1884            isr,
1885            tmr,
1886            irr,
1887            esr,
1888            reserved_29: _,
1889            lvt_cmci: _,
1890            icr,
1891            lvt_timer,
1892            lvt_thermal,
1893            lvt_pmc,
1894            lvt_lint0,
1895            lvt_lint1,
1896            lvt_error,
1897            timer_icr,
1898            timer_ccr: _,
1899            reserved_3a: _,
1900            timer_dcr,
1901            reserved_3f: _,
1902        } = registers.into();
1903
1904        self.id = if self.x2apic_enabled() { id } else { id >> 24 };
1905        self.version = version;
1906        if !self.x2apic_enabled() {
1907            self.ldr = ldr & 0xff000000;
1908        }
1909        self.cluster_mode = cluster_mode(dfr);
1910        self.svr = svr;
1911        self.irr = irr;
1912        self.tmr = tmr;
1913        self.auto_eoi = *auto_eoi;
1914        self.recompute_next_irr();
1915        self.isr.clear();
1916        self.isr.load_from_bits(isr);
1917        self.esr = esr;
1918        self.icr = icr[0] as u64 | ((icr[1] as u64) << 32);
1919        self.lvt_timer = self.effective_lvt(lvt_timer);
1920        self.lvt_thermal = self.effective_lvt(lvt_thermal);
1921        self.lvt_pmc = self.effective_lvt(lvt_pmc);
1922        self.lvt_lint = [self.effective_lvt(lvt_lint0), self.effective_lvt(lvt_lint1)];
1923        self.lvt_error = self.effective_lvt(lvt_error);
1924        self.timer_icr = timer_icr;
1925        self.timer_dcr = timer_dcr;
1926        self.update_slot();
1927        self.needs_offload_reeval = true;
1928        Ok(())
1929    }
1930
1931    fn recompute_next_irr(&mut self) {
1932        for (i, &v) in self.irr.iter().enumerate().rev() {
1933            if v != 0 {
1934                let vector = (i as u32) * 32 + (31 - v.leading_zeros());
1935                self.next_irr = Some(vector as u8);
1936                return;
1937            }
1938        }
1939        self.next_irr = None;
1940        self.active_auto_eoi = false;
1941    }
1942
1943    /// Read all the remote IRR bits into the local IRR array. Having two arrays
1944    /// like this ensures that we don't miss an interrupt if a second instance
1945    /// of one arrives while the first instance is being injected into the
1946    /// processor.
1947    ///
1948    /// Hypervisor backends that acknowledge interrupts before running the VP
1949    /// would never hit this condition, because the VP would not have a chance
1950    /// to run code to act upon the interrupt, and the two interrupts could be
1951    /// merged.
1952    ///
1953    /// But hypervisor backends that acknowledge interrupts only after running
1954    /// the VP for some time (such as those backed by AMD SNP) could hit this,
1955    /// since the VP will act on the interrupt, which might cause a device or
1956    /// another processor to generate a second interrupt before the first VP
1957    /// exits and acknowledges the first interrupt.
1958    fn pull_irr(&mut self) {
1959        for (
1960            ((((local_irr, local_tmr), local_auto_eoi), remote_irr), remote_tmr),
1961            remote_auto_eoi,
1962        ) in self
1963            .irr
1964            .iter_mut()
1965            .zip(&mut self.tmr)
1966            .zip(&mut self.auto_eoi)
1967            .zip(&self.shared.new_irr)
1968            .zip(&self.shared.tmr)
1969            .zip(&self.shared.auto_eoi)
1970        {
1971            // Read `irr` first with acquire ordering so that the TMR bit
1972            // associated with each requested interrupt is correct.
1973            if remote_irr.load(Ordering::Relaxed) == 0 {
1974                continue;
1975            }
1976            let irr = remote_irr.swap(0, Ordering::Acquire);
1977            let tmr = remote_tmr.load(Ordering::Relaxed);
1978            let auto_eoi = remote_auto_eoi.load(Ordering::Relaxed);
1979            if Svr::from(self.svr).enable() {
1980                *local_irr |= irr;
1981                *local_tmr &= !irr;
1982                *local_tmr |= tmr & irr;
1983                *local_auto_eoi &= !irr;
1984                *local_auto_eoi |= auto_eoi & irr;
1985                self.active_auto_eoi |= auto_eoi != 0;
1986                self.needs_offload_reeval = true;
1987            }
1988        }
1989        self.recompute_next_irr();
1990        self.scan_irr = false;
1991    }
1992
1993    fn id_register(&self) -> u32 {
1994        if self.x2apic_enabled() {
1995            self.id
1996        } else {
1997            self.id << 24
1998        }
1999    }
2000
2001    fn ldr_register(&self) -> u32 {
2002        if self.x2apic_enabled() {
2003            X2ApicLogicalId::new()
2004                .with_cluster_id((self.id >> 4) as u16)
2005                .with_logical_id(1 << (self.id & 0xf))
2006                .into()
2007        } else {
2008            self.ldr
2009        }
2010    }
2011}