firmware_pcat/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! PCAT BIOS helper device.
5//!
6//! A bespoke virtual device that works in-tandem with the custom Hyper-V PCAT
7//! BIOS running within the guest.
8//!
9//! Provides interfaces to fetch various bits of VM machine topology and
10//! configuration, along with hooks into various VMM runtime services (e.g:
11//! event logging, efficient busy-waiting, generation ID, etc...).
12
13#![forbid(unsafe_code)]
14
15mod bios_boot_order;
16mod default_cmos_values;
17mod root_cpu_data;
18
19pub use default_cmos_values::default_cmos_values;
20
21use self::bios_boot_order::bios_boot_order;
22use chipset_device::ChipsetDevice;
23use chipset_device::io::IoError;
24use chipset_device::io::IoResult;
25use chipset_device::io::deferred::DeferredToken;
26use chipset_device::io::deferred::DeferredWrite;
27use chipset_device::io::deferred::defer_write;
28use chipset_device::mmio::MmioIntercept;
29use chipset_device::pio::ControlPortIoIntercept;
30use chipset_device::pio::PortIoIntercept;
31use chipset_device::pio::RegisterPortIoIntercept;
32use chipset_device::poll_device::PollDevice;
33use guestmem::GuestMemory;
34use guestmem::MapRom;
35use guestmem::UnmapRom;
36use inspect::Inspect;
37use inspect::InspectMut;
38use std::fmt::Debug;
39use std::ops::RangeInclusive;
40use std::task::Context;
41use std::time::Duration;
42use thiserror::Error;
43use vm_topology::memory::MemoryLayout;
44use vm_topology::processor::VpIndex;
45use vmcore::device_state::ChangeDeviceState;
46use vmcore::vmtime::VmTimeAccess;
47use vmcore::vmtime::VmTimeSource;
48use zerocopy::IntoBytes;
49
50/// Static config info which gets queried by the PCAT BIOS.
51pub mod config {
52    use guid::Guid;
53    use inspect::Inspect;
54    use vm_topology::memory::MemoryLayout;
55    use vm_topology::processor::ProcessorTopology;
56    use vm_topology::processor::x86::X86Topology;
57
58    /// Subset of SMBIOS v2.4 CPU Information structure.
59    #[derive(Debug, Inspect)]
60    #[expect(missing_docs)] // self-explanatory fields
61    pub struct SmbiosProcessorInfoBundle {
62        pub processor_family: u8,
63        pub voltage: u8,
64        pub external_clock: u16,
65        pub max_speed: u16,
66        pub current_speed: u16,
67    }
68
69    /// A collection of SMBIOS constants that get reflected into the guest.
70    ///
71    /// There is a lot of info here, but empirically, it's not _super_ important
72    /// to make these values 100% accurate...
73    #[expect(missing_docs)] // self-explanatory fields
74    #[derive(Debug, Inspect)]
75    pub struct SmbiosConstants {
76        pub bios_guid: Guid,
77        #[inspect(with = "String::from_utf8_lossy")]
78        pub system_serial_number: Vec<u8>,
79        #[inspect(with = "String::from_utf8_lossy")]
80        pub base_board_serial_number: Vec<u8>,
81        #[inspect(with = "String::from_utf8_lossy")]
82        pub chassis_serial_number: Vec<u8>,
83        #[inspect(with = "String::from_utf8_lossy")]
84        pub chassis_asset_tag: Vec<u8>,
85        #[inspect(with = "String::from_utf8_lossy")]
86        pub bios_lock_string: Vec<u8>,
87        #[inspect(with = "String::from_utf8_lossy")]
88        pub processor_manufacturer: Vec<u8>,
89        #[inspect(with = "String::from_utf8_lossy")]
90        pub processor_version: Vec<u8>,
91        /// If set to `None`, default UNKNOWN values are used
92        pub cpu_info_bundle: Option<SmbiosProcessorInfoBundle>,
93    }
94
95    /// A particular kind of boot device PCAT understands.
96    #[derive(Debug, Clone, Copy, Inspect)]
97    #[expect(missing_docs)] // self-explanatory variants
98    pub enum BootDevice {
99        Floppy = 0,
100        Optical = 1,
101        HardDrive = 2,
102        Network = 3,
103    }
104
105    /// Determines if a boot device is connected or not.
106    #[derive(Debug, Clone, Copy, Inspect)]
107    pub struct BootDeviceStatus {
108        /// Boot device
109        pub kind: BootDevice,
110        /// Whether it is physically attached to the system
111        pub attached: bool,
112    }
113
114    /// PCAT device static configuration data.
115    #[derive(Debug, Inspect)]
116    pub struct PcatBiosConfig {
117        /// Number of VCPUs
118        pub processor_topology: ProcessorTopology<X86Topology>,
119        /// The VM's memory layout
120        pub mem_layout: MemoryLayout,
121        /// The SRAT ACPI table reflected into the guest
122        pub srat: Vec<u8>,
123        /// Initial [Generation Id](generation_id) value
124        pub initial_generation_id: [u8; 16],
125        /// Hibernation support
126        pub hibernation_enabled: bool,
127        /// Boot device order
128        #[inspect(iter_by_index)]
129        pub boot_order: [BootDeviceStatus; 4],
130        /// If num-lock is enabled at boot
131        pub num_lock_enabled: bool,
132        /// Bundle of SMBIOS constants
133        pub smbios: SmbiosConstants,
134    }
135}
136
137/// PCAT event
138#[derive(Debug)]
139pub enum PcatEvent {
140    /// Failed to boot via any boot medium
141    BootFailure,
142    /// Attempted to boot (INT19) via BIOS
143    BootAttempt,
144}
145
146/// Platform interface to emit PCAT events.
147pub trait PcatLogger: Send {
148    /// Emit a log corresponding to the provided event.
149    fn log_event(&self, event: PcatEvent);
150}
151
152#[derive(Debug, Inspect)]
153struct PcatBiosState {
154    #[inspect(hex)]
155    address: u32,
156    #[inspect(hex)]
157    read_count: u32,
158    #[inspect(hex)]
159    e820_entry: u8,
160    #[inspect(hex)]
161    srat_offset: u32,
162    #[inspect(hex)]
163    srat_size: u32,
164    #[inspect(hex)]
165    port80: u32,
166    #[inspect(skip)]
167    entropy: [u8; 64],
168    entropy_placed: bool,
169}
170
171impl PcatBiosState {
172    fn new() -> Self {
173        let mut entropy = [0; 64];
174        getrandom::fill(&mut entropy).expect("rng failure");
175        Self {
176            address: 0,
177            read_count: 0,
178            e820_entry: 0,
179            srat_offset: 0,
180            srat_size: 0,
181            port80: 0,
182            entropy,
183            entropy_placed: false,
184        }
185    }
186}
187
188/// PCAT device runtime dependencies.
189#[expect(missing_docs)] // self-explanatory fields
190pub struct PcatBiosRuntimeDeps<'a> {
191    pub gm: GuestMemory,
192    pub logger: Box<dyn PcatLogger>,
193    pub generation_id_deps: generation_id::GenerationIdRuntimeDeps,
194    pub vmtime: &'a VmTimeSource,
195    /// The BIOS ROM.
196    ///
197    /// If missing, then assume the ROM is already in memory.
198    pub rom: Option<Box<dyn MapRom>>,
199    pub register_pio: &'a mut dyn RegisterPortIoIntercept,
200    /// Replays the initial MTRRs on all VPs.
201    pub replay_mtrrs: Box<dyn Send + FnMut()>,
202}
203
204/// PCAT BIOS helper device.
205#[derive(InspectMut)]
206pub struct PcatBiosDevice {
207    // Fixed configuration
208    config: config::PcatBiosConfig,
209
210    // Runtime glue
211    vmtime_wait: VmTimeAccess,
212    gm: GuestMemory,
213    #[inspect(skip)]
214    logger: Box<dyn PcatLogger>,
215    #[inspect(skip)]
216    _rom_mems: Vec<Box<dyn UnmapRom>>,
217    pre_boot_pio: PreBootStubbedPio,
218    #[inspect(skip)]
219    replay_mtrrs: Box<dyn Send + FnMut()>,
220
221    // Sub-emulators
222    #[inspect(mut)]
223    generation_id: generation_id::GenerationId,
224
225    // Runtime book-keeping
226    #[inspect(skip)]
227    deferred_wait: Option<DeferredWrite>,
228
229    // Volatile state
230    state: PcatBiosState,
231}
232
233// Begin and end range are inclusive.
234const IO_PORT_RANGE_BEGIN: u16 = 0x28;
235const IO_PORT_RANGE_END: u16 = 0x2f;
236const IO_PORT_ADDR_OFFSET: u16 = 0x0;
237const IO_PORT_DATA_OFFSET: u16 = 0x4;
238
239// Reports BIOS POST status.
240const POST_IO_PORT: u16 = 0x80;
241
242/// Errors which may occur during PCAT BIOS helper device initialization.
243#[derive(Debug, Error)]
244#[expect(missing_docs)] // self-explanatory variants
245pub enum PcatBiosDeviceInitError {
246    #[error("expected exactly 2 mmio holes, found {0}")]
247    IncorrectMmioHoles(usize),
248    #[error("invalid ROM size {0:x} bytes, expected 256KB")]
249    InvalidRomSize(u64),
250    #[error("error mapping ROM")]
251    Rom(#[source] std::io::Error),
252}
253
254impl PcatBiosDevice {
255    /// Create a new instance of the PCAT BIOS helper device.
256    pub fn new(
257        runtime_deps: PcatBiosRuntimeDeps<'_>,
258        config: config::PcatBiosConfig,
259    ) -> Result<PcatBiosDevice, PcatBiosDeviceInitError> {
260        let PcatBiosRuntimeDeps {
261            gm,
262            logger,
263            generation_id_deps,
264            vmtime,
265            rom,
266            register_pio,
267            replay_mtrrs,
268        } = runtime_deps;
269
270        let initial_generation_id = config.initial_generation_id;
271
272        if config.mem_layout.mmio().len() != 2 {
273            return Err(PcatBiosDeviceInitError::IncorrectMmioHoles(
274                config.mem_layout.mmio().len(),
275            ));
276        }
277
278        let mut rom_mems = Vec::new();
279        if let Some(rom) = rom {
280            let rom_size = rom.len();
281            if rom_size != 0x40000 {
282                return Err(PcatBiosDeviceInitError::InvalidRomSize(rom_size));
283            }
284
285            // Map the ROM at both high and low memory.
286            for gpa in [0xfffc0000, 0xf0000] {
287                let rom_offset = (gpa + rom_size) & 0xfffff;
288                let len = rom_size - rom_offset;
289                let mem = rom
290                    .map_rom(gpa, rom_offset, len)
291                    .map_err(PcatBiosDeviceInitError::Rom)?;
292                rom_mems.push(mem);
293            }
294        }
295
296        Ok(PcatBiosDevice {
297            gm,
298            logger,
299            config,
300            state: PcatBiosState::new(),
301            generation_id: generation_id::GenerationId::new(
302                initial_generation_id,
303                generation_id_deps,
304            ),
305            vmtime_wait: vmtime.access("pcat-wait"),
306            deferred_wait: None,
307            _rom_mems: rom_mems,
308            pre_boot_pio: PreBootStubbedPio::new(register_pio),
309            replay_mtrrs,
310        })
311    }
312
313    fn index_using_read_count(&self, data: &[u8]) -> u32 {
314        let index = (self.state.read_count % 8) as usize * 4;
315        let mut buffer = [0u8; 4];
316        for i in 0..4_usize {
317            if index + i < data.len() {
318                buffer[i] = data[index + i];
319            } else {
320                buffer[i] = b' ';
321            }
322        }
323        u32::from_ne_bytes(buffer)
324    }
325
326    fn read_data(&mut self, addr: u32) -> u32 {
327        let mut buffer = [0u8; 4];
328        match PcatAddress(addr) {
329            PcatAddress::FIRST_MEMORY_BLOCK_SIZE => {
330                // Consumers: PCAT BIOS in source/bsp/OEM.ASM
331                //
332                // Report only the first memory block here as the BIOS really
333                // isn't structured to deal with gaps between memory blocks.
334                // This will bound where the BIOS puts things, including the
335                // ACPI tables, answers to INT 15 E820, etc.
336                self.config.mem_layout.ram()[0].range.len().to_kb()
337            }
338            PcatAddress::NUM_LOCK_ENABLED => self.config.num_lock_enabled as u32,
339            PcatAddress::BIOS_GUID => {
340                let index = (self.state.read_count % 4) as usize;
341                buffer.copy_from_slice(&self.config.smbios.bios_guid.as_bytes()[index * 4..][..4]);
342                u32::from_ne_bytes(buffer)
343            }
344            PcatAddress::BIOS_SYSTEM_SERIAL_NUMBER => {
345                self.index_using_read_count(self.config.smbios.system_serial_number.as_bytes())
346            }
347            PcatAddress::BIOS_BASE_SERIAL_NUMBER => {
348                self.index_using_read_count(self.config.smbios.base_board_serial_number.as_bytes())
349            }
350            PcatAddress::BIOS_CHASSIS_SERIAL_NUMBER => {
351                self.index_using_read_count(self.config.smbios.chassis_serial_number.as_bytes())
352            }
353            PcatAddress::BIOS_CHASSIS_ASSET_TAG => {
354                self.index_using_read_count(self.config.smbios.chassis_asset_tag.as_bytes())
355            }
356            PcatAddress::BOOT_DEVICE_ORDER => bios_boot_order(&self.config.boot_order),
357            PcatAddress::BIOS_PROCESSOR_COUNT => self.config.processor_topology.vp_count(),
358            PcatAddress::PROCESSOR_LOCAL_APIC_ID => {
359                if self.state.read_count < self.config.processor_topology.vp_count() {
360                    self.config
361                        .processor_topology
362                        .vp_arch(VpIndex::new(self.state.read_count))
363                        .apic_id
364                } else {
365                    !0
366                }
367            }
368            PcatAddress::SRAT_SIZE => self.config.srat.len() as u32,
369            PcatAddress::SRAT_DATA => {
370                let srat_chunk = (self.state.srat_offset + self.state.read_count * 4) as usize;
371                if let Some(data) = self.config.srat.get(srat_chunk..).and_then(|c| c.get(..4)) {
372                    u32::from_ne_bytes(data.try_into().unwrap())
373                } else {
374                    tracelimit::warn_ratelimited!(
375                        "invalid SRAT offset: {} + {} * 4 < {} - 4",
376                        self.state.srat_offset,
377                        self.state.read_count,
378                        self.config.srat.len()
379                    );
380                    0
381                }
382            }
383            PcatAddress::MEMORY_AMOUNT_ABOVE_4GB => {
384                // Consumers:
385                // - vmbios/source/bsp/em/smbios/Smbport.asm,
386                // - core/src/MEM.ASM.
387                self.config.mem_layout.ram_above_4gb().to_mb()
388            }
389            PcatAddress::SLEEP_STATES => {
390                // The AMI BIOS wants to read a byte value of flags to determine
391                // what sleep states (S1...S4) are supported. In the original
392                // AMI BIOS code, S4 was enabled as:
393                //
394                //              or      aml_buff.AMLDATA.dSx, 8
395                //
396                // Our data register is 4-bytes wide, we just fill in the low
397                // byte (al) here with the S4 flag if it should be set
398                if self.config.hibernation_enabled {
399                    8
400                } else {
401                    0
402                }
403            }
404            PcatAddress::PCI_IO_GAP_START => {
405                self.config.mem_layout.mmio()[0].start().try_into().unwrap()
406            }
407            PcatAddress::PROCESSOR_STA_ENABLE => {
408                // Read by the ACPI _STA (status) method in the Processor
409                // objects in the PCAT BIOS DSDT. Return zero (not active) for
410                // any processor whose index exceeds the current active
411                // processor count.
412                if self.state.read_count < self.config.processor_topology.vp_count() {
413                    1
414                } else {
415                    0
416                }
417            }
418            PcatAddress::BIOS_LOCK_STRING => {
419                self.index_using_read_count(self.config.smbios.bios_lock_string.as_bytes())
420            }
421            PcatAddress::MEMORY_ABOVE_HIGH_MMIO => {
422                // Consumers:
423                // - vmbios/source/bsp/em/smbios/Smbport.asm,
424                // - core/src/MEM.ASM.
425                self.config
426                    .mem_layout
427                    .ram_above_high_mmio()
428                    .expect("validated exactly 2 mmio ranges")
429                    .to_mb()
430            }
431            PcatAddress::HIGH_MMIO_GAP_BASE_IN_MB => {
432                // Consumers:
433                // - vmbios/source/bsp/em/smbios/Smbport.asm,
434                // - core/src/MEM.ASM.
435                self.config.mem_layout.mmio()[1].start().to_mb()
436            }
437            PcatAddress::HIGH_MMIO_GAP_LENGTH_IN_MB => {
438                // Consumers:
439                // - vmbios/source/bsp/em/smbios/Smbport.asm,
440                // - core/src/MEM.ASM.
441                //
442                // In a classic case of "two wrongs make a right", PCAT expects
443                // to get _one less_ than the true MMIO region length , as when
444                // this code was written in Hyper-V, the `end - start`
445                // calculation used an _inclusive_ `start..=end` range from the
446                // MMIO gaps API, which wasn't properly compensated for here.
447                self.config.mem_layout.mmio()[1].len().to_mb() - 1
448            }
449            PcatAddress::E820_ENTRY => handle_int15_e820_query(
450                &self.config.mem_layout,
451                self.state.e820_entry,
452                self.state.read_count,
453            ),
454            PcatAddress::INITIAL_MEGABYTES_BELOW_GAP => {
455                // Consumers: vmbios/source/bsp/em/smbios/smbios/Smbport.asm
456                self.config.mem_layout.ram_below_4gb().to_mb()
457            }
458            _ => {
459                tracelimit::warn_ratelimited!(?addr, "unknown bios read");
460                0xffffffff
461            }
462        }
463    }
464
465    fn write_data(
466        &mut self,
467        addr: u32,
468        data: u32,
469    ) -> Result<Option<DeferredToken>, guestmem::GuestMemoryError> {
470        match PcatAddress(addr) {
471            PcatAddress::BIOS_PROCESSOR_COUNT => {
472                // gets poked by the bios for some reason...
473            }
474            PcatAddress::SRAT_SIZE => {
475                if self.config.srat.len() > (data as usize) {
476                    tracelimit::warn_ratelimited!(
477                        data,
478                        len = self.config.srat.len(),
479                        "improper SRAT_SIZE write",
480                    );
481                }
482
483                self.state.srat_size = data;
484            }
485            PcatAddress::SRAT_OFFSET => {
486                if (data as usize) >= self.config.srat.len() || data >= self.state.srat_size {
487                    tracelimit::warn_ratelimited!(
488                        data,
489                        len = self.config.srat.len(),
490                        "improper SRAT_OFFSET write",
491                    );
492                }
493
494                self.state.srat_offset = data;
495            }
496            PcatAddress::SRAT_DATA => {
497                if data == 0 || data == 0xffffffff {
498                    tracelimit::warn_ratelimited!(data, "improper SRAT_DATA write");
499                }
500
501                self.gm.write_at(data as u64, &self.config.srat)?;
502            }
503            PcatAddress::BOOT_FINALIZE => {
504                // The BIOS trashes the originally set MTRRs. Reset them.
505                (self.replay_mtrrs)();
506            }
507            PcatAddress::ENTROPY_TABLE => {
508                if data == 0 || data == 0xffffffff {
509                    tracelimit::warn_ratelimited!(data, "improper ENTROPY_TABLE write");
510                }
511
512                if !self.state.entropy_placed {
513                    self.gm.write_plain(data as u64, &self.state.entropy)?;
514                    self.state.entropy_placed = true;
515                }
516            }
517            PcatAddress::PROCESSOR_DMTF_TABLE => {
518                if data == 0 || data == 0xffffffff {
519                    tracelimit::warn_ratelimited!(data, "improper PROCESSOR_DMTF_TABLE write");
520                }
521
522                let cpu_info_legacy = root_cpu_data::get_vp_dmi_info(
523                    self.config.smbios.cpu_info_bundle.as_ref(),
524                    &self.config.smbios.processor_manufacturer,
525                    &self.config.smbios.processor_version,
526                );
527
528                self.gm.write_plain(data as u64, &cpu_info_legacy)?;
529            }
530            PcatAddress::PROCESSOR_STA_ENABLE => {
531                // NOTE: doesn't make a whole lot of sense, but that's what our
532                // old impl did, so better safe than sorry...
533                self.state.read_count = data;
534            }
535            PcatAddress::WAIT_NANO100 => {
536                return Ok(Some(
537                    self.defer_wait(Duration::from_nanos(data as u64 * 100)),
538                ));
539            }
540            PcatAddress::GENERATION_ID_PTR_LOW => self.generation_id.write_generation_id_low(data),
541            PcatAddress::GENERATION_ID_PTR_HIGH => {
542                self.generation_id.write_generation_id_high(data)
543            }
544            PcatAddress::E820_ENTRY => {
545                self.state.e820_entry = data as u8;
546            }
547            _ => tracelimit::warn_ratelimited!(addr, data, "unknown bios write"),
548        }
549
550        Ok(None)
551    }
552
553    fn write_address(&mut self, addr: u32) -> Option<DeferredToken> {
554        // As a side effect of setting the address register, we also reset the
555        // data register read counter.
556        self.state.address = addr;
557        self.state.read_count = 0;
558
559        // Some commands do not write to the data register, only the address
560        // register (so as to save an additional VMEXIT).
561        match PcatAddress(addr) {
562            PcatAddress::WAIT1_MILLISECOND => {
563                return Some(self.defer_wait(Duration::from_millis(1)));
564            }
565            PcatAddress::WAIT10_MILLISECONDS => {
566                return Some(self.defer_wait(Duration::from_millis(10)));
567            }
568            PcatAddress::WAIT2_MILLISECOND => {
569                return Some(self.defer_wait(Duration::from_millis(2)));
570            }
571            PcatAddress::REPORT_BOOT_FAILURE => {
572                tracelimit::info_ratelimited!("pcat boot: failure");
573                self.stop_pre_boot_pio();
574                self.logger.log_event(PcatEvent::BootFailure)
575            }
576            PcatAddress::REPORT_BOOT_ATTEMPT => {
577                tracelimit::info_ratelimited!("pcat boot: attempt");
578                self.stop_pre_boot_pio();
579                self.logger.log_event(PcatEvent::BootAttempt)
580            }
581            _ => {}
582        }
583        None
584    }
585
586    fn defer_wait(&mut self, duration: Duration) -> DeferredToken {
587        tracing::trace!(?duration, "deferring wait request");
588        self.vmtime_wait
589            .set_timeout(self.vmtime_wait.now().wrapping_add(duration));
590        let (write, token) = defer_write();
591        self.deferred_wait = Some(write);
592        token
593    }
594
595    /// Unmap the pre-boot PIO stubs if they are active.
596    /// This should be called before booting into an OS, since
597    /// the BIOS should no longer try to access these ports.
598    fn stop_pre_boot_pio(&mut self) {
599        if self.pre_boot_pio.is_active() {
600            tracing::info!("disabling pre-boot legacy port-io stubs");
601            self.pre_boot_pio.unmap();
602        }
603    }
604}
605
606open_enum::open_enum! {
607    /// Must match constants in VMCONFIG.EQU
608    enum PcatAddress: u32 {
609        FIRST_MEMORY_BLOCK_SIZE      = 0x00,
610        NUM_LOCK_ENABLED             = 0x01,
611        BIOS_GUID                    = 0x02,
612        BIOS_SYSTEM_SERIAL_NUMBER    = 0x03,
613        BIOS_BASE_SERIAL_NUMBER      = 0x04,
614        BIOS_CHASSIS_SERIAL_NUMBER   = 0x05,
615        BIOS_CHASSIS_ASSET_TAG       = 0x06,
616        BOOT_DEVICE_ORDER            = 0x07,
617        BIOS_PROCESSOR_COUNT         = 0x08,
618        PROCESSOR_LOCAL_APIC_ID      = 0x09,
619        SRAT_SIZE                    = 0x0A,
620        SRAT_OFFSET                  = 0x0B,
621        SRAT_DATA                    = 0x0C,
622        MEMORY_AMOUNT_ABOVE_4GB      = 0x0D,
623        GENERATION_ID_PTR_LOW        = 0x0E,
624        GENERATION_ID_PTR_HIGH       = 0x0F,
625        SLEEP_STATES                 = 0x10,
626
627        PCI_IO_GAP_START             = 0x12,
628
629        PROCESSOR_STA_ENABLE         = 0x16,
630        WAIT_NANO100                 = 0x17,
631        WAIT1_MILLISECOND            = 0x18,
632        WAIT10_MILLISECONDS          = 0x19,
633        BOOT_FINALIZE                = 0x1A,
634        WAIT2_MILLISECOND            = 0x1B,
635        BIOS_LOCK_STRING             = 0x1C,
636        PROCESSOR_DMTF_TABLE         = 0x1D,
637        ENTROPY_TABLE                = 0x1E,
638        MEMORY_ABOVE_HIGH_MMIO       = 0x1F,
639        HIGH_MMIO_GAP_BASE_IN_MB     = 0x20,
640        HIGH_MMIO_GAP_LENGTH_IN_MB   = 0x21,
641        E820_ENTRY                   = 0x22,
642        INITIAL_MEGABYTES_BELOW_GAP  = 0x23,
643
644        REPORT_BOOT_FAILURE          = 0x3A,
645        REPORT_BOOT_ATTEMPT          = 0x3B,
646    }
647}
648
649/// Handler for PCAT BIOS e820 Enlightenment
650///
651/// The following documentation is copied wholesale from the OS repo.
652///
653/// * * *
654///
655/// The guest OS will discover the parts of GPA space that are populated with
656/// usable RAM by using the INT 15 E820 interface. This interface returns one
657/// entry of the table per invocation, with an iterator value passed back and
658/// forth through EBX.
659///
660/// Our virtual AMI BIOS is constructed in a way that's difficult to change
661/// without odd side effects, as many things look at the E820 table entries
662/// internally, and it's not always clear which parts are switched on or off,
663/// making changes hard to validate.
664///
665/// Extending the AMI BIOS to understand an unbounded number of memory blocks,
666/// each with a small gap between them is more difficult than just calling out
667/// to the worker process and handing it here. On the other hand, some
668/// parameters, such as the location of the Extended BIOS Data Area (EBDA) are
669/// really BIOS-internal things and moving them to the worker process would be
670/// fragile. So the algorithm here is that the BIOS responds to queries about
671/// everything involving the first memory block. The BIOS sets itself up within
672/// that. Any subsequent memory block is handled here within the worker process.
673///
674/// From the ACPI spec:
675///
676/// ```text
677/// Input:
678///
679///     Register    |   Parameter   |   Description
680///                 |               |
681///       EAX       | Function Code |   E820
682///                 |               |
683///       EBX       | Continuation  |   Contains the loop counter.
684///                 |               |
685///       ES:DI     | Buffer Ptr    |   Pointer to a buffer with the table entry.
686///                 |               |
687///       ECX       | Buffer Size   |   Size of passed in struct.
688///                 |               |
689///       EDX       | Signature     |   'SMAP'
690///
691/// Output:
692///
693///       EAX       | Signature     |   'SMAP'
694///                 |               |
695///       ES:DI     | Buffer Ptr    |   same as input
696///                 |               |
697///       ECX       | Size          |   20 bytes
698///                 |               |
699///       EBX       | Continuation  |   Value that the caller should use to get
700///                 |               |   the next entry.
701///```
702///
703/// In order to avoid opening an aperture to the guest here, the BIOS takes
704/// register contents modified by this function and unpacks them into the
705/// caller's buffer.
706///
707/// The AMI BIOS will subtract the number of entries that it wants to handle
708/// internally from EBX before writing it to the BIOS port, so that this
709/// function will see indices starting with 0.
710///
711/// So we return to the guest using this port as a FIFO. Each successive read
712/// returns a different part of the data:
713///
714/// ```text
715///       0 (b:0)       | 1 == "entry exists"
716///       0 (b:1)       | 0 == "memory",      1 == "reserved"
717///       0 (b:2)       | 0 == "last entry",    1 == "there's more data"
718///       0 (31:3)      | Length in megabytes low (48:20)
719///       1             | Base Address Low
720///       2             | Base Address High
721/// ```
722fn handle_int15_e820_query(mem_layout: &MemoryLayout, e820_entry: u8, read_count: u32) -> u32 {
723    // The first memory range is the one that the BIOS itself knows about, and
724    // the one for which the BIOS will answer the guest OS's questions. This is
725    // done because the BIOS places various tables (EBDA, ACPI "reclaim", ACPI
726    // NVS, etc.) in this memory block, carving things out of it.
727    //
728    // The BIOS, on the other hand, has no idea, at least in the core BIOS code,
729    // that the other memory blocks exist. This is necessary because there can
730    // be a series of gaps between memory blocks that are hard to accommodate
731    // within the BIOS. For reporting things above the gaps, this function looks
732    // at the upper memory blocks.
733    let index = (e820_entry + 1) as usize;
734
735    // Special case: if there is only a single RAM range, no error should be
736    // logged + zero should be returned, indicating that there are no further
737    // RAM regions.
738    if e820_entry == 0 && mem_layout.ram().len() == 1 {
739        return 0;
740    }
741
742    let Some(ram) = mem_layout.ram().get(index) else {
743        tracelimit::warn_ratelimited!(?e820_entry, "unexpected e820 entry");
744        return 0;
745    };
746
747    match read_count {
748        0 => {
749            let mut data = 1; // entry exists
750            data |= if index + 1 != mem_layout.ram().len() {
751                0b100 // more data
752            } else {
753                0 // last entry
754            };
755            data |= ram.range.len().to_mb() << 3; // clamp reported RAM to the nearest megabyte
756            data
757        }
758        1 => ram.range.start() as u32,
759        2 => (ram.range.start() >> 32) as u32,
760        _ => {
761            tracelimit::warn_ratelimited!(?read_count, "invalid E820 read count");
762            0
763        }
764    }
765}
766
767impl ChangeDeviceState for PcatBiosDevice {
768    fn start(&mut self) {}
769
770    async fn stop(&mut self) {}
771
772    async fn reset(&mut self) {
773        self.generation_id.reset();
774        self.state = PcatBiosState::new();
775    }
776}
777
778impl ChipsetDevice for PcatBiosDevice {
779    fn supports_pio(&mut self) -> Option<&mut dyn PortIoIntercept> {
780        Some(self)
781    }
782
783    fn supports_mmio(&mut self) -> Option<&mut dyn MmioIntercept> {
784        Some(self)
785    }
786
787    fn supports_poll_device(&mut self) -> Option<&mut dyn PollDevice> {
788        Some(self)
789    }
790}
791
792impl PollDevice for PcatBiosDevice {
793    fn poll_device(&mut self, cx: &mut Context<'_>) {
794        self.generation_id.poll(cx);
795        while self.vmtime_wait.poll_timeout(cx).is_ready() {
796            if let Some(deferred) = self.deferred_wait.take() {
797                tracing::trace!("releasing deferred wait");
798                deferred.complete();
799            }
800        }
801    }
802}
803
804impl MmioIntercept for PcatBiosDevice {
805    fn mmio_read(&mut self, _addr: u64, _data: &mut [u8]) -> IoResult {
806        tracelimit::error_ratelimited!("firmware should be mapped, should not be visible as MMIO");
807        IoResult::Ok
808    }
809
810    fn mmio_write(&mut self, addr: u64, _data: &[u8]) -> IoResult {
811        match addr {
812            0xf5bea | 0xf5bfa => {
813                // There is a bug in the firmware's throttle_getchar_FAR
814                // enlightenment: it expects to write to a value in the ROM
815                // segment, but this is not writable after POST. Just ignore
816                // this, it means that getchar is not actually throttled after
817                // POST (e.g. in DOS).
818            }
819            _ => tracelimit::warn_ratelimited!(addr, "unexpected firmware write"),
820        }
821        IoResult::Ok
822    }
823
824    fn get_static_regions(&mut self) -> &[(&str, RangeInclusive<u64>)] {
825        &[
826            ("rom-low", 0xf0000..=0xfffff),
827            ("rom-high", 0xfffc_0000..=0xffff_ffff),
828        ]
829    }
830}
831
832impl PortIoIntercept for PcatBiosDevice {
833    fn io_read(&mut self, io_port: u16, data: &mut [u8]) -> IoResult {
834        if io_port == POST_IO_PORT {
835            data.copy_from_slice(&self.state.port80.to_ne_bytes()[..data.len()]);
836            return IoResult::Ok;
837        }
838
839        if self.pre_boot_pio.contains_port(io_port) {
840            tracing::trace!(?io_port, "stubbed pre-boot pio read");
841            data.fill(!0);
842            return IoResult::Ok;
843        }
844
845        // Some OSes probe for an 8-bit superio device at this location,
846        // silence the logs generated by this.
847        if io_port == 0x2f && data.len() == 1 {
848            tracing::trace!(?io_port, "stubbed superio pio read");
849            data.fill(!0);
850            return IoResult::Ok;
851        }
852
853        if data.len() != 4 {
854            return IoResult::Err(IoError::InvalidAccessSize);
855        }
856
857        let offset = io_port - IO_PORT_RANGE_BEGIN;
858        let v = match offset {
859            IO_PORT_ADDR_OFFSET => self.state.address,
860            IO_PORT_DATA_OFFSET => self.read_data(self.state.address),
861            _ => return IoResult::Err(IoError::InvalidRegister),
862        };
863        data.copy_from_slice(&v.to_ne_bytes());
864
865        tracing::trace!(
866            offset,
867            address = self.state.address,
868            read_count = self.state.read_count,
869            value = v,
870            "bios read",
871        );
872
873        if offset == IO_PORT_DATA_OFFSET {
874            self.state.read_count += 1;
875        }
876
877        IoResult::Ok
878    }
879
880    fn io_write(&mut self, io_port: u16, data: &[u8]) -> IoResult {
881        if io_port == POST_IO_PORT {
882            let mut v = [0; 4];
883            v[..data.len()].copy_from_slice(data);
884            let data = u32::from_ne_bytes(v);
885
886            tracing::debug!(data, "pcat boot: checkpoint");
887
888            // magic number specific to PCAT BIOS
889            const AT_END_POST_CHECKPOINT: u32 = 0x50ac;
890            if data == AT_END_POST_CHECKPOINT {
891                self.stop_pre_boot_pio();
892            }
893
894            // Store the port 80 data. Consider keeping a ring of
895            // these for inspect in the future.
896            self.state.port80 = data;
897            return IoResult::Ok;
898        }
899
900        if self.pre_boot_pio.contains_port(io_port) {
901            tracing::trace!(?io_port, ?data, "stubbed pre-boot pio write");
902            return IoResult::Ok;
903        }
904
905        // Some OSes probe for an 8-bit superio device at this location,
906        // silence the logs generated by this.
907        if io_port == 0x2e && data.len() == 1 {
908            tracing::trace!(?io_port, ?data, "stubbed superio pio write");
909            return IoResult::Ok;
910        }
911
912        if data.len() != 4 {
913            return IoResult::Err(IoError::InvalidAccessSize);
914        }
915
916        let offset = io_port - IO_PORT_RANGE_BEGIN;
917        let v = u32::from_ne_bytes(data.try_into().unwrap());
918        let r = match offset {
919            IO_PORT_ADDR_OFFSET => Ok(self.write_address(v)),
920            IO_PORT_DATA_OFFSET => self.write_data(self.state.address, v),
921            _ => return IoResult::Err(IoError::InvalidRegister),
922        };
923
924        match r {
925            Ok(Some(token)) => return IoResult::Defer(token),
926            Ok(None) => {}
927            Err(err) => {
928                tracelimit::warn_ratelimited!(
929                    error = &err as &dyn std::error::Error,
930                    "bios command error"
931                );
932            }
933        }
934
935        tracing::trace!(
936            offset,
937            address = self.state.address,
938            read_count = self.state.read_count,
939            data = v,
940            "bios write",
941        );
942
943        IoResult::Ok
944    }
945
946    fn get_static_regions(&mut self) -> &[(&str, RangeInclusive<u16>)] {
947        &[
948            ("pcat_bios", IO_PORT_RANGE_BEGIN..=IO_PORT_RANGE_END),
949            // NOTE: POST port 0x80 might overlap with a an ISA DMA page register.
950            ("post", POST_IO_PORT..=POST_IO_PORT),
951        ]
952    }
953}
954
955/// Helper trait to convert bytes to various other units
956trait ConvertBytes {
957    /// Convert from bytes to megabytes
958    fn to_mb(self) -> u32;
959    /// Convert from bytes to kiloytes
960    fn to_kb(self) -> u32;
961}
962
963impl ConvertBytes for u64 {
964    fn to_mb(self) -> u32 {
965        (self >> 20).try_into().unwrap()
966    }
967
968    fn to_kb(self) -> u32 {
969        (self >> 10).try_into().unwrap()
970    }
971}
972
973/// Encapsulates ownership over various legacy port io locations that the PCAT
974/// BIOS attempts to access during init.
975///
976/// We don't implement any of the devices backing these ports, so in order to
977/// cut down on the large amount of "unknown device" logging, we claim these
978/// ports for the PCAT BIOS helper device during pre-boot, and then release
979/// ownership post-boot.
980#[derive(Inspect)]
981struct PreBootStubbedPio {
982    #[inspect(iter_by_index)]
983    ranges: Vec<Box<dyn ControlPortIoIntercept>>,
984}
985
986impl PreBootStubbedPio {
987    const LEN_PORT: &'static [(u16, u16)] = &[
988        // ISA PnP
989        (1, 0x279), // index
990        (1, 0xa79), // write data port
991        (1, 0x20b), // initial value for read data port
992        (1, 0x20f), // ...which PCAT will increment by 4
993        (1, 0x213),
994        (1, 0x217),
995        (1, 0x21b),
996        (1, 0x21f),
997        (1, 0x223),
998        (1, 0x227), // ...until it gives up (after 8x tries)
999        // something to do with archaic dual VGA init?
1000        (2, 0x102),
1001        (2, 0x46e8),
1002        // something to do with piix4 "routing ports"?
1003        (1, 0xeb),
1004        // (1, 0xed), // gets claimed as part of the 0xED IO port delay device
1005        (1, 0xee),
1006        // no idea ¯\_(ツ)_/¯
1007        (1, 0x6f0),
1008    ];
1009
1010    fn new(register_pio: &mut dyn RegisterPortIoIntercept) -> PreBootStubbedPio {
1011        let mut ranges = Vec::new();
1012        for &(len, port) in Self::LEN_PORT {
1013            let mut control = register_pio.new_io_region("legacy-port-stub", len);
1014            control.map(port);
1015            ranges.push(control)
1016        }
1017        PreBootStubbedPio { ranges }
1018    }
1019
1020    fn is_active(&self) -> bool {
1021        !self.ranges.is_empty()
1022    }
1023
1024    fn unmap(&mut self) {
1025        for mut range in self.ranges.drain(..) {
1026            range.unmap()
1027        }
1028    }
1029
1030    fn contains_port(&self, port: u16) -> bool {
1031        if !self.is_active() {
1032            return false;
1033        }
1034
1035        Self::LEN_PORT
1036            .iter()
1037            .any(|&(len, p)| (p..p + len).contains(&port))
1038    }
1039}
1040
1041mod save_restore {
1042    use super::*;
1043    use vmcore::save_restore::RestoreError;
1044    use vmcore::save_restore::SaveError;
1045    use vmcore::save_restore::SaveRestore;
1046
1047    mod state {
1048        use generation_id::GenerationId;
1049        use mesh::payload::Protobuf;
1050        use vmcore::save_restore::SaveRestore;
1051        use vmcore::save_restore::SavedStateRoot;
1052
1053        #[derive(Protobuf, SavedStateRoot)]
1054        #[mesh(package = "firmware.pcat")]
1055        pub struct SavedState {
1056            #[mesh(1)]
1057            pub address: u32,
1058            #[mesh(2)]
1059            pub read_count: u32,
1060            #[mesh(3)]
1061            pub e820_entry: u8,
1062            #[mesh(4)]
1063            pub srat_offset: u32,
1064            #[mesh(5)]
1065            pub srat_size: u32,
1066            #[mesh(6)]
1067            pub port80: u32,
1068            #[mesh(7)]
1069            pub entropy: [u8; 64],
1070            #[mesh(8)]
1071            pub entropy_placed: bool,
1072
1073            #[mesh(9)]
1074            pub genid: <GenerationId as SaveRestore>::SavedState,
1075        }
1076    }
1077
1078    impl SaveRestore for PcatBiosDevice {
1079        type SavedState = state::SavedState;
1080
1081        fn save(&mut self) -> Result<Self::SavedState, SaveError> {
1082            let PcatBiosState {
1083                address,
1084                read_count,
1085                e820_entry,
1086                srat_offset,
1087                srat_size,
1088                port80,
1089                entropy,
1090                entropy_placed,
1091            } = self.state;
1092
1093            let saved_state = state::SavedState {
1094                address,
1095                read_count,
1096                e820_entry,
1097                srat_offset,
1098                srat_size,
1099                port80,
1100                entropy,
1101                entropy_placed,
1102                genid: self.generation_id.save()?,
1103            };
1104
1105            // sanity check that there aren't any outstanding deferred IOs
1106            assert!(self.deferred_wait.is_none());
1107
1108            Ok(saved_state)
1109        }
1110
1111        fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
1112            let state::SavedState {
1113                address,
1114                read_count,
1115                e820_entry,
1116                srat_offset,
1117                srat_size,
1118                port80,
1119                entropy,
1120                entropy_placed,
1121                genid,
1122            } = state;
1123
1124            self.state = PcatBiosState {
1125                address,
1126                read_count,
1127                e820_entry,
1128                srat_offset,
1129                srat_size,
1130                port80,
1131                entropy,
1132                entropy_placed,
1133            };
1134
1135            self.generation_id.restore(genid)?;
1136
1137            Ok(())
1138        }
1139    }
1140}