Skip to main content

loader/
common.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Common helper routines for all loaders.
5
6use crate::importer::BootPageAcceptance;
7use crate::importer::GuestArch;
8use crate::importer::ImageLoad;
9use crate::importer::SegmentRegister;
10use crate::importer::TableRegister;
11use crate::importer::X86Register;
12use hvdef::HV_PAGE_SIZE;
13use memory_range::MemoryRange;
14use std::io::Read;
15use std::io::Seek;
16use thiserror::Error;
17use vm_topology::memory::MemoryLayout;
18use x86defs::GdtEntry;
19use x86defs::X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES;
20use x86defs::X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES;
21use zerocopy::FromZeros;
22use zerocopy::IntoBytes;
23
24/// Trait alias for `Read + Seek`.
25pub trait ReadSeek: Read + Seek {}
26impl<T: Read + Seek> ReadSeek for T {}
27
28const DEFAULT_GDT_COUNT: usize = 4;
29/// The size of the default GDT table, in bytes.
30pub const DEFAULT_GDT_SIZE: u64 = HV_PAGE_SIZE;
31
32/// Import a default GDT at the given address, with one page imported.
33/// The GDT is used with cs as entry 1, and data segments (ds, es, fs, gs, ss) as entry 2.
34/// Registers using the GDT are imported with vtl 0 only.
35pub fn import_default_gdt(
36    importer: &mut dyn ImageLoad<X86Register>,
37    gdt_page_base: u64,
38) -> anyhow::Result<()> {
39    // Create a default GDT consisting of two entries.
40    // ds, es, fs, gs, ss are entry 2 (linear_selector)
41    // cs is entry 1 (linear_code64_selector)
42    let default_data_attributes: u16 = X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES.into();
43    let default_code_attributes: u16 = X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES.into();
44    let gdt: [GdtEntry; DEFAULT_GDT_COUNT] = [
45        GdtEntry::new_zeroed(),
46        GdtEntry {
47            limit_low: 0xffff,
48            attr_low: default_code_attributes as u8,
49            attr_high: (default_code_attributes >> 8) as u8,
50            ..GdtEntry::new_zeroed()
51        },
52        GdtEntry {
53            limit_low: 0xffff,
54            attr_low: default_data_attributes as u8,
55            attr_high: (default_data_attributes >> 8) as u8,
56            ..GdtEntry::new_zeroed()
57        },
58        GdtEntry::new_zeroed(),
59    ];
60    let gdt_entry_size = size_of::<GdtEntry>();
61    let linear_selector_offset = 2 * gdt_entry_size;
62    let linear_code64_selector_offset = gdt_entry_size;
63
64    // Import the GDT into the specified base page.
65    importer.import_pages(
66        gdt_page_base,
67        DEFAULT_GDT_SIZE / HV_PAGE_SIZE,
68        "default-gdt",
69        BootPageAcceptance::Exclusive,
70        gdt.as_bytes(),
71    )?;
72
73    // Import GDTR and selectors.
74    let mut import_reg = |register| importer.import_vp_register(register);
75    import_reg(X86Register::Gdtr(TableRegister {
76        base: gdt_page_base * HV_PAGE_SIZE,
77        limit: (size_of::<GdtEntry>() * DEFAULT_GDT_COUNT - 1) as u16,
78    }))?;
79
80    let ds = SegmentRegister {
81        selector: linear_selector_offset as u16,
82        base: 0,
83        limit: 0xffffffff,
84        attributes: default_data_attributes,
85    };
86    import_reg(X86Register::Ds(ds))?;
87    import_reg(X86Register::Es(ds))?;
88    import_reg(X86Register::Fs(ds))?;
89    import_reg(X86Register::Gs(ds))?;
90    import_reg(X86Register::Ss(ds))?;
91
92    let cs = SegmentRegister {
93        selector: linear_code64_selector_offset as u16,
94        base: 0,
95        limit: 0xffffffff,
96        attributes: default_code_attributes,
97    };
98    import_reg(X86Register::Cs(cs))?;
99
100    Ok(())
101}
102
103/// Computes the x86 variable MTRRs that describe the given memory layout. This
104/// is intended to be used to setup MTRRs for booting a guest with two mmio
105/// gaps, such as booting Linux, UEFI, or PCAT.
106pub fn compute_variable_mtrrs(
107    memory: &MemoryLayout,
108    physical_address_width: u8,
109    chipset_low_mmio: MemoryRange,
110    chipset_high_mmio: MemoryRange,
111) -> Vec<X86Register> {
112    const WRITEBACK: u64 = 0x6;
113
114    // Clamp the width to something reasonable.
115    let gpa_space_size = physical_address_width.clamp(36, 52);
116
117    // The MMIO limits will be the basis of the MTRR calculations
118    // as page count doesn't work when there may be gaps between memory blocks.
119
120    let mut result = Vec::with_capacity(8);
121
122    // Our PCAT firmware sets MTRR 200 and MTRR Mask 201 to 128 MB during boot, so we
123    // mimic that here.
124    let pcat_mtrr_size = 128 * 1024 * 1024;
125
126    result.push(X86Register::MtrrPhysBase0(WRITEBACK));
127    result.push(X86Register::MtrrPhysMask0(mtrr_mask(
128        gpa_space_size,
129        pcat_mtrr_size - 1,
130    )));
131
132    // If there is more than 128 MB, use MTRR 202 and MTRR Mask 203 to cover the
133    // amount of memory below the 3.8GB memory gap.
134    if memory.end_of_ram() > pcat_mtrr_size {
135        result.push(X86Register::MtrrPhysBase1(pcat_mtrr_size | WRITEBACK));
136        result.push(X86Register::MtrrPhysMask1(mtrr_mask(
137            gpa_space_size,
138            chipset_low_mmio.start() - 1,
139        )));
140    }
141
142    // If there is more than ~3.8GB of memory, use MTRR 204 and MTRR Mask 205 to cover
143    // the amount of memory above 4GB.
144    if memory.end_of_ram() > chipset_low_mmio.end() {
145        result.push(X86Register::MtrrPhysBase2(
146            chipset_low_mmio.end() | WRITEBACK,
147        ));
148        if chipset_high_mmio.is_empty() {
149            // No high MMIO gap — RAM above the low gap is contiguous.
150            // Only cover up to the end of RAM so that regions above RAM
151            // (e.g. PCIe MMIO64 windows) remain UC by default.
152            // Use the same 8TB split as the high-MMIO path to work around
153            // a bug in older Linux kernels (e.g. RHEL 6.x).
154            result.push(X86Register::MtrrPhysMask2(mtrr_mask(
155                gpa_space_size,
156                (1 << std::cmp::min(gpa_space_size, 43)).min(memory.end_of_ram()) - 1,
157            )));
158            if memory.end_of_ram() > (1 << 43) {
159                result.push(X86Register::MtrrPhysBase3((1 << 43) | WRITEBACK));
160                result.push(X86Register::MtrrPhysMask3(mtrr_mask(
161                    gpa_space_size,
162                    memory.end_of_ram() - 1,
163                )));
164            }
165        } else {
166            result.push(X86Register::MtrrPhysMask2(mtrr_mask(
167                gpa_space_size,
168                chipset_high_mmio.start() - 1,
169            )));
170        }
171    }
172
173    // If there is more memory than 64GB then use MTRR 206 and MTRR Mask 207 and possibly
174    // MTRR 208 and MTRR Mask 209 depending on maximum address width. Both MTRR pairs are
175    // used with the magic 8TB boundary to work around a bug in older Linux kernels
176    // (e.g. RHEL 6.x, etc.)
177    if !chipset_high_mmio.is_empty() && memory.end_of_ram() > chipset_high_mmio.end() {
178        result.push(X86Register::MtrrPhysBase3(
179            chipset_high_mmio.end() | WRITEBACK,
180        ));
181        result.push(X86Register::MtrrPhysMask3(mtrr_mask(
182            gpa_space_size,
183            (1 << std::cmp::min(gpa_space_size, 43)) - 1,
184        )));
185        if gpa_space_size > 43 {
186            result.push(X86Register::MtrrPhysBase4((1 << 43) | WRITEBACK));
187            result.push(X86Register::MtrrPhysMask4(mtrr_mask(
188                gpa_space_size,
189                (1 << gpa_space_size) - 1,
190            )));
191        }
192    }
193
194    result
195}
196
197fn mtrr_mask(gpa_space_size: u8, maximum_address: u64) -> u64 {
198    const ENABLED: u64 = 1 << 11;
199
200    let mut result = ENABLED;
201
202    // Set all the bits above bit 11 to 1's to cover the gpa_space_size
203    for index in 12..gpa_space_size {
204        result |= 1 << index;
205    }
206
207    // Clear the span of bits above bit 11 to cover the maximum address
208    for index in 12..gpa_space_size {
209        let test_maximum_address = 1 << index;
210
211        if maximum_address >= test_maximum_address {
212            // Turn the correct bit off
213            result &= !(1 << index);
214        } else {
215            // Done clearing the span of bits
216            break;
217        }
218    }
219
220    result
221}
222
223/// Error returned by [`ChunkBuf::import_file_region`].
224#[derive(Debug, Error)]
225pub enum ImportFileRegionError {
226    /// The file length exceeds the memory length.
227    #[error("file length {file_length} exceeds memory length {memory_length}")]
228    FileLengthExceedsMemoryLength {
229        /// The file length.
230        file_length: u64,
231        /// The memory length.
232        memory_length: u64,
233    },
234    /// Failed to seek the file.
235    #[error("failed to seek file")]
236    Seek(#[source] std::io::Error),
237    /// Failed to read the file.
238    #[error("failed to read file")]
239    Read(#[source] std::io::Error),
240    /// Failed to import pages.
241    #[error("failed to import pages")]
242    ImportPages(#[source] anyhow::Error),
243    /// Address computation overflowed.
244    #[error("address computation overflowed")]
245    Overflow,
246}
247
248/// Parameters for [`ChunkBuf::import_file_region`].
249pub struct ImportFileRegion<'a, F: ?Sized> {
250    /// The file to read from.
251    pub file: &'a mut F,
252    /// The offset within the file to start reading.
253    pub file_offset: u64,
254    /// The number of bytes to read from the file.
255    pub file_length: u64,
256    /// The guest physical address to import into.
257    pub gpa: u64,
258    /// The total memory region length (file data + zero fill).
259    pub memory_length: u64,
260    /// The page acceptance type.
261    pub acceptance: BootPageAcceptance,
262    /// A debug tag for tracing.
263    pub tag: &'a str,
264}
265
266/// A page-aligned chunk buffer for streaming file data into guest memory.
267///
268/// The buffer is guaranteed to hold at least one page (`HV_PAGE_SIZE`) and its
269/// length is always a whole number of pages. Reuse the same `ChunkBuf` across
270/// multiple imports to avoid repeated allocations.
271pub struct ChunkBuf(Vec<u8>);
272
273impl ChunkBuf {
274    /// Default chunk size (64 KiB).
275    const DEFAULT_SIZE: usize = 64 * 1024;
276
277    /// Create a new chunk buffer with the default size.
278    pub fn new() -> Self {
279        Self::with_size(Self::DEFAULT_SIZE)
280    }
281
282    /// Create a new chunk buffer with the given byte size, rounded down to a
283    /// whole number of pages.
284    ///
285    /// Panics if `size` is less than `HV_PAGE_SIZE`.
286    pub fn with_size(size: usize) -> Self {
287        let page_count = size as u64 / HV_PAGE_SIZE;
288        assert!(page_count > 0, "ChunkBuf must be at least one page");
289        Self(vec![0u8; (page_count * HV_PAGE_SIZE) as usize])
290    }
291
292    /// Import a region from a file into guest memory.
293    ///
294    /// Reads `file_length` bytes from `file` at `file_offset`, importing them
295    /// at guest physical address `gpa`. If `gpa` is not page-aligned, the
296    /// leading bytes of that page are zeroed. If `memory_length` exceeds
297    /// `file_length`, the remaining bytes are zeroed. Zeroing extends to the
298    /// end of the last target page.
299    pub fn import_file_region<F, R: GuestArch>(
300        &mut self,
301        importer: &mut dyn ImageLoad<R>,
302        params: ImportFileRegion<'_, F>,
303    ) -> Result<(), ImportFileRegionError>
304    where
305        F: ReadSeek + ?Sized,
306    {
307        let ImportFileRegion {
308            file,
309            file_offset,
310            file_length,
311            gpa,
312            memory_length,
313            acceptance,
314            tag,
315        } = params;
316
317        if file_length > memory_length {
318            return Err(ImportFileRegionError::FileLengthExceedsMemoryLength {
319                file_length,
320                memory_length,
321            });
322        }
323
324        if memory_length == 0 {
325            return Ok(());
326        }
327
328        let buf = &mut self.0[..];
329        let buf_pages = buf.len() as u64 / HV_PAGE_SIZE;
330
331        let page_mask = HV_PAGE_SIZE - 1;
332        let leading_zero = gpa & page_mask;
333        let page_base = gpa / HV_PAGE_SIZE;
334        let total_page_count = leading_zero
335            .checked_add(memory_length)
336            .and_then(|v| v.checked_add(page_mask))
337            .ok_or(ImportFileRegionError::Overflow)?
338            / HV_PAGE_SIZE;
339
340        file.seek(std::io::SeekFrom::Start(file_offset))
341            .map_err(ImportFileRegionError::Seek)?;
342
343        let mut pages_done: u64 = 0;
344        let mut file_remaining = file_length;
345
346        while file_remaining > 0 {
347            let chunk_pages = (total_page_count - pages_done).min(buf_pages);
348            let chunk_bytes = (chunk_pages * HV_PAGE_SIZE) as usize;
349            let chunk_buf = &mut buf[..chunk_bytes];
350
351            let data_start = if pages_done == 0 {
352                leading_zero as usize
353            } else {
354                0
355            };
356            let data_len = file_remaining.min((chunk_bytes - data_start) as u64) as usize;
357
358            // Zero leading padding on the first chunk.
359            chunk_buf[..data_start].fill(0);
360
361            // Read file data.
362            file.read_exact(&mut chunk_buf[data_start..data_start + data_len])
363                .map_err(ImportFileRegionError::Read)?;
364
365            file_remaining -= data_len as u64;
366
367            // On the last chunk with file data, extend page_count to cover all
368            // remaining pages. import_pages will zero beyond the data.
369            let import_page_count = if file_remaining == 0 {
370                total_page_count - pages_done
371            } else {
372                chunk_pages
373            };
374
375            importer
376                .import_pages(
377                    page_base + pages_done,
378                    import_page_count,
379                    tag,
380                    acceptance,
381                    &chunk_buf[..data_start + data_len],
382                )
383                .map_err(ImportFileRegionError::ImportPages)?;
384
385            pages_done += import_page_count;
386        }
387
388        // No file data at all — just import zero pages.
389        if file_length == 0 {
390            importer
391                .import_pages(page_base, total_page_count, tag, acceptance, &[])
392                .map_err(ImportFileRegionError::ImportPages)?;
393        }
394
395        Ok(())
396    }
397
398    /// Read a file in chunks and compute its CRC32, rewinding it afterward.
399    pub fn crc32(&mut self, file: &mut dyn ReadSeek, len: u64) -> Result<u32, std::io::Error> {
400        file.seek(std::io::SeekFrom::Start(0))?;
401        let mut hasher = crc32fast::Hasher::new();
402        let mut remaining = len;
403        while remaining > 0 {
404            let to_read = remaining.min(self.0.len() as u64) as usize;
405            file.read_exact(&mut self.0[..to_read])?;
406            hasher.update(&self.0[..to_read]);
407            remaining -= to_read as u64;
408        }
409        file.rewind()?;
410        Ok(hasher.finalize())
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    const GB: u64 = 1024 * 1024 * 1024;
419    const MB: u64 = 1024 * 1024;
420
421    fn standard_low_mmio() -> MemoryRange {
422        // 128 MB gap below 4 GiB, matching the typical x86_64 config.
423        MemoryRange::new(4 * GB - 128 * MB..4 * GB)
424    }
425
426    fn standard_high_mmio() -> MemoryRange {
427        // 512 MB immediately above 4 GiB.
428        MemoryRange::new(4 * GB..4 * GB + 512 * MB)
429    }
430
431    fn make_layout(ram_size: u64) -> MemoryLayout {
432        MemoryLayout::new(
433            ram_size,
434            &[standard_low_mmio(), standard_high_mmio()],
435            &[],
436            &[],
437            None,
438        )
439        .unwrap()
440    }
441
442    /// Count MTRR base/mask pairs in the register list.
443    fn pair_count(regs: &[X86Register]) -> usize {
444        assert_eq!(regs.len() % 2, 0, "MTRR registers come in pairs");
445        regs.len() / 2
446    }
447
448    #[test]
449    fn mtrr_128mb_exactly() {
450        // 128 MB = pcat_mtrr_size → only pair 0 (base covers 128 MB)
451        let layout = make_layout(128 * MB);
452        let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio());
453        assert_eq!(pair_count(&regs), 1);
454        assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6));
455    }
456
457    #[test]
458    fn mtrr_256mb() {
459        // 256 MB: pair 0 (128 MB) + pair 1 (128 MB..low_gap_start)
460        let layout = make_layout(256 * MB);
461        let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio());
462        assert_eq!(pair_count(&regs), 2);
463        assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6));
464        // Pair 1 base = 128 MB | WRITEBACK
465        assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6));
466    }
467
468    #[test]
469    fn mtrr_2gb() {
470        // 2 GB: same structure as 256 MB (RAM is below 4 GiB)
471        let layout = make_layout(2 * GB);
472        let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio());
473        assert_eq!(pair_count(&regs), 2);
474        assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6));
475    }
476
477    #[test]
478    fn mtrr_8gb() {
479        // 8 GB: RAM above 4 GiB and above high MMIO end → pairs 0–4
480        let layout = make_layout(8 * GB);
481        let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio());
482        // Pair 0: 0..128 MB
483        // Pair 1: 128 MB..low gap start
484        // Pair 2: low gap end (4 GiB)..high gap start (4 GiB) — effectively empty but still emitted
485        // Pair 3: high gap end..8 TB boundary
486        // Pair 4: 8 TB.. (gpa_space_size > 43)
487        assert_eq!(pair_count(&regs), 5);
488
489        // Verify key base addresses
490        assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6));
491        assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6));
492        assert_eq!(
493            regs[4],
494            X86Register::MtrrPhysBase2(standard_low_mmio().end() | 0x6)
495        );
496        assert_eq!(
497            regs[6],
498            X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6)
499        );
500        assert_eq!(regs[8], X86Register::MtrrPhysBase4((1u64 << 43) | 0x6));
501    }
502
503    #[test]
504    fn mtrr_8gb_no_high_mmio() {
505        // No high MMIO gap. RAM above 4 GiB is contiguous.
506        // WB MTRRs should cover only up to end of RAM, leaving
507        // everything above (ECAM, MMIO64 windows) as default UC.
508        let low = standard_low_mmio();
509        let layout = MemoryLayout::new(8 * GB, &[low], &[], &[], None).unwrap();
510        let regs = compute_variable_mtrrs(&layout, 46, low, MemoryRange::EMPTY);
511        // Pair 0: 0..128 MB
512        // Pair 1: 128 MB..low gap start
513        // Pair 2: low gap end..end of RAM (8 GB)
514        assert_eq!(pair_count(&regs), 3);
515        assert_eq!(regs[4], X86Register::MtrrPhysBase2(low.end() | 0x6));
516    }
517
518    #[test]
519    fn mtrr_narrow_address_width() {
520        // 40-bit address width (< 43): the 8 TB split is not needed.
521        let layout = make_layout(8 * GB);
522        let regs = compute_variable_mtrrs(&layout, 40, standard_low_mmio(), standard_high_mmio());
523        // No pair 4 because gpa_space_size <= 43
524        assert_eq!(pair_count(&regs), 4);
525        assert_eq!(
526            regs[6],
527            X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6)
528        );
529    }
530
531    #[test]
532    fn mtrr_masks_have_enabled_bit() {
533        // Every mask register should have the ENABLED bit (bit 11) set.
534        let layout = make_layout(8 * GB);
535        let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio());
536        for (i, reg) in regs.iter().enumerate() {
537            match reg {
538                X86Register::MtrrPhysMask0(v)
539                | X86Register::MtrrPhysMask1(v)
540                | X86Register::MtrrPhysMask2(v)
541                | X86Register::MtrrPhysMask3(v)
542                | X86Register::MtrrPhysMask4(v) => {
543                    assert!(v & (1 << 11) != 0, "mask at index {i} missing ENABLED bit");
544                }
545                _ => {} // base registers
546            }
547        }
548    }
549}