loader/
paravisor.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Paravisor specific loader definitions and implementation.
5
6use crate::cpuid::HV_PSP_CPUID_PAGE;
7use crate::importer::Aarch64Register;
8use crate::importer::BootPageAcceptance;
9use crate::importer::IgvmParameterType;
10use crate::importer::ImageLoad;
11use crate::importer::IsolationConfig;
12use crate::importer::IsolationType;
13use crate::importer::SegmentRegister;
14use crate::importer::StartupMemoryType;
15use crate::importer::TableRegister;
16use crate::importer::X86Register;
17use crate::linux::InitrdAddressType;
18use crate::linux::InitrdConfig;
19use crate::linux::InitrdInfo;
20use crate::linux::KernelInfo;
21use crate::linux::load_kernel_and_initrd_arm64;
22use aarch64defs::Cpsr64;
23use aarch64defs::IntermPhysAddrSize;
24use aarch64defs::SctlrEl1;
25use aarch64defs::TranslationBaseEl1;
26use aarch64defs::TranslationControlEl1;
27use aarch64defs::TranslationGranule0;
28use aarch64defs::TranslationGranule1;
29use hvdef::HV_PAGE_SIZE;
30use hvdef::Vtl;
31use igvm::registers::AArch64Register;
32use loader_defs::paravisor::*;
33use loader_defs::shim::ShimParamsRaw;
34use memory_range::MemoryRange;
35use page_table::aarch64::Arm64PageSize;
36use page_table::aarch64::MemoryAttributeEl1;
37use page_table::aarch64::MemoryAttributeIndirectionEl1;
38use page_table::x64::MappedRange;
39use page_table::x64::PAGE_TABLE_MAX_BYTES;
40use page_table::x64::PAGE_TABLE_MAX_COUNT;
41use page_table::x64::PageTable;
42use page_table::x64::PageTableBuilder;
43use page_table::x64::X64_LARGE_PAGE_SIZE;
44use page_table::x64::align_up_to_large_page_size;
45use page_table::x64::align_up_to_page_size;
46use page_table::x64::calculate_pde_table_count;
47use thiserror::Error;
48use x86defs::GdtEntry;
49use x86defs::SegmentSelector;
50use x86defs::X64_BUSY_TSS_SEGMENT_ATTRIBUTES;
51use x86defs::X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES;
52use x86defs::X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES;
53use x86defs::cpuid::CpuidFunction;
54use zerocopy::FromZeros;
55use zerocopy::IntoBytes;
56
57#[derive(Debug)]
58pub struct Vtl0Linux<'a> {
59    pub command_line: &'a std::ffi::CString,
60    pub load_info: crate::linux::LoadInfo,
61}
62
63#[derive(Debug)]
64pub struct Vtl0Config<'a> {
65    pub supports_pcat: bool,
66    /// The load info and the VP context page.
67    pub supports_uefi: Option<(crate::uefi::LoadInfo, Vec<u8>)>,
68    pub supports_linux: Option<Vtl0Linux<'a>>,
69}
70
71// See HclDefs.h
72pub const HCL_SECURE_VTL: Vtl = Vtl::Vtl2;
73
74/// Size of the persisted region (2MB).
75const PERSISTED_REGION_SIZE: u64 = 2 * 1024 * 1024;
76
77#[derive(Debug, Error)]
78pub enum Error {
79    #[error("memory is unaligned: {0}")]
80    MemoryUnaligned(u64),
81    #[error("command line too large: {0}")]
82    CommandLineSize(usize),
83    #[error("kernel load error")]
84    Kernel(#[source] crate::linux::Error),
85    #[error("shim load error")]
86    Shim(#[source] crate::elf::Error),
87    #[error("invalid initrd size: {0}")]
88    InvalidInitrdSize(u64),
89    #[error("memory used: {0} is greater than available")]
90    NotEnoughMemory(u64),
91    #[error("importer error")]
92    Importer(#[from] anyhow::Error),
93    #[error("PageTableBuilder: {0}")]
94    PageTableBuilder(#[from] page_table::Error),
95}
96
97/// Kernel Command line type.
98pub enum CommandLineType<'a> {
99    /// The command line is a static string.
100    Static(&'a str),
101    /// The command line is dynamic and host appendable via the chosen node in
102    /// device tree, with initial data specified by the provided CStr. An empty
103    /// base_string may be provided to allow the host to specify the full kernel
104    /// command line.
105    HostAppendable(&'a str),
106}
107
108/// Load the underhill kernel on x64.
109///
110/// An optional initrd may be specified.
111///
112/// An optional `memory_page_base` may be specified. This will disable
113/// relocation support for underhill.
114pub fn load_openhcl_x64<F>(
115    importer: &mut dyn ImageLoad<X86Register>,
116    kernel_image: &mut F,
117    shim: &mut F,
118    sidecar: Option<&mut F>,
119    command_line: CommandLineType<'_>,
120    initrd: Option<&[u8]>,
121    memory_page_base: Option<u64>,
122    memory_page_count: u64,
123    vtl0_config: Vtl0Config<'_>,
124) -> Result<(), Error>
125where
126    F: std::io::Read + std::io::Seek,
127{
128    let IsolationConfig {
129        isolation_type,
130        paravisor_present,
131        shared_gpa_boundary_bits,
132    } = importer.isolation_config();
133
134    // If no explicit memory base is specified, load with relocation support.
135    let with_relocation = memory_page_base.is_none() && isolation_type == IsolationType::None;
136
137    let memory_start_address = memory_page_base
138        .map(|page_number| page_number * HV_PAGE_SIZE)
139        .unwrap_or(PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS);
140
141    let memory_size = memory_page_count * HV_PAGE_SIZE;
142
143    // OpenHCL is laid out as the following:
144    // --- High Memory, 2MB aligned ---
145    // free space
146    //
147    // page tables
148    // 16 pages reserved for bootshim heap
149    // 8K bootshim logs
150    // IGVM parameters
151    // reserved vtl2 ranges
152    // initrd
153    // openhcl_boot
154    // sidecar, if configured
155    // - pad to next 2MB -
156    // kernel
157    // optional 2mb bounce buf for CVM
158    // persisted state region
159    // --- Low memory, 2MB aligned ---
160
161    // Paravisor memory ranges must be 2MB (large page) aligned.
162    if !memory_start_address.is_multiple_of(X64_LARGE_PAGE_SIZE) {
163        return Err(Error::MemoryUnaligned(memory_start_address));
164    }
165
166    if !memory_size.is_multiple_of(X64_LARGE_PAGE_SIZE) {
167        return Err(Error::MemoryUnaligned(memory_size));
168    }
169
170    // The whole memory range must be present and VTL2 protectable for the
171    // underhill kernel to work.
172    importer.verify_startup_memory_available(
173        memory_start_address / HV_PAGE_SIZE,
174        memory_page_count,
175        if paravisor_present {
176            StartupMemoryType::Vtl2ProtectableRam
177        } else {
178            StartupMemoryType::Ram
179        },
180    )?;
181
182    let kernel_acceptance = match isolation_type {
183        IsolationType::Snp | IsolationType::Tdx => BootPageAcceptance::Shared,
184        _ => BootPageAcceptance::Exclusive,
185    };
186
187    let mut offset = memory_start_address;
188
189    // Reserve the first 2MB for a potential persisted state region. The first
190    // 4K page is always the persisted state header, and the bootshim may decide
191    // to use the the remaining pages for the protobuf payload.
192    let persisted_region_base = offset;
193    let persisted_region_size = PERSISTED_REGION_SIZE;
194    offset += persisted_region_size;
195
196    // If hardware isolated, reserve a 2MB range for bounce buffering shared
197    // pages. This is done first because we know the start address is 2MB
198    // aligned, with the next consumers wanting 2MB aligned ranges. This is
199    // reserved at load time in order to guarantee the pagetables have entries
200    // for this identity mapping.
201    //
202    // Leave this as a gap, as there's no need to accept or describe this range
203    // in the IGVM file.
204    let bounce_buffer = if matches!(isolation_type, IsolationType::Snp | IsolationType::Tdx) {
205        let bounce_buffer_gpa = offset;
206        assert_eq!(bounce_buffer_gpa % X64_LARGE_PAGE_SIZE, 0);
207        let range = MemoryRange::new(bounce_buffer_gpa..bounce_buffer_gpa + X64_LARGE_PAGE_SIZE);
208
209        offset += range.len();
210        Some(range)
211    } else {
212        None
213    };
214
215    tracing::trace!(offset, "loading the kernel");
216
217    // The x86_64 uncompressed kernel we use doesn't show any difference
218    // in the code sections upon flipping CONFIG_RELOCATABLE. In total,
219    // there are 6 places where a difference is found: dates in the Linux
220    // banner, GNU build ID, and metadata entries in the empty initrd image
221    // (it always is embedded into the kernel). No sections with relocations
222    // appear if CONFIG_RELOCATABLE is set.
223    // Assume that at least the kernel entry contains PIC and no loader
224    // assistance with the relocations records (if any) is required.
225    let load_info = crate::elf::load_static_elf(
226        importer,
227        kernel_image,
228        offset,
229        0,
230        true,
231        kernel_acceptance,
232        "underhill-kernel",
233    )
234    .map_err(|e| Error::Kernel(crate::linux::Error::ElfLoader(e)))?;
235    tracing::trace!("Kernel loaded at {load_info:x?}");
236    let crate::elf::LoadInfo {
237        minimum_address_used: _min_addr,
238        next_available_address: mut offset,
239        entrypoint: kernel_entrypoint,
240    } = load_info;
241
242    assert_eq!(offset & (HV_PAGE_SIZE - 1), 0);
243
244    // If an AP kernel was provided, load it next.
245    let (sidecar_size, sidecar_entrypoint) = if let Some(sidecar) = sidecar {
246        // Sidecar load addr must be 2MB aligned
247        offset = align_up_to_large_page_size(offset);
248
249        let load_info = crate::elf::load_static_elf(
250            importer,
251            sidecar,
252            0,
253            offset,
254            false,
255            BootPageAcceptance::Exclusive,
256            "sidecar-kernel",
257        )
258        .map_err(|e| Error::Kernel(crate::linux::Error::ElfLoader(e)))?;
259
260        (
261            load_info.next_available_address - offset,
262            load_info.entrypoint,
263        )
264    } else {
265        (0, 0)
266    };
267
268    let sidecar_base = offset;
269    offset += sidecar_size;
270
271    let load_info = crate::elf::load_static_elf(
272        importer,
273        shim,
274        0,
275        offset,
276        false,
277        BootPageAcceptance::Exclusive,
278        "underhill-boot-shim",
279    )
280    .map_err(Error::Shim)?;
281    tracing::trace!("The boot shim loaded at {load_info:x?}");
282    let crate::elf::LoadInfo {
283        minimum_address_used: shim_base_addr,
284        next_available_address: mut offset,
285        entrypoint: shim_entry_address,
286    } = load_info;
287
288    // Optionally import initrd if specified.
289    let ramdisk = if let Some(initrd) = initrd {
290        let initrd_base = offset;
291        let initrd_size = align_up_to_page_size(initrd.len() as u64);
292
293        importer.import_pages(
294            initrd_base / HV_PAGE_SIZE,
295            initrd_size / HV_PAGE_SIZE,
296            "underhill-initrd",
297            kernel_acceptance,
298            initrd,
299        )?;
300
301        offset += initrd_size;
302        Some((initrd_base, initrd.len() as u64))
303    } else {
304        None
305    };
306
307    let gdt_base_address = offset;
308    let gdt_size = HV_PAGE_SIZE;
309    offset += gdt_size;
310
311    let boot_params_base = offset;
312    let boot_params_size = HV_PAGE_SIZE;
313
314    offset += boot_params_size;
315
316    let cmdline_base = offset;
317    let (cmdline, policy) = match command_line {
318        CommandLineType::Static(val) => (val, CommandLinePolicy::STATIC),
319        CommandLineType::HostAppendable(val) => (val, CommandLinePolicy::APPEND_CHOSEN),
320    };
321
322    if cmdline.len() > COMMAND_LINE_SIZE {
323        return Err(Error::CommandLineSize(cmdline.len()));
324    }
325
326    let mut static_command_line = [0; COMMAND_LINE_SIZE];
327    static_command_line[..cmdline.len()].copy_from_slice(cmdline.as_bytes());
328    let paravisor_command_line = ParavisorCommandLine {
329        policy,
330        static_command_line_len: cmdline.len() as u16,
331        static_command_line,
332    };
333
334    importer.import_pages(
335        cmdline_base / HV_PAGE_SIZE,
336        1,
337        "underhill-command-line",
338        BootPageAcceptance::Exclusive,
339        paravisor_command_line.as_bytes(),
340    )?;
341
342    offset += HV_PAGE_SIZE;
343
344    // Reserve space for the VTL2 reserved region.
345    let reserved_region_size = PARAVISOR_RESERVED_VTL2_PAGE_COUNT_MAX * HV_PAGE_SIZE;
346    let reserved_region_start = offset;
347    offset += reserved_region_size;
348
349    tracing::debug!(reserved_region_start);
350
351    let parameter_region_size = PARAVISOR_VTL2_CONFIG_REGION_PAGE_COUNT_MAX * HV_PAGE_SIZE;
352    let parameter_region_start = offset;
353    offset += parameter_region_size;
354
355    tracing::debug!(parameter_region_start);
356
357    // Reserve 8K for the bootshim log buffer. Import these pages so they are
358    // available early without extra acceptance calls.
359    let bootshim_log_size = HV_PAGE_SIZE * 2;
360    let bootshim_log_start = offset;
361    offset += bootshim_log_size;
362
363    importer.import_pages(
364        bootshim_log_start / HV_PAGE_SIZE,
365        bootshim_log_size / HV_PAGE_SIZE,
366        "ohcl-boot-shim-log-buffer",
367        BootPageAcceptance::Exclusive,
368        &[],
369    )?;
370
371    // Reserve 16 pages for a bootshim heap. This is only used to parse the
372    // protobuf payload from the previous instance in a servicing boot.
373    //
374    // Import these pages as it greatly simplifies the early startup code in the
375    // bootshim for isolated guests. This allows the bootshim to use these pages
376    // early on without extra acceptance calls.
377    let heap_start = offset;
378    let heap_size = 16 * HV_PAGE_SIZE;
379    importer.import_pages(
380        heap_start / HV_PAGE_SIZE,
381        heap_size / HV_PAGE_SIZE,
382        "ohcl-boot-shim-heap",
383        BootPageAcceptance::Exclusive,
384        &[],
385    )?;
386    offset += heap_size;
387
388    // The end of memory used by the loader, excluding pagetables.
389    let end_of_underhill_mem = offset;
390
391    // Page tables live at the end of VTL2 ram used by the bootshim.
392    //
393    // Size the available page table memory as 5 pages + 2 * 1GB of memory. This
394    // allows underhill to be mapped across a 512 GB boundary when using more
395    // than 1 GB, as the PDPTE will span 2 PML4E entries. Each GB of memory
396    // mapped requires 1 page for 2MB pages. Give 2 extra base pages and 1
397    // additional page per GB of mapped memory to allow the page table
398    // relocation code to be simpler, and not need to reclaim free pages from
399    // tables that have no valid entries.
400    //
401    // FUTURE: It would be better to change it so the shim only needs to map
402    //         itself, kernel, initrd and IGVM parameters. This requires
403    //         changing how the e820 map is constructed for the kernel along
404    //         with changing the contract on where the IGVM parameters live
405    //         within VTL2's memory.
406    let local_map = match isolation_type {
407        IsolationType::Snp | IsolationType::Tdx => {
408            Some((PARAVISOR_LOCAL_MAP_VA, PARAVISOR_LOCAL_MAP_SIZE))
409        }
410        _ => None,
411    };
412
413    let page_table_base_page_count = 5;
414    let page_table_dynamic_page_count = {
415        // Double the count to allow for simpler reconstruction.
416        calculate_pde_table_count(memory_start_address, memory_size) * 2
417            + local_map.map_or(0, |v| calculate_pde_table_count(v.0, v.1))
418    };
419    let page_table_isolation_page_count = match isolation_type {
420        IsolationType::Tdx => {
421            // TDX requires up to an extra 3 pages to map the reset vector as a
422            // 4K page.
423            3
424        }
425        _ => 0,
426    };
427    let page_table_page_count = page_table_base_page_count
428        + page_table_dynamic_page_count
429        + page_table_isolation_page_count;
430    let page_table_region_size = HV_PAGE_SIZE * page_table_page_count;
431    let page_table_region_start = offset;
432    offset += page_table_region_size;
433
434    tracing::debug!(page_table_region_start, page_table_region_size);
435
436    // Construct the memory ranges that will be identity mapped
437    let mut ranges: Vec<MappedRange> = Vec::new();
438
439    ranges.push(MappedRange::new(
440        memory_start_address,
441        memory_start_address + memory_size,
442    ));
443
444    if let Some((local_map_start, size)) = local_map {
445        ranges.push(MappedRange::new(local_map_start, local_map_start + size));
446    }
447
448    if isolation_type == IsolationType::Tdx {
449        const RESET_VECTOR_ADDR: u64 = 0xffff_f000;
450        ranges.push(MappedRange::new(
451            RESET_VECTOR_ADDR,
452            RESET_VECTOR_ADDR + page_table::x64::X64_PAGE_SIZE,
453        ));
454    }
455
456    ranges.sort_by_key(|r| r.start());
457
458    // Initialize the page table builder, and build the page table
459    let mut page_table_work_buffer: Vec<PageTable> =
460        vec![PageTable::new_zeroed(); PAGE_TABLE_MAX_COUNT];
461    let mut page_table: Vec<u8> = vec![0; PAGE_TABLE_MAX_BYTES];
462    let mut page_table_builder = PageTableBuilder::new(
463        page_table_region_start,
464        page_table_work_buffer.as_mut_slice(),
465        page_table.as_mut_slice(),
466        ranges.as_slice(),
467    )?;
468
469    if isolation_type == IsolationType::Snp {
470        page_table_builder = page_table_builder.with_confidential_bit(51);
471    }
472
473    let page_table = page_table_builder.build()?;
474
475    assert!((page_table.len() as u64).is_multiple_of(HV_PAGE_SIZE));
476    let page_table_page_base = page_table_region_start / HV_PAGE_SIZE;
477    assert!(page_table.len() as u64 <= page_table_region_size);
478    let offset = offset;
479
480    if with_relocation {
481        // Indicate relocation information. Don't include page table region.
482        importer.relocation_region(
483            memory_start_address,
484            end_of_underhill_mem - memory_start_address,
485            X64_LARGE_PAGE_SIZE,
486            PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS,
487            1 << 48,
488            true,
489            true,
490            0, // BSP
491        )?;
492
493        // Tell the loader page table relocation information.
494        importer.page_table_relocation(
495            page_table_region_start,
496            page_table_region_size / HV_PAGE_SIZE,
497            page_table.len() as u64 / HV_PAGE_SIZE,
498            0,
499        )?;
500    }
501
502    // The memory used by the loader must be smaller than the memory available.
503    if offset > memory_start_address + memory_size {
504        return Err(Error::NotEnoughMemory(offset - memory_start_address));
505    }
506
507    let (initrd_base, initrd_size) = ramdisk.unwrap_or((0, 0));
508    // Shim parameters for locations are relative to the base of where the shim is loaded.
509    let calculate_shim_offset = |addr: u64| addr.wrapping_sub(shim_base_addr) as i64;
510    let initrd_crc = crc32fast::hash(initrd.unwrap_or(&[]));
511    let shim_params = ShimParamsRaw {
512        kernel_entry_offset: calculate_shim_offset(kernel_entrypoint),
513        cmdline_offset: calculate_shim_offset(cmdline_base),
514        initrd_offset: calculate_shim_offset(initrd_base),
515        initrd_size,
516        initrd_crc,
517        supported_isolation_type: match isolation_type {
518            // To the shim, None and VBS isolation are the same. The shim
519            // queries CPUID when running to determine if page acceptance needs
520            // to be done.
521            IsolationType::None | IsolationType::Vbs => {
522                loader_defs::shim::SupportedIsolationType::VBS
523            }
524            IsolationType::Snp => loader_defs::shim::SupportedIsolationType::SNP,
525            IsolationType::Tdx => loader_defs::shim::SupportedIsolationType::TDX,
526        },
527        memory_start_offset: calculate_shim_offset(memory_start_address),
528        memory_size,
529        parameter_region_offset: calculate_shim_offset(parameter_region_start),
530        parameter_region_size,
531        vtl2_reserved_region_offset: calculate_shim_offset(reserved_region_start),
532        vtl2_reserved_region_size: reserved_region_size,
533        sidecar_offset: calculate_shim_offset(sidecar_base),
534        sidecar_size,
535        sidecar_entry_offset: calculate_shim_offset(sidecar_entrypoint),
536        used_start: calculate_shim_offset(memory_start_address),
537        used_end: calculate_shim_offset(offset),
538        bounce_buffer_start: bounce_buffer.map_or(0, |r| calculate_shim_offset(r.start())),
539        bounce_buffer_size: bounce_buffer.map_or(0, |r| r.len()),
540        log_buffer_start: calculate_shim_offset(bootshim_log_start),
541        log_buffer_size: bootshim_log_size,
542        heap_start_offset: calculate_shim_offset(heap_start),
543        heap_size,
544        persisted_state_region_offset: calculate_shim_offset(persisted_region_base),
545        persisted_state_region_size: persisted_region_size,
546    };
547
548    tracing::debug!(boot_params_base, "shim gpa");
549
550    importer
551        .import_pages(
552            boot_params_base / HV_PAGE_SIZE,
553            boot_params_size / HV_PAGE_SIZE,
554            "underhill-shim-params",
555            BootPageAcceptance::Exclusive,
556            shim_params.as_bytes(),
557        )
558        .map_err(Error::Importer)?;
559
560    importer.import_pages(
561        page_table_page_base,
562        page_table_page_count,
563        "underhill-page-tables",
564        BootPageAcceptance::Exclusive,
565        page_table,
566    )?;
567
568    // Set selectors and control registers
569    // Setup two selectors and segment registers.
570    // ds, es, fs, gs, ss are linearSelector
571    // cs is linearCode64Selector
572
573    // GDT is laid out as (counting by the small entries):
574    //  0: null descriptor,
575    //  1: null descriptor,
576    //  2: linear code64 descriptor,
577    //  3. linear descriptor for data
578    //  4: here you can add more descriptors.
579
580    let default_data_attributes: u16 = X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES.into();
581    let default_code64_attributes: u16 = X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES.into();
582    let gdt = [
583        // A large null descriptor.
584        GdtEntry::new_zeroed(),
585        GdtEntry::new_zeroed(),
586        // Code descriptor for the long mode.
587        GdtEntry {
588            limit_low: 0xffff,
589            attr_low: default_code64_attributes as u8,
590            attr_high: (default_code64_attributes >> 8) as u8,
591            ..GdtEntry::new_zeroed()
592        },
593        // Data descriptor.
594        GdtEntry {
595            limit_low: 0xffff,
596            attr_low: default_data_attributes as u8,
597            attr_high: (default_data_attributes >> 8) as u8,
598            ..GdtEntry::new_zeroed()
599        },
600    ];
601
602    const LINEAR_CODE64_DESCRIPTOR_INDEX: usize = 2;
603    const LINEAR_DATA_DESCRIPTOR_INDEX: usize = 3;
604    const RPL: u8 = 0x00; // requested priviledge level: the highest
605
606    let linear_code64_descriptor_selector =
607        SegmentSelector::from_gdt_index(LINEAR_CODE64_DESCRIPTOR_INDEX as u16, RPL);
608    let linear_data_descriptor_selector =
609        SegmentSelector::from_gdt_index(LINEAR_DATA_DESCRIPTOR_INDEX as u16, RPL);
610
611    importer.import_pages(
612        gdt_base_address / HV_PAGE_SIZE,
613        gdt_size / HV_PAGE_SIZE,
614        "underhill-gdt",
615        BootPageAcceptance::Exclusive,
616        gdt.as_bytes(),
617    )?;
618
619    let mut import_reg = |register| {
620        importer
621            .import_vp_register(register)
622            .map_err(Error::Importer)
623    };
624
625    // Import GDTR and selectors.
626    import_reg(X86Register::Gdtr(TableRegister {
627        base: gdt_base_address,
628        limit: (size_of_val(&gdt) - 1) as u16,
629    }))?;
630
631    let ds = SegmentRegister {
632        selector: linear_data_descriptor_selector.into_bits(),
633        base: 0,
634        limit: 0xffffffff,
635        attributes: default_data_attributes,
636    };
637    import_reg(X86Register::Ds(ds))?;
638    import_reg(X86Register::Es(ds))?;
639    import_reg(X86Register::Fs(ds))?;
640    import_reg(X86Register::Gs(ds))?;
641    import_reg(X86Register::Ss(ds))?;
642
643    let cs = SegmentRegister {
644        selector: linear_code64_descriptor_selector.into_bits(),
645        base: 0,
646        limit: 0xffffffff,
647        attributes: default_code64_attributes,
648    };
649    import_reg(X86Register::Cs(cs))?;
650
651    // TODO: Workaround an OS repo bug where enabling a higher VTL zeros TR
652    //       instead of setting it to the reset default state. Manually set it
653    //       to the reset default state until the OS repo is fixed.
654    //
655    //       In the future, we should just not set this at all.
656    import_reg(X86Register::Tr(SegmentRegister {
657        selector: 0x0000,
658        base: 0x00000000,
659        limit: 0x0000FFFF,
660        attributes: X64_BUSY_TSS_SEGMENT_ATTRIBUTES.into(),
661    }))?;
662
663    // Set system registers to state expected by the boot shim, 64 bit mode with
664    // paging enabled.
665
666    // Set CR0
667    import_reg(X86Register::Cr0(
668        x86defs::X64_CR0_PG | x86defs::X64_CR0_PE | x86defs::X64_CR0_NE,
669    ))?;
670
671    // Set CR3 to point to page table
672    import_reg(X86Register::Cr3(page_table_region_start))?;
673
674    // Set CR4
675    import_reg(X86Register::Cr4(
676        x86defs::X64_CR4_PAE | x86defs::X64_CR4_MCE | x86defs::X64_CR4_OSXSAVE,
677    ))?;
678
679    // Set EFER to LMA, LME, and NXE for 64 bit mode.
680    import_reg(X86Register::Efer(
681        x86defs::X64_EFER_LMA | x86defs::X64_EFER_LME | x86defs::X64_EFER_NXE,
682    ))?;
683
684    // Set PAT
685    import_reg(X86Register::Pat(x86defs::X86X_MSR_DEFAULT_PAT))?;
686
687    // Setup remaining registers
688    // Set %rsi to relative location of boot_params_base
689    let relative_boot_params_base = boot_params_base - shim_base_addr;
690    import_reg(X86Register::Rsi(relative_boot_params_base))?;
691
692    // Set %rip to the shim entry point.
693    import_reg(X86Register::Rip(shim_entry_address))?;
694
695    // Load parameter regions.
696    let config_region_page_base = parameter_region_start / HV_PAGE_SIZE;
697
698    // Slit
699    let slit_page_base = config_region_page_base + PARAVISOR_CONFIG_SLIT_PAGE_INDEX;
700    let slit_parameter_area = importer.create_parameter_area(
701        slit_page_base,
702        PARAVISOR_CONFIG_SLIT_SIZE_PAGES as u32,
703        "underhill-slit",
704    )?;
705    importer.import_parameter(slit_parameter_area, 0, IgvmParameterType::Slit)?;
706
707    // Pptt
708    let pptt_page_base = config_region_page_base + PARAVISOR_CONFIG_PPTT_PAGE_INDEX;
709    let pptt_parameter_area = importer.create_parameter_area(
710        pptt_page_base,
711        PARAVISOR_CONFIG_PPTT_SIZE_PAGES as u32,
712        "underhill-pptt",
713    )?;
714    importer.import_parameter(pptt_parameter_area, 0, IgvmParameterType::Pptt)?;
715
716    // device tree
717    let dt_page_base = config_region_page_base + PARAVISOR_CONFIG_DEVICE_TREE_PAGE_INDEX;
718    let dt_parameter_area = importer.create_parameter_area(
719        dt_page_base,
720        PARAVISOR_CONFIG_DEVICE_TREE_SIZE_PAGES as u32,
721        "underhill-device-tree",
722    )?;
723    importer.import_parameter(dt_parameter_area, 0, IgvmParameterType::DeviceTree)?;
724
725    if isolation_type == IsolationType::Snp {
726        let reserved_region_page_base = reserved_region_start / HV_PAGE_SIZE;
727        let secrets_page_base: u64 =
728            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_SECRETS_PAGE_INDEX;
729        importer.import_pages(
730            secrets_page_base,
731            PARAVISOR_RESERVED_VTL2_SNP_SECRETS_SIZE_PAGES,
732            "underhill-snp-secrets-page",
733            BootPageAcceptance::SecretsPage,
734            &[],
735        )?;
736
737        let cpuid_page = create_snp_cpuid_page();
738        let cpuid_page_base =
739            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_CPUID_PAGE_INDEX;
740        importer.import_pages(
741            cpuid_page_base,
742            1,
743            "underhill-snp-cpuid-page",
744            BootPageAcceptance::CpuidPage,
745            cpuid_page.as_bytes(),
746        )?;
747
748        importer.import_pages(
749            cpuid_page_base + 1,
750            1,
751            "underhill-snp-cpuid-extended-state-page",
752            BootPageAcceptance::CpuidExtendedStatePage,
753            &[],
754        )?;
755
756        let vmsa_page_base =
757            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_VMSA_PAGE_INDEX;
758        importer.set_vp_context_page(vmsa_page_base)?;
759    }
760
761    // Load measured config.
762    // The measured config is at page 0. Free pages start at page 1.
763    let mut free_page = 1;
764    let mut measured_config = ParavisorMeasuredVtl0Config {
765        magic: ParavisorMeasuredVtl0Config::MAGIC,
766        ..FromZeros::new_zeroed()
767    };
768
769    let Vtl0Config {
770        supports_pcat,
771        supports_uefi,
772        supports_linux,
773    } = vtl0_config;
774
775    if supports_pcat {
776        measured_config.supported_vtl0.set_pcat_supported(true);
777    }
778
779    if let Some((uefi, vp_context)) = &supports_uefi {
780        measured_config.supported_vtl0.set_uefi_supported(true);
781        let vp_context_page = free_page;
782        free_page += 1;
783        measured_config.uefi_info = UefiInfo {
784            firmware: PageRegionDescriptor {
785                base_page_number: uefi.firmware_base / HV_PAGE_SIZE,
786                page_count: uefi.total_size / HV_PAGE_SIZE,
787            },
788            vtl0_vp_context: PageRegionDescriptor {
789                base_page_number: vp_context_page,
790                page_count: 1,
791            },
792        };
793
794        // Deposit the UEFI vp context.
795        importer.import_pages(
796            vp_context_page,
797            1,
798            "openhcl-uefi-vp-context",
799            BootPageAcceptance::Exclusive,
800            vp_context,
801        )?;
802    }
803
804    if let Some(linux) = supports_linux {
805        measured_config
806            .supported_vtl0
807            .set_linux_direct_supported(true);
808
809        let kernel_region = PageRegionDescriptor::new(
810            linux.load_info.kernel.gpa / HV_PAGE_SIZE,
811            align_up_to_page_size(linux.load_info.kernel.size) / HV_PAGE_SIZE,
812        );
813
814        let (initrd_region, initrd_size) = match linux.load_info.initrd {
815            Some(info) => {
816                if info.gpa % HV_PAGE_SIZE != 0 {
817                    return Err(Error::MemoryUnaligned(info.gpa));
818                }
819                (
820                    // initrd info is aligned up to the next page.
821                    PageRegionDescriptor::new(
822                        info.gpa / HV_PAGE_SIZE,
823                        align_up_to_page_size(info.size) / HV_PAGE_SIZE,
824                    ),
825                    info.size,
826                )
827            }
828            None => (PageRegionDescriptor::EMPTY, 0),
829        };
830
831        let command_line_page = free_page;
832        // free_page += 1;
833
834        // Import the command line as a C string.
835        importer
836            .import_pages(
837                command_line_page,
838                1,
839                "underhill-vtl0-linux-command-line",
840                BootPageAcceptance::Exclusive,
841                linux.command_line.as_bytes_with_nul(),
842            )
843            .map_err(Error::Importer)?;
844        let command_line = PageRegionDescriptor::new(command_line_page, 1);
845
846        measured_config.linux_info = LinuxInfo {
847            kernel_region,
848            kernel_entrypoint: linux.load_info.kernel.entrypoint,
849            initrd_region,
850            initrd_size,
851            command_line,
852        };
853    }
854
855    importer
856        .import_pages(
857            PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_X64,
858            1,
859            "underhill-measured-config",
860            BootPageAcceptance::Exclusive,
861            measured_config.as_bytes(),
862        )
863        .map_err(Error::Importer)?;
864
865    let vtl2_measured_config = ParavisorMeasuredVtl2Config {
866        magic: ParavisorMeasuredVtl2Config::MAGIC,
867        vtom_offset_bit: shared_gpa_boundary_bits.unwrap_or(0),
868        padding: [0; 7],
869    };
870
871    importer
872        .import_pages(
873            config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_PAGE_INDEX,
874            PARAVISOR_MEASURED_VTL2_CONFIG_SIZE_PAGES,
875            "underhill-vtl2-measured-config",
876            BootPageAcceptance::Exclusive,
877            vtl2_measured_config.as_bytes(),
878        )
879        .map_err(Error::Importer)?;
880
881    let imported_region_base =
882        config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_ACCEPTED_MEMORY_PAGE_INDEX;
883
884    importer.set_imported_regions_config_page(imported_region_base);
885    Ok(())
886}
887
888/// Create a hypervisor SNP CPUID page with the default values.
889fn create_snp_cpuid_page() -> HV_PSP_CPUID_PAGE {
890    let mut cpuid_page = HV_PSP_CPUID_PAGE::default();
891
892    // TODO SNP: The list used here is based earlier Microsoft projects.
893    // 1. ExtendedStateEnumeration should be part of BootPageAcceptance::CpuidExtendedStatePage,
894    // but it is unclear whether Linux supports a second page. The need for the second page is that
895    // the entries in it are actually based on supported features on a specific host.
896    // 2. ExtendedStateEnumeration should specify Xfem = 3
897    for (i, required_leaf) in crate::cpuid::SNP_REQUIRED_CPUID_LEAF_LIST_PARAVISOR
898        .iter()
899        .enumerate()
900    {
901        let entry = &mut cpuid_page.cpuid_leaf_info[i];
902        entry.eax_in = required_leaf.eax;
903        entry.ecx_in = required_leaf.ecx;
904        if required_leaf.eax == CpuidFunction::ExtendedStateEnumeration.0 {
905            entry.xfem_in = 1;
906        }
907        cpuid_page.count += 1;
908    }
909
910    cpuid_page
911}
912
913/// Load the underhill kernel on arm64.
914///
915/// An optional initrd may be specified.
916///
917/// An optional `memory_page_base` may be specified. This will disable
918/// relocation support for underhill.
919pub fn load_openhcl_arm64<F>(
920    importer: &mut dyn ImageLoad<Aarch64Register>,
921    kernel_image: &mut F,
922    shim: &mut F,
923    command_line: CommandLineType<'_>,
924    initrd: Option<&[u8]>,
925    memory_page_base: Option<u64>,
926    memory_page_count: u64,
927    vtl0_config: Vtl0Config<'_>,
928) -> Result<(), Error>
929where
930    F: std::io::Read + std::io::Seek,
931{
932    let Vtl0Config {
933        supports_pcat,
934        supports_uefi,
935        supports_linux,
936    } = vtl0_config;
937
938    assert!(!supports_pcat);
939    assert!(supports_uefi.is_some() || supports_linux.is_some());
940
941    let paravisor_present = importer.isolation_config().paravisor_present;
942
943    // If no explicit memory base is specified, load with relocation support.
944    let with_relocation = memory_page_base.is_none();
945
946    let memory_start_address = memory_page_base
947        .map(|page_number| page_number * HV_PAGE_SIZE)
948        .unwrap_or(PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS);
949
950    let memory_size = memory_page_count * HV_PAGE_SIZE;
951
952    // Paravisor memory ranges must be 2MB (large page) aligned.
953    if !memory_start_address.is_multiple_of(u64::from(Arm64PageSize::Large)) {
954        return Err(Error::MemoryUnaligned(memory_start_address));
955    }
956
957    if !memory_size.is_multiple_of(u64::from(Arm64PageSize::Large)) {
958        return Err(Error::MemoryUnaligned(memory_size));
959    }
960
961    // The whole memory range must be present and VTL2 protectable for the
962    // underhill kernel to work.
963    importer.verify_startup_memory_available(
964        memory_start_address / HV_PAGE_SIZE,
965        memory_page_count,
966        if paravisor_present {
967            StartupMemoryType::Vtl2ProtectableRam
968        } else {
969            StartupMemoryType::Ram
970        },
971    )?;
972
973    let mut next_addr = memory_start_address;
974
975    // Reserve the first 2MB for a potential persisted state region. The first
976    // 4K page is always the persisted state header, and the bootshim may decide
977    // to use the the remaining pages for the protobuf payload.
978    let persisted_region_base = next_addr;
979    let persisted_region_size = PERSISTED_REGION_SIZE;
980    next_addr += persisted_region_size;
981
982    tracing::trace!(next_addr, "loading the kernel");
983
984    // The aarch64 Linux kernel image is most commonly found as a flat binary with a
985    // header rather than an ELF.
986    // DeviceTree is generated dynamically by the boot shim.
987    let initrd_address_type = InitrdAddressType::AfterKernel;
988    let initrd_config = InitrdConfig {
989        initrd_address: initrd_address_type,
990        initrd: initrd.unwrap_or_default(),
991    };
992    let device_tree_blob = None;
993    let crate::linux::LoadInfo {
994        kernel:
995            KernelInfo {
996                gpa: kernel_base,
997                size: kernel_size,
998                entrypoint: kernel_entry_point,
999            },
1000        initrd: initrd_info,
1001        dtb,
1002    } = load_kernel_and_initrd_arm64(
1003        importer,
1004        kernel_image,
1005        next_addr,
1006        Some(initrd_config),
1007        device_tree_blob,
1008    )
1009    .map_err(Error::Kernel)?;
1010
1011    assert!(
1012        dtb.is_none(),
1013        "DeviceTree is generated dynamically by the boot shim."
1014    );
1015
1016    tracing::trace!(kernel_base, "kernel loaded");
1017
1018    let InitrdInfo {
1019        gpa: initrd_gpa,
1020        size: initrd_size,
1021    } = if let Some(initrd_info) = initrd_info {
1022        assert!(initrd_address_type == InitrdAddressType::AfterKernel);
1023        next_addr = initrd_info.gpa + initrd_info.size;
1024        initrd_info
1025    } else {
1026        next_addr = kernel_base + kernel_size;
1027        InitrdInfo { gpa: 0, size: 0 }
1028    };
1029
1030    next_addr = align_up_to_page_size(next_addr);
1031
1032    tracing::trace!(next_addr, "loading the boot shim");
1033
1034    let crate::elf::LoadInfo {
1035        minimum_address_used: shim_base_addr,
1036        next_available_address: mut next_addr,
1037        entrypoint: shim_entry_point,
1038    } = crate::elf::load_static_elf(
1039        importer,
1040        shim,
1041        0,
1042        next_addr,
1043        false,
1044        BootPageAcceptance::Exclusive,
1045        "underhill-boot-shim",
1046    )
1047    .map_err(Error::Shim)?;
1048
1049    tracing::trace!(shim_base_addr, "boot shim loaded");
1050
1051    tracing::trace!(next_addr, "loading the command line");
1052
1053    let cmdline_base = next_addr;
1054    let (cmdline, policy) = match command_line {
1055        CommandLineType::Static(val) => (val, CommandLinePolicy::STATIC),
1056        CommandLineType::HostAppendable(val) => (val, CommandLinePolicy::APPEND_CHOSEN),
1057    };
1058
1059    if cmdline.len() > COMMAND_LINE_SIZE {
1060        return Err(Error::CommandLineSize(cmdline.len()));
1061    }
1062
1063    let mut static_command_line = [0; COMMAND_LINE_SIZE];
1064    static_command_line[..cmdline.len()].copy_from_slice(cmdline.as_bytes());
1065    let paravisor_command_line = ParavisorCommandLine {
1066        policy,
1067        static_command_line_len: cmdline.len() as u16,
1068        static_command_line,
1069    };
1070
1071    importer.import_pages(
1072        cmdline_base / HV_PAGE_SIZE,
1073        1,
1074        "underhill-command-line",
1075        BootPageAcceptance::Exclusive,
1076        paravisor_command_line.as_bytes(),
1077    )?;
1078
1079    next_addr += HV_PAGE_SIZE;
1080
1081    tracing::trace!(next_addr, "loading the boot shim parameters");
1082
1083    let shim_params_base = next_addr;
1084    let shim_params_size = HV_PAGE_SIZE;
1085
1086    next_addr += shim_params_size;
1087
1088    let parameter_region_size = PARAVISOR_VTL2_CONFIG_REGION_PAGE_COUNT_MAX * HV_PAGE_SIZE;
1089    let parameter_region_start = next_addr;
1090    next_addr += parameter_region_size;
1091
1092    tracing::debug!(parameter_region_start);
1093
1094    // Reserve 8K for the bootshim log buffer.
1095    let bootshim_log_size = HV_PAGE_SIZE * 2;
1096    let bootshim_log_start = next_addr;
1097    next_addr += bootshim_log_size;
1098
1099    importer.import_pages(
1100        bootshim_log_start / HV_PAGE_SIZE,
1101        bootshim_log_size / HV_PAGE_SIZE,
1102        "ohcl-boot-shim-log-buffer",
1103        BootPageAcceptance::Exclusive,
1104        &[],
1105    )?;
1106
1107    // Reserve 16 pages for a bootshim heap. This is only used to parse the
1108    // protobuf payload from the previous instance in a servicing boot.
1109    //
1110    // Import these pages as it greatly simplifies the early startup code in the
1111    // bootshim for isolated guests. This allows the bootshim to use these pages
1112    // early on without extra acceptance calls.
1113    let heap_start = next_addr;
1114    let heap_size = 16 * HV_PAGE_SIZE;
1115    importer.import_pages(
1116        heap_start / HV_PAGE_SIZE,
1117        heap_size / HV_PAGE_SIZE,
1118        "ohcl-boot-shim-heap",
1119        BootPageAcceptance::Exclusive,
1120        &[],
1121    )?;
1122    next_addr += heap_size;
1123
1124    // The end of memory used by the loader, excluding pagetables.
1125    let end_of_underhill_mem = next_addr;
1126
1127    // Page tables live at the end of the VTL2 imported region, which allows it
1128    // to be relocated separately.
1129    let page_table_base_page_count = 5;
1130    let page_table_dynamic_page_count = 2 * page_table_base_page_count;
1131    let page_table_page_count = page_table_base_page_count + page_table_dynamic_page_count;
1132    let page_table_region_size = HV_PAGE_SIZE * page_table_page_count;
1133    let page_table_region_start = next_addr;
1134    next_addr += page_table_region_size;
1135
1136    tracing::debug!(page_table_region_start, page_table_region_size);
1137
1138    let next_addr = next_addr;
1139
1140    // The memory used by the loader must be smaller than the memory available.
1141    if next_addr > memory_start_address + memory_size {
1142        return Err(Error::NotEnoughMemory(next_addr - memory_start_address));
1143    }
1144
1145    // Shim parameters for locations are relative to the base of where the shim is loaded.
1146    let calculate_shim_offset = |addr: u64| -> i64 { addr.wrapping_sub(shim_base_addr) as i64 };
1147    let initrd_crc = crc32fast::hash(initrd.unwrap_or(&[]));
1148    let shim_params = ShimParamsRaw {
1149        kernel_entry_offset: calculate_shim_offset(kernel_entry_point),
1150        cmdline_offset: calculate_shim_offset(cmdline_base),
1151        initrd_offset: calculate_shim_offset(initrd_gpa),
1152        initrd_size,
1153        initrd_crc,
1154        supported_isolation_type: match importer.isolation_config().isolation_type {
1155            IsolationType::None | IsolationType::Vbs => {
1156                loader_defs::shim::SupportedIsolationType::VBS
1157            }
1158            _ => panic!("only None and VBS are supported for ARM64"),
1159        },
1160        memory_start_offset: calculate_shim_offset(memory_start_address),
1161        memory_size,
1162        parameter_region_offset: calculate_shim_offset(parameter_region_start),
1163        parameter_region_size,
1164        vtl2_reserved_region_offset: 0,
1165        vtl2_reserved_region_size: 0,
1166        sidecar_offset: 0,
1167        sidecar_size: 0,
1168        sidecar_entry_offset: 0,
1169        used_start: calculate_shim_offset(memory_start_address),
1170        used_end: calculate_shim_offset(next_addr),
1171        bounce_buffer_start: 0,
1172        bounce_buffer_size: 0,
1173        log_buffer_start: calculate_shim_offset(bootshim_log_start),
1174        log_buffer_size: bootshim_log_size,
1175        heap_start_offset: calculate_shim_offset(heap_start),
1176        heap_size,
1177        persisted_state_region_offset: calculate_shim_offset(persisted_region_base),
1178        persisted_state_region_size: persisted_region_size,
1179    };
1180
1181    importer
1182        .import_pages(
1183            shim_params_base / HV_PAGE_SIZE,
1184            shim_params_size / HV_PAGE_SIZE,
1185            "underhill-shim-params",
1186            BootPageAcceptance::Exclusive,
1187            shim_params.as_bytes(),
1188        )
1189        .map_err(Error::Importer)?;
1190
1191    let mut measured_config = ParavisorMeasuredVtl0Config {
1192        magic: ParavisorMeasuredVtl0Config::MAGIC,
1193        ..FromZeros::new_zeroed()
1194    };
1195
1196    if let Some((uefi, vp_context)) = &supports_uefi {
1197        measured_config.supported_vtl0.set_uefi_supported(true);
1198        let vp_context_page = PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_AARCH64 + 1;
1199        measured_config.uefi_info = UefiInfo {
1200            firmware: PageRegionDescriptor {
1201                base_page_number: uefi.firmware_base / HV_PAGE_SIZE,
1202                page_count: uefi.total_size / HV_PAGE_SIZE,
1203            },
1204            vtl0_vp_context: PageRegionDescriptor {
1205                base_page_number: vp_context_page,
1206                page_count: 1,
1207            },
1208        };
1209
1210        // Deposit the UEFI vp context.
1211        importer.import_pages(
1212            vp_context_page,
1213            1,
1214            "openhcl-uefi-vp-context",
1215            BootPageAcceptance::Exclusive,
1216            vp_context,
1217        )?;
1218    }
1219
1220    importer
1221        .import_pages(
1222            PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_AARCH64,
1223            1,
1224            "underhill-measured-config",
1225            BootPageAcceptance::Exclusive,
1226            measured_config.as_bytes(),
1227        )
1228        .map_err(Error::Importer)?;
1229
1230    tracing::trace!(page_table_region_start, "loading the page tables");
1231
1232    let memory_attribute_indirection = MemoryAttributeIndirectionEl1([
1233        MemoryAttributeEl1::Device_nGnRnE,
1234        MemoryAttributeEl1::Normal_NonCacheable,
1235        MemoryAttributeEl1::Normal_WriteThrough,
1236        MemoryAttributeEl1::Normal_WriteBack,
1237        MemoryAttributeEl1::Device_nGnRnE,
1238        MemoryAttributeEl1::Device_nGnRnE,
1239        MemoryAttributeEl1::Device_nGnRnE,
1240        MemoryAttributeEl1::Device_nGnRnE,
1241    ]);
1242    let mut page_tables: Vec<u8> = vec![0; page_table_region_size as usize];
1243    let page_tables = page_table::aarch64::build_identity_page_tables_aarch64(
1244        page_table_region_start,
1245        memory_start_address,
1246        memory_size,
1247        memory_attribute_indirection,
1248        page_tables.as_mut_slice(),
1249    );
1250    assert!((page_tables.len() as u64).is_multiple_of(HV_PAGE_SIZE));
1251    let page_table_page_base = page_table_region_start / HV_PAGE_SIZE;
1252    assert!(page_tables.len() as u64 <= page_table_region_size);
1253    assert!(page_table_region_size as usize > page_tables.len());
1254
1255    if with_relocation {
1256        // Indicate relocation information. Don't include page table region.
1257        importer.relocation_region(
1258            memory_start_address,
1259            end_of_underhill_mem - memory_start_address,
1260            Arm64PageSize::Large.into(),
1261            PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS,
1262            1 << 48,
1263            true,
1264            false,
1265            0, // BSP
1266        )?;
1267
1268        // Tell the loader page table relocation information.
1269        importer.page_table_relocation(
1270            page_table_region_start,
1271            page_table_region_size / HV_PAGE_SIZE,
1272            page_tables.len() as u64 / HV_PAGE_SIZE,
1273            0,
1274        )?;
1275    }
1276
1277    importer.import_pages(
1278        page_table_page_base,
1279        page_table_page_count,
1280        "underhill-page-tables",
1281        BootPageAcceptance::Exclusive,
1282        page_tables,
1283    )?;
1284
1285    tracing::trace!("Importing register state");
1286
1287    let mut import_reg = |register| {
1288        importer
1289            .import_vp_register(register)
1290            .map_err(Error::Importer)
1291    };
1292
1293    // Set %X0 to relative location of boot_params_base
1294    let relative_boot_params_base = shim_params_base - shim_base_addr;
1295    import_reg(AArch64Register::X0(relative_boot_params_base).into())?;
1296
1297    // Set %pc to the shim entry point.
1298    import_reg(AArch64Register::Pc(shim_entry_point).into())?;
1299
1300    // System registers
1301
1302    import_reg(AArch64Register::Cpsr(Cpsr64::new().with_sp(true).with_el(1).into()).into())?;
1303
1304    // This is what Hyper-V uses. qemu/KVM, and qemu/max use slightly
1305    // different flags.
1306    // KVM sets these in addition to what the Hyper-V uses:
1307    //
1308    // .with_sa(true)
1309    // .with_itd(true)
1310    // .with_sed(true)
1311    //
1312    // Windows sets:
1313    //
1314    // .with_sa(true)
1315    // .with_sa0(true)
1316    // .with_n_aa(true)
1317    // .with_sed(true)
1318    // .with_dze(true)
1319    // .with_en_ib(true)
1320    // .with_dssbs(true)
1321    //
1322    // Maybe could enforce the `s`tack `a`lignment, here, too. Depends on
1323    // the compiler generating code aligned accesses for the stack.
1324    //
1325    // Hyper-V sets:
1326    import_reg(
1327        AArch64Register::SctlrEl1(
1328            SctlrEl1::new()
1329                // MMU enable for EL1&0 stage 1 address translation.
1330                // It can be turned off in VTL2 for debugging.
1331                // The family of the `at` instructions and the `PAR_EL1` register are
1332                // useful for debugging MMU issues.
1333                .with_m(true)
1334                // Stage 1 Cacheability control, for data accesses.
1335                .with_c(true)
1336                // Stage 1 Cacheability control, for code.
1337                .with_i(true)
1338                // Reserved flags, must be set
1339                .with_eos(true)
1340                .with_tscxt(true)
1341                .with_eis(true)
1342                .with_span(true)
1343                .with_n_tlsmd(true)
1344                .with_lsmaoe(true)
1345                .into(),
1346        )
1347        .into(),
1348    )?;
1349
1350    // Hyper-V UEFI and qemu/KVM use the same value for TCR_EL1.
1351    // They set `t0sz` to `28` as they map memory pretty low.
1352    // In the paravisor case, need more flexibility.
1353    // For the details, refer to the "Learning the architecture" series
1354    // on the ARM website.
1355    import_reg(
1356        AArch64Register::TcrEl1(
1357            TranslationControlEl1::new()
1358                .with_t0sz(0x11)
1359                .with_irgn0(1)
1360                .with_orgn0(1)
1361                .with_sh0(3)
1362                .with_tg0(TranslationGranule0::TG_4KB)
1363                // Disable TTBR1_EL1 walks (i.e. the upper half).
1364                .with_epd1(1)
1365                // Due to erratum #822227, need to set a valid TG1 regardless of EPD1.
1366                .with_tg1(TranslationGranule1::TG_4KB)
1367                .with_ips(IntermPhysAddrSize::IPA_48_BITS_256_TB)
1368                .into(),
1369        )
1370        .into(),
1371    )?;
1372
1373    // The Memory Attribute Indirection
1374    import_reg(AArch64Register::MairEl1(memory_attribute_indirection.into()).into())?;
1375    import_reg(
1376        AArch64Register::Ttbr0El1(
1377            TranslationBaseEl1::new()
1378                .with_baddr(page_table_region_start)
1379                .into(),
1380        )
1381        .into(),
1382    )?;
1383
1384    // VBAR is in the undefined state, setting it to 0 albeit
1385    // without the vector exception table. The shim can configure that on its own
1386    // if need be.
1387    import_reg(AArch64Register::VbarEl1(0).into())?;
1388
1389    // Load parameter regions.
1390    let config_region_page_base = parameter_region_start / HV_PAGE_SIZE;
1391
1392    // Slit
1393    let slit_page_base = config_region_page_base + PARAVISOR_CONFIG_SLIT_PAGE_INDEX;
1394    let slit_parameter_area = importer.create_parameter_area(
1395        slit_page_base,
1396        PARAVISOR_CONFIG_SLIT_SIZE_PAGES as u32,
1397        "underhill-slit",
1398    )?;
1399    importer.import_parameter(slit_parameter_area, 0, IgvmParameterType::Slit)?;
1400
1401    // Pptt
1402    let pptt_page_base = config_region_page_base + PARAVISOR_CONFIG_PPTT_PAGE_INDEX;
1403    let pptt_parameter_area = importer.create_parameter_area(
1404        pptt_page_base,
1405        PARAVISOR_CONFIG_PPTT_SIZE_PAGES as u32,
1406        "underhill-pptt",
1407    )?;
1408    importer.import_parameter(pptt_parameter_area, 0, IgvmParameterType::Pptt)?;
1409
1410    // device tree
1411    let dt_page_base = config_region_page_base + PARAVISOR_CONFIG_DEVICE_TREE_PAGE_INDEX;
1412    let dt_parameter_area = importer.create_parameter_area(
1413        dt_page_base,
1414        PARAVISOR_CONFIG_DEVICE_TREE_SIZE_PAGES as u32,
1415        "underhill-device-tree",
1416    )?;
1417    importer.import_parameter(dt_parameter_area, 0, IgvmParameterType::DeviceTree)?;
1418
1419    let vtl2_measured_config = ParavisorMeasuredVtl2Config {
1420        magic: ParavisorMeasuredVtl2Config::MAGIC,
1421        vtom_offset_bit: 0,
1422        padding: [0; 7],
1423    };
1424
1425    importer
1426        .import_pages(
1427            config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_PAGE_INDEX,
1428            PARAVISOR_MEASURED_VTL2_CONFIG_SIZE_PAGES,
1429            "underhill-vtl2-measured-config",
1430            BootPageAcceptance::Exclusive,
1431            vtl2_measured_config.as_bytes(),
1432        )
1433        .map_err(Error::Importer)?;
1434
1435    let imported_region_base =
1436        config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_ACCEPTED_MEMORY_PAGE_INDEX;
1437
1438    importer.set_imported_regions_config_page(imported_region_base);
1439
1440    Ok(())
1441}