loader/
paravisor.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Paravisor specific loader definitions and implementation.
5
6use crate::cpuid::HV_PSP_CPUID_PAGE;
7use crate::importer::Aarch64Register;
8use crate::importer::BootPageAcceptance;
9use crate::importer::IgvmParameterType;
10use crate::importer::ImageLoad;
11use crate::importer::IsolationConfig;
12use crate::importer::IsolationType;
13use crate::importer::SegmentRegister;
14use crate::importer::StartupMemoryType;
15use crate::importer::TableRegister;
16use crate::importer::X86Register;
17use crate::linux::InitrdAddressType;
18use crate::linux::InitrdConfig;
19use crate::linux::InitrdInfo;
20use crate::linux::KernelInfo;
21use crate::linux::load_kernel_and_initrd_arm64;
22use aarch64defs::Cpsr64;
23use aarch64defs::IntermPhysAddrSize;
24use aarch64defs::SctlrEl1;
25use aarch64defs::TranslationBaseEl1;
26use aarch64defs::TranslationControlEl1;
27use aarch64defs::TranslationGranule0;
28use aarch64defs::TranslationGranule1;
29use hvdef::HV_PAGE_SIZE;
30use hvdef::Vtl;
31use igvm::registers::AArch64Register;
32use loader_defs::paravisor::*;
33use loader_defs::shim::ShimParamsRaw;
34use memory_range::MemoryRange;
35use page_table::aarch64::Arm64PageSize;
36use page_table::aarch64::MemoryAttributeEl1;
37use page_table::aarch64::MemoryAttributeIndirectionEl1;
38use page_table::x64::PageTableBuilder;
39use page_table::x64::X64_LARGE_PAGE_SIZE;
40use page_table::x64::align_up_to_large_page_size;
41use page_table::x64::align_up_to_page_size;
42use page_table::x64::calculate_pde_table_count;
43use thiserror::Error;
44use x86defs::GdtEntry;
45use x86defs::SegmentSelector;
46use x86defs::X64_BUSY_TSS_SEGMENT_ATTRIBUTES;
47use x86defs::X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES;
48use x86defs::X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES;
49use x86defs::cpuid::CpuidFunction;
50use zerocopy::FromZeros;
51use zerocopy::IntoBytes;
52
53#[derive(Debug)]
54pub struct Vtl0Linux<'a> {
55    pub command_line: &'a std::ffi::CString,
56    pub load_info: crate::linux::LoadInfo,
57}
58
59#[derive(Debug)]
60pub struct Vtl0Config<'a> {
61    pub supports_pcat: bool,
62    /// The load info and the VP context page.
63    pub supports_uefi: Option<(crate::uefi::LoadInfo, Vec<u8>)>,
64    pub supports_linux: Option<Vtl0Linux<'a>>,
65}
66
67// See HclDefs.h
68pub const HCL_SECURE_VTL: Vtl = Vtl::Vtl2;
69
70/// Size of the persisted region (2MB).
71const PERSISTED_REGION_SIZE: u64 = 2 * 1024 * 1024;
72
73#[derive(Debug, Error)]
74pub enum Error {
75    #[error("memory is unaligned: {0}")]
76    MemoryUnaligned(u64),
77    #[error("command line too large: {0}")]
78    CommandLineSize(usize),
79    #[error("kernel load error")]
80    Kernel(#[source] crate::linux::Error),
81    #[error("shim load error")]
82    Shim(#[source] crate::elf::Error),
83    #[error("invalid initrd size: {0}")]
84    InvalidInitrdSize(u64),
85    #[error("memory used: {0} is greater than available")]
86    NotEnoughMemory(u64),
87    #[error("importer error")]
88    Importer(#[from] anyhow::Error),
89}
90
91/// Kernel Command line type.
92pub enum CommandLineType<'a> {
93    /// The command line is a static string.
94    Static(&'a str),
95    /// The command line is dynamic and host appendable via the chosen node in
96    /// device tree, with initial data specified by the provided CStr. An empty
97    /// base_string may be provided to allow the host to specify the full kernel
98    /// command line.
99    HostAppendable(&'a str),
100}
101
102/// Load the underhill kernel on x64.
103///
104/// An optional initrd may be specified.
105///
106/// An optional `memory_page_base` may be specified. This will disable
107/// relocation support for underhill.
108pub fn load_openhcl_x64<F>(
109    importer: &mut dyn ImageLoad<X86Register>,
110    kernel_image: &mut F,
111    shim: &mut F,
112    sidecar: Option<&mut F>,
113    command_line: CommandLineType<'_>,
114    initrd: Option<&[u8]>,
115    memory_page_base: Option<u64>,
116    memory_page_count: u64,
117    vtl0_config: Vtl0Config<'_>,
118) -> Result<(), Error>
119where
120    F: std::io::Read + std::io::Seek,
121{
122    let IsolationConfig {
123        isolation_type,
124        paravisor_present,
125        shared_gpa_boundary_bits,
126    } = importer.isolation_config();
127
128    // If no explicit memory base is specified, load with relocation support.
129    let with_relocation = memory_page_base.is_none() && isolation_type == IsolationType::None;
130
131    let memory_start_address = memory_page_base
132        .map(|page_number| page_number * HV_PAGE_SIZE)
133        .unwrap_or(PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS);
134
135    let memory_size = memory_page_count * HV_PAGE_SIZE;
136
137    // OpenHCL is laid out as the following:
138    // --- High Memory, 2MB aligned ---
139    // free space
140    //
141    // page tables
142    // 16 pages reserved for bootshim heap
143    // 8K bootshim logs
144    // IGVM parameters
145    // reserved vtl2 ranges
146    // initrd
147    // openhcl_boot
148    // sidecar, if configured
149    // - pad to next 2MB -
150    // kernel
151    // optional 2mb bounce buf for CVM
152    // persisted state region
153    // --- Low memory, 2MB aligned ---
154
155    // Paravisor memory ranges must be 2MB (large page) aligned.
156    if !memory_start_address.is_multiple_of(X64_LARGE_PAGE_SIZE) {
157        return Err(Error::MemoryUnaligned(memory_start_address));
158    }
159
160    if !memory_size.is_multiple_of(X64_LARGE_PAGE_SIZE) {
161        return Err(Error::MemoryUnaligned(memory_size));
162    }
163
164    // The whole memory range must be present and VTL2 protectable for the
165    // underhill kernel to work.
166    importer.verify_startup_memory_available(
167        memory_start_address / HV_PAGE_SIZE,
168        memory_page_count,
169        if paravisor_present {
170            StartupMemoryType::Vtl2ProtectableRam
171        } else {
172            StartupMemoryType::Ram
173        },
174    )?;
175
176    let kernel_acceptance = match isolation_type {
177        IsolationType::Snp | IsolationType::Tdx => BootPageAcceptance::Shared,
178        _ => BootPageAcceptance::Exclusive,
179    };
180
181    let mut offset = memory_start_address;
182
183    // Reserve the first 2MB for a potential persisted state region. The first
184    // 4K page is always the persisted state header, and the bootshim may decide
185    // to use the the remaining pages for the protobuf payload.
186    let persisted_region_base = offset;
187    let persisted_region_size = PERSISTED_REGION_SIZE;
188    offset += persisted_region_size;
189
190    // If hardware isolated, reserve a 2MB range for bounce buffering shared
191    // pages. This is done first because we know the start address is 2MB
192    // aligned, with the next consumers wanting 2MB aligned ranges. This is
193    // reserved at load time in order to guarantee the pagetables have entries
194    // for this identity mapping.
195    //
196    // Leave this as a gap, as there's no need to accept or describe this range
197    // in the IGVM file.
198    let bounce_buffer = if matches!(isolation_type, IsolationType::Snp | IsolationType::Tdx) {
199        let bounce_buffer_gpa = offset;
200        assert_eq!(bounce_buffer_gpa % X64_LARGE_PAGE_SIZE, 0);
201        let range = MemoryRange::new(bounce_buffer_gpa..bounce_buffer_gpa + X64_LARGE_PAGE_SIZE);
202
203        offset += range.len();
204        Some(range)
205    } else {
206        None
207    };
208
209    tracing::trace!(offset, "loading the kernel");
210
211    // The x86_64 uncompressed kernel we use doesn't show any difference
212    // in the code sections upon flipping CONFIG_RELOCATABLE. In total,
213    // there are 6 places where a difference is found: dates in the Linux
214    // banner, GNU build ID, and metadata entries in the empty initrd image
215    // (it always is embedded into the kernel). No sections with relocations
216    // appear if CONFIG_RELOCATABLE is set.
217    // Assume that at least the kernel entry contains PIC and no loader
218    // assistance with the relocations records (if any) is required.
219    let load_info = crate::elf::load_static_elf(
220        importer,
221        kernel_image,
222        offset,
223        0,
224        true,
225        kernel_acceptance,
226        "underhill-kernel",
227    )
228    .map_err(|e| Error::Kernel(crate::linux::Error::ElfLoader(e)))?;
229    tracing::trace!("Kernel loaded at {load_info:x?}");
230    let crate::elf::LoadInfo {
231        minimum_address_used: _min_addr,
232        next_available_address: mut offset,
233        entrypoint: kernel_entrypoint,
234    } = load_info;
235
236    assert_eq!(offset & (HV_PAGE_SIZE - 1), 0);
237
238    // If an AP kernel was provided, load it next.
239    let (sidecar_size, sidecar_entrypoint) = if let Some(sidecar) = sidecar {
240        // Sidecar load addr must be 2MB aligned
241        offset = align_up_to_large_page_size(offset);
242
243        let load_info = crate::elf::load_static_elf(
244            importer,
245            sidecar,
246            0,
247            offset,
248            false,
249            BootPageAcceptance::Exclusive,
250            "sidecar-kernel",
251        )
252        .map_err(|e| Error::Kernel(crate::linux::Error::ElfLoader(e)))?;
253
254        (
255            load_info.next_available_address - offset,
256            load_info.entrypoint,
257        )
258    } else {
259        (0, 0)
260    };
261
262    let sidecar_base = offset;
263    offset += sidecar_size;
264
265    let load_info = crate::elf::load_static_elf(
266        importer,
267        shim,
268        0,
269        offset,
270        false,
271        BootPageAcceptance::Exclusive,
272        "underhill-boot-shim",
273    )
274    .map_err(Error::Shim)?;
275    tracing::trace!("The boot shim loaded at {load_info:x?}");
276    let crate::elf::LoadInfo {
277        minimum_address_used: shim_base_addr,
278        next_available_address: mut offset,
279        entrypoint: shim_entry_address,
280    } = load_info;
281
282    // Optionally import initrd if specified.
283    let ramdisk = if let Some(initrd) = initrd {
284        let initrd_base = offset;
285        let initrd_size = align_up_to_page_size(initrd.len() as u64);
286
287        importer.import_pages(
288            initrd_base / HV_PAGE_SIZE,
289            initrd_size / HV_PAGE_SIZE,
290            "underhill-initrd",
291            kernel_acceptance,
292            initrd,
293        )?;
294
295        offset += initrd_size;
296        Some((initrd_base, initrd.len() as u64))
297    } else {
298        None
299    };
300
301    let gdt_base_address = offset;
302    let gdt_size = HV_PAGE_SIZE;
303    offset += gdt_size;
304
305    let boot_params_base = offset;
306    let boot_params_size = HV_PAGE_SIZE;
307
308    offset += boot_params_size;
309
310    let cmdline_base = offset;
311    let (cmdline, policy) = match command_line {
312        CommandLineType::Static(val) => (val, CommandLinePolicy::STATIC),
313        CommandLineType::HostAppendable(val) => (val, CommandLinePolicy::APPEND_CHOSEN),
314    };
315
316    if cmdline.len() > COMMAND_LINE_SIZE {
317        return Err(Error::CommandLineSize(cmdline.len()));
318    }
319
320    let mut static_command_line = [0; COMMAND_LINE_SIZE];
321    static_command_line[..cmdline.len()].copy_from_slice(cmdline.as_bytes());
322    let paravisor_command_line = ParavisorCommandLine {
323        policy,
324        static_command_line_len: cmdline.len() as u16,
325        static_command_line,
326    };
327
328    importer.import_pages(
329        cmdline_base / HV_PAGE_SIZE,
330        1,
331        "underhill-command-line",
332        BootPageAcceptance::Exclusive,
333        paravisor_command_line.as_bytes(),
334    )?;
335
336    offset += HV_PAGE_SIZE;
337
338    // Reserve space for the VTL2 reserved region.
339    let reserved_region_size = PARAVISOR_RESERVED_VTL2_PAGE_COUNT_MAX * HV_PAGE_SIZE;
340    let reserved_region_start = offset;
341    offset += reserved_region_size;
342
343    tracing::debug!(reserved_region_start);
344
345    let parameter_region_size = PARAVISOR_VTL2_CONFIG_REGION_PAGE_COUNT_MAX * HV_PAGE_SIZE;
346    let parameter_region_start = offset;
347    offset += parameter_region_size;
348
349    tracing::debug!(parameter_region_start);
350
351    // Reserve 8K for the bootshim log buffer. Import these pages so they are
352    // available early without extra acceptance calls.
353    let bootshim_log_size = HV_PAGE_SIZE * 2;
354    let bootshim_log_start = offset;
355    offset += bootshim_log_size;
356
357    importer.import_pages(
358        bootshim_log_start / HV_PAGE_SIZE,
359        bootshim_log_size / HV_PAGE_SIZE,
360        "ohcl-boot-shim-log-buffer",
361        BootPageAcceptance::Exclusive,
362        &[],
363    )?;
364
365    // Reserve 16 pages for a bootshim heap. This is only used to parse the
366    // protobuf payload from the previous instance in a servicing boot.
367    //
368    // Import these pages as it greatly simplifies the early startup code in the
369    // bootshim for isolated guests. This allows the bootshim to use these pages
370    // early on without extra acceptance calls.
371    let heap_start = offset;
372    let heap_size = 16 * HV_PAGE_SIZE;
373    importer.import_pages(
374        heap_start / HV_PAGE_SIZE,
375        heap_size / HV_PAGE_SIZE,
376        "ohcl-boot-shim-heap",
377        BootPageAcceptance::Exclusive,
378        &[],
379    )?;
380    offset += heap_size;
381
382    // The end of memory used by the loader, excluding pagetables.
383    let end_of_underhill_mem = offset;
384
385    // Page tables live at the end of VTL2 ram used by the bootshim.
386    //
387    // Size the available page table memory as 5 pages + 2 * 1GB of memory. This
388    // allows underhill to be mapped across a 512 GB boundary when using more
389    // than 1 GB, as the PDPTE will span 2 PML4E entries. Each GB of memory
390    // mapped requires 1 page for 2MB pages. Give 2 extra base pages and 1
391    // additional page per GB of mapped memory to allow the page table
392    // relocation code to be simpler, and not need to reclaim free pages from
393    // tables that have no valid entries.
394    //
395    // FUTURE: It would be better to change it so the shim only needs to map
396    //         itself, kernel, initrd and IGVM parameters. This requires
397    //         changing how the e820 map is constructed for the kernel along
398    //         with changing the contract on where the IGVM parameters live
399    //         within VTL2's memory.
400    let local_map = match isolation_type {
401        IsolationType::Snp | IsolationType::Tdx => {
402            Some((PARAVISOR_LOCAL_MAP_VA, PARAVISOR_LOCAL_MAP_SIZE))
403        }
404        _ => None,
405    };
406
407    // HACK: On TDX, the kernel uses the ACPI AP Mailbox protocol to start APs.
408    // However, the kernel assumes that all kernel ram is identity mapped, as
409    // the kernel will jump to a startup routine in any arbitrary kernel ram
410    // range.
411    //
412    // For now, describe 3GB of memory identity mapped in the page table used by
413    // the mailbox assembly stub, so the kernel can start APs regardless of how
414    // large the initial memory size was. An upcoming change will instead have
415    // the bootshim modify the pagetable at runtime to guarantee all ranges
416    // reported in the E820 map to kernel as ram are mapped.
417    //
418    // FUTURE: A future kernel change could remove this requirement entirely by
419    // making the kernel spec compliant, and only require that the reset vector
420    // page is identity mapped.
421
422    let page_table_mapping_size = if isolation_type == IsolationType::Tdx {
423        3 * 1024 * 1024 * 1024
424    } else {
425        memory_size
426    };
427
428    let page_table_base_page_count = 5;
429    let page_table_dynamic_page_count = {
430        // Double the count to allow for simpler reconstruction.
431        calculate_pde_table_count(memory_start_address, page_table_mapping_size) * 2
432            + local_map.map_or(0, |v| calculate_pde_table_count(v.0, v.1))
433    };
434    let page_table_isolation_page_count = match isolation_type {
435        IsolationType::Tdx => {
436            // TDX requires up to an extra 3 pages to map the reset vector as a
437            // 4K page.
438            3
439        }
440        _ => 0,
441    };
442    let page_table_page_count = page_table_base_page_count
443        + page_table_dynamic_page_count
444        + page_table_isolation_page_count;
445    let page_table_region_size = HV_PAGE_SIZE * page_table_page_count;
446    let page_table_region_start = offset;
447    offset += page_table_region_size;
448
449    tracing::debug!(page_table_region_start, page_table_region_size);
450
451    let mut page_table_builder = PageTableBuilder::new(page_table_region_start)
452        .with_mapped_region(memory_start_address, page_table_mapping_size);
453
454    if let Some((local_map_start, size)) = local_map {
455        page_table_builder = page_table_builder.with_local_map(local_map_start, size);
456    }
457
458    match isolation_type {
459        IsolationType::Snp => {
460            page_table_builder = page_table_builder.with_confidential_bit(51);
461        }
462        IsolationType::Tdx => {
463            page_table_builder = page_table_builder.with_reset_vector(true);
464        }
465        _ => {}
466    }
467
468    let page_table = page_table_builder.build();
469
470    assert!((page_table.len() as u64).is_multiple_of(HV_PAGE_SIZE));
471    let page_table_page_base = page_table_region_start / HV_PAGE_SIZE;
472    assert!(page_table.len() as u64 <= page_table_region_size);
473
474    let offset = offset;
475
476    if with_relocation {
477        // Indicate relocation information. Don't include page table region.
478        importer.relocation_region(
479            memory_start_address,
480            end_of_underhill_mem - memory_start_address,
481            X64_LARGE_PAGE_SIZE,
482            PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS,
483            1 << 48,
484            true,
485            true,
486            0, // BSP
487        )?;
488
489        // Tell the loader page table relocation information.
490        importer.page_table_relocation(
491            page_table_region_start,
492            page_table_region_size / HV_PAGE_SIZE,
493            page_table.len() as u64 / HV_PAGE_SIZE,
494            0,
495        )?;
496    }
497
498    // The memory used by the loader must be smaller than the memory available.
499    if offset > memory_start_address + memory_size {
500        return Err(Error::NotEnoughMemory(offset - memory_start_address));
501    }
502
503    let (initrd_base, initrd_size) = ramdisk.unwrap_or((0, 0));
504    // Shim parameters for locations are relative to the base of where the shim is loaded.
505    let calculate_shim_offset = |addr: u64| addr.wrapping_sub(shim_base_addr) as i64;
506    let initrd_crc = crc32fast::hash(initrd.unwrap_or(&[]));
507    let shim_params = ShimParamsRaw {
508        kernel_entry_offset: calculate_shim_offset(kernel_entrypoint),
509        cmdline_offset: calculate_shim_offset(cmdline_base),
510        initrd_offset: calculate_shim_offset(initrd_base),
511        initrd_size,
512        initrd_crc,
513        supported_isolation_type: match isolation_type {
514            // To the shim, None and VBS isolation are the same. The shim
515            // queries CPUID when running to determine if page acceptance needs
516            // to be done.
517            IsolationType::None | IsolationType::Vbs => {
518                loader_defs::shim::SupportedIsolationType::VBS
519            }
520            IsolationType::Snp => loader_defs::shim::SupportedIsolationType::SNP,
521            IsolationType::Tdx => loader_defs::shim::SupportedIsolationType::TDX,
522        },
523        memory_start_offset: calculate_shim_offset(memory_start_address),
524        memory_size,
525        parameter_region_offset: calculate_shim_offset(parameter_region_start),
526        parameter_region_size,
527        vtl2_reserved_region_offset: calculate_shim_offset(reserved_region_start),
528        vtl2_reserved_region_size: reserved_region_size,
529        sidecar_offset: calculate_shim_offset(sidecar_base),
530        sidecar_size,
531        sidecar_entry_offset: calculate_shim_offset(sidecar_entrypoint),
532        used_start: calculate_shim_offset(memory_start_address),
533        used_end: calculate_shim_offset(offset),
534        bounce_buffer_start: bounce_buffer.map_or(0, |r| calculate_shim_offset(r.start())),
535        bounce_buffer_size: bounce_buffer.map_or(0, |r| r.len()),
536        page_tables_start: calculate_shim_offset(page_table_region_start),
537        page_tables_size: page_table_region_size,
538        log_buffer_start: calculate_shim_offset(bootshim_log_start),
539        log_buffer_size: bootshim_log_size,
540        heap_start_offset: calculate_shim_offset(heap_start),
541        heap_size,
542        persisted_state_region_offset: calculate_shim_offset(persisted_region_base),
543        persisted_state_region_size: persisted_region_size,
544    };
545
546    tracing::debug!(boot_params_base, "shim gpa");
547
548    importer
549        .import_pages(
550            boot_params_base / HV_PAGE_SIZE,
551            boot_params_size / HV_PAGE_SIZE,
552            "underhill-shim-params",
553            BootPageAcceptance::Exclusive,
554            shim_params.as_bytes(),
555        )
556        .map_err(Error::Importer)?;
557
558    importer.import_pages(
559        page_table_page_base,
560        page_table_page_count,
561        "underhill-page-tables",
562        BootPageAcceptance::Exclusive,
563        &page_table,
564    )?;
565
566    // Set selectors and control registers
567    // Setup two selectors and segment registers.
568    // ds, es, fs, gs, ss are linearSelector
569    // cs is linearCode64Selector
570
571    // GDT is laid out as (counting by the small entries):
572    //  0: null descriptor,
573    //  1: null descriptor,
574    //  2: linear code64 descriptor,
575    //  3. linear descriptor for data
576    //  4: here you can add more descriptors.
577
578    let default_data_attributes: u16 = X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES.into();
579    let default_code64_attributes: u16 = X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES.into();
580    let gdt = [
581        // A large null descriptor.
582        GdtEntry::new_zeroed(),
583        GdtEntry::new_zeroed(),
584        // Code descriptor for the long mode.
585        GdtEntry {
586            limit_low: 0xffff,
587            attr_low: default_code64_attributes as u8,
588            attr_high: (default_code64_attributes >> 8) as u8,
589            ..GdtEntry::new_zeroed()
590        },
591        // Data descriptor.
592        GdtEntry {
593            limit_low: 0xffff,
594            attr_low: default_data_attributes as u8,
595            attr_high: (default_data_attributes >> 8) as u8,
596            ..GdtEntry::new_zeroed()
597        },
598    ];
599
600    const LINEAR_CODE64_DESCRIPTOR_INDEX: usize = 2;
601    const LINEAR_DATA_DESCRIPTOR_INDEX: usize = 3;
602    const RPL: u8 = 0x00; // requested priviledge level: the highest
603
604    let linear_code64_descriptor_selector =
605        SegmentSelector::from_gdt_index(LINEAR_CODE64_DESCRIPTOR_INDEX as u16, RPL);
606    let linear_data_descriptor_selector =
607        SegmentSelector::from_gdt_index(LINEAR_DATA_DESCRIPTOR_INDEX as u16, RPL);
608
609    importer.import_pages(
610        gdt_base_address / HV_PAGE_SIZE,
611        gdt_size / HV_PAGE_SIZE,
612        "underhill-gdt",
613        BootPageAcceptance::Exclusive,
614        gdt.as_bytes(),
615    )?;
616
617    let mut import_reg = |register| {
618        importer
619            .import_vp_register(register)
620            .map_err(Error::Importer)
621    };
622
623    // Import GDTR and selectors.
624    import_reg(X86Register::Gdtr(TableRegister {
625        base: gdt_base_address,
626        limit: (size_of_val(&gdt) - 1) as u16,
627    }))?;
628
629    let ds = SegmentRegister {
630        selector: linear_data_descriptor_selector.into_bits(),
631        base: 0,
632        limit: 0xffffffff,
633        attributes: default_data_attributes,
634    };
635    import_reg(X86Register::Ds(ds))?;
636    import_reg(X86Register::Es(ds))?;
637    import_reg(X86Register::Fs(ds))?;
638    import_reg(X86Register::Gs(ds))?;
639    import_reg(X86Register::Ss(ds))?;
640
641    let cs = SegmentRegister {
642        selector: linear_code64_descriptor_selector.into_bits(),
643        base: 0,
644        limit: 0xffffffff,
645        attributes: default_code64_attributes,
646    };
647    import_reg(X86Register::Cs(cs))?;
648
649    // TODO: Workaround an OS repo bug where enabling a higher VTL zeros TR
650    //       instead of setting it to the reset default state. Manually set it
651    //       to the reset default state until the OS repo is fixed.
652    //
653    //       In the future, we should just not set this at all.
654    import_reg(X86Register::Tr(SegmentRegister {
655        selector: 0x0000,
656        base: 0x00000000,
657        limit: 0x0000FFFF,
658        attributes: X64_BUSY_TSS_SEGMENT_ATTRIBUTES.into(),
659    }))?;
660
661    // Set system registers to state expected by the boot shim, 64 bit mode with
662    // paging enabled.
663
664    // Set CR0
665    import_reg(X86Register::Cr0(
666        x86defs::X64_CR0_PG | x86defs::X64_CR0_PE | x86defs::X64_CR0_NE,
667    ))?;
668
669    // Set CR3 to point to page table
670    import_reg(X86Register::Cr3(page_table_region_start))?;
671
672    // Set CR4
673    import_reg(X86Register::Cr4(
674        x86defs::X64_CR4_PAE | x86defs::X64_CR4_MCE | x86defs::X64_CR4_OSXSAVE,
675    ))?;
676
677    // Set EFER to LMA, LME, and NXE for 64 bit mode.
678    import_reg(X86Register::Efer(
679        x86defs::X64_EFER_LMA | x86defs::X64_EFER_LME | x86defs::X64_EFER_NXE,
680    ))?;
681
682    // Set PAT
683    import_reg(X86Register::Pat(x86defs::X86X_MSR_DEFAULT_PAT))?;
684
685    // Setup remaining registers
686    // Set %rsi to relative location of boot_params_base
687    let relative_boot_params_base = boot_params_base - shim_base_addr;
688    import_reg(X86Register::Rsi(relative_boot_params_base))?;
689
690    // Set %rip to the shim entry point.
691    import_reg(X86Register::Rip(shim_entry_address))?;
692
693    // Load parameter regions.
694    let config_region_page_base = parameter_region_start / HV_PAGE_SIZE;
695
696    // Slit
697    let slit_page_base = config_region_page_base + PARAVISOR_CONFIG_SLIT_PAGE_INDEX;
698    let slit_parameter_area = importer.create_parameter_area(
699        slit_page_base,
700        PARAVISOR_CONFIG_SLIT_SIZE_PAGES as u32,
701        "underhill-slit",
702    )?;
703    importer.import_parameter(slit_parameter_area, 0, IgvmParameterType::Slit)?;
704
705    // Pptt
706    let pptt_page_base = config_region_page_base + PARAVISOR_CONFIG_PPTT_PAGE_INDEX;
707    let pptt_parameter_area = importer.create_parameter_area(
708        pptt_page_base,
709        PARAVISOR_CONFIG_PPTT_SIZE_PAGES as u32,
710        "underhill-pptt",
711    )?;
712    importer.import_parameter(pptt_parameter_area, 0, IgvmParameterType::Pptt)?;
713
714    // device tree
715    let dt_page_base = config_region_page_base + PARAVISOR_CONFIG_DEVICE_TREE_PAGE_INDEX;
716    let dt_parameter_area = importer.create_parameter_area(
717        dt_page_base,
718        PARAVISOR_CONFIG_DEVICE_TREE_SIZE_PAGES as u32,
719        "underhill-device-tree",
720    )?;
721    importer.import_parameter(dt_parameter_area, 0, IgvmParameterType::DeviceTree)?;
722
723    if isolation_type == IsolationType::Snp {
724        let reserved_region_page_base = reserved_region_start / HV_PAGE_SIZE;
725        let secrets_page_base: u64 =
726            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_SECRETS_PAGE_INDEX;
727        importer.import_pages(
728            secrets_page_base,
729            PARAVISOR_RESERVED_VTL2_SNP_SECRETS_SIZE_PAGES,
730            "underhill-snp-secrets-page",
731            BootPageAcceptance::SecretsPage,
732            &[],
733        )?;
734
735        let cpuid_page = create_snp_cpuid_page();
736        let cpuid_page_base =
737            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_CPUID_PAGE_INDEX;
738        importer.import_pages(
739            cpuid_page_base,
740            1,
741            "underhill-snp-cpuid-page",
742            BootPageAcceptance::CpuidPage,
743            cpuid_page.as_bytes(),
744        )?;
745
746        importer.import_pages(
747            cpuid_page_base + 1,
748            1,
749            "underhill-snp-cpuid-extended-state-page",
750            BootPageAcceptance::CpuidExtendedStatePage,
751            &[],
752        )?;
753
754        let vmsa_page_base =
755            reserved_region_page_base + PARAVISOR_RESERVED_VTL2_SNP_VMSA_PAGE_INDEX;
756        importer.set_vp_context_page(vmsa_page_base)?;
757    }
758
759    // Load measured config.
760    // The measured config is at page 0. Free pages start at page 1.
761    let mut free_page = 1;
762    let mut measured_config = ParavisorMeasuredVtl0Config {
763        magic: ParavisorMeasuredVtl0Config::MAGIC,
764        ..FromZeros::new_zeroed()
765    };
766
767    let Vtl0Config {
768        supports_pcat,
769        supports_uefi,
770        supports_linux,
771    } = vtl0_config;
772
773    if supports_pcat {
774        measured_config.supported_vtl0.set_pcat_supported(true);
775    }
776
777    if let Some((uefi, vp_context)) = &supports_uefi {
778        measured_config.supported_vtl0.set_uefi_supported(true);
779        let vp_context_page = free_page;
780        free_page += 1;
781        measured_config.uefi_info = UefiInfo {
782            firmware: PageRegionDescriptor {
783                base_page_number: uefi.firmware_base / HV_PAGE_SIZE,
784                page_count: uefi.total_size / HV_PAGE_SIZE,
785            },
786            vtl0_vp_context: PageRegionDescriptor {
787                base_page_number: vp_context_page,
788                page_count: 1,
789            },
790        };
791
792        // Deposit the UEFI vp context.
793        importer.import_pages(
794            vp_context_page,
795            1,
796            "openhcl-uefi-vp-context",
797            BootPageAcceptance::Exclusive,
798            vp_context,
799        )?;
800    }
801
802    if let Some(linux) = supports_linux {
803        measured_config
804            .supported_vtl0
805            .set_linux_direct_supported(true);
806
807        let kernel_region = PageRegionDescriptor::new(
808            linux.load_info.kernel.gpa / HV_PAGE_SIZE,
809            align_up_to_page_size(linux.load_info.kernel.size) / HV_PAGE_SIZE,
810        );
811
812        let (initrd_region, initrd_size) = match linux.load_info.initrd {
813            Some(info) => {
814                if info.gpa % HV_PAGE_SIZE != 0 {
815                    return Err(Error::MemoryUnaligned(info.gpa));
816                }
817                (
818                    // initrd info is aligned up to the next page.
819                    PageRegionDescriptor::new(
820                        info.gpa / HV_PAGE_SIZE,
821                        align_up_to_page_size(info.size) / HV_PAGE_SIZE,
822                    ),
823                    info.size,
824                )
825            }
826            None => (PageRegionDescriptor::EMPTY, 0),
827        };
828
829        let command_line_page = free_page;
830        // free_page += 1;
831
832        // Import the command line as a C string.
833        importer
834            .import_pages(
835                command_line_page,
836                1,
837                "underhill-vtl0-linux-command-line",
838                BootPageAcceptance::Exclusive,
839                linux.command_line.as_bytes_with_nul(),
840            )
841            .map_err(Error::Importer)?;
842        let command_line = PageRegionDescriptor::new(command_line_page, 1);
843
844        measured_config.linux_info = LinuxInfo {
845            kernel_region,
846            kernel_entrypoint: linux.load_info.kernel.entrypoint,
847            initrd_region,
848            initrd_size,
849            command_line,
850        };
851    }
852
853    importer
854        .import_pages(
855            PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_X64,
856            1,
857            "underhill-measured-config",
858            BootPageAcceptance::Exclusive,
859            measured_config.as_bytes(),
860        )
861        .map_err(Error::Importer)?;
862
863    let vtl2_measured_config = ParavisorMeasuredVtl2Config {
864        magic: ParavisorMeasuredVtl2Config::MAGIC,
865        vtom_offset_bit: shared_gpa_boundary_bits.unwrap_or(0),
866        padding: [0; 7],
867    };
868
869    importer
870        .import_pages(
871            config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_PAGE_INDEX,
872            PARAVISOR_MEASURED_VTL2_CONFIG_SIZE_PAGES,
873            "underhill-vtl2-measured-config",
874            BootPageAcceptance::Exclusive,
875            vtl2_measured_config.as_bytes(),
876        )
877        .map_err(Error::Importer)?;
878
879    let imported_region_base =
880        config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_ACCEPTED_MEMORY_PAGE_INDEX;
881
882    importer.set_imported_regions_config_page(imported_region_base);
883    Ok(())
884}
885
886/// Create a hypervisor SNP CPUID page with the default values.
887fn create_snp_cpuid_page() -> HV_PSP_CPUID_PAGE {
888    let mut cpuid_page = HV_PSP_CPUID_PAGE::default();
889
890    // TODO SNP: The list used here is based earlier Microsoft projects.
891    // 1. ExtendedStateEnumeration should be part of BootPageAcceptance::CpuidExtendedStatePage,
892    // but it is unclear whether Linux supports a second page. The need for the second page is that
893    // the entries in it are actually based on supported features on a specific host.
894    // 2. ExtendedStateEnumeration should specify Xfem = 3
895    for (i, required_leaf) in crate::cpuid::SNP_REQUIRED_CPUID_LEAF_LIST_PARAVISOR
896        .iter()
897        .enumerate()
898    {
899        let entry = &mut cpuid_page.cpuid_leaf_info[i];
900        entry.eax_in = required_leaf.eax;
901        entry.ecx_in = required_leaf.ecx;
902        if required_leaf.eax == CpuidFunction::ExtendedStateEnumeration.0 {
903            entry.xfem_in = 1;
904        }
905        cpuid_page.count += 1;
906    }
907
908    cpuid_page
909}
910
911/// Load the underhill kernel on arm64.
912///
913/// An optional initrd may be specified.
914///
915/// An optional `memory_page_base` may be specified. This will disable
916/// relocation support for underhill.
917pub fn load_openhcl_arm64<F>(
918    importer: &mut dyn ImageLoad<Aarch64Register>,
919    kernel_image: &mut F,
920    shim: &mut F,
921    command_line: CommandLineType<'_>,
922    initrd: Option<&[u8]>,
923    memory_page_base: Option<u64>,
924    memory_page_count: u64,
925    vtl0_config: Vtl0Config<'_>,
926) -> Result<(), Error>
927where
928    F: std::io::Read + std::io::Seek,
929{
930    let Vtl0Config {
931        supports_pcat,
932        supports_uefi,
933        supports_linux,
934    } = vtl0_config;
935
936    assert!(!supports_pcat);
937    assert!(supports_uefi.is_some() || supports_linux.is_some());
938
939    let paravisor_present = importer.isolation_config().paravisor_present;
940
941    // If no explicit memory base is specified, load with relocation support.
942    let with_relocation = memory_page_base.is_none();
943
944    let memory_start_address = memory_page_base
945        .map(|page_number| page_number * HV_PAGE_SIZE)
946        .unwrap_or(PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS);
947
948    let memory_size = memory_page_count * HV_PAGE_SIZE;
949
950    // Paravisor memory ranges must be 2MB (large page) aligned.
951    if !memory_start_address.is_multiple_of(u64::from(Arm64PageSize::Large)) {
952        return Err(Error::MemoryUnaligned(memory_start_address));
953    }
954
955    if !memory_size.is_multiple_of(u64::from(Arm64PageSize::Large)) {
956        return Err(Error::MemoryUnaligned(memory_size));
957    }
958
959    // The whole memory range must be present and VTL2 protectable for the
960    // underhill kernel to work.
961    importer.verify_startup_memory_available(
962        memory_start_address / HV_PAGE_SIZE,
963        memory_page_count,
964        if paravisor_present {
965            StartupMemoryType::Vtl2ProtectableRam
966        } else {
967            StartupMemoryType::Ram
968        },
969    )?;
970
971    let mut next_addr = memory_start_address;
972
973    // Reserve the first 2MB for a potential persisted state region. The first
974    // 4K page is always the persisted state header, and the bootshim may decide
975    // to use the the remaining pages for the protobuf payload.
976    let persisted_region_base = next_addr;
977    let persisted_region_size = PERSISTED_REGION_SIZE;
978    next_addr += persisted_region_size;
979
980    tracing::trace!(next_addr, "loading the kernel");
981
982    // The aarch64 Linux kernel image is most commonly found as a flat binary with a
983    // header rather than an ELF.
984    // DeviceTree is generated dynamically by the boot shim.
985    let initrd_address_type = InitrdAddressType::AfterKernel;
986    let initrd_config = InitrdConfig {
987        initrd_address: initrd_address_type,
988        initrd: initrd.unwrap_or_default(),
989    };
990    let device_tree_blob = None;
991    let crate::linux::LoadInfo {
992        kernel:
993            KernelInfo {
994                gpa: kernel_base,
995                size: kernel_size,
996                entrypoint: kernel_entry_point,
997            },
998        initrd: initrd_info,
999        dtb,
1000    } = load_kernel_and_initrd_arm64(
1001        importer,
1002        kernel_image,
1003        next_addr,
1004        Some(initrd_config),
1005        device_tree_blob,
1006    )
1007    .map_err(Error::Kernel)?;
1008
1009    assert!(
1010        dtb.is_none(),
1011        "DeviceTree is generated dynamically by the boot shim."
1012    );
1013
1014    tracing::trace!(kernel_base, "kernel loaded");
1015
1016    let InitrdInfo {
1017        gpa: initrd_gpa,
1018        size: initrd_size,
1019    } = if let Some(initrd_info) = initrd_info {
1020        assert!(initrd_address_type == InitrdAddressType::AfterKernel);
1021        next_addr = initrd_info.gpa + initrd_info.size;
1022        initrd_info
1023    } else {
1024        next_addr = kernel_base + kernel_size;
1025        InitrdInfo { gpa: 0, size: 0 }
1026    };
1027
1028    next_addr = align_up_to_page_size(next_addr);
1029
1030    tracing::trace!(next_addr, "loading the boot shim");
1031
1032    let crate::elf::LoadInfo {
1033        minimum_address_used: shim_base_addr,
1034        next_available_address: mut next_addr,
1035        entrypoint: shim_entry_point,
1036    } = crate::elf::load_static_elf(
1037        importer,
1038        shim,
1039        0,
1040        next_addr,
1041        false,
1042        BootPageAcceptance::Exclusive,
1043        "underhill-boot-shim",
1044    )
1045    .map_err(Error::Shim)?;
1046
1047    tracing::trace!(shim_base_addr, "boot shim loaded");
1048
1049    tracing::trace!(next_addr, "loading the command line");
1050
1051    let cmdline_base = next_addr;
1052    let (cmdline, policy) = match command_line {
1053        CommandLineType::Static(val) => (val, CommandLinePolicy::STATIC),
1054        CommandLineType::HostAppendable(val) => (val, CommandLinePolicy::APPEND_CHOSEN),
1055    };
1056
1057    if cmdline.len() > COMMAND_LINE_SIZE {
1058        return Err(Error::CommandLineSize(cmdline.len()));
1059    }
1060
1061    let mut static_command_line = [0; COMMAND_LINE_SIZE];
1062    static_command_line[..cmdline.len()].copy_from_slice(cmdline.as_bytes());
1063    let paravisor_command_line = ParavisorCommandLine {
1064        policy,
1065        static_command_line_len: cmdline.len() as u16,
1066        static_command_line,
1067    };
1068
1069    importer.import_pages(
1070        cmdline_base / HV_PAGE_SIZE,
1071        1,
1072        "underhill-command-line",
1073        BootPageAcceptance::Exclusive,
1074        paravisor_command_line.as_bytes(),
1075    )?;
1076
1077    next_addr += HV_PAGE_SIZE;
1078
1079    tracing::trace!(next_addr, "loading the boot shim parameters");
1080
1081    let shim_params_base = next_addr;
1082    let shim_params_size = HV_PAGE_SIZE;
1083
1084    next_addr += shim_params_size;
1085
1086    let parameter_region_size = PARAVISOR_VTL2_CONFIG_REGION_PAGE_COUNT_MAX * HV_PAGE_SIZE;
1087    let parameter_region_start = next_addr;
1088    next_addr += parameter_region_size;
1089
1090    tracing::debug!(parameter_region_start);
1091
1092    // Reserve 8K for the bootshim log buffer.
1093    let bootshim_log_size = HV_PAGE_SIZE * 2;
1094    let bootshim_log_start = next_addr;
1095    next_addr += bootshim_log_size;
1096
1097    importer.import_pages(
1098        bootshim_log_start / HV_PAGE_SIZE,
1099        bootshim_log_size / HV_PAGE_SIZE,
1100        "ohcl-boot-shim-log-buffer",
1101        BootPageAcceptance::Exclusive,
1102        &[],
1103    )?;
1104
1105    // Reserve 16 pages for a bootshim heap. This is only used to parse the
1106    // protobuf payload from the previous instance in a servicing boot.
1107    //
1108    // Import these pages as it greatly simplifies the early startup code in the
1109    // bootshim for isolated guests. This allows the bootshim to use these pages
1110    // early on without extra acceptance calls.
1111    let heap_start = next_addr;
1112    let heap_size = 16 * HV_PAGE_SIZE;
1113    importer.import_pages(
1114        heap_start / HV_PAGE_SIZE,
1115        heap_size / HV_PAGE_SIZE,
1116        "ohcl-boot-shim-heap",
1117        BootPageAcceptance::Exclusive,
1118        &[],
1119    )?;
1120    next_addr += heap_size;
1121
1122    // The end of memory used by the loader, excluding pagetables.
1123    let end_of_underhill_mem = next_addr;
1124
1125    // Page tables live at the end of the VTL2 imported region, which allows it
1126    // to be relocated separately.
1127    let page_table_base_page_count = 5;
1128    let page_table_dynamic_page_count = 2 * page_table_base_page_count;
1129    let page_table_page_count = page_table_base_page_count + page_table_dynamic_page_count;
1130    let page_table_region_size = HV_PAGE_SIZE * page_table_page_count;
1131    let page_table_region_start = next_addr;
1132    next_addr += page_table_region_size;
1133
1134    tracing::debug!(page_table_region_start, page_table_region_size);
1135
1136    let next_addr = next_addr;
1137
1138    // The memory used by the loader must be smaller than the memory available.
1139    if next_addr > memory_start_address + memory_size {
1140        return Err(Error::NotEnoughMemory(next_addr - memory_start_address));
1141    }
1142
1143    // Shim parameters for locations are relative to the base of where the shim is loaded.
1144    let calculate_shim_offset = |addr: u64| -> i64 { addr.wrapping_sub(shim_base_addr) as i64 };
1145    let initrd_crc = crc32fast::hash(initrd.unwrap_or(&[]));
1146    let shim_params = ShimParamsRaw {
1147        kernel_entry_offset: calculate_shim_offset(kernel_entry_point),
1148        cmdline_offset: calculate_shim_offset(cmdline_base),
1149        initrd_offset: calculate_shim_offset(initrd_gpa),
1150        initrd_size,
1151        initrd_crc,
1152        supported_isolation_type: match importer.isolation_config().isolation_type {
1153            IsolationType::None | IsolationType::Vbs => {
1154                loader_defs::shim::SupportedIsolationType::VBS
1155            }
1156            _ => panic!("only None and VBS are supported for ARM64"),
1157        },
1158        memory_start_offset: calculate_shim_offset(memory_start_address),
1159        memory_size,
1160        parameter_region_offset: calculate_shim_offset(parameter_region_start),
1161        parameter_region_size,
1162        vtl2_reserved_region_offset: 0,
1163        vtl2_reserved_region_size: 0,
1164        sidecar_offset: 0,
1165        sidecar_size: 0,
1166        sidecar_entry_offset: 0,
1167        used_start: calculate_shim_offset(memory_start_address),
1168        used_end: calculate_shim_offset(next_addr),
1169        bounce_buffer_start: 0,
1170        bounce_buffer_size: 0,
1171        page_tables_start: 0,
1172        page_tables_size: 0,
1173        log_buffer_start: calculate_shim_offset(bootshim_log_start),
1174        log_buffer_size: bootshim_log_size,
1175        heap_start_offset: calculate_shim_offset(heap_start),
1176        heap_size,
1177        persisted_state_region_offset: calculate_shim_offset(persisted_region_base),
1178        persisted_state_region_size: persisted_region_size,
1179    };
1180
1181    importer
1182        .import_pages(
1183            shim_params_base / HV_PAGE_SIZE,
1184            shim_params_size / HV_PAGE_SIZE,
1185            "underhill-shim-params",
1186            BootPageAcceptance::Exclusive,
1187            shim_params.as_bytes(),
1188        )
1189        .map_err(Error::Importer)?;
1190
1191    let mut measured_config = ParavisorMeasuredVtl0Config {
1192        magic: ParavisorMeasuredVtl0Config::MAGIC,
1193        ..FromZeros::new_zeroed()
1194    };
1195
1196    if let Some((uefi, vp_context)) = &supports_uefi {
1197        measured_config.supported_vtl0.set_uefi_supported(true);
1198        let vp_context_page = PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_AARCH64 + 1;
1199        measured_config.uefi_info = UefiInfo {
1200            firmware: PageRegionDescriptor {
1201                base_page_number: uefi.firmware_base / HV_PAGE_SIZE,
1202                page_count: uefi.total_size / HV_PAGE_SIZE,
1203            },
1204            vtl0_vp_context: PageRegionDescriptor {
1205                base_page_number: vp_context_page,
1206                page_count: 1,
1207            },
1208        };
1209
1210        // Deposit the UEFI vp context.
1211        importer.import_pages(
1212            vp_context_page,
1213            1,
1214            "openhcl-uefi-vp-context",
1215            BootPageAcceptance::Exclusive,
1216            vp_context,
1217        )?;
1218    }
1219
1220    importer
1221        .import_pages(
1222            PARAVISOR_VTL0_MEASURED_CONFIG_BASE_PAGE_AARCH64,
1223            1,
1224            "underhill-measured-config",
1225            BootPageAcceptance::Exclusive,
1226            measured_config.as_bytes(),
1227        )
1228        .map_err(Error::Importer)?;
1229
1230    tracing::trace!(page_table_region_start, "loading the page tables");
1231
1232    let memory_attribute_indirection = MemoryAttributeIndirectionEl1([
1233        MemoryAttributeEl1::Device_nGnRnE,
1234        MemoryAttributeEl1::Normal_NonCacheable,
1235        MemoryAttributeEl1::Normal_WriteThrough,
1236        MemoryAttributeEl1::Normal_WriteBack,
1237        MemoryAttributeEl1::Device_nGnRnE,
1238        MemoryAttributeEl1::Device_nGnRnE,
1239        MemoryAttributeEl1::Device_nGnRnE,
1240        MemoryAttributeEl1::Device_nGnRnE,
1241    ]);
1242    let page_tables = page_table::aarch64::build_identity_page_tables_aarch64(
1243        page_table_region_start,
1244        memory_start_address,
1245        memory_size,
1246        memory_attribute_indirection,
1247        page_table_region_size as usize,
1248    );
1249    assert!((page_tables.len() as u64).is_multiple_of(HV_PAGE_SIZE));
1250    let page_table_page_base = page_table_region_start / HV_PAGE_SIZE;
1251    assert!(page_tables.len() as u64 <= page_table_region_size);
1252    assert!(page_table_region_size as usize > page_tables.len());
1253
1254    if with_relocation {
1255        // Indicate relocation information. Don't include page table region.
1256        importer.relocation_region(
1257            memory_start_address,
1258            end_of_underhill_mem - memory_start_address,
1259            Arm64PageSize::Large.into(),
1260            PARAVISOR_DEFAULT_MEMORY_BASE_ADDRESS,
1261            1 << 48,
1262            true,
1263            false,
1264            0, // BSP
1265        )?;
1266
1267        // Tell the loader page table relocation information.
1268        importer.page_table_relocation(
1269            page_table_region_start,
1270            page_table_region_size / HV_PAGE_SIZE,
1271            page_tables.len() as u64 / HV_PAGE_SIZE,
1272            0,
1273        )?;
1274    }
1275
1276    importer.import_pages(
1277        page_table_page_base,
1278        page_table_page_count,
1279        "underhill-page-tables",
1280        BootPageAcceptance::Exclusive,
1281        &page_tables,
1282    )?;
1283
1284    tracing::trace!("Importing register state");
1285
1286    let mut import_reg = |register| {
1287        importer
1288            .import_vp_register(register)
1289            .map_err(Error::Importer)
1290    };
1291
1292    // Set %X0 to relative location of boot_params_base
1293    let relative_boot_params_base = shim_params_base - shim_base_addr;
1294    import_reg(AArch64Register::X0(relative_boot_params_base).into())?;
1295
1296    // Set %pc to the shim entry point.
1297    import_reg(AArch64Register::Pc(shim_entry_point).into())?;
1298
1299    // System registers
1300
1301    import_reg(AArch64Register::Cpsr(Cpsr64::new().with_sp(true).with_el(1).into()).into())?;
1302
1303    // This is what Hyper-V uses. qemu/KVM, and qemu/max use slightly
1304    // different flags.
1305    // KVM sets these in addition to what the Hyper-V uses:
1306    //
1307    // .with_sa(true)
1308    // .with_itd(true)
1309    // .with_sed(true)
1310    //
1311    // Windows sets:
1312    //
1313    // .with_sa(true)
1314    // .with_sa0(true)
1315    // .with_n_aa(true)
1316    // .with_sed(true)
1317    // .with_dze(true)
1318    // .with_en_ib(true)
1319    // .with_dssbs(true)
1320    //
1321    // Maybe could enforce the `s`tack `a`lignment, here, too. Depends on
1322    // the compiler generating code aligned accesses for the stack.
1323    //
1324    // Hyper-V sets:
1325    import_reg(
1326        AArch64Register::SctlrEl1(
1327            SctlrEl1::new()
1328                // MMU enable for EL1&0 stage 1 address translation.
1329                // It can be turned off in VTL2 for debugging.
1330                // The family of the `at` instructions and the `PAR_EL1` register are
1331                // useful for debugging MMU issues.
1332                .with_m(true)
1333                // Stage 1 Cacheability control, for data accesses.
1334                .with_c(true)
1335                // Stage 1 Cacheability control, for code.
1336                .with_i(true)
1337                // Reserved flags, must be set
1338                .with_eos(true)
1339                .with_tscxt(true)
1340                .with_eis(true)
1341                .with_span(true)
1342                .with_n_tlsmd(true)
1343                .with_lsmaoe(true)
1344                .into(),
1345        )
1346        .into(),
1347    )?;
1348
1349    // Hyper-V UEFI and qemu/KVM use the same value for TCR_EL1.
1350    // They set `t0sz` to `28` as they map memory pretty low.
1351    // In the paravisor case, need more flexibility.
1352    // For the details, refer to the "Learning the architecture" series
1353    // on the ARM website.
1354    import_reg(
1355        AArch64Register::TcrEl1(
1356            TranslationControlEl1::new()
1357                .with_t0sz(0x11)
1358                .with_irgn0(1)
1359                .with_orgn0(1)
1360                .with_sh0(3)
1361                .with_tg0(TranslationGranule0::TG_4KB)
1362                // Disable TTBR1_EL1 walks (i.e. the upper half).
1363                .with_epd1(1)
1364                // Due to erratum #822227, need to set a valid TG1 regardless of EPD1.
1365                .with_tg1(TranslationGranule1::TG_4KB)
1366                .with_ips(IntermPhysAddrSize::IPA_48_BITS_256_TB)
1367                .into(),
1368        )
1369        .into(),
1370    )?;
1371
1372    // The Memory Attribute Indirection
1373    import_reg(AArch64Register::MairEl1(memory_attribute_indirection.into()).into())?;
1374    import_reg(
1375        AArch64Register::Ttbr0El1(
1376            TranslationBaseEl1::new()
1377                .with_baddr(page_table_region_start)
1378                .into(),
1379        )
1380        .into(),
1381    )?;
1382
1383    // VBAR is in the undefined state, setting it to 0 albeit
1384    // without the vector exception table. The shim can configure that on its own
1385    // if need be.
1386    import_reg(AArch64Register::VbarEl1(0).into())?;
1387
1388    // Load parameter regions.
1389    let config_region_page_base = parameter_region_start / HV_PAGE_SIZE;
1390
1391    // Slit
1392    let slit_page_base = config_region_page_base + PARAVISOR_CONFIG_SLIT_PAGE_INDEX;
1393    let slit_parameter_area = importer.create_parameter_area(
1394        slit_page_base,
1395        PARAVISOR_CONFIG_SLIT_SIZE_PAGES as u32,
1396        "underhill-slit",
1397    )?;
1398    importer.import_parameter(slit_parameter_area, 0, IgvmParameterType::Slit)?;
1399
1400    // Pptt
1401    let pptt_page_base = config_region_page_base + PARAVISOR_CONFIG_PPTT_PAGE_INDEX;
1402    let pptt_parameter_area = importer.create_parameter_area(
1403        pptt_page_base,
1404        PARAVISOR_CONFIG_PPTT_SIZE_PAGES as u32,
1405        "underhill-pptt",
1406    )?;
1407    importer.import_parameter(pptt_parameter_area, 0, IgvmParameterType::Pptt)?;
1408
1409    // device tree
1410    let dt_page_base = config_region_page_base + PARAVISOR_CONFIG_DEVICE_TREE_PAGE_INDEX;
1411    let dt_parameter_area = importer.create_parameter_area(
1412        dt_page_base,
1413        PARAVISOR_CONFIG_DEVICE_TREE_SIZE_PAGES as u32,
1414        "underhill-device-tree",
1415    )?;
1416    importer.import_parameter(dt_parameter_area, 0, IgvmParameterType::DeviceTree)?;
1417
1418    let vtl2_measured_config = ParavisorMeasuredVtl2Config {
1419        magic: ParavisorMeasuredVtl2Config::MAGIC,
1420        vtom_offset_bit: 0,
1421        padding: [0; 7],
1422    };
1423
1424    importer
1425        .import_pages(
1426            config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_PAGE_INDEX,
1427            PARAVISOR_MEASURED_VTL2_CONFIG_SIZE_PAGES,
1428            "underhill-vtl2-measured-config",
1429            BootPageAcceptance::Exclusive,
1430            vtl2_measured_config.as_bytes(),
1431        )
1432        .map_err(Error::Importer)?;
1433
1434    let imported_region_base =
1435        config_region_page_base + PARAVISOR_MEASURED_VTL2_CONFIG_ACCEPTED_MEMORY_PAGE_INDEX;
1436
1437    importer.set_imported_regions_config_page(imported_region_base);
1438
1439    Ok(())
1440}