1#![cfg_attr(minimal_rt, no_std, no_main)]
9#![expect(unsafe_code)]
11#![cfg_attr(nightly, feature(allocator_api))]
18
19mod arch;
20mod boot_logger;
21mod cmdline;
22mod dt;
23mod host_params;
24mod hypercall;
25mod memory;
26mod rt;
27mod sidecar;
28mod single_threaded;
29
30use crate::arch::setup_vtl2_memory;
31use crate::arch::setup_vtl2_vp;
32#[cfg(target_arch = "x86_64")]
33use crate::arch::tdx::get_tdx_tsc_reftime;
34use crate::arch::verify_imported_regions_hash;
35use crate::boot_logger::boot_logger_memory_init;
36use crate::boot_logger::boot_logger_runtime_init;
37use crate::hypercall::hvcall;
38use crate::memory::AddressSpaceManager;
39use crate::single_threaded::OffStackRef;
40use crate::single_threaded::off_stack;
41use arrayvec::ArrayString;
42use arrayvec::ArrayVec;
43use cmdline::BootCommandLineOptions;
44use core::fmt::Write;
45use dt::BootTimes;
46use dt::write_dt;
47use host_params::COMMAND_LINE_SIZE;
48use host_params::PartitionInfo;
49use host_params::shim_params::IsolationType;
50use host_params::shim_params::ShimParams;
51use hvdef::Vtl;
52use loader_defs::linux::SETUP_DTB;
53use loader_defs::linux::setup_data;
54use loader_defs::shim::ShimParamsRaw;
55use memory_range::RangeWalkResult;
56use memory_range::walk_ranges;
57use minimal_rt::enlightened_panic::enable_enlightened_panic;
58use sidecar::SidecarConfig;
59use sidecar_defs::SidecarOutput;
60use sidecar_defs::SidecarParams;
61use zerocopy::FromBytes;
62use zerocopy::FromZeros;
63use zerocopy::Immutable;
64use zerocopy::IntoBytes;
65use zerocopy::KnownLayout;
66
67#[derive(Debug)]
68struct CommandLineTooLong;
69
70impl From<core::fmt::Error> for CommandLineTooLong {
71 fn from(_: core::fmt::Error) -> Self {
72 Self
73 }
74}
75
76struct BuildKernelCommandLineParams<'a> {
77 params: &'a ShimParams,
78 cmdline: &'a mut ArrayString<COMMAND_LINE_SIZE>,
79 partition_info: &'a PartitionInfo,
80 can_trust_host: bool,
81 is_confidential_debug: bool,
82 sidecar: Option<&'a SidecarConfig<'a>>,
83 vtl2_pool_supported: bool,
84}
85
86fn build_kernel_command_line(
88 fn_params: BuildKernelCommandLineParams<'_>,
89) -> Result<(), CommandLineTooLong> {
90 let BuildKernelCommandLineParams {
91 params,
92 cmdline,
93 partition_info,
94 can_trust_host,
95 is_confidential_debug,
96 sidecar,
97 vtl2_pool_supported,
98 } = fn_params;
99
100 const KERNEL_PARAMETERS: &[&str] = &[
103 "loglevel=8",
105 "log_buf_len=128K",
107 "printk.time=1",
109 "console_msg_format=syslog",
111 "uio_hv_generic.no_mask=1",
113 "coredump_filter=0x33",
116 "cpufreq.off=1",
118 "cpuidle.off=1",
122 "cryptomgr.notests",
126 "idle=halt",
130 "initcall_blacklist=init_real_mode,sbf_init",
133 "lpj=3000000",
135 "no_timer_check",
137 "noxsave",
143 "oops=panic",
145 "panic_on_warn=0",
147 "panic_print=0",
150 "panic=-1",
152 "printk.devkmsg=on",
160 "reboot=t",
164 "rootfstype=tmpfs",
166 "sysctl.vm.compaction_proactiveness=0",
169 "tsc=reliable",
172 "unknown_nmi_panic=1",
174 "vfio_pci.ids=1414:00ba",
176 "vfio.enable_unsafe_noiommu_mode=1",
179 "rdinit=/underhill-init",
181 "OPENHCL_NVME_VFIO=1",
183 "hv_storvsc.storvsc_vcpus_per_sub_channel=2048",
186 "hv_storvsc.storvsc_max_hw_queues=2",
188 "hv_storvsc.storvsc_ringbuffer_size=0x8000",
190 "MIMALLOC_ARENA_EAGER_COMMIT=0",
192 "acpi=off",
195 ];
196
197 const X86_KERNEL_PARAMETERS: &[&str] = &[
198 "clearcpuid=pcid",
207 "iommu=off",
209 "pci=off",
212 ];
213
214 const AARCH64_KERNEL_PARAMETERS: &[&str] = &[];
215
216 for p in KERNEL_PARAMETERS {
217 write!(cmdline, "{p} ")?;
218 }
219
220 let arch_parameters = if cfg!(target_arch = "x86_64") {
221 X86_KERNEL_PARAMETERS
222 } else {
223 AARCH64_KERNEL_PARAMETERS
224 };
225 for p in arch_parameters {
226 write!(cmdline, "{p} ")?;
227 }
228
229 const HARDWARE_ISOLATED_KERNEL_PARAMETERS: &[&str] = &[
230 "swiotlb=4096,1",
240 ];
241
242 const NON_HARDWARE_ISOLATED_KERNEL_PARAMETERS: &[&str] = &[
243 "swiotlb=1,1",
248 ];
249
250 if params.isolation_type.is_hardware_isolated() {
251 for p in HARDWARE_ISOLATED_KERNEL_PARAMETERS {
252 write!(cmdline, "{p} ")?;
253 }
254 } else {
255 for p in NON_HARDWARE_ISOLATED_KERNEL_PARAMETERS {
256 write!(cmdline, "{p} ")?;
257 }
258 }
259
260 let console = if partition_info.com3_serial_available && can_trust_host {
268 "ttyS2,115200"
269 } else {
270 "ttynull"
271 };
272 write!(cmdline, "console={console} ")?;
273
274 if params.isolation_type != IsolationType::None {
275 write!(
276 cmdline,
277 "{}=1 ",
278 underhill_confidentiality::OPENHCL_CONFIDENTIAL_ENV_VAR_NAME
279 )?;
280 }
281
282 if is_confidential_debug {
283 write!(
284 cmdline,
285 "{}=1 ",
286 underhill_confidentiality::OPENHCL_CONFIDENTIAL_DEBUG_ENV_VAR_NAME
287 )?;
288 }
289
290 write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=")?;
294
295 if partition_info.boot_options.disable_nvme_keep_alive {
296 write!(cmdline, "disabled,")?;
297 }
298
299 if partition_info.nvme_keepalive {
300 write!(cmdline, "host,")?;
301 } else {
302 write!(cmdline, "nohost,")?;
303 }
304
305 if vtl2_pool_supported {
306 write!(cmdline, "privatepool ")?;
307 } else {
308 write!(cmdline, "noprivatepool ")?;
309 }
310
311 if let Some(sidecar) = sidecar {
312 write!(cmdline, "{} ", sidecar.kernel_command_line())?;
313 }
314
315 if !cmdline.contains("hv_vmbus.message_connection_id") {
316 write!(
322 cmdline,
323 "hv_vmbus.message_connection_id=0x{:x} ",
324 partition_info.vmbus_vtl2.connection_id
325 )?;
326 }
327
328 cmdline.write_str(&partition_info.cmdline)?;
330
331 Ok(())
332}
333
334const FDT_SIZE: usize = 256 * 1024;
341
342#[repr(C, align(4096))]
343#[derive(FromBytes, IntoBytes, Immutable, KnownLayout)]
344struct Fdt {
345 header: setup_data,
346 data: [u8; FDT_SIZE - size_of::<setup_data>()],
347}
348
349fn shim_parameters(shim_params_raw_offset: isize) -> ShimParams {
353 unsafe extern "C" {
354 static __ehdr_start: u8;
355 }
356
357 let shim_base = core::ptr::addr_of!(__ehdr_start) as usize;
358
359 let raw_shim_params = unsafe {
363 &*(shim_base.wrapping_add_signed(shim_params_raw_offset) as *const ShimParamsRaw)
364 };
365
366 ShimParams::new(shim_base as u64, raw_shim_params)
367}
368
369#[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))]
370mod x86_boot {
371 use crate::PageAlign;
372 use crate::memory::AddressSpaceManager;
373 use crate::single_threaded::OffStackRef;
374 use crate::single_threaded::off_stack;
375 use crate::zeroed;
376 use core::mem::size_of;
377 use core::ops::Range;
378 use core::ptr;
379 use loader_defs::linux::E820_RAM;
380 use loader_defs::linux::E820_RESERVED;
381 use loader_defs::linux::SETUP_E820_EXT;
382 use loader_defs::linux::boot_params;
383 use loader_defs::linux::e820entry;
384 use loader_defs::linux::setup_data;
385 use loader_defs::shim::MemoryVtlType;
386 use memory_range::MemoryRange;
387 use zerocopy::FromZeros;
388 use zerocopy::Immutable;
389 use zerocopy::KnownLayout;
390
391 #[repr(C)]
392 #[derive(FromZeros, Immutable, KnownLayout)]
393 pub struct E820Ext {
394 pub header: setup_data,
395 pub entries: [e820entry; 512],
396 }
397
398 fn add_e820_entry(
399 entry: Option<&mut e820entry>,
400 range: MemoryRange,
401 typ: u32,
402 ) -> Result<(), BuildE820MapError> {
403 *entry.ok_or(BuildE820MapError::OutOfE820Entries)? = e820entry {
404 addr: range.start().into(),
405 size: range.len().into(),
406 typ: typ.into(),
407 };
408 Ok(())
409 }
410
411 #[derive(Debug)]
412 pub enum BuildE820MapError {
413 OutOfE820Entries,
415 }
416
417 pub fn build_e820_map(
419 boot_params: &mut boot_params,
420 ext: &mut E820Ext,
421 address_space: &AddressSpaceManager,
422 ) -> Result<bool, BuildE820MapError> {
423 boot_params.e820_entries = 0;
424 let mut entries = boot_params
425 .e820_map
426 .iter_mut()
427 .chain(ext.entries.iter_mut());
428
429 let mut n = 0;
430 for (range, typ) in address_space.vtl2_ranges() {
431 match typ {
432 MemoryVtlType::VTL2_RAM => {
433 add_e820_entry(entries.next(), range, E820_RAM)?;
434 n += 1;
435 }
436 MemoryVtlType::VTL2_CONFIG
437 | MemoryVtlType::VTL2_SIDECAR_IMAGE
438 | MemoryVtlType::VTL2_SIDECAR_NODE
439 | MemoryVtlType::VTL2_RESERVED
440 | MemoryVtlType::VTL2_GPA_POOL
441 | MemoryVtlType::VTL2_TDX_PAGE_TABLES
442 | MemoryVtlType::VTL2_BOOTSHIM_LOG_BUFFER
443 | MemoryVtlType::VTL2_PERSISTED_STATE_HEADER
444 | MemoryVtlType::VTL2_PERSISTED_STATE_PROTOBUF => {
445 add_e820_entry(entries.next(), range, E820_RESERVED)?;
446 n += 1;
447 }
448
449 _ => {
450 panic!("unexpected vtl2 ram type {typ:?} for range {range:#?}");
451 }
452 }
453 }
454
455 let base = n.min(boot_params.e820_map.len());
456 boot_params.e820_entries = base as u8;
457
458 if base < n {
459 ext.header.len = ((n - base) * size_of::<e820entry>()) as u32;
460 Ok(true)
461 } else {
462 Ok(false)
463 }
464 }
465
466 pub fn build_boot_params(
467 address_space: &AddressSpaceManager,
468 initrd: Range<u64>,
469 cmdline: &str,
470 setup_data_head: *const setup_data,
471 setup_data_tail: &mut &mut setup_data,
472 ) -> OffStackRef<'static, PageAlign<boot_params>> {
473 let mut boot_params_storage = off_stack!(PageAlign<boot_params>, zeroed());
474 let boot_params = &mut boot_params_storage.0;
475 boot_params.hdr.type_of_loader = 0xff; boot_params.hdr.hardware_subarch = 1.into();
487
488 boot_params.hdr.ramdisk_image = (initrd.start as u32).into();
489 boot_params.ext_ramdisk_image = (initrd.start >> 32) as u32;
490 let initrd_len = initrd.end - initrd.start;
491 boot_params.hdr.ramdisk_size = (initrd_len as u32).into();
492 boot_params.ext_ramdisk_size = (initrd_len >> 32) as u32;
493
494 let e820_ext = OffStackRef::leak(off_stack!(E820Ext, zeroed()));
495
496 let used_ext = build_e820_map(boot_params, e820_ext, address_space)
497 .expect("building e820 map must succeed");
498
499 if used_ext {
500 e820_ext.header.ty = SETUP_E820_EXT;
501 setup_data_tail.next = ptr::from_ref(&e820_ext.header) as u64;
502 *setup_data_tail = &mut e820_ext.header;
503 }
504
505 let cmd_line_addr = cmdline.as_ptr() as u64;
506 boot_params.hdr.cmd_line_ptr = (cmd_line_addr as u32).into();
507 boot_params.ext_cmd_line_ptr = (cmd_line_addr >> 32) as u32;
508
509 boot_params.hdr.setup_data = (setup_data_head as u64).into();
510
511 boot_params_storage
512 }
513}
514
515#[cfg(target_arch = "x86_64")]
517fn build_cc_blob_sev_info(
518 cc_blob: &mut loader_defs::linux::cc_blob_sev_info,
519 shim_params: &ShimParams,
520) {
521 cc_blob.magic = loader_defs::linux::CC_BLOB_SEV_INFO_MAGIC;
524 cc_blob.version = 0;
525 cc_blob._reserved = 0;
526 cc_blob.secrets_phys = shim_params.secrets_start();
527 cc_blob.secrets_len = hvdef::HV_PAGE_SIZE as u32;
528 cc_blob._rsvd1 = 0;
529 cc_blob.cpuid_phys = shim_params.cpuid_start();
530 cc_blob.cpuid_len = hvdef::HV_PAGE_SIZE as u32;
531 cc_blob._rsvd2 = 0;
532}
533
534#[repr(C, align(4096))]
535#[derive(FromZeros, Immutable, KnownLayout)]
536struct PageAlign<T>(T);
537
538const fn zeroed<T: FromZeros>() -> T {
539 unsafe { core::mem::MaybeUninit::<T>::zeroed().assume_init() }
541}
542
543fn get_ref_time(isolation: IsolationType) -> Option<u64> {
544 match isolation {
545 #[cfg(target_arch = "x86_64")]
546 IsolationType::Tdx => get_tdx_tsc_reftime(),
547 #[cfg(target_arch = "x86_64")]
548 IsolationType::Snp => None,
549 _ => Some(minimal_rt::reftime::reference_time()),
550 }
551}
552
553fn shim_main(shim_params_raw_offset: isize) -> ! {
554 let p = shim_parameters(shim_params_raw_offset);
555 if p.isolation_type == IsolationType::None {
556 enable_enlightened_panic();
557 }
558
559 boot_logger_memory_init(p.log_buffer);
561
562 log::set_logger(&boot_logger::BOOT_LOGGER).unwrap();
564 log::set_max_level(log::LevelFilter::Info);
566
567 let boot_reftime = get_ref_time(p.isolation_type);
568
569 if !p.isolation_type.is_hardware_isolated() {
576 hvcall().initialize();
577 }
578
579 let mut static_options = BootCommandLineOptions::new();
580 if let Some(cmdline) = p.command_line().command_line() {
581 static_options.parse(cmdline);
582 }
583
584 let static_confidential_debug = static_options.confidential_debug;
585 let can_trust_host = p.isolation_type == IsolationType::None || static_confidential_debug;
586
587 let mut dt_storage = off_stack!(PartitionInfo, PartitionInfo::new());
588 let address_space = OffStackRef::leak(off_stack!(
589 AddressSpaceManager,
590 AddressSpaceManager::new_const()
591 ));
592 let partition_info = match PartitionInfo::read_from_dt(
593 &p,
594 &mut dt_storage,
595 address_space,
596 static_options,
597 can_trust_host,
598 ) {
599 Ok(val) => val,
600 Err(e) => panic!("unable to read device tree params {:?}", e),
601 };
602
603 boot_logger_runtime_init(p.isolation_type, partition_info.com3_serial_available);
606 log::info!("openhcl_boot: logging enabled");
607
608 let is_confidential_debug =
612 static_confidential_debug || partition_info.boot_options.confidential_debug;
613
614 if !p.isolation_type.is_hardware_isolated()
616 && hvcall().vtl() == Vtl::Vtl2
617 && hvdef::HvRegisterVsmCapabilities::from(
618 hvcall()
619 .get_register(hvdef::HvAllArchRegisterName::VsmCapabilities.into())
620 .expect("failed to query vsm capabilities")
621 .as_u64(),
622 )
623 .vtl0_alias_map_available()
624 {
625 if partition_info.vtl0_alias_map.is_none() {
635 partition_info.vtl0_alias_map =
636 Some(1 << (arch::physical_address_bits(p.isolation_type) - 1));
637 }
638 } else {
639 partition_info.vtl0_alias_map = None;
642 }
643
644 let partition_info: &PartitionInfo = partition_info;
646
647 if partition_info.cpus.is_empty() {
648 panic!("no cpus");
649 }
650
651 validate_vp_hw_ids(partition_info);
652
653 setup_vtl2_memory(&p, partition_info, address_space);
654 setup_vtl2_vp(partition_info);
655
656 verify_imported_regions_hash(&p);
657
658 let mut sidecar_params = off_stack!(PageAlign<SidecarParams>, zeroed());
659 let mut sidecar_output = off_stack!(PageAlign<SidecarOutput>, zeroed());
660 let sidecar = sidecar::start_sidecar(
661 &p,
662 partition_info,
663 address_space,
664 &mut sidecar_params.0,
665 &mut sidecar_output.0,
666 );
667
668 let address_space: &AddressSpaceManager = address_space;
670
671 let mut cmdline = off_stack!(ArrayString<COMMAND_LINE_SIZE>, ArrayString::new_const());
672 build_kernel_command_line(BuildKernelCommandLineParams {
673 params: &p,
674 cmdline: &mut cmdline,
675 partition_info,
676 can_trust_host,
677 is_confidential_debug,
678 sidecar: sidecar.as_ref(),
679 vtl2_pool_supported: address_space.has_vtl2_pool(),
680 })
681 .unwrap();
682
683 let mut fdt = off_stack!(Fdt, zeroed());
684 fdt.header.len = fdt.data.len() as u32;
685 fdt.header.ty = SETUP_DTB;
686
687 #[cfg(target_arch = "x86_64")]
688 let mut setup_data_tail = &mut fdt.header;
689 #[cfg(target_arch = "x86_64")]
690 let setup_data_head = core::ptr::from_ref(setup_data_tail);
691
692 #[cfg(target_arch = "x86_64")]
693 if p.isolation_type == IsolationType::Snp {
694 let cc_blob = OffStackRef::leak(off_stack!(loader_defs::linux::cc_blob_sev_info, zeroed()));
695 build_cc_blob_sev_info(cc_blob, &p);
696
697 let cc_data = OffStackRef::leak(off_stack!(loader_defs::linux::cc_setup_data, zeroed()));
698 cc_data.header.len = size_of::<loader_defs::linux::cc_setup_data>() as u32;
699 cc_data.header.ty = loader_defs::linux::SETUP_CC_BLOB;
700 cc_data.cc_blob_address = core::ptr::from_ref(&*cc_blob) as u32;
701
702 setup_data_tail.next = core::ptr::from_ref(&*cc_data) as u64;
704 setup_data_tail = &mut cc_data.header;
705 }
706
707 let initrd = p.initrd_base..p.initrd_base + p.initrd_size;
708
709 let computed_crc = crc32fast::hash(p.initrd());
711 assert_eq!(
712 computed_crc, p.initrd_crc,
713 "computed initrd crc does not match build time calculated crc"
714 );
715
716 #[cfg(target_arch = "x86_64")]
717 let boot_params = x86_boot::build_boot_params(
718 address_space,
719 initrd.clone(),
720 &cmdline,
721 setup_data_head,
722 &mut setup_data_tail,
723 );
724
725 let boot_times = boot_reftime.map(|start| BootTimes {
729 start,
730 end: get_ref_time(p.isolation_type).unwrap_or(0),
731 });
732
733 for (range, result) in walk_ranges(
736 partition_info.vtl2_ram.iter().map(|r| (r.range, ())),
737 p.imported_regions(),
738 ) {
739 match result {
740 RangeWalkResult::Neither | RangeWalkResult::Left(_) | RangeWalkResult::Both(_, _) => {}
741 RangeWalkResult::Right(accepted) => {
742 assert!(
745 accepted,
746 "range {:#x?} not in vtl2 ram was not preaccepted at launch",
747 range
748 );
749 }
750 }
751 }
752
753 write_dt(
754 &mut fdt.data,
755 partition_info,
756 address_space,
757 p.imported_regions().map(|r| {
758 r.0
765 }),
766 initrd,
767 &cmdline,
768 sidecar.as_ref(),
769 boot_times,
770 p.isolation_type,
771 )
772 .unwrap();
773
774 rt::verify_stack_cookie();
775
776 log::info!("uninitializing hypercalls, about to jump to kernel");
777 hvcall().uninitialize();
778
779 cfg_if::cfg_if! {
780 if #[cfg(target_arch = "x86_64")] {
781 let kernel_entry: extern "C" fn(u64, &loader_defs::linux::boot_params) -> ! =
783 unsafe { core::mem::transmute(p.kernel_entry_address) };
784 kernel_entry(0, &boot_params.0)
785 } else if #[cfg(target_arch = "aarch64")] {
786 let kernel_entry: extern "C" fn(fdt_data: *const u8, mbz0: u64, mbz1: u64, mbz2: u64) -> ! =
788 unsafe { core::mem::transmute(p.kernel_entry_address) };
789 unsafe {
793 core::arch::asm!(
794 "
795 mrs {0}, sctlr_el1
796 bic {0}, {0}, #0x1
797 msr sctlr_el1, {0}
798 tlbi vmalle1
799 dsb sy
800 isb sy",
801 lateout(reg) _,
802 );
803 }
804 kernel_entry(fdt.data.as_ptr(), 0, 0, 0)
805 } else {
806 panic!("unsupported arch")
807 }
808 }
809}
810
811fn validate_vp_hw_ids(partition_info: &PartitionInfo) {
815 use host_params::MAX_CPU_COUNT;
816 use hypercall::HwId;
817
818 if partition_info.isolation.is_hardware_isolated() {
819 return;
828 }
829
830 if hvcall().vtl() != Vtl::Vtl2 {
831 return;
835 }
836
837 let mut hw_ids = off_stack!(ArrayVec<HwId, MAX_CPU_COUNT>, ArrayVec::new_const());
840 hw_ids.clear();
841 hw_ids.extend(partition_info.cpus.iter().map(|c| c.reg as _));
842 let mut vp_indexes = off_stack!(ArrayVec<u32, MAX_CPU_COUNT>, ArrayVec::new_const());
843 vp_indexes.clear();
844 if let Err(err) = hvcall().get_vp_index_from_hw_id(&hw_ids, &mut vp_indexes) {
845 panic!(
846 "failed to get VP index for hardware ID {:#x}: {}",
847 hw_ids[vp_indexes.len().min(hw_ids.len() - 1)],
848 err
849 );
850 }
851 if let Some((i, &vp_index)) = vp_indexes
852 .iter()
853 .enumerate()
854 .find(|&(i, vp_index)| i as u32 != *vp_index)
855 {
856 panic!(
857 "CPU hardware ID {:#x} does not correspond to VP index {}",
858 hw_ids[i], vp_index
859 );
860 }
861}
862
863#[cfg(not(minimal_rt))]
866fn main() {
867 unimplemented!("build with MINIMAL_RT_BUILD to produce a working boot loader");
868}
869
870#[cfg(test)]
871mod test {
872 use super::x86_boot::E820Ext;
873 use super::x86_boot::build_e820_map;
874 use crate::cmdline::BootCommandLineOptions;
875 use crate::dt::write_dt;
876 use crate::host_params::MAX_CPU_COUNT;
877 use crate::host_params::PartitionInfo;
878 use crate::host_params::shim_params::IsolationType;
879 use crate::memory::AddressSpaceManager;
880 use crate::memory::AddressSpaceManagerBuilder;
881 use arrayvec::ArrayString;
882 use arrayvec::ArrayVec;
883 use core::ops::Range;
884 use host_fdt_parser::CpuEntry;
885 use host_fdt_parser::MemoryEntry;
886 use host_fdt_parser::VmbusInfo;
887 use igvm_defs::MemoryMapEntryType;
888 use loader_defs::linux::E820_RAM;
889 use loader_defs::linux::E820_RESERVED;
890 use loader_defs::linux::boot_params;
891 use loader_defs::linux::e820entry;
892 use memory_range::MemoryRange;
893 use memory_range::subtract_ranges;
894 use sidecar_defs::PerCpuState;
895 use zerocopy::FromZeros;
896
897 const HIGH_MMIO_GAP_END: u64 = 0x1000000000; const VMBUS_MMIO_GAP_SIZE: u64 = 0x10000000; const HIGH_MMIO_GAP_START: u64 = HIGH_MMIO_GAP_END - VMBUS_MMIO_GAP_SIZE;
900
901 fn new_partition_info(cpu_count: usize) -> PartitionInfo {
904 let mut cpus: ArrayVec<CpuEntry, MAX_CPU_COUNT> = ArrayVec::new();
905
906 for id in 0..(cpu_count as u64) {
907 cpus.push(CpuEntry { reg: id, vnode: 0 });
908 }
909
910 let mut mmio = ArrayVec::new();
911 mmio.push(
912 MemoryRange::try_new(HIGH_MMIO_GAP_START..HIGH_MMIO_GAP_END).expect("valid range"),
913 );
914
915 PartitionInfo {
916 vtl2_ram: ArrayVec::new(),
917 partition_ram: ArrayVec::new(),
918 isolation: IsolationType::None,
919 bsp_reg: cpus[0].reg as u32,
920 cpus,
921 sidecar_cpu_overrides: PerCpuState {
922 per_cpu_state_specified: false,
923 sidecar_starts_cpu: [true; sidecar_defs::NUM_CPUS_SUPPORTED_FOR_PER_CPU_STATE],
924 },
925 cmdline: ArrayString::new(),
926 vmbus_vtl2: VmbusInfo {
927 mmio,
928 connection_id: 0,
929 },
930 vmbus_vtl0: VmbusInfo {
931 mmio: ArrayVec::new(),
932 connection_id: 0,
933 },
934 com3_serial_available: false,
935 gic: None,
936 pmu_gsiv: None,
937 memory_allocation_mode: host_fdt_parser::MemoryAllocationMode::Host,
938 entropy: None,
939 vtl0_alias_map: None,
940 nvme_keepalive: false,
941 boot_options: BootCommandLineOptions::new(),
942 }
943 }
944
945 #[test]
947 #[cfg_attr(
948 target_arch = "aarch64",
949 ignore = "TODO: investigate why this doesn't always work on ARM"
950 )]
951 fn fdt_cpu_scaling() {
952 const MAX_CPUS: usize = 2048;
953
954 let mut buf = [0; 0x40000];
955 write_dt(
956 &mut buf,
957 &new_partition_info(MAX_CPUS),
958 &AddressSpaceManager::new_const(),
959 [],
960 0..0,
961 &ArrayString::from("test").unwrap_or_default(),
962 None,
963 None,
964 IsolationType::None,
965 )
966 .unwrap();
967 }
968
969 #[test]
975 #[ignore = "TODO: temporarily broken"]
976 fn fdt_dtc_check_content() {
977 const MAX_CPUS: usize = 2;
978 const BUF_SIZE: usize = 0x1000;
979
980 let dtb_data_spans: [(usize, &[u8]); 2] = [
982 (
983 0,
984 b"\xd0\x0d\xfe\xed\x00\x00\x10\x00\x00\x00\x04\x38\x00\x00\x00\x38\
985 \x00\x00\x00\x28\x00\x00\x00\x11\x00\x00\x00\x10\x00\x00\x00\x00\
986 \x00\x00\x00\x4a\x00\x00\x01\x6c\x00\x00\x00\x00\x00\x00\x00\x00\
987 \x00\x00\x00\x00\x00\x00\x00\x00\x23\x61\x64\x64\x72\x65\x73\x73\
988 \x2d\x63\x65\x6c\x6c\x73\x00\x23\x73\x69\x7a\x65\x2d\x63\x65\x6c\
989 \x6c\x73\x00\x6d\x6f\x64\x65\x6c\x00\x72\x65\x67\x00\x64\x65\x76\
990 \x69\x63\x65\x5f\x74\x79\x70\x65\x00\x73\x74\x61\x74\x75\x73\x00\
991 \x63\x6f\x6d\x70\x61\x74\x69\x62\x6c\x65\x00\x72\x61\x6e\x67\x65\
992 \x73",
993 ),
994 (
995 0x430,
996 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
997 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\
998 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x0f\x00\x00\x00\x00\
999 \x00\x00\x00\x03\x00\x00\x00\x0f\x00\x00\x00\x1b\x6d\x73\x66\x74\
1000 \x2c\x75\x6e\x64\x65\x72\x68\x69\x6c\x6c\x00\x00\x00\x00\x00\x01\
1001 \x63\x70\x75\x73\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1002 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x04\
1003 \x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x01\x63\x70\x75\x40\
1004 \x30\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x25\
1005 \x63\x70\x75\x00\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x21\
1006 \x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x05\x00\x00\x00\x31\
1007 \x6f\x6b\x61\x79\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x01\
1008 \x63\x70\x75\x40\x31\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1009 \x00\x00\x00\x25\x63\x70\x75\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1010 \x00\x00\x00\x21\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x05\
1011 \x00\x00\x00\x31\x6f\x6b\x61\x79\x00\x00\x00\x00\x00\x00\x00\x02\
1012 \x00\x00\x00\x02\x00\x00\x00\x01\x76\x6d\x62\x75\x73\x00\x00\x00\
1013 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\
1014 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x0f\x00\x00\x00\x01\
1015 \x00\x00\x00\x03\x00\x00\x00\x0b\x00\x00\x00\x38\x6d\x73\x66\x74\
1016 \x2c\x76\x6d\x62\x75\x73\x00\x00\x00\x00\x00\x03\x00\x00\x00\x14\
1017 \x00\x00\x00\x43\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\
1018 \xf0\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x02\
1019 \x00\x00\x00\x09",
1020 ),
1021 ];
1022
1023 let mut sample_buf = [0u8; BUF_SIZE];
1024 for (span_start, bytes) in dtb_data_spans {
1025 sample_buf[span_start..span_start + bytes.len()].copy_from_slice(bytes);
1026 }
1027
1028 let mut buf = [0u8; BUF_SIZE];
1029 write_dt(
1030 &mut buf,
1031 &new_partition_info(MAX_CPUS),
1032 &AddressSpaceManager::new_const(),
1033 [],
1034 0..0,
1035 &ArrayString::from("test").unwrap_or_default(),
1036 None,
1037 None,
1038 IsolationType::None,
1039 )
1040 .unwrap();
1041
1042 assert!(sample_buf == buf);
1043 }
1044
1045 #[test]
1052 #[ignore = "enabling the test requires installing additional software, \
1053 and developers will experience a break."]
1054 fn fdt_dtc_decompile() {
1055 const MAX_CPUS: usize = 2048;
1056
1057 let mut buf = [0; 0x40000];
1058 write_dt(
1059 &mut buf,
1060 &new_partition_info(MAX_CPUS),
1061 &AddressSpaceManager::new_const(),
1062 [],
1063 0..0,
1064 &ArrayString::from("test").unwrap_or_default(),
1065 None,
1066 None,
1067 IsolationType::None,
1068 )
1069 .unwrap();
1070
1071 let input_dtb_file_name = "openhcl_boot.dtb";
1072 let output_dts_file_name = "openhcl_boot.dts";
1073 std::fs::write(input_dtb_file_name, buf).unwrap();
1074 let success = std::process::Command::new("dtc")
1075 .args([input_dtb_file_name, "-I", "dtb", "-o", output_dts_file_name])
1076 .status()
1077 .unwrap()
1078 .success();
1079 assert!(success);
1080 }
1081
1082 fn new_address_space_manager(
1083 ram: &[MemoryRange],
1084 bootshim_used: MemoryRange,
1085 persisted_range: MemoryRange,
1086 parameter_range: MemoryRange,
1087 reclaim: Option<MemoryRange>,
1088 ) -> AddressSpaceManager {
1089 let ram = ram
1090 .iter()
1091 .cloned()
1092 .map(|range| MemoryEntry {
1093 range,
1094 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
1095 vnode: 0,
1096 })
1097 .collect::<Vec<_>>();
1098 let mut address_space = AddressSpaceManager::new_const();
1099 AddressSpaceManagerBuilder::new(
1100 &mut address_space,
1101 &ram,
1102 bootshim_used,
1103 persisted_range,
1104 subtract_ranges([parameter_range], reclaim),
1105 )
1106 .init()
1107 .unwrap();
1108 address_space
1109 }
1110
1111 fn check_e820(boot_params: &boot_params, ext: &E820Ext, expected: &[(Range<u64>, u32)]) {
1112 let actual = boot_params.e820_map[..boot_params.e820_entries as usize]
1113 .iter()
1114 .chain(
1115 ext.entries
1116 .iter()
1117 .take((ext.header.len as usize) / size_of::<e820entry>()),
1118 );
1119
1120 assert_eq!(actual.clone().count(), expected.len());
1121
1122 for (actual, (expected_range, expected_type)) in actual.zip(expected.iter()) {
1123 let addr: u64 = actual.addr.into();
1124 let size: u64 = actual.size.into();
1125 let typ: u32 = actual.typ.into();
1126 assert_eq!(addr, expected_range.start);
1127 assert_eq!(size, expected_range.end - expected_range.start);
1128 assert_eq!(typ, *expected_type);
1129 }
1130 }
1131
1132 const PAGE_SIZE: u64 = 0x1000;
1133 const ONE_MB: u64 = 0x10_0000;
1134
1135 #[test]
1136 fn test_e820_basic() {
1137 let mut boot_params: boot_params = FromZeros::new_zeroed();
1139 let mut ext = FromZeros::new_zeroed();
1140 let bootshim_used = MemoryRange::try_new(ONE_MB..3 * ONE_MB).unwrap();
1141 let persisted_header_end = ONE_MB + PAGE_SIZE;
1142 let persisted_end = ONE_MB + 4 * PAGE_SIZE;
1143 let persisted_state = MemoryRange::try_new(ONE_MB..persisted_end).unwrap();
1144 let parameter_range = MemoryRange::try_new(2 * ONE_MB..3 * ONE_MB).unwrap();
1145 let address_space = new_address_space_manager(
1146 &[MemoryRange::new(ONE_MB..4 * ONE_MB)],
1147 bootshim_used,
1148 persisted_state,
1149 parameter_range,
1150 None,
1151 );
1152
1153 assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
1154
1155 check_e820(
1156 &boot_params,
1157 &ext,
1158 &[
1159 (ONE_MB..(persisted_header_end), E820_RESERVED),
1160 (persisted_header_end..persisted_end, E820_RESERVED),
1161 (persisted_end..2 * ONE_MB, E820_RAM),
1162 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1163 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1164 ],
1165 );
1166
1167 let mut boot_params: boot_params = FromZeros::new_zeroed();
1169 let mut ext = FromZeros::new_zeroed();
1170 let bootshim_used = MemoryRange::try_new(ONE_MB..5 * ONE_MB).unwrap();
1171 let persisted_header_end = ONE_MB + PAGE_SIZE;
1172 let persisted_end = ONE_MB + 4 * PAGE_SIZE;
1173 let persisted_state = MemoryRange::try_new(ONE_MB..persisted_end).unwrap();
1174 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1175 let reclaim = MemoryRange::try_new(3 * ONE_MB..4 * ONE_MB).unwrap();
1176 let address_space = new_address_space_manager(
1177 &[MemoryRange::new(ONE_MB..6 * ONE_MB)],
1178 bootshim_used,
1179 persisted_state,
1180 parameter_range,
1181 Some(reclaim),
1182 );
1183
1184 assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
1185
1186 check_e820(
1187 &boot_params,
1188 &ext,
1189 &[
1190 (ONE_MB..(persisted_header_end), E820_RESERVED),
1191 (persisted_header_end..persisted_end, E820_RESERVED),
1192 (persisted_end..2 * ONE_MB, E820_RAM),
1193 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1194 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1195 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1196 (5 * ONE_MB..6 * ONE_MB, E820_RAM),
1197 ],
1198 );
1199
1200 let mut boot_params: boot_params = FromZeros::new_zeroed();
1202 let mut ext = FromZeros::new_zeroed();
1203 let bootshim_used = MemoryRange::try_new(ONE_MB..5 * ONE_MB).unwrap();
1204 let persisted_header_end = ONE_MB + PAGE_SIZE;
1205 let persisted_end = ONE_MB + 4 * PAGE_SIZE;
1206 let persisted_state = MemoryRange::try_new(ONE_MB..persisted_end).unwrap();
1207 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1208 let reclaim = MemoryRange::try_new(3 * ONE_MB..4 * ONE_MB).unwrap();
1209 let address_space = new_address_space_manager(
1210 &[
1211 MemoryRange::new(ONE_MB..4 * ONE_MB),
1212 MemoryRange::new(4 * ONE_MB..10 * ONE_MB),
1213 ],
1214 bootshim_used,
1215 persisted_state,
1216 parameter_range,
1217 Some(reclaim),
1218 );
1219
1220 assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
1221
1222 check_e820(
1223 &boot_params,
1224 &ext,
1225 &[
1226 (ONE_MB..(persisted_header_end), E820_RESERVED),
1227 (persisted_header_end..persisted_end, E820_RESERVED),
1228 (persisted_end..2 * ONE_MB, E820_RAM),
1229 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1230 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1231 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1232 (5 * ONE_MB..10 * ONE_MB, E820_RAM),
1233 ],
1234 );
1235
1236 let mut boot_params: boot_params = FromZeros::new_zeroed();
1238 let mut ext = FromZeros::new_zeroed();
1239 let bootshim_used = MemoryRange::try_new(ONE_MB..5 * ONE_MB).unwrap();
1240 let persisted_header_end = ONE_MB + PAGE_SIZE;
1241 let persisted_end = ONE_MB + 4 * PAGE_SIZE;
1242 let persisted_state = MemoryRange::try_new(ONE_MB..persisted_end).unwrap();
1243 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1244 let reclaim = MemoryRange::try_new(3 * ONE_MB..4 * ONE_MB).unwrap();
1245 let address_space = new_address_space_manager(
1246 &[
1247 MemoryRange::new(ONE_MB..2 * ONE_MB),
1248 MemoryRange::new(2 * ONE_MB..3 * ONE_MB),
1249 MemoryRange::new(3 * ONE_MB..4 * ONE_MB),
1250 MemoryRange::new(4 * ONE_MB..5 * ONE_MB),
1251 MemoryRange::new(5 * ONE_MB..6 * ONE_MB),
1252 MemoryRange::new(6 * ONE_MB..7 * ONE_MB),
1253 MemoryRange::new(7 * ONE_MB..8 * ONE_MB),
1254 ],
1255 bootshim_used,
1256 persisted_state,
1257 parameter_range,
1258 Some(reclaim),
1259 );
1260
1261 assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
1262
1263 check_e820(
1264 &boot_params,
1265 &ext,
1266 &[
1267 (ONE_MB..(persisted_header_end), E820_RESERVED),
1268 (persisted_header_end..persisted_end, E820_RESERVED),
1269 (persisted_end..2 * ONE_MB, E820_RAM),
1270 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1271 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1272 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1273 (5 * ONE_MB..8 * ONE_MB, E820_RAM),
1274 ],
1275 );
1276 }
1277
1278 #[test]
1280 fn test_e820_huge() {
1281 use crate::memory::AllocationPolicy;
1282 use crate::memory::AllocationType;
1283
1284 const E820_MAX_ENTRIES_ZEROPAGE: usize = 128;
1287 const RAM_RANGES: usize = 64;
1288 const TOTAL_ALLOCATIONS: usize = 256;
1289
1290 let mut ranges = Vec::new();
1292 for i in 0..RAM_RANGES {
1293 let start = (i as u64) * 64 * ONE_MB;
1294 let end = start + 64 * ONE_MB;
1295 ranges.push(MemoryRange::new(start..end));
1296 }
1297
1298 let bootshim_used = MemoryRange::try_new(0..ONE_MB * 2).unwrap();
1299 let persisted_range = MemoryRange::try_new(0..ONE_MB).unwrap();
1300 let parameter_range = MemoryRange::try_new(ONE_MB..2 * ONE_MB).unwrap();
1301
1302 let mut address_space = {
1303 let ram = ranges
1304 .iter()
1305 .cloned()
1306 .map(|range| MemoryEntry {
1307 range,
1308 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
1309 vnode: 0,
1310 })
1311 .collect::<Vec<_>>();
1312 let mut address_space = AddressSpaceManager::new_const();
1313 AddressSpaceManagerBuilder::new(
1314 &mut address_space,
1315 &ram,
1316 bootshim_used,
1317 persisted_range,
1318 core::iter::once(parameter_range),
1319 )
1320 .init()
1321 .unwrap();
1322 address_space
1323 };
1324
1325 for i in 0..TOTAL_ALLOCATIONS {
1326 let _allocated = address_space
1330 .allocate(
1331 None,
1332 ONE_MB,
1333 if i % 2 == 0 {
1334 AllocationType::GpaPool
1335 } else {
1336 AllocationType::SidecarNode
1337 },
1338 AllocationPolicy::LowMemory,
1339 )
1340 .expect("should be able to allocate sidecar node");
1341 }
1342
1343 let mut boot_params: boot_params = FromZeros::new_zeroed();
1344 let mut ext = FromZeros::new_zeroed();
1345 let total_ranges = address_space.vtl2_ranges().count();
1346
1347 let used_ext = build_e820_map(&mut boot_params, &mut ext, &address_space).unwrap();
1348
1349 assert!(used_ext, "should use extension when there are many ranges");
1351
1352 assert_eq!(boot_params.e820_entries, E820_MAX_ENTRIES_ZEROPAGE as u8);
1354
1355 let ext_entries = (ext.header.len as usize) / size_of::<e820entry>();
1357 assert_eq!(ext_entries, total_ranges - E820_MAX_ENTRIES_ZEROPAGE);
1358
1359 let total_e820_entries = boot_params.e820_entries as usize + ext_entries;
1361 assert_eq!(total_e820_entries, total_ranges);
1362 }
1363}