1#![cfg_attr(minimal_rt, no_std, no_main)]
9#![expect(unsafe_code)]
11
12mod arch;
13mod boot_logger;
14mod cmdline;
15mod dt;
16mod host_params;
17mod hypercall;
18mod rt;
19mod sidecar;
20mod single_threaded;
21
22use crate::arch::setup_vtl2_memory;
23use crate::arch::setup_vtl2_vp;
24#[cfg(target_arch = "x86_64")]
25use crate::arch::tdx::get_tdx_tsc_reftime;
26use crate::arch::verify_imported_regions_hash;
27use crate::boot_logger::boot_logger_init;
28use crate::boot_logger::log;
29use crate::hypercall::hvcall;
30use crate::single_threaded::off_stack;
31use arrayvec::ArrayString;
32use arrayvec::ArrayVec;
33use boot_logger::LoggerType;
34use cmdline::BootCommandLineOptions;
35use core::fmt::Write;
36use dt::BootTimes;
37use dt::write_dt;
38use host_params::COMMAND_LINE_SIZE;
39use host_params::PartitionInfo;
40use host_params::shim_params::IsolationType;
41use host_params::shim_params::ShimParams;
42use hvdef::Vtl;
43use loader_defs::linux::SETUP_DTB;
44use loader_defs::linux::setup_data;
45use loader_defs::shim::ShimParamsRaw;
46use memory_range::MemoryRange;
47use memory_range::RangeWalkResult;
48use memory_range::merge_adjacent_ranges;
49use memory_range::walk_ranges;
50use minimal_rt::enlightened_panic::enable_enlightened_panic;
51use sidecar::SidecarConfig;
52use sidecar_defs::SidecarOutput;
53use sidecar_defs::SidecarParams;
54use single_threaded::OffStackRef;
55use zerocopy::FromBytes;
56use zerocopy::FromZeros;
57use zerocopy::Immutable;
58use zerocopy::IntoBytes;
59use zerocopy::KnownLayout;
60
61#[derive(Debug)]
62struct CommandLineTooLong;
63
64impl From<core::fmt::Error> for CommandLineTooLong {
65 fn from(_: core::fmt::Error) -> Self {
66 Self
67 }
68}
69
70fn build_kernel_command_line(
72 params: &ShimParams,
73 cmdline: &mut ArrayString<COMMAND_LINE_SIZE>,
74 partition_info: &PartitionInfo,
75 can_trust_host: bool,
76 is_confidential_debug: bool,
77 sidecar: Option<&SidecarConfig<'_>>,
78) -> Result<(), CommandLineTooLong> {
79 const KERNEL_PARAMETERS: &[&str] = &[
82 "loglevel=8",
84 "log_buf_len=128K",
86 "printk.time=1",
88 "console_msg_format=syslog",
90 "uio_hv_generic.no_mask=1",
92 "coredump_filter=0x33",
95 "cpufreq.off=1",
97 "cpuidle.off=1",
101 "cryptomgr.notests",
105 "idle=halt",
109 "initcall_blacklist=init_real_mode,sbf_init",
112 "lpj=3000000",
114 "no_timer_check",
116 "noxsave",
122 "oops=panic",
124 "panic_on_warn=0",
126 "panic_print=0",
129 "panic=-1",
131 "printk.devkmsg=on",
139 "reboot=t",
143 "rootfstype=tmpfs",
145 "sysctl.vm.compaction_proactiveness=0",
148 "tsc=reliable",
151 "unknown_nmi_panic=1",
153 "vfio_pci.ids=1414:00ba",
155 "vfio.enable_unsafe_noiommu_mode=1",
158 "rdinit=/underhill-init",
160 "OPENHCL_NVME_VFIO=1",
162 "hv_storvsc.storvsc_vcpus_per_sub_channel=2048",
165 "hv_storvsc.storvsc_max_hw_queues=2",
167 "hv_storvsc.storvsc_ringbuffer_size=0x8000",
169 "MIMALLOC_ARENA_EAGER_COMMIT=0",
171 ];
172
173 const X86_KERNEL_PARAMETERS: &[&str] = &[
174 "clearcpuid=pcid",
183 "iommu=off",
185 "pci=off",
188 ];
189
190 const AARCH64_KERNEL_PARAMETERS: &[&str] = &[];
191
192 for p in KERNEL_PARAMETERS {
193 write!(cmdline, "{p} ")?;
194 }
195
196 let arch_parameters = if cfg!(target_arch = "x86_64") {
197 X86_KERNEL_PARAMETERS
198 } else {
199 AARCH64_KERNEL_PARAMETERS
200 };
201 for p in arch_parameters {
202 write!(cmdline, "{p} ")?;
203 }
204
205 const HARDWARE_ISOLATED_KERNEL_PARAMETERS: &[&str] = &[
206 "swiotlb=4096,1",
216 ];
217
218 const NON_HARDWARE_ISOLATED_KERNEL_PARAMETERS: &[&str] = &[
219 "swiotlb=1,1",
224 ];
225
226 if params.isolation_type.is_hardware_isolated() {
227 for p in HARDWARE_ISOLATED_KERNEL_PARAMETERS {
228 write!(cmdline, "{p} ")?;
229 }
230 } else {
231 for p in NON_HARDWARE_ISOLATED_KERNEL_PARAMETERS {
232 write!(cmdline, "{p} ")?;
233 }
234 }
235
236 let console = if partition_info.com3_serial_available && can_trust_host {
244 "ttyS2,115200"
245 } else {
246 "ttynull"
247 };
248 write!(cmdline, "console={console} ")?;
249
250 if params.isolation_type != IsolationType::None {
251 write!(
252 cmdline,
253 "{}=1 ",
254 underhill_confidentiality::OPENHCL_CONFIDENTIAL_ENV_VAR_NAME
255 )?;
256 }
257
258 if is_confidential_debug {
259 write!(
260 cmdline,
261 "{}=1 ",
262 underhill_confidentiality::OPENHCL_CONFIDENTIAL_DEBUG_ENV_VAR_NAME
263 )?;
264 }
265
266 if partition_info.nvme_keepalive && !partition_info.vtl2_pool_memory.is_empty() {
269 write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=1 ")?;
270 }
271
272 if let Some(sidecar) = sidecar {
273 write!(cmdline, "{} ", sidecar.kernel_command_line())?;
274 }
275
276 if can_trust_host {
278 let old_cmdline = &partition_info.cmdline;
279
280 if !old_cmdline.contains("hv_vmbus.message_connection_id=") {
286 write!(
287 cmdline,
288 "hv_vmbus.message_connection_id=0x{:x} ",
289 partition_info.vmbus_vtl2.connection_id
290 )?;
291 }
292
293 cmdline.write_str(old_cmdline)?;
295 }
296
297 Ok(())
298}
299
300const FDT_SIZE: usize = 256 * 1024;
307
308#[repr(C, align(4096))]
309#[derive(FromBytes, IntoBytes, Immutable, KnownLayout)]
310struct Fdt {
311 header: setup_data,
312 data: [u8; FDT_SIZE - size_of::<setup_data>()],
313}
314
315fn shim_parameters(shim_params_raw_offset: isize) -> ShimParams {
319 unsafe extern "C" {
320 static __ehdr_start: u8;
321 }
322
323 let shim_base = core::ptr::addr_of!(__ehdr_start) as usize;
324
325 let raw_shim_params = unsafe {
329 &*(shim_base.wrapping_add_signed(shim_params_raw_offset) as *const ShimParamsRaw)
330 };
331
332 ShimParams::new(shim_base as u64, raw_shim_params)
333}
334
335pub const MAX_RESERVED_MEM_RANGES: usize = 5 + sidecar_defs::MAX_NODES;
338
339#[derive(Clone, Copy, Debug, PartialEq, Eq)]
340enum ReservedMemoryType {
341 Vtl2Config,
343 Vtl2Reserved,
346 SidecarImage,
348 SidecarNode,
350 Vtl2GpaPool,
354}
355
356fn reserved_memory_regions(
359 partition_info: &PartitionInfo,
360 sidecar: Option<&SidecarConfig<'_>>,
361) -> OffStackRef<'static, impl AsRef<[(MemoryRange, ReservedMemoryType)]> + use<>> {
362 let mut reserved = off_stack!(ArrayVec<(MemoryRange, ReservedMemoryType), MAX_RESERVED_MEM_RANGES>, ArrayVec::new_const());
363 reserved.clear();
364 reserved.extend(
365 partition_info
366 .vtl2_config_regions()
367 .map(|r| (r, ReservedMemoryType::Vtl2Config)),
368 );
369 if let Some(sidecar) = sidecar {
370 reserved.push((sidecar.image, ReservedMemoryType::SidecarImage));
371 reserved.extend(sidecar.node_params.iter().map(|x| {
372 (
373 MemoryRange::new(x.memory_base..x.memory_base + x.memory_size),
374 ReservedMemoryType::SidecarNode,
375 )
376 }));
377 }
378
379 if !partition_info.vtl2_reserved_region.is_empty() {
381 reserved.push((
382 partition_info.vtl2_reserved_region,
383 ReservedMemoryType::Vtl2Reserved,
384 ));
385 }
386
387 if partition_info.vtl2_pool_memory != MemoryRange::EMPTY {
389 reserved.push((
390 partition_info.vtl2_pool_memory,
391 ReservedMemoryType::Vtl2GpaPool,
392 ));
393 }
394
395 reserved
396 .as_mut()
397 .sort_unstable_by_key(|(r, _typ)| r.start());
398
399 let mut flattened = off_stack!(ArrayVec<(MemoryRange, ReservedMemoryType), MAX_RESERVED_MEM_RANGES>, ArrayVec::new_const());
405 flattened.clear();
406 flattened.extend(merge_adjacent_ranges(reserved.iter().copied()));
407 flattened
408}
409
410#[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))]
411mod x86_boot {
412 use crate::PageAlign;
413 use crate::ReservedMemoryType;
414 use crate::host_params::PartitionInfo;
415 use crate::host_params::shim_params::IsolationType;
416 use crate::single_threaded::OffStackRef;
417 use crate::single_threaded::off_stack;
418 use crate::zeroed;
419 use core::mem::size_of;
420 use core::ops::Range;
421 use core::ptr;
422 use loader_defs::linux::E820_RAM;
423 use loader_defs::linux::E820_RESERVED;
424 use loader_defs::linux::SETUP_E820_EXT;
425 use loader_defs::linux::boot_params;
426 use loader_defs::linux::e820entry;
427 use loader_defs::linux::setup_data;
428 use memory_range::MemoryRange;
429 use memory_range::RangeWalkResult;
430 use memory_range::walk_ranges;
431 use zerocopy::FromZeros;
432 use zerocopy::Immutable;
433 use zerocopy::KnownLayout;
434
435 #[repr(C)]
436 #[derive(FromZeros, Immutable, KnownLayout)]
437 pub struct E820Ext {
438 pub header: setup_data,
439 pub entries: [e820entry; 512],
440 }
441
442 fn add_e820_entry(
443 entry: Option<&mut e820entry>,
444 range: MemoryRange,
445 typ: u32,
446 ) -> Result<(), BuildE820MapError> {
447 *entry.ok_or(BuildE820MapError::OutOfE820Entries)? = e820entry {
448 addr: range.start().into(),
449 size: range.len().into(),
450 typ: typ.into(),
451 };
452 Ok(())
453 }
454
455 #[derive(Debug)]
456 pub enum BuildE820MapError {
457 ReservedRegionNotCovered,
459 OutOfE820Entries,
461 }
462
463 pub fn build_e820_map(
465 boot_params: &mut boot_params,
466 ext: &mut E820Ext,
467 partition_info: &PartitionInfo,
468 reserved: &[(MemoryRange, ReservedMemoryType)],
469 #[cfg_attr(target_arch = "aarch64", expect(unused_variables))]
471 isolation_type: IsolationType,
472 #[cfg_attr(target_arch = "aarch64", expect(unused_variables))] page_tables: Option<MemoryRange>,
474 ) -> Result<bool, BuildE820MapError> {
475 boot_params.e820_entries = 0;
476 let mut entries = boot_params
477 .e820_map
478 .iter_mut()
479 .chain(ext.entries.iter_mut());
480
481 let mut n = 0;
482 for (range, r) in walk_ranges(
483 partition_info.vtl2_ram.iter().map(|e| (e.range, ())),
484 reserved.iter().map(|&(r, _)| (r, ())),
485 ) {
486 match r {
487 RangeWalkResult::Neither => {}
488 RangeWalkResult::Left(_) => {
489 add_e820_entry(entries.next(), range, E820_RAM)?;
490 n += 1;
491 }
492 RangeWalkResult::Right(_) => {
493 return Err(BuildE820MapError::ReservedRegionNotCovered);
494 }
495 RangeWalkResult::Both(_, _) => {
496 add_e820_entry(entries.next(), range, E820_RESERVED)?;
497 n += 1;
498 }
499 }
500 }
501
502 #[cfg(target_arch = "x86_64")]
510 if IsolationType::Tdx == isolation_type {
511 add_e820_entry(entries.next(), page_tables.unwrap(), E820_RESERVED)?;
512 n += 1;
513 add_e820_entry(
514 entries.next(),
515 MemoryRange::new(
516 x86defs::tdx::RESET_VECTOR_PAGE..x86defs::tdx::RESET_VECTOR_PAGE + 0x1000,
517 ),
518 E820_RESERVED,
519 )?;
520 n += 1;
521 }
522
523 let base = n.min(boot_params.e820_map.len());
524 boot_params.e820_entries = base as u8;
525
526 if base < n {
527 ext.header.len = ((n - base) * size_of::<e820entry>()) as u32;
528 Ok(true)
529 } else {
530 Ok(false)
531 }
532 }
533
534 pub fn build_boot_params(
535 partition_info: &PartitionInfo,
536 reserved_memory: &[(MemoryRange, ReservedMemoryType)],
537 initrd: Range<u64>,
538 cmdline: &str,
539 setup_data_head: *const setup_data,
540 setup_data_tail: &mut &mut setup_data,
541 isolation_type: IsolationType,
542 page_tables: Option<MemoryRange>,
543 ) -> OffStackRef<'static, PageAlign<boot_params>> {
544 let mut boot_params_storage = off_stack!(PageAlign<boot_params>, zeroed());
545 let boot_params = &mut boot_params_storage.0;
546 boot_params.hdr.type_of_loader = 0xff; boot_params.hdr.hardware_subarch = 1.into();
558
559 boot_params.hdr.ramdisk_image = (initrd.start as u32).into();
560 boot_params.ext_ramdisk_image = (initrd.start >> 32) as u32;
561 let initrd_len = initrd.end - initrd.start;
562 boot_params.hdr.ramdisk_size = (initrd_len as u32).into();
563 boot_params.ext_ramdisk_size = (initrd_len >> 32) as u32;
564
565 let e820_ext = OffStackRef::leak(off_stack!(E820Ext, zeroed()));
566
567 let used_ext = build_e820_map(
568 boot_params,
569 e820_ext,
570 partition_info,
571 reserved_memory,
572 isolation_type,
573 page_tables,
574 )
575 .expect("building e820 map must succeed");
576
577 if used_ext {
578 e820_ext.header.ty = SETUP_E820_EXT;
579 setup_data_tail.next = ptr::from_ref(&e820_ext.header) as u64;
580 *setup_data_tail = &mut e820_ext.header;
581 }
582
583 let cmd_line_addr = cmdline.as_ptr() as u64;
584 boot_params.hdr.cmd_line_ptr = (cmd_line_addr as u32).into();
585 boot_params.ext_cmd_line_ptr = (cmd_line_addr >> 32) as u32;
586
587 boot_params.hdr.setup_data = (setup_data_head as u64).into();
588
589 boot_params_storage
590 }
591}
592
593#[cfg(target_arch = "x86_64")]
595fn build_cc_blob_sev_info(
596 cc_blob: &mut loader_defs::linux::cc_blob_sev_info,
597 shim_params: &ShimParams,
598) {
599 cc_blob.magic = loader_defs::linux::CC_BLOB_SEV_INFO_MAGIC;
602 cc_blob.version = 0;
603 cc_blob._reserved = 0;
604 cc_blob.secrets_phys = shim_params.secrets_start();
605 cc_blob.secrets_len = hvdef::HV_PAGE_SIZE as u32;
606 cc_blob._rsvd1 = 0;
607 cc_blob.cpuid_phys = shim_params.cpuid_start();
608 cc_blob.cpuid_len = hvdef::HV_PAGE_SIZE as u32;
609 cc_blob._rsvd2 = 0;
610}
611
612#[repr(C, align(4096))]
613#[derive(FromZeros, Immutable, KnownLayout)]
614struct PageAlign<T>(T);
615
616const fn zeroed<T: FromZeros>() -> T {
617 unsafe { core::mem::MaybeUninit::<T>::zeroed().assume_init() }
619}
620
621fn get_ref_time(isolation: IsolationType) -> Option<u64> {
622 match isolation {
623 #[cfg(target_arch = "x86_64")]
624 IsolationType::Tdx => get_tdx_tsc_reftime(),
625 #[cfg(target_arch = "x86_64")]
626 IsolationType::Snp => None,
627 _ => Some(minimal_rt::reftime::reference_time()),
628 }
629}
630
631fn get_hw_debug_bit(isolation: IsolationType) -> bool {
632 match isolation {
633 #[cfg(target_arch = "x86_64")]
634 IsolationType::Tdx => {
635 use tdx_guest_device::protocol::TdReport;
636
637 use crate::arch::tdx::get_tdreport;
638
639 let mut report = off_stack!(PageAlign<TdReport>, zeroed());
640 match get_tdreport(&mut report.0) {
641 Ok(()) => report.0.td_info.td_info_base.attributes.debug(),
642 Err(_) => false,
643 }
644 }
645 #[cfg(target_arch = "x86_64")]
646 IsolationType::Snp => {
647 false
649 }
650 _ => false,
651 }
652}
653
654fn shim_main(shim_params_raw_offset: isize) -> ! {
655 let p = shim_parameters(shim_params_raw_offset);
656 if p.isolation_type == IsolationType::None {
657 enable_enlightened_panic();
658 }
659
660 if !p.isolation_type.is_hardware_isolated() {
667 hvcall().initialize();
668 }
669
670 let mut static_options = BootCommandLineOptions::new();
673 if let Some(cmdline) = p.command_line().command_line() {
674 static_options.parse(cmdline);
675 }
676 if let Some(typ) = static_options.logger {
677 boot_logger_init(p.isolation_type, typ);
678 log!("openhcl_boot: early debugging enabled");
679 }
680
681 let hw_debug_bit = get_hw_debug_bit(p.isolation_type);
682 let can_trust_host = p.isolation_type == IsolationType::None
683 || static_options.confidential_debug
684 || hw_debug_bit;
685
686 let boot_reftime = get_ref_time(p.isolation_type);
687
688 let mut dt_storage = off_stack!(PartitionInfo, PartitionInfo::new());
689 let partition_info =
690 match PartitionInfo::read_from_dt(&p, &mut dt_storage, static_options, can_trust_host) {
691 Ok(Some(val)) => val,
692 Ok(None) => panic!("host did not provide a device tree"),
693 Err(e) => panic!("unable to read device tree params {}", e),
694 };
695
696 let is_confidential_debug = (can_trust_host && p.isolation_type != IsolationType::None)
700 || partition_info.boot_options.confidential_debug;
701
702 if !p.isolation_type.is_hardware_isolated()
704 && hvcall().vtl() == Vtl::Vtl2
705 && hvdef::HvRegisterVsmCapabilities::from(
706 hvcall()
707 .get_register(hvdef::HvAllArchRegisterName::VsmCapabilities.into())
708 .expect("failed to query vsm capabilities")
709 .as_u64(),
710 )
711 .vtl0_alias_map_available()
712 {
713 if partition_info.vtl0_alias_map.is_none() {
723 partition_info.vtl0_alias_map =
724 Some(1 << (arch::physical_address_bits(p.isolation_type) - 1));
725 }
726 } else {
727 partition_info.vtl0_alias_map = None;
730 }
731
732 if can_trust_host {
733 if let Some(typ) = partition_info.boot_options.logger {
736 boot_logger_init(p.isolation_type, typ);
737 } else if partition_info.com3_serial_available && cfg!(target_arch = "x86_64") {
738 boot_logger_init(p.isolation_type, LoggerType::Serial);
741 }
742 }
743
744 log!("openhcl_boot: entered shim_main");
745
746 if partition_info.cpus.is_empty() {
747 panic!("no cpus");
748 }
749
750 validate_vp_hw_ids(partition_info);
751
752 setup_vtl2_memory(&p, partition_info);
753 setup_vtl2_vp(partition_info);
754
755 verify_imported_regions_hash(&p);
756
757 let mut sidecar_params = off_stack!(PageAlign<SidecarParams>, zeroed());
758 let mut sidecar_output = off_stack!(PageAlign<SidecarOutput>, zeroed());
759 let sidecar = sidecar::start_sidecar(
760 &p,
761 partition_info,
762 &mut sidecar_params.0,
763 &mut sidecar_output.0,
764 );
765
766 let mut cmdline = off_stack!(ArrayString<COMMAND_LINE_SIZE>, ArrayString::new_const());
767 build_kernel_command_line(
768 &p,
769 &mut cmdline,
770 partition_info,
771 can_trust_host,
772 is_confidential_debug,
773 sidecar.as_ref(),
774 )
775 .unwrap();
776
777 let mut fdt = off_stack!(Fdt, zeroed());
778 fdt.header.len = fdt.data.len() as u32;
779 fdt.header.ty = SETUP_DTB;
780
781 #[cfg(target_arch = "x86_64")]
782 let mut setup_data_tail = &mut fdt.header;
783 #[cfg(target_arch = "x86_64")]
784 let setup_data_head = core::ptr::from_ref(setup_data_tail);
785
786 #[cfg(target_arch = "x86_64")]
787 if p.isolation_type == IsolationType::Snp {
788 let cc_blob = OffStackRef::leak(off_stack!(loader_defs::linux::cc_blob_sev_info, zeroed()));
789 build_cc_blob_sev_info(cc_blob, &p);
790
791 let cc_data = OffStackRef::leak(off_stack!(loader_defs::linux::cc_setup_data, zeroed()));
792 cc_data.header.len = size_of::<loader_defs::linux::cc_setup_data>() as u32;
793 cc_data.header.ty = loader_defs::linux::SETUP_CC_BLOB;
794 cc_data.cc_blob_address = core::ptr::from_ref(&*cc_blob) as u32;
795
796 setup_data_tail.next = core::ptr::from_ref(&*cc_data) as u64;
798 setup_data_tail = &mut cc_data.header;
799 }
800
801 let reserved_memory = reserved_memory_regions(partition_info, sidecar.as_ref());
802 let initrd = p.initrd_base..p.initrd_base + p.initrd_size;
803
804 let computed_crc = crc32fast::hash(p.initrd());
806 assert_eq!(
807 computed_crc, p.initrd_crc,
808 "computed initrd crc does not match build time calculated crc"
809 );
810
811 #[cfg(target_arch = "x86_64")]
812 let boot_params = x86_boot::build_boot_params(
813 partition_info,
814 reserved_memory.as_ref(),
815 initrd.clone(),
816 &cmdline,
817 setup_data_head,
818 &mut setup_data_tail,
819 p.isolation_type,
820 p.page_tables,
821 );
822
823 let boot_times = boot_reftime.map(|start| BootTimes {
827 start,
828 end: get_ref_time(p.isolation_type).unwrap_or(0),
829 });
830
831 for (range, result) in walk_ranges(
834 partition_info.vtl2_ram.iter().map(|r| (r.range, ())),
835 p.imported_regions(),
836 ) {
837 match result {
838 RangeWalkResult::Neither | RangeWalkResult::Left(_) | RangeWalkResult::Both(_, _) => {}
839 RangeWalkResult::Right(accepted) => {
840 assert!(
843 accepted,
844 "range {:#x?} not in vtl2 ram was not preaccepted at launch",
845 range
846 );
847 }
848 }
849 }
850
851 write_dt(
852 &mut fdt.data,
853 partition_info,
854 reserved_memory.as_ref(),
855 p.imported_regions().map(|r| {
856 r.0
863 }),
864 initrd,
865 &cmdline,
866 sidecar.as_ref(),
867 boot_times,
868 p.isolation_type,
869 )
870 .unwrap();
871
872 rt::verify_stack_cookie();
873
874 log!("uninitializing hypercalls, about to jump to kernel");
875 hvcall().uninitialize();
876
877 cfg_if::cfg_if! {
878 if #[cfg(target_arch = "x86_64")] {
879 let kernel_entry: extern "C" fn(u64, &loader_defs::linux::boot_params) -> ! =
881 unsafe { core::mem::transmute(p.kernel_entry_address) };
882 kernel_entry(0, &boot_params.0)
883 } else if #[cfg(target_arch = "aarch64")] {
884 let kernel_entry: extern "C" fn(fdt_data: *const u8, mbz0: u64, mbz1: u64, mbz2: u64) -> ! =
886 unsafe { core::mem::transmute(p.kernel_entry_address) };
887 unsafe {
891 core::arch::asm!(
892 "
893 mrs {0}, sctlr_el1
894 bic {0}, {0}, #0x1
895 msr sctlr_el1, {0}
896 tlbi vmalle1
897 dsb sy
898 isb sy",
899 lateout(reg) _,
900 );
901 }
902 kernel_entry(fdt.data.as_ptr(), 0, 0, 0)
903 } else {
904 panic!("unsupported arch")
905 }
906 }
907}
908
909fn validate_vp_hw_ids(partition_info: &PartitionInfo) {
913 use host_params::MAX_CPU_COUNT;
914 use hypercall::HwId;
915
916 if partition_info.isolation.is_hardware_isolated() {
917 return;
926 }
927
928 if hvcall().vtl() != Vtl::Vtl2 {
929 return;
933 }
934
935 let mut hw_ids = off_stack!(ArrayVec<HwId, MAX_CPU_COUNT>, ArrayVec::new_const());
938 hw_ids.clear();
939 hw_ids.extend(partition_info.cpus.iter().map(|c| c.reg as _));
940 let mut vp_indexes = off_stack!(ArrayVec<u32, MAX_CPU_COUNT>, ArrayVec::new_const());
941 vp_indexes.clear();
942 if let Err(err) = hvcall().get_vp_index_from_hw_id(&hw_ids, &mut vp_indexes) {
943 panic!(
944 "failed to get VP index for hardware ID {:#x}: {}",
945 hw_ids[vp_indexes.len().min(hw_ids.len() - 1)],
946 err
947 );
948 }
949 if let Some((i, &vp_index)) = vp_indexes
950 .iter()
951 .enumerate()
952 .find(|&(i, vp_index)| i as u32 != *vp_index)
953 {
954 panic!(
955 "CPU hardware ID {:#x} does not correspond to VP index {}",
956 hw_ids[i], vp_index
957 );
958 }
959}
960
961#[cfg(not(minimal_rt))]
964fn main() {
965 unimplemented!("build with MINIMAL_RT_BUILD to produce a working boot loader");
966}
967
968#[cfg(test)]
969mod test {
970 use super::x86_boot::E820Ext;
971 use super::x86_boot::build_e820_map;
972 use crate::ReservedMemoryType;
973 use crate::cmdline::BootCommandLineOptions;
974 use crate::dt::write_dt;
975 use crate::host_params::MAX_CPU_COUNT;
976 use crate::host_params::PartitionInfo;
977 use crate::host_params::shim_params::IsolationType;
978 use crate::reserved_memory_regions;
979 use arrayvec::ArrayString;
980 use arrayvec::ArrayVec;
981 use core::ops::Range;
982 use host_fdt_parser::CpuEntry;
983 use host_fdt_parser::MemoryEntry;
984 use host_fdt_parser::VmbusInfo;
985 use igvm_defs::MemoryMapEntryType;
986 use loader_defs::linux::E820_RAM;
987 use loader_defs::linux::E820_RESERVED;
988 use loader_defs::linux::boot_params;
989 use loader_defs::linux::e820entry;
990 use memory_range::MemoryRange;
991 use memory_range::RangeWalkResult;
992 use memory_range::walk_ranges;
993 use zerocopy::FromZeros;
994
995 const HIGH_MMIO_GAP_END: u64 = 0x1000000000; const VMBUS_MMIO_GAP_SIZE: u64 = 0x10000000; const HIGH_MMIO_GAP_START: u64 = HIGH_MMIO_GAP_END - VMBUS_MMIO_GAP_SIZE;
998
999 fn new_partition_info(cpu_count: usize) -> PartitionInfo {
1002 let mut cpus: ArrayVec<CpuEntry, MAX_CPU_COUNT> = ArrayVec::new();
1003
1004 for id in 0..(cpu_count as u64) {
1005 cpus.push(CpuEntry { reg: id, vnode: 0 });
1006 }
1007
1008 let mut mmio = ArrayVec::new();
1009 mmio.push(
1010 MemoryRange::try_new(HIGH_MMIO_GAP_START..HIGH_MMIO_GAP_END).expect("valid range"),
1011 );
1012
1013 PartitionInfo {
1014 vtl2_ram: ArrayVec::new(),
1015 vtl2_full_config_region: MemoryRange::EMPTY,
1016 vtl2_config_region_reclaim: MemoryRange::EMPTY,
1017 vtl2_reserved_region: MemoryRange::EMPTY,
1018 vtl2_pool_memory: MemoryRange::EMPTY,
1019 vtl2_used_ranges: ArrayVec::new(),
1020 partition_ram: ArrayVec::new(),
1021 isolation: IsolationType::None,
1022 bsp_reg: cpus[0].reg as u32,
1023 cpus,
1024 cmdline: ArrayString::new(),
1025 vmbus_vtl2: VmbusInfo {
1026 mmio,
1027 connection_id: 0,
1028 },
1029 vmbus_vtl0: VmbusInfo {
1030 mmio: ArrayVec::new(),
1031 connection_id: 0,
1032 },
1033 com3_serial_available: false,
1034 gic: None,
1035 memory_allocation_mode: host_fdt_parser::MemoryAllocationMode::Host,
1036 entropy: None,
1037 vtl0_alias_map: None,
1038 nvme_keepalive: false,
1039 boot_options: BootCommandLineOptions::new(),
1040 }
1041 }
1042
1043 #[test]
1045 #[cfg_attr(
1046 target_arch = "aarch64",
1047 ignore = "TODO: investigate why this doesn't always work on ARM"
1048 )]
1049 fn fdt_cpu_scaling() {
1050 const MAX_CPUS: usize = 2048;
1051
1052 let mut buf = [0; 0x40000];
1053 write_dt(
1054 &mut buf,
1055 &new_partition_info(MAX_CPUS),
1056 &[],
1057 [],
1058 0..0,
1059 &ArrayString::from("test").unwrap_or_default(),
1060 None,
1061 None,
1062 IsolationType::None,
1063 )
1064 .unwrap();
1065 }
1066
1067 #[test]
1073 #[ignore = "TODO: temporarily broken"]
1074 fn fdt_dtc_check_content() {
1075 const MAX_CPUS: usize = 2;
1076 const BUF_SIZE: usize = 0x1000;
1077
1078 let dtb_data_spans: [(usize, &[u8]); 2] = [
1080 (
1081 0,
1082 b"\xd0\x0d\xfe\xed\x00\x00\x10\x00\x00\x00\x04\x38\x00\x00\x00\x38\
1083 \x00\x00\x00\x28\x00\x00\x00\x11\x00\x00\x00\x10\x00\x00\x00\x00\
1084 \x00\x00\x00\x4a\x00\x00\x01\x6c\x00\x00\x00\x00\x00\x00\x00\x00\
1085 \x00\x00\x00\x00\x00\x00\x00\x00\x23\x61\x64\x64\x72\x65\x73\x73\
1086 \x2d\x63\x65\x6c\x6c\x73\x00\x23\x73\x69\x7a\x65\x2d\x63\x65\x6c\
1087 \x6c\x73\x00\x6d\x6f\x64\x65\x6c\x00\x72\x65\x67\x00\x64\x65\x76\
1088 \x69\x63\x65\x5f\x74\x79\x70\x65\x00\x73\x74\x61\x74\x75\x73\x00\
1089 \x63\x6f\x6d\x70\x61\x74\x69\x62\x6c\x65\x00\x72\x61\x6e\x67\x65\
1090 \x73",
1091 ),
1092 (
1093 0x430,
1094 b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
1095 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\
1096 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x0f\x00\x00\x00\x00\
1097 \x00\x00\x00\x03\x00\x00\x00\x0f\x00\x00\x00\x1b\x6d\x73\x66\x74\
1098 \x2c\x75\x6e\x64\x65\x72\x68\x69\x6c\x6c\x00\x00\x00\x00\x00\x01\
1099 \x63\x70\x75\x73\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1100 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x04\
1101 \x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x01\x63\x70\x75\x40\
1102 \x30\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x25\
1103 \x63\x70\x75\x00\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x21\
1104 \x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x05\x00\x00\x00\x31\
1105 \x6f\x6b\x61\x79\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x01\
1106 \x63\x70\x75\x40\x31\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1107 \x00\x00\x00\x25\x63\x70\x75\x00\x00\x00\x00\x03\x00\x00\x00\x04\
1108 \x00\x00\x00\x21\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x05\
1109 \x00\x00\x00\x31\x6f\x6b\x61\x79\x00\x00\x00\x00\x00\x00\x00\x02\
1110 \x00\x00\x00\x02\x00\x00\x00\x01\x76\x6d\x62\x75\x73\x00\x00\x00\
1111 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x02\
1112 \x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x0f\x00\x00\x00\x01\
1113 \x00\x00\x00\x03\x00\x00\x00\x0b\x00\x00\x00\x38\x6d\x73\x66\x74\
1114 \x2c\x76\x6d\x62\x75\x73\x00\x00\x00\x00\x00\x03\x00\x00\x00\x14\
1115 \x00\x00\x00\x43\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\
1116 \xf0\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x02\
1117 \x00\x00\x00\x09",
1118 ),
1119 ];
1120
1121 let mut sample_buf = [0u8; BUF_SIZE];
1122 for (span_start, bytes) in dtb_data_spans {
1123 sample_buf[span_start..span_start + bytes.len()].copy_from_slice(bytes);
1124 }
1125
1126 let mut buf = [0u8; BUF_SIZE];
1127 write_dt(
1128 &mut buf,
1129 &new_partition_info(MAX_CPUS),
1130 &[],
1131 [],
1132 0..0,
1133 &ArrayString::from("test").unwrap_or_default(),
1134 None,
1135 None,
1136 IsolationType::None,
1137 )
1138 .unwrap();
1139
1140 assert!(sample_buf == buf);
1141 }
1142
1143 #[test]
1150 #[ignore = "enabling the test requires installing additional software, \
1151 and developers will experience a break."]
1152 fn fdt_dtc_decompile() {
1153 const MAX_CPUS: usize = 2048;
1154
1155 let mut buf = [0; 0x40000];
1156 write_dt(
1157 &mut buf,
1158 &new_partition_info(MAX_CPUS),
1159 &[],
1160 [],
1161 0..0,
1162 &ArrayString::from("test").unwrap_or_default(),
1163 None,
1164 None,
1165 IsolationType::None,
1166 )
1167 .unwrap();
1168
1169 let input_dtb_file_name = "openhcl_boot.dtb";
1170 let output_dts_file_name = "openhcl_boot.dts";
1171 std::fs::write(input_dtb_file_name, buf).unwrap();
1172 let success = std::process::Command::new("dtc")
1173 .args([input_dtb_file_name, "-I", "dtb", "-o", output_dts_file_name])
1174 .status()
1175 .unwrap()
1176 .success();
1177 assert!(success);
1178 }
1179
1180 fn partition_info_ram_ranges(
1181 ram: &[Range<u64>],
1182 parameter_range: MemoryRange,
1183 reclaim: Option<Range<u64>>,
1184 ) -> PartitionInfo {
1185 let mut info = PartitionInfo::new();
1186
1187 info.vtl2_ram = ram
1188 .iter()
1189 .map(|r| MemoryEntry {
1190 range: MemoryRange::try_new(r.clone()).unwrap(),
1191 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
1192 vnode: 0,
1193 })
1194 .collect();
1195
1196 info.vtl2_full_config_region = parameter_range;
1197
1198 info.vtl2_config_region_reclaim = reclaim
1199 .map(|r| MemoryRange::try_new(r).unwrap())
1200 .unwrap_or(MemoryRange::EMPTY);
1201
1202 info
1203 }
1204
1205 fn check_e820(boot_params: &boot_params, ext: &E820Ext, expected: &[(Range<u64>, u32)]) {
1206 let actual = boot_params.e820_map[..boot_params.e820_entries as usize]
1207 .iter()
1208 .chain(
1209 ext.entries
1210 .iter()
1211 .take((ext.header.len as usize) / size_of::<e820entry>()),
1212 );
1213
1214 assert_eq!(actual.clone().count(), expected.len());
1215
1216 for (actual, (expected_range, expected_type)) in actual.zip(expected.iter()) {
1217 let addr: u64 = actual.addr.into();
1218 let size: u64 = actual.size.into();
1219 let typ: u32 = actual.typ.into();
1220 assert_eq!(addr, expected_range.start);
1221 assert_eq!(size, expected_range.end - expected_range.start);
1222 assert_eq!(typ, *expected_type);
1223 }
1224 }
1225
1226 const ONE_MB: u64 = 0x10_0000;
1227
1228 #[test]
1229 fn test_e820_basic() {
1230 let mut boot_params: boot_params = FromZeros::new_zeroed();
1232 let mut ext = FromZeros::new_zeroed();
1233 let parameter_range = MemoryRange::try_new(2 * ONE_MB..3 * ONE_MB).unwrap();
1234 let partition_info =
1235 partition_info_ram_ranges(&[ONE_MB..4 * ONE_MB], parameter_range, None);
1236
1237 assert!(
1238 build_e820_map(
1239 &mut boot_params,
1240 &mut ext,
1241 &partition_info,
1242 reserved_memory_regions(&partition_info, None).as_ref(),
1243 partition_info.isolation,
1244 None
1245 )
1246 .is_ok()
1247 );
1248
1249 check_e820(
1250 &boot_params,
1251 &ext,
1252 &[
1253 (ONE_MB..2 * ONE_MB, E820_RAM),
1254 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1255 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1256 ],
1257 );
1258
1259 let mut boot_params: boot_params = FromZeros::new_zeroed();
1261 let mut ext = FromZeros::new_zeroed();
1262 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1263 let partition_info = partition_info_ram_ranges(
1264 &[ONE_MB..6 * ONE_MB],
1265 parameter_range,
1266 Some(3 * ONE_MB..4 * ONE_MB),
1267 );
1268
1269 assert!(
1270 build_e820_map(
1271 &mut boot_params,
1272 &mut ext,
1273 &partition_info,
1274 reserved_memory_regions(&partition_info, None).as_ref(),
1275 partition_info.isolation,
1276 None
1277 )
1278 .is_ok()
1279 );
1280
1281 check_e820(
1282 &boot_params,
1283 &ext,
1284 &[
1285 (ONE_MB..2 * ONE_MB, E820_RAM),
1286 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1287 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1288 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1289 (5 * ONE_MB..6 * ONE_MB, E820_RAM),
1290 ],
1291 );
1292
1293 let mut boot_params: boot_params = FromZeros::new_zeroed();
1295 let mut ext = FromZeros::new_zeroed();
1296 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1297 let partition_info = partition_info_ram_ranges(
1298 &[ONE_MB..4 * ONE_MB, 4 * ONE_MB..10 * ONE_MB],
1299 parameter_range,
1300 Some(3 * ONE_MB..4 * ONE_MB),
1301 );
1302
1303 assert!(
1304 build_e820_map(
1305 &mut boot_params,
1306 &mut ext,
1307 &partition_info,
1308 reserved_memory_regions(&partition_info, None).as_ref(),
1309 partition_info.isolation,
1310 None
1311 )
1312 .is_ok()
1313 );
1314
1315 check_e820(
1316 &boot_params,
1317 &ext,
1318 &[
1319 (ONE_MB..2 * ONE_MB, E820_RAM),
1320 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1321 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1322 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1323 (5 * ONE_MB..10 * ONE_MB, E820_RAM),
1324 ],
1325 );
1326
1327 let mut boot_params: boot_params = FromZeros::new_zeroed();
1329 let mut ext = FromZeros::new_zeroed();
1330 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1331 let partition_info = partition_info_ram_ranges(
1332 &[
1333 ONE_MB..2 * ONE_MB,
1334 2 * ONE_MB..3 * ONE_MB,
1335 3 * ONE_MB..4 * ONE_MB,
1336 4 * ONE_MB..5 * ONE_MB,
1337 5 * ONE_MB..6 * ONE_MB,
1338 6 * ONE_MB..7 * ONE_MB,
1339 7 * ONE_MB..8 * ONE_MB,
1340 ],
1341 parameter_range,
1342 Some(3 * ONE_MB..4 * ONE_MB),
1343 );
1344
1345 assert!(
1346 build_e820_map(
1347 &mut boot_params,
1348 &mut ext,
1349 &partition_info,
1350 reserved_memory_regions(&partition_info, None).as_ref(),
1351 partition_info.isolation,
1352 None
1353 )
1354 .is_ok()
1355 );
1356
1357 check_e820(
1358 &boot_params,
1359 &ext,
1360 &[
1361 (ONE_MB..2 * ONE_MB, E820_RAM),
1362 (2 * ONE_MB..3 * ONE_MB, E820_RESERVED),
1363 (3 * ONE_MB..4 * ONE_MB, E820_RAM),
1364 (4 * ONE_MB..5 * ONE_MB, E820_RESERVED),
1365 (5 * ONE_MB..6 * ONE_MB, E820_RAM),
1366 (6 * ONE_MB..7 * ONE_MB, E820_RAM),
1367 (7 * ONE_MB..8 * ONE_MB, E820_RAM),
1368 ],
1369 );
1370 }
1371
1372 #[test]
1373 fn test_e820_param_not_covered() {
1374 let mut boot_params: boot_params = FromZeros::new_zeroed();
1376 let mut ext = FromZeros::new_zeroed();
1377 let parameter_range = MemoryRange::try_new(5 * ONE_MB..6 * ONE_MB).unwrap();
1378 let partition_info =
1379 partition_info_ram_ranges(&[ONE_MB..4 * ONE_MB], parameter_range, None);
1380
1381 assert!(
1382 build_e820_map(
1383 &mut boot_params,
1384 &mut ext,
1385 &partition_info,
1386 reserved_memory_regions(&partition_info, None).as_ref(),
1387 partition_info.isolation,
1388 None
1389 )
1390 .is_err()
1391 );
1392
1393 let mut boot_params: boot_params = FromZeros::new_zeroed();
1395 let mut ext = FromZeros::new_zeroed();
1396 let parameter_range = MemoryRange::try_new(3 * ONE_MB..6 * ONE_MB).unwrap();
1397 let partition_info =
1398 partition_info_ram_ranges(&[ONE_MB..4 * ONE_MB], parameter_range, None);
1399
1400 assert!(
1401 build_e820_map(
1402 &mut boot_params,
1403 &mut ext,
1404 &partition_info,
1405 reserved_memory_regions(&partition_info, None).as_ref(),
1406 partition_info.isolation,
1407 None
1408 )
1409 .is_err()
1410 );
1411
1412 let mut boot_params: boot_params = FromZeros::new_zeroed();
1414 let mut ext = FromZeros::new_zeroed();
1415 let parameter_range = MemoryRange::try_new(2 * ONE_MB..5 * ONE_MB).unwrap();
1416 let partition_info =
1417 partition_info_ram_ranges(&[4 * ONE_MB..6 * ONE_MB], parameter_range, None);
1418
1419 assert!(
1420 build_e820_map(
1421 &mut boot_params,
1422 &mut ext,
1423 &partition_info,
1424 reserved_memory_regions(&partition_info, None).as_ref(),
1425 partition_info.isolation,
1426 None
1427 )
1428 .is_err()
1429 );
1430
1431 let mut boot_params: boot_params = FromZeros::new_zeroed();
1433 let mut ext = FromZeros::new_zeroed();
1434 let parameter_range = MemoryRange::try_new(2 * ONE_MB..8 * ONE_MB).unwrap();
1435 let partition_info =
1436 partition_info_ram_ranges(&[4 * ONE_MB..6 * ONE_MB], parameter_range, None);
1437
1438 assert!(
1439 build_e820_map(
1440 &mut boot_params,
1441 &mut ext,
1442 &partition_info,
1443 reserved_memory_regions(&partition_info, None).as_ref(),
1444 partition_info.isolation,
1445 None
1446 )
1447 .is_err()
1448 );
1449
1450 let mut boot_params: boot_params = FromZeros::new_zeroed();
1452 let mut ext = FromZeros::new_zeroed();
1453 let parameter_range = MemoryRange::try_new(2 * ONE_MB..8 * ONE_MB).unwrap();
1454 let partition_info = partition_info_ram_ranges(
1455 &[ONE_MB..6 * ONE_MB, 7 * ONE_MB..10 * ONE_MB],
1456 parameter_range,
1457 None,
1458 );
1459
1460 assert!(
1461 build_e820_map(
1462 &mut boot_params,
1463 &mut ext,
1464 &partition_info,
1465 reserved_memory_regions(&partition_info, None).as_ref(),
1466 partition_info.isolation,
1467 None
1468 )
1469 .is_err()
1470 );
1471 }
1472
1473 #[test]
1474 fn test_e820_huge() {
1475 let mut boot_params: boot_params = FromZeros::new_zeroed();
1477 let mut ext = FromZeros::new_zeroed();
1478 let ram = MemoryRange::new(0..32 * ONE_MB);
1479 let partition_info = partition_info_ram_ranges(&[ram.into()], MemoryRange::EMPTY, None);
1480 let reserved = (0..256)
1481 .map(|i| {
1482 (
1483 MemoryRange::from_4k_gpn_range(i * 8 + 1..i * 8 + 3),
1484 ReservedMemoryType::Vtl2Config,
1485 )
1486 })
1487 .collect::<Vec<_>>();
1488
1489 build_e820_map(
1490 &mut boot_params,
1491 &mut ext,
1492 &partition_info,
1493 &reserved,
1494 partition_info.isolation,
1495 None,
1496 )
1497 .unwrap();
1498
1499 assert!(ext.header.len > 0);
1500
1501 let expected = walk_ranges([(ram, ())], reserved.iter().map(|&(r, _)| (r, ())))
1502 .flat_map(|(range, r)| match r {
1503 RangeWalkResult::Neither => None,
1504 RangeWalkResult::Left(_) => Some((range.into(), E820_RAM)),
1505 RangeWalkResult::Right(_) => unreachable!(),
1506 RangeWalkResult::Both(_, _) => Some((range.into(), E820_RESERVED)),
1507 })
1508 .collect::<Vec<_>>();
1509
1510 check_e820(&boot_params, &ext, &expected);
1511 }
1512}