openhcl_boot/host_params/
dt.rsuse super::PartitionInfo;
use super::shim_params::IsolationType;
use super::shim_params::ShimParams;
use crate::boot_logger::log;
use crate::host_params::COMMAND_LINE_SIZE;
use crate::host_params::MAX_CPU_COUNT;
use crate::host_params::MAX_ENTROPY_SIZE;
use crate::host_params::MAX_NUMA_NODES;
use crate::host_params::MAX_PARTITION_RAM_RANGES;
use crate::host_params::MAX_VTL2_USED_RANGES;
use crate::single_threaded::OffStackRef;
use crate::single_threaded::off_stack;
use arrayvec::ArrayVec;
use core::cmp::max;
use core::fmt::Display;
use core::fmt::Write;
use host_fdt_parser::MemoryAllocationMode;
use host_fdt_parser::MemoryEntry;
use host_fdt_parser::ParsedDeviceTree;
use hvdef::HV_PAGE_SIZE;
use igvm_defs::MemoryMapEntryType;
use loader_defs::paravisor::CommandLinePolicy;
use memory_range::MemoryRange;
use memory_range::flatten_ranges;
use memory_range::subtract_ranges;
use memory_range::walk_ranges;
#[derive(Debug)]
pub enum DtError {
DeviceTree(host_fdt_parser::Error<'static>),
CommandLineSize,
Vtl2Vmbus,
Vtl0Vmbus,
NotEnoughMmio,
}
impl Display for DtError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
DtError::DeviceTree(err) => {
f.write_fmt(format_args!("host provided device tree is invalid: {err}"))
}
DtError::CommandLineSize => {
f.write_str("commandline storage is too small to write the parsed command line")
}
DtError::Vtl2Vmbus => f.write_str("device tree did not contain a vmbus node for VTL2"),
DtError::Vtl0Vmbus => f.write_str("device tree did not contain a vmbus node for VTL0"),
DtError::NotEnoughMmio => {
f.write_str("host provided high MMIO range is insufficient to cover VTL0 and VTL2")
}
}
}
}
fn allocate_vtl2_ram(
params: &ShimParams,
partition_memory_map: &[MemoryEntry],
ram_size: Option<u64>,
) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
let mut numa_nodes = off_stack!(ArrayVec<u32, MAX_NUMA_NODES>, ArrayVec::new_const());
for entry in partition_memory_map.iter() {
match numa_nodes.binary_search(&entry.vnode) {
Ok(_) => {}
Err(index) => {
numa_nodes.insert(index, entry.vnode);
}
}
}
let numa_node_count = numa_nodes.len();
let vtl2_size = if let Some(ram_size) = ram_size {
if ram_size < params.memory_size {
panic!(
"host provided vtl2 ram size {:x} is smaller than measured size {:x}",
ram_size, params.memory_size
);
}
max(ram_size, params.memory_size)
} else {
params.memory_size
};
let ram_per_node = vtl2_size / numa_node_count as u64;
let mut memory_per_node = off_stack!(ArrayVec<u64, MAX_NUMA_NODES>, ArrayVec::new_const());
memory_per_node.extend((0..numa_node_count).map(|_| 0));
for entry in partition_memory_map.iter() {
memory_per_node[entry.vnode as usize] = ram_per_node;
}
let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
let mut free_memory_after_vtl2 = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
let file_memory_range = MemoryRange::new(
params.memory_start_address..(params.memory_start_address + params.memory_size),
);
for (range, result) in walk_ranges(
[(file_memory_range, ())],
partition_memory_map.iter().map(|e| (e.range, e)),
) {
match result {
memory_range::RangeWalkResult::Right(entry) => {
free_memory_after_vtl2.push(MemoryEntry {
range,
mem_type: entry.mem_type,
vnode: entry.vnode,
});
}
memory_range::RangeWalkResult::Both(_, entry) => {
vtl2_ram.push(MemoryEntry {
range,
mem_type: entry.mem_type,
vnode: entry.vnode,
});
}
memory_range::RangeWalkResult::Left(_) => {
panic!("used file range {range:#x?} is not reported as ram by host memmap")
}
memory_range::RangeWalkResult::Neither => {}
}
}
let mut free_memory = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
for (range, result) in walk_ranges(
params
.imported_regions()
.filter_map(|(range, _preaccepted)| {
if !file_memory_range.contains(&range) {
assert!(!file_memory_range.overlaps(&range), "imported range {range:#x?} overlaps vtl2 range and is not fully contained within vtl2 range");
Some((range, ()))
} else {
None
}
}),
free_memory_after_vtl2.iter().map(|e| (e.range, e)),
) {
match result {
memory_range::RangeWalkResult::Right(entry) => {
free_memory.push(MemoryEntry {
range,
mem_type: entry.mem_type,
vnode: entry.vnode,
});
}
memory_range::RangeWalkResult::Left(_) => {
#[cfg(target_arch = "x86_64")]
if params.isolation_type == IsolationType::Tdx && range.start_4k_gpn() == 0xFFFFF && range.len() == 0x1000 {
continue;
}
panic!("launch context range {range:#x?} is not reported as ram by host memmap")
}
memory_range::RangeWalkResult::Both(_, _) => {
}
memory_range::RangeWalkResult::Neither => {}
}
}
for entry in vtl2_ram.iter() {
let mem_req = &mut memory_per_node[entry.vnode as usize];
if entry.range.len() > *mem_req {
log!(
"entry {entry:?} is larger than required {mem_req} for vnode {}",
entry.vnode
);
*mem_req = 0;
} else {
*mem_req -= entry.range.len();
}
}
for (node, required_mem) in memory_per_node.iter().enumerate() {
let mut required_mem = *required_mem;
if required_mem == 0 {
continue;
}
for entry in free_memory.iter_mut().rev() {
if entry.vnode == node as u32 && !entry.range.is_empty() {
assert!(required_mem != 0);
let bytes_to_allocate = core::cmp::min(entry.range.len(), required_mem);
let offset = entry.range.len() - bytes_to_allocate;
let (remaining, alloc) = MemoryRange::split_at_offset(&entry.range, offset);
entry.range = remaining;
vtl2_ram.push(MemoryEntry {
range: alloc,
mem_type: entry.mem_type,
vnode: node as u32,
});
required_mem -= bytes_to_allocate;
if required_mem == 0 {
break;
}
}
}
if required_mem != 0 {
panic!(
"failed to allocate {required_mem:#x} for vnode {node:#x}, no memory remaining for vnode"
);
}
}
vtl2_ram.sort_unstable_by_key(|e| e.range.start());
vtl2_ram
}
fn parse_host_vtl2_ram(
params: &ShimParams,
memory: &[MemoryEntry],
) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
if params.isolation_type.is_hardware_isolated() {
let vtl2_size = memory.iter().fold(0, |acc, entry| {
if entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE {
acc + entry.range.len()
} else {
acc
}
});
log!(
"host provided vtl2 ram size is {:x}, measured size is {:x}",
vtl2_size,
params.memory_size
);
let vtl2_size = max(vtl2_size, params.memory_size);
vtl2_ram.push(MemoryEntry {
range: MemoryRange::new(
params.memory_start_address..(params.memory_start_address + vtl2_size),
),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
} else {
for &entry in memory
.iter()
.filter(|entry| entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE)
{
vtl2_ram.push(entry);
}
}
if vtl2_ram.is_empty() {
log!("using measured vtl2 ram");
vtl2_ram.push(MemoryEntry {
range: MemoryRange::try_new(
params.memory_start_address..(params.memory_start_address + params.memory_size),
)
.expect("range is valid"),
mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
vnode: 0,
});
}
vtl2_ram
}
impl PartitionInfo {
pub fn read_from_dt<'a>(
params: &'a ShimParams,
storage: &'a mut Self,
can_trust_host: bool,
) -> Result<Option<&'a mut Self>, DtError> {
let dt = params.device_tree();
if dt[0] == 0 {
log!("host did not provide a device tree");
return Ok(None);
}
let mut dt_storage = off_stack!(ParsedDeviceTree<MAX_PARTITION_RAM_RANGES, MAX_CPU_COUNT, COMMAND_LINE_SIZE, MAX_ENTROPY_SIZE>, ParsedDeviceTree::new());
let parsed = ParsedDeviceTree::parse(dt, &mut *dt_storage).map_err(DtError::DeviceTree)?;
let command_line = params.command_line();
write!(
storage.cmdline,
"{}",
command_line
.command_line()
.expect("measured command line should be valid")
)
.map_err(|_| DtError::CommandLineSize)?;
if can_trust_host && command_line.policy == CommandLinePolicy::APPEND_CHOSEN {
write!(storage.cmdline, " {}", &parsed.command_line)
.map_err(|_| DtError::CommandLineSize)?;
}
match parsed.memory_allocation_mode {
MemoryAllocationMode::Host => {
storage.vtl2_ram.clear();
storage
.vtl2_ram
.try_extend_from_slice(parse_host_vtl2_ram(params, &parsed.memory).as_ref())
.expect("vtl2 ram should only be 64 big");
storage.memory_allocation_mode = MemoryAllocationMode::Host;
}
MemoryAllocationMode::Vtl2 {
memory_size,
mmio_size,
} => {
storage.vtl2_ram.clear();
storage
.vtl2_ram
.try_extend_from_slice(
allocate_vtl2_ram(params, &parsed.memory, memory_size).as_ref(),
)
.expect("vtl2 ram should only be 64 big");
storage.memory_allocation_mode = MemoryAllocationMode::Vtl2 {
memory_size,
mmio_size,
};
}
}
storage.vmbus_vtl2 = parsed.vmbus_vtl2.clone().ok_or(DtError::Vtl2Vmbus)?;
storage.vmbus_vtl0 = parsed.vmbus_vtl0.clone().ok_or(DtError::Vtl0Vmbus)?;
if params.isolation_type != IsolationType::None
|| matches!(
parsed.memory_allocation_mode,
MemoryAllocationMode::Vtl2 { .. }
)
{
const MINIMUM_MMIO_SIZE: u64 = 128 * (1 << 20);
let mmio_size = max(
match parsed.memory_allocation_mode {
MemoryAllocationMode::Vtl2 { mmio_size, .. } => mmio_size.unwrap_or(0),
_ => 0,
},
MINIMUM_MMIO_SIZE,
);
let vtl2_mmio = storage.select_vtl2_mmio_range(mmio_size)?;
let vtl0_mmio = subtract_ranges(storage.vmbus_vtl0.mmio.iter().cloned(), [vtl2_mmio])
.collect::<ArrayVec<MemoryRange, 2>>();
assert_eq!(
vtl0_mmio.len(),
2,
"vtl0 mmio ranges are not 2 {:#x?}",
vtl0_mmio
);
storage.vmbus_vtl2.mmio.clear();
storage.vmbus_vtl2.mmio.push(vtl2_mmio);
storage.vmbus_vtl0.mmio = vtl0_mmio;
}
let reclaim_base = params.dt_start();
let reclaim_end = params.dt_start() + params.dt_size();
let vtl2_config_region_reclaim =
MemoryRange::try_new(reclaim_base..reclaim_end).expect("range is valid");
log!("reclaim device tree memory {reclaim_base:x}-{reclaim_end:x}");
for entry in &parsed.memory {
storage.partition_ram.push(*entry);
}
let mut used_ranges =
off_stack!(ArrayVec<MemoryRange, MAX_VTL2_USED_RANGES>, ArrayVec::new_const());
used_ranges.push(params.used);
used_ranges.sort_unstable_by_key(|r| r.start());
storage.vtl2_used_ranges.clear();
storage
.vtl2_used_ranges
.extend(flatten_ranges(used_ranges.iter().copied()));
let vtl2_gpa_pool_size = {
let dt_page_count = parsed.device_dma_page_count;
let cmdline_page_count =
crate::cmdline::parse_boot_command_line(storage.cmdline.as_str())
.enable_vtl2_gpa_pool;
max(dt_page_count.unwrap_or(0), cmdline_page_count.unwrap_or(0))
};
if vtl2_gpa_pool_size != 0 {
let pool_size_bytes = vtl2_gpa_pool_size * HV_PAGE_SIZE;
let free_memory = subtract_ranges(
storage.vtl2_ram.iter().map(|e| e.range),
storage.vtl2_used_ranges.iter().copied(),
);
let mut pool = MemoryRange::EMPTY;
for range in free_memory {
if range.len() >= pool_size_bytes {
pool = MemoryRange::new(range.start()..(range.start() + pool_size_bytes));
break;
}
}
if pool.is_empty() {
panic!(
"failed to find {pool_size_bytes} bytes of free VTL2 memory for VTL2 GPA pool"
);
}
used_ranges.clear();
used_ranges.extend(storage.vtl2_used_ranges.iter().copied());
used_ranges.push(pool);
used_ranges.sort_unstable_by_key(|r| r.start());
storage.vtl2_used_ranges.clear();
storage
.vtl2_used_ranges
.extend(flatten_ranges(used_ranges.iter().copied()));
storage.vtl2_pool_memory = pool;
}
if can_trust_host {
storage.vtl0_alias_map = parsed.vtl0_alias_map;
}
let Self {
vtl2_ram: _,
vtl2_full_config_region: vtl2_config_region,
vtl2_config_region_reclaim: vtl2_config_region_reclaim_struct,
vtl2_reserved_region,
vtl2_pool_memory: _,
vtl2_used_ranges,
partition_ram: _,
isolation,
bsp_reg,
cpus,
vmbus_vtl0: _,
vmbus_vtl2: _,
cmdline: _,
com3_serial_available: com3_serial,
gic,
memory_allocation_mode: _,
entropy,
vtl0_alias_map: _,
nvme_keepalive,
} = storage;
assert!(!vtl2_used_ranges.is_empty());
*isolation = params.isolation_type;
*vtl2_config_region = MemoryRange::new(
params.parameter_region_start
..(params.parameter_region_start + params.parameter_region_size),
);
*vtl2_config_region_reclaim_struct = vtl2_config_region_reclaim;
assert!(vtl2_config_region.contains(&vtl2_config_region_reclaim));
*vtl2_reserved_region = MemoryRange::new(
params.vtl2_reserved_region_start
..(params.vtl2_reserved_region_start + params.vtl2_reserved_region_size),
);
*bsp_reg = parsed.boot_cpuid_phys;
cpus.extend(parsed.cpus.iter().copied());
*com3_serial = parsed.com3_serial;
*gic = parsed.gic.clone();
*entropy = parsed.entropy.clone();
*nvme_keepalive = parsed.nvme_keepalive;
Ok(Some(storage))
}
}