1extern crate alloc;
7
8use super::PartitionInfo;
9use super::shim_params::ShimParams;
10use crate::cmdline::BootCommandLineOptions;
11use crate::cmdline::SidecarOptions;
12use crate::host_params::COMMAND_LINE_SIZE;
13use crate::host_params::MAX_CPU_COUNT;
14use crate::host_params::MAX_ENTROPY_SIZE;
15use crate::host_params::MAX_NUMA_NODES;
16use crate::host_params::MAX_PARTITION_RAM_RANGES;
17use crate::host_params::MAX_VTL2_RAM_RANGES;
18use crate::host_params::dt::dma_hint::pick_private_pool_size;
19use crate::host_params::mmio::select_vtl2_mmio_range;
20use crate::host_params::shim_params::IsolationType;
21use crate::memory::AddressSpaceManager;
22use crate::memory::AddressSpaceManagerBuilder;
23use crate::memory::AllocationPolicy;
24use crate::memory::AllocationType;
25use crate::single_threaded::OffStackRef;
26use crate::single_threaded::off_stack;
27use alloc::vec::Vec;
28use arrayvec::ArrayString;
29use arrayvec::ArrayVec;
30use bump_alloc::ALLOCATOR;
31use core::cmp::max;
32use core::fmt::Write;
33use host_fdt_parser::MemoryAllocationMode;
34use host_fdt_parser::MemoryEntry;
35use host_fdt_parser::ParsedDeviceTree;
36use host_fdt_parser::VmbusInfo;
37use hvdef::HV_PAGE_SIZE;
38use igvm_defs::MemoryMapEntryType;
39use loader_defs::paravisor::CommandLinePolicy;
40use loader_defs::shim::MemoryVtlType;
41use loader_defs::shim::PersistedStateHeader;
42use memory_range::MemoryRange;
43use memory_range::subtract_ranges;
44use memory_range::walk_ranges;
45use thiserror::Error;
46use zerocopy::FromBytes;
47
48mod bump_alloc;
49mod dma_hint;
50
51#[derive(Debug, Error)]
53pub enum DtError {
54 #[error("no device tree provided by host")]
56 NoDeviceTree,
57 #[error("host provided device tree is invalid")]
59 DeviceTree(#[source] host_fdt_parser::Error<'static>),
60 #[error("commandline storage is too small to write the parsed command line")]
63 CommandLineSize,
64 #[error("device tree did not contain a vmbus node for VTL2")]
66 Vtl2Vmbus,
67 #[error("device tree did not contain a vmbus node for VTL0")]
69 Vtl0Vmbus,
70 #[error("host provided high MMIO range is insufficient to cover VTL0 and VTL2")]
72 NotEnoughVtl0Mmio,
73 #[error("host provided MMIO range is insufficient to cover VTL2")]
75 NotEnoughVtl2Mmio,
76}
77
78fn allocate_private_pool(
84 address_space: &mut AddressSpaceManager,
85 vtl2_ram: &[MemoryEntry],
86 pool_size_bytes: u64,
87 force_numa_split: bool,
88 enable_vtl2_gpa_pool: crate::cmdline::Vtl2GpaPoolConfig,
89 device_dma_page_count: Option<u64>,
90 vp_count: usize,
91 mem_size: u64,
92) {
93 if !force_numa_split {
103 if let Some(pool) = address_space.allocate(
104 Some(0),
105 pool_size_bytes,
106 AllocationType::GpaPool,
107 AllocationPolicy::HighMemory,
108 ) {
109 log::info!("allocated VTL2 pool at {:#x?}", pool.range);
110 return;
111 }
112 log::info!("node 0 cannot fit full pool, splitting across NUMA nodes");
113 } else {
114 log::info!("forcing VTL2 pool NUMA split across nodes");
115 }
116
117 let mut numa_nodes = off_stack!(ArrayVec<u32, MAX_NUMA_NODES>, ArrayVec::new_const());
121 for entry in vtl2_ram.iter() {
122 match numa_nodes.binary_search(&entry.vnode) {
123 Ok(_) => {}
124 Err(index) => {
125 numa_nodes.insert(index, entry.vnode);
126 }
127 }
128 }
129
130 let num_nodes = numa_nodes.len() as u64;
131 let per_node_size = (pool_size_bytes / num_nodes) & !(HV_PAGE_SIZE - 1);
134 let last_node_size = pool_size_bytes - per_node_size * (num_nodes - 1);
135 let mut remaining = pool_size_bytes;
136
137 if per_node_size == 0 {
142 panic!(
143 "cannot split VTL2 pool of size {pool_size_bytes:#x} bytes across \
144 {num_nodes} nodes, per node size {per_node_size:#x} bytes; \
145 enable_vtl2_gpa_pool={enable_vtl2_gpa_pool:?}, \
146 device_dma_page_count={device_dma_page_count:#x?}, \
147 vp_count={vp_count}, mem_size={mem_size:#x}"
148 );
149 }
150
151 for (i, vnode) in numa_nodes.iter().enumerate() {
152 if remaining == 0 {
153 break;
154 }
155
156 let is_last = i == numa_nodes.len() - 1;
157 let alloc_size = if is_last {
158 last_node_size
159 } else {
160 per_node_size
161 };
162
163 match address_space.allocate(
166 Some(*vnode),
167 alloc_size,
168 AllocationType::GpaPool,
169 AllocationPolicy::HighMemory,
170 ) {
171 Some(pool) => {
172 remaining -= pool.range.len();
173 log::info!(
174 "allocated VTL2 pool on node {} at {:#x?}",
175 vnode,
176 pool.range
177 );
178 }
179 None => {
180 let mut free_ranges = off_stack!(ArrayString<2048>, ArrayString::new_const());
181 for node in numa_nodes.iter() {
182 for range in address_space.free_ranges(*node) {
183 if write!(
184 free_ranges,
185 "n{}:[{:#x?}, {:#x?}) ",
186 node,
187 range.start(),
188 range.end()
189 )
190 .is_err()
191 {
192 let _ = write!(free_ranges, "...");
193 break;
194 }
195 }
196 }
197 let highest_numa_node = vtl2_ram.iter().map(|e| e.vnode).max().unwrap_or(0);
198 panic!(
199 "failed to allocate VTL2 pool on node {vnode}: \
200 need {alloc_size:#x} bytes, pool total {pool_size_bytes:#x} bytes \
201 (enable_vtl2_gpa_pool={enable_vtl2_gpa_pool:?}, \
202 device_dma_page_count={device_dma_page_count:#x?}, \
203 vp_count={vp_count}, mem_size={mem_size:#x}), \
204 highest_numa_node={highest_numa_node}, \
205 free_ranges=[ {}]",
206 free_ranges.as_str()
207 );
208 }
209 }
210 }
211
212 assert_eq!(
213 remaining, 0,
214 "pool allocation arithmetic error: {remaining:#x} bytes unallocated"
215 );
216}
217
218fn allocate_vtl2_ram(
220 params: &ShimParams,
221 partition_memory_map: &[MemoryEntry],
222 ram_size: Option<u64>,
223) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
224 let mut numa_nodes = off_stack!(ArrayVec<u32, MAX_NUMA_NODES>, ArrayVec::new_const());
227
228 for entry in partition_memory_map.iter() {
229 match numa_nodes.binary_search(&entry.vnode) {
230 Ok(_) => {}
231 Err(index) => {
232 numa_nodes.insert(index, entry.vnode);
233 }
234 }
235 }
236
237 let numa_node_count = numa_nodes.len();
238
239 let vtl2_size = if let Some(ram_size) = ram_size {
240 if ram_size < params.memory_size {
241 panic!(
242 "host provided vtl2 ram size {:x} is smaller than measured size {:x}",
243 ram_size, params.memory_size
244 );
245 }
246 max(ram_size, params.memory_size)
247 } else {
248 params.memory_size
249 };
250
251 let ram_per_node = vtl2_size / numa_node_count as u64;
254
255 let mut memory_per_node = off_stack!(ArrayVec<u64, MAX_NUMA_NODES>, ArrayVec::new_const());
257 memory_per_node.extend((0..numa_node_count).map(|_| 0));
258 for entry in partition_memory_map.iter() {
259 memory_per_node[entry.vnode as usize] = ram_per_node;
260 }
261
262 let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
267 let mut free_memory_after_vtl2 = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
268 let file_memory_range = MemoryRange::new(
269 params.memory_start_address..(params.memory_start_address + params.memory_size),
270 );
271
272 for (range, result) in walk_ranges(
273 [(file_memory_range, ())],
274 partition_memory_map.iter().map(|e| (e.range, e)),
275 ) {
276 match result {
277 memory_range::RangeWalkResult::Right(entry) => {
278 free_memory_after_vtl2.push(MemoryEntry {
280 range,
281 mem_type: entry.mem_type,
282 vnode: entry.vnode,
283 });
284 }
285 memory_range::RangeWalkResult::Both(_, entry) => {
286 vtl2_ram.push(MemoryEntry {
288 range,
289 mem_type: entry.mem_type,
290 vnode: entry.vnode,
291 });
292 }
293 memory_range::RangeWalkResult::Left(_) => {
294 panic!("used file range {range:#x?} is not reported as ram by host memmap")
295 }
296 memory_range::RangeWalkResult::Neither => {}
298 }
299 }
300
301 let mut free_memory = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
304 for (range, result) in walk_ranges(
305 params
306 .imported_regions()
307 .filter_map(|(range, _preaccepted)| {
308 if !file_memory_range.contains(&range) {
309 assert!(!file_memory_range.overlaps(&range), "imported range {range:#x?} overlaps vtl2 range and is not fully contained within vtl2 range");
313 Some((range, ()))
314 } else {
315 None
316 }
317 }),
318 free_memory_after_vtl2.iter().map(|e| (e.range, e)),
319 ) {
320 match result {
321 memory_range::RangeWalkResult::Right(entry) => {
322 free_memory.push(MemoryEntry {
323 range,
324 mem_type: entry.mem_type,
325 vnode: entry.vnode,
326 });
327 }
328 memory_range::RangeWalkResult::Left(_) => {
329 #[cfg(target_arch = "x86_64")]
332 if params.isolation_type == IsolationType::Tdx && range.start_4k_gpn() == 0xFFFFF && range.len() == 0x1000 {
333 continue;
334 }
335
336 panic!("launch context range {range:#x?} is not reported as ram by host memmap")
337 }
338 memory_range::RangeWalkResult::Both(_, _) => {
339 }
342 memory_range::RangeWalkResult::Neither => {}
344 }
345 }
346
347 for entry in vtl2_ram.iter() {
349 let mem_req = &mut memory_per_node[entry.vnode as usize];
350
351 if entry.range.len() > *mem_req {
352 log::warn!(
356 "entry {entry:?} is larger than required {mem_req} for vnode {}",
357 entry.vnode
358 );
359 *mem_req = 0;
360 } else {
361 *mem_req -= entry.range.len();
362 }
363 }
364
365 for (node, required_mem) in memory_per_node.iter().enumerate() {
367 let mut required_mem = *required_mem;
368 if required_mem == 0 {
369 continue;
370 }
371
372 for entry in free_memory.iter_mut().rev() {
375 if entry.vnode == node as u32 && !entry.range.is_empty() {
376 assert!(required_mem != 0);
377 let bytes_to_allocate = core::cmp::min(entry.range.len(), required_mem);
378
379 let offset = entry.range.len() - bytes_to_allocate;
381 let (remaining, alloc) = MemoryRange::split_at_offset(&entry.range, offset);
382
383 entry.range = remaining;
384 vtl2_ram.push(MemoryEntry {
385 range: alloc,
386 mem_type: entry.mem_type,
387 vnode: node as u32,
388 });
389
390 required_mem -= bytes_to_allocate;
391
392 if required_mem == 0 {
394 break;
395 }
396 }
397 }
398
399 if required_mem != 0 {
400 panic!(
403 "failed to allocate {required_mem:#x} for vnode {node:#x}, no memory remaining for vnode"
404 );
405 }
406 }
407
408 vtl2_ram.sort_unstable_by_key(|e| e.range.start());
410
411 vtl2_ram
412}
413
414fn parse_host_vtl2_ram(
416 params: &ShimParams,
417 memory: &[MemoryEntry],
418) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
419 let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
422 if params.isolation_type.is_hardware_isolated() {
423 let vtl2_size = memory.iter().fold(0, |acc, entry| {
426 if entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE {
427 acc + entry.range.len()
428 } else {
429 acc
430 }
431 });
432
433 log::info!(
434 "host provided vtl2 ram size is {:x}, measured size is {:x}",
435 vtl2_size,
436 params.memory_size
437 );
438
439 let vtl2_size = max(vtl2_size, params.memory_size);
440 vtl2_ram.push(MemoryEntry {
441 range: MemoryRange::new(
442 params.memory_start_address..(params.memory_start_address + vtl2_size),
443 ),
444 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
445 vnode: 0,
446 });
447 } else {
448 for &entry in memory
449 .iter()
450 .filter(|entry| entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE)
451 {
452 vtl2_ram.push(entry);
453 }
454 }
455
456 if vtl2_ram.is_empty() {
457 log::info!("using measured vtl2 ram");
458 vtl2_ram.push(MemoryEntry {
459 range: MemoryRange::try_new(
460 params.memory_start_address..(params.memory_start_address + params.memory_size),
461 )
462 .expect("range is valid"),
463 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
464 vnode: 0,
465 });
466 }
467
468 vtl2_ram
469}
470
471fn init_heap(params: &ShimParams) {
472 unsafe {
479 ALLOCATOR.init(params.heap);
480 }
481}
482
483type ParsedDt =
484 ParsedDeviceTree<MAX_PARTITION_RAM_RANGES, MAX_CPU_COUNT, COMMAND_LINE_SIZE, MAX_ENTROPY_SIZE>;
485
486fn add_common_ranges<'a, I: Iterator<Item = MemoryRange>>(
489 params: &ShimParams,
490 mut builder: AddressSpaceManagerBuilder<'a, I>,
491) -> AddressSpaceManagerBuilder<'a, I> {
492 builder = builder.with_log_buffer(params.log_buffer);
494
495 if params.vtl2_reserved_region_size != 0 {
496 builder = builder.with_reserved_range(MemoryRange::new(
497 params.vtl2_reserved_region_start
498 ..(params.vtl2_reserved_region_start + params.vtl2_reserved_region_size),
499 ));
500 }
501
502 if params.sidecar_size != 0 {
503 builder = builder.with_sidecar_image(MemoryRange::new(
504 params.sidecar_base..(params.sidecar_base + params.sidecar_size),
505 ));
506 }
507
508 builder
509}
510
511#[derive(Debug, PartialEq, Eq)]
512struct PartitionTopology {
513 vtl2_ram: &'static [MemoryEntry],
514 vtl0_mmio: ArrayVec<MemoryRange, 2>,
515 vtl2_mmio: ArrayVec<MemoryRange, 2>,
516 memory_allocation_mode: MemoryAllocationMode,
517}
518
519#[derive(Debug, PartialEq, Eq)]
522struct PersistedPartitionTopology {
523 topology: PartitionTopology,
524 cpus_with_mapped_interrupts_no_io: Vec<u32>,
525 cpus_with_outstanding_io: Vec<u32>,
526}
527
528fn calculate_default_mmio_size(parsed: &ParsedDt) -> Result<u64, DtError> {
533 const MINIMUM_MMIO_SIZE: u64 = 128 * (1 << 20);
534 const MAXIMUM_MMIO_SIZE: u64 = 1 << 30;
535 let half_high_gap = parsed.vmbus_vtl0.as_ref().ok_or(DtError::Vtl0Vmbus)?.mmio[1].len() / 2;
536 Ok(half_high_gap.clamp(MINIMUM_MMIO_SIZE, MAXIMUM_MMIO_SIZE))
537}
538
539fn topology_from_host_dt(
541 params: &ShimParams,
542 parsed: &ParsedDt,
543 options: &BootCommandLineOptions,
544 address_space: &mut AddressSpaceManager,
545) -> Result<PartitionTopology, DtError> {
546 log::info!("reading topology from host device tree");
547
548 let mut vtl2_ram =
549 off_stack!(ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>, ArrayVec::new_const());
550
551 let memory_allocation_mode = parsed.memory_allocation_mode;
554 match memory_allocation_mode {
555 MemoryAllocationMode::Host => {
556 vtl2_ram
557 .try_extend_from_slice(parse_host_vtl2_ram(params, &parsed.memory).as_ref())
558 .expect("vtl2 ram should only be 64 big");
559 }
560 MemoryAllocationMode::Vtl2 {
561 memory_size,
562 mmio_size: _,
563 } => {
564 vtl2_ram
565 .try_extend_from_slice(
566 allocate_vtl2_ram(params, &parsed.memory, memory_size).as_ref(),
567 )
568 .expect("vtl2 ram should only be 64 big");
569 }
570 }
571
572 let (vtl0_mmio, vtl2_mmio) = if params.isolation_type != IsolationType::None
580 || matches!(
581 parsed.memory_allocation_mode,
582 MemoryAllocationMode::Vtl2 { .. }
583 ) {
584 let host_provided_size = match parsed.memory_allocation_mode {
595 MemoryAllocationMode::Vtl2 { mmio_size, .. } => mmio_size.unwrap_or(0),
596 _ => 0,
597 };
598 let vmbus_vtl2 = parsed.vmbus_vtl2.as_ref().ok_or(DtError::Vtl2Vmbus)?;
599 let vmbus_vtl2_mmio_size = vmbus_vtl2.mmio.iter().map(|r| r.len()).sum::<u64>();
600 let mmio_size = if vmbus_vtl2_mmio_size != 0 {
601 host_provided_size
602 } else {
603 max(host_provided_size, calculate_default_mmio_size(parsed)?)
604 };
605
606 log::info!("allocating vtl2 mmio size {mmio_size:#x} bytes");
607 log::info!("host provided vtl2 mmio ranges are {vmbus_vtl2_mmio_size:#x} bytes");
608
609 let vmbus_vtl0 = parsed.vmbus_vtl0.as_ref().ok_or(DtError::Vtl0Vmbus)?;
610 if vmbus_vtl2_mmio_size != 0 {
611 if vmbus_vtl2_mmio_size < mmio_size {
613 return Err(DtError::NotEnoughVtl2Mmio);
614 }
615
616 log::info!("using host provided vtl2 mmio: {:x?}", vmbus_vtl2.mmio);
617 (vmbus_vtl0.mmio.clone(), vmbus_vtl2.mmio.clone())
618 } else {
619 log::info!("no vtl2 mmio provided by host, allocating from vtl0 mmio");
621 let selected_vtl2_mmio = select_vtl2_mmio_range(&vmbus_vtl0.mmio, mmio_size)?;
622
623 let vtl0_mmio = subtract_ranges(vmbus_vtl0.mmio.iter().cloned(), [selected_vtl2_mmio])
625 .collect::<ArrayVec<MemoryRange, 2>>();
626 let vtl2_mmio = [selected_vtl2_mmio]
627 .into_iter()
628 .collect::<ArrayVec<MemoryRange, 2>>();
629
630 assert_eq!(
635 vtl0_mmio.len(),
636 2,
637 "vtl0 mmio ranges are not 2 {:#x?}",
638 vtl0_mmio
639 );
640
641 log::info!("vtl0 mmio: {vtl0_mmio:x?}, vtl2 mmio: {vtl2_mmio:x?}");
642
643 (vtl0_mmio, vtl2_mmio)
644 }
645 } else {
646 (
647 parsed
648 .vmbus_vtl0
649 .as_ref()
650 .ok_or(DtError::Vtl0Vmbus)?
651 .mmio
652 .clone(),
653 parsed
654 .vmbus_vtl2
655 .as_ref()
656 .ok_or(DtError::Vtl2Vmbus)?
657 .mmio
658 .clone(),
659 )
660 };
661
662 let reclaim_base = params.dt_start();
666 let reclaim_end = params.dt_start() + params.dt_size();
667 let vtl2_config_region_reclaim =
668 MemoryRange::try_new(reclaim_base..reclaim_end).expect("range is valid");
669
670 log::info!("reclaim device tree memory {reclaim_base:x}-{reclaim_end:x}");
671
672 let vtl2_config_region = MemoryRange::new(
674 params.parameter_region_start
675 ..(params.parameter_region_start + params.parameter_region_size),
676 );
677
678 const PERSISTED_REGION_SIZE: u64 = 20 * 4096;
684 let (persisted_state_region, remainder) = params
685 .persisted_state
686 .split_at_offset(PERSISTED_REGION_SIZE);
687 log::info!(
688 "persisted state region sized to {persisted_state_region:#x?}, remainder {remainder:#x?}"
689 );
690
691 let mut address_space_builder = AddressSpaceManagerBuilder::new(
692 address_space,
693 &vtl2_ram,
694 params.used,
695 persisted_state_region,
696 subtract_ranges([vtl2_config_region], [vtl2_config_region_reclaim]),
697 );
698
699 address_space_builder = add_common_ranges(params, address_space_builder);
700
701 address_space_builder
702 .init()
703 .expect("failed to initialize address space manager");
704
705 if params.isolation_type == IsolationType::None {
706 let enable_vtl2_gpa_pool = options.enable_vtl2_gpa_pool;
707 let device_dma_page_count = parsed.device_dma_page_count;
708 let vp_count = parsed.cpu_count();
709 let mem_size = vtl2_ram.iter().map(|e| e.range.len()).sum();
710 if let Some(vtl2_gpa_pool_size) = pick_private_pool_size(
711 enable_vtl2_gpa_pool,
712 device_dma_page_count,
713 vp_count,
714 mem_size,
715 ) {
716 let pool_size_bytes = vtl2_gpa_pool_size * HV_PAGE_SIZE;
719
720 allocate_private_pool(
721 address_space,
722 &vtl2_ram,
723 pool_size_bytes,
724 options.vtl2_gpa_pool_numa_split,
725 enable_vtl2_gpa_pool,
726 device_dma_page_count,
727 vp_count,
728 mem_size,
729 );
730 }
731 }
732
733 Ok(PartitionTopology {
734 vtl2_ram: OffStackRef::<'_, ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>>::leak(vtl2_ram),
735 vtl0_mmio,
736 vtl2_mmio,
737 memory_allocation_mode,
738 })
739}
740
741fn topology_from_persisted_state(
743 header: PersistedStateHeader,
744 params: &ShimParams,
745 parsed: &ParsedDt,
746 address_space: &mut AddressSpaceManager,
747) -> Result<PersistedPartitionTopology, DtError> {
748 log::info!("reading topology from persisted state");
749
750 let protobuf_region =
754 MemoryRange::new(header.protobuf_base..(header.protobuf_base + header.protobuf_region_len));
755 assert!(
756 params.persisted_state.contains(&protobuf_region),
757 "protobuf region {protobuf_region:#x?} is not contained within the persisted state region {:#x?}",
758 params.persisted_state
759 );
760
761 assert!(
763 header.protobuf_payload_len <= header.protobuf_region_len,
764 "protobuf payload len {} is larger than region len {}",
765 header.protobuf_payload_len,
766 header.protobuf_region_len
767 );
768
769 let protobuf_raw = unsafe {
772 core::slice::from_raw_parts(
773 header.protobuf_base as *const u8,
774 header.protobuf_payload_len as usize,
775 )
776 };
777
778 let parsed_protobuf: loader_defs::shim::save_restore::SavedState =
779 bump_alloc::with_global_alloc(|| {
780 log::info!("decoding protobuf of size {}", protobuf_raw.len());
781 mesh_protobuf::decode(protobuf_raw).expect("failed to decode protobuf")
782 });
783
784 let loader_defs::shim::save_restore::SavedState {
785 partition_memory,
786 partition_mmio,
787 cpus_with_mapped_interrupts_no_io,
788 cpus_with_outstanding_io,
789 } = parsed_protobuf;
790
791 log::info!(
792 "persisted state: cpus_with_mapped_interrupts_no_io={:?}, cpus_with_outstanding_io={:?}",
793 cpus_with_mapped_interrupts_no_io,
794 cpus_with_outstanding_io,
795 );
796
797 let memory_allocation_mode = parsed.memory_allocation_mode;
800
801 let mut vtl2_ram =
802 off_stack!(ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>, ArrayVec::new_const());
803
804 let previous_vtl2_ram = partition_memory.iter().filter_map(|entry| {
806 if entry.vtl_type.ram() && entry.vtl_type.vtl2() {
807 Some(MemoryEntry {
808 range: entry.range,
809 mem_type: entry.igvm_type.clone().into(),
810 vnode: entry.vnode,
811 })
812 } else {
813 None
814 }
815 });
816
817 let previous_vtl2_ram = memory_range::merge_adjacent_ranges(
821 previous_vtl2_ram.map(|entry| (entry.range, (entry.mem_type, entry.vnode))),
822 );
823
824 vtl2_ram.extend(
825 previous_vtl2_ram.map(|(range, (mem_type, vnode))| MemoryEntry {
826 range,
827 mem_type,
828 vnode,
829 }),
830 );
831
832 if matches!(memory_allocation_mode, MemoryAllocationMode::Host) {
838 let host_vtl2_ram = parse_host_vtl2_ram(params, &parsed.memory);
839 assert_eq!(
840 vtl2_ram.as_slice(),
841 host_vtl2_ram.as_ref(),
842 "vtl2 ram from persisted state does not match host provided ram"
843 );
844 }
845
846 let persisted_header = partition_memory
852 .iter()
853 .find(|entry| entry.vtl_type == MemoryVtlType::VTL2_PERSISTED_STATE_HEADER)
854 .expect("persisted state header missing");
855 let persisted_protobuf = partition_memory
856 .iter()
857 .find(|entry| entry.vtl_type == MemoryVtlType::VTL2_PERSISTED_STATE_PROTOBUF)
858 .expect("persisted state protobuf region missing");
859 assert_eq!(persisted_header.range.end(), protobuf_region.start());
860 let persisted_state_region =
861 MemoryRange::new(persisted_header.range.start()..persisted_protobuf.range.end());
862
863 let reclaim_base = params.dt_start();
867 let reclaim_end = params.dt_start() + params.dt_size();
868 let vtl2_config_region_reclaim =
869 MemoryRange::try_new(reclaim_base..reclaim_end).expect("range is valid");
870
871 log::info!("reclaim device tree memory {reclaim_base:x}-{reclaim_end:x}");
872
873 let vtl2_config_region = MemoryRange::new(
874 params.parameter_region_start
875 ..(params.parameter_region_start + params.parameter_region_size),
876 );
877
878 let mut address_space_builder = AddressSpaceManagerBuilder::new(
879 address_space,
880 &vtl2_ram,
881 params.used,
882 persisted_state_region,
883 subtract_ranges([vtl2_config_region], [vtl2_config_region_reclaim]),
884 );
885
886 let pool_ranges = partition_memory.iter().filter_map(|entry| {
891 if entry.vtl_type == MemoryVtlType::VTL2_GPA_POOL {
892 Some(entry.range)
893 } else {
894 None
895 }
896 });
897
898 address_space_builder = address_space_builder.with_pool_ranges(pool_ranges);
899
900 address_space_builder = add_common_ranges(params, address_space_builder);
902
903 address_space_builder
904 .init()
905 .expect("failed to initialize address space manager");
906
907 let vtl0_mmio = partition_mmio
909 .iter()
910 .filter_map(|entry| {
911 if entry.vtl_type == MemoryVtlType::VTL0_MMIO {
912 Some(entry.range)
913 } else {
914 None
915 }
916 })
917 .collect::<ArrayVec<MemoryRange, 2>>();
918 let vtl2_mmio = partition_mmio
919 .iter()
920 .filter_map(|entry| {
921 if entry.vtl_type == MemoryVtlType::VTL2_MMIO {
922 Some(entry.range)
923 } else {
924 None
925 }
926 })
927 .collect::<ArrayVec<MemoryRange, 2>>();
928
929 Ok(PersistedPartitionTopology {
930 topology: PartitionTopology {
931 vtl2_ram: OffStackRef::<'_, ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>>::leak(vtl2_ram),
932 vtl0_mmio,
933 vtl2_mmio,
934 memory_allocation_mode,
935 },
936 cpus_with_mapped_interrupts_no_io,
937 cpus_with_outstanding_io,
938 })
939}
940
941fn read_persisted_region_header(params: &ShimParams) -> Option<PersistedStateHeader> {
945 if params.isolation_type != IsolationType::None {
954 return None;
955 }
956
957 let buf = unsafe {
961 core::slice::from_raw_parts(
962 params.persisted_state.start() as *const u8,
963 size_of::<PersistedStateHeader>(),
964 )
965 };
966
967 let header = PersistedStateHeader::read_from_bytes(buf)
968 .expect("region is page aligned and the correct size");
969
970 if header.magic == PersistedStateHeader::MAGIC {
971 Some(header)
972 } else {
973 None
974 }
975}
976
977impl PartitionInfo {
978 pub fn read_from_dt<'a>(
980 params: &'a ShimParams,
981 storage: &'a mut Self,
982 address_space: &'_ mut AddressSpaceManager,
983 mut options: BootCommandLineOptions,
984 can_trust_host: bool,
985 ) -> Result<&'a mut Self, DtError> {
986 let dt = params.device_tree();
987
988 if dt[0] == 0 {
989 log::error!("host did not provide a device tree");
990 return Err(DtError::NoDeviceTree);
991 }
992
993 let mut dt_storage = off_stack!(ParsedDt, ParsedDeviceTree::new());
994
995 let parsed = ParsedDeviceTree::parse(dt, &mut *dt_storage).map_err(DtError::DeviceTree)?;
996
997 let command_line = params.command_line();
998
999 write!(
1001 storage.cmdline,
1002 "{}",
1003 command_line
1004 .command_line()
1005 .expect("measured command line should be valid")
1006 )
1007 .map_err(|_| DtError::CommandLineSize)?;
1008
1009 match command_line.policy {
1010 CommandLinePolicy::STATIC => {
1011 }
1013 CommandLinePolicy::APPEND_CHOSEN if can_trust_host => {
1014 options.parse(&parsed.command_line);
1017 write!(storage.cmdline, " {}", &parsed.command_line)
1018 .map_err(|_| DtError::CommandLineSize)?;
1019 }
1020 CommandLinePolicy::APPEND_CHOSEN if !can_trust_host => {
1021 }
1023 _ => unreachable!(),
1024 }
1025
1026 init_heap(params);
1027
1028 let persisted_state_header = read_persisted_region_header(params);
1029 log::info!(
1030 "read_from_dt: persisted_state_header present={}, sidecar={:?}",
1031 persisted_state_header.is_some(),
1032 options.sidecar,
1033 );
1034 let (topology, cpus_with_outstanding_io) = if let Some(header) = persisted_state_header {
1035 log::info!("found persisted state header");
1036 let persisted_topology =
1037 topology_from_persisted_state(header, params, parsed, address_space)?;
1038 (
1039 persisted_topology.topology,
1040 persisted_topology.cpus_with_outstanding_io,
1041 )
1042 } else {
1043 (
1044 topology_from_host_dt(params, parsed, &options, address_space)?,
1045 Vec::new(),
1046 )
1047 };
1048
1049 let Self {
1050 vtl2_ram,
1051 partition_ram,
1052 isolation,
1053 bsp_reg,
1054 cpus,
1055 sidecar_cpu_overrides,
1056 vmbus_vtl0,
1057 vmbus_vtl2,
1058 cmdline: _,
1059 com3_serial_available: com3_serial,
1060 gic,
1061 pmu_gsiv,
1062 memory_allocation_mode,
1063 entropy,
1064 vtl0_alias_map,
1065 nvme_keepalive,
1066 boot_options,
1067 } = storage;
1068
1069 if let (SidecarOptions::Enabled { .. }, true) =
1087 (&boot_options.sidecar, !cpus_with_outstanding_io.is_empty())
1088 {
1089 let max_cpu_id = *cpus_with_outstanding_io.iter().max().unwrap() as usize;
1090 if parsed.cpu_count() <= sidecar_cpu_overrides.sidecar_starts_cpu.len()
1091 && max_cpu_id < sidecar_cpu_overrides.sidecar_starts_cpu.len()
1092 {
1093 sidecar_cpu_overrides.per_cpu_state_specified = true;
1095 for &cpu_id in &cpus_with_outstanding_io {
1096 sidecar_cpu_overrides.sidecar_starts_cpu[cpu_id as usize] = false;
1097 }
1098 log::info!(
1099 "sidecar: excluding CPUs {:?} due to outstanding IO",
1100 cpus_with_outstanding_io,
1101 );
1102 } else {
1103 log::info!(
1105 "sidecar: disabling, too many CPUs for per-CPU state (max id {max_cpu_id})"
1106 );
1107 boot_options.sidecar = SidecarOptions::DisabledServicing;
1108 options.sidecar = SidecarOptions::DisabledServicing;
1109 }
1110 }
1111
1112 vtl2_ram.clear();
1114 vtl2_ram.extend(topology.vtl2_ram.iter().copied());
1115 partition_ram.clear();
1116 partition_ram.extend(parsed.memory.iter().copied());
1117 *memory_allocation_mode = topology.memory_allocation_mode;
1118
1119 *vmbus_vtl0 = VmbusInfo {
1122 connection_id: parsed
1123 .vmbus_vtl0
1124 .as_ref()
1125 .ok_or(DtError::Vtl0Vmbus)?
1126 .connection_id,
1127 mmio: topology.vtl0_mmio,
1128 };
1129 *vmbus_vtl2 = VmbusInfo {
1130 connection_id: parsed
1131 .vmbus_vtl2
1132 .as_ref()
1133 .ok_or(DtError::Vtl2Vmbus)?
1134 .connection_id,
1135 mmio: topology.vtl2_mmio,
1136 };
1137
1138 if can_trust_host {
1140 *vtl0_alias_map = parsed.vtl0_alias_map;
1141 }
1142
1143 *isolation = params.isolation_type;
1144
1145 *bsp_reg = parsed.boot_cpuid_phys;
1146 cpus.extend(parsed.cpus.iter().copied());
1147 *com3_serial = parsed.com3_serial;
1148 *gic = parsed.gic.clone();
1149 *pmu_gsiv = parsed.pmu_gsiv;
1150 *entropy = parsed.entropy.clone();
1151 *nvme_keepalive = parsed.nvme_keepalive;
1152 *boot_options = options;
1153
1154 Ok(storage)
1155 }
1156}