1use super::PartitionInfo;
7use super::shim_params::ShimParams;
8use crate::cmdline::BootCommandLineOptions;
9use crate::cmdline::SidecarOptions;
10use crate::host_params::COMMAND_LINE_SIZE;
11use crate::host_params::MAX_CPU_COUNT;
12use crate::host_params::MAX_ENTROPY_SIZE;
13use crate::host_params::MAX_NUMA_NODES;
14use crate::host_params::MAX_PARTITION_RAM_RANGES;
15use crate::host_params::MAX_VTL2_RAM_RANGES;
16use crate::host_params::dt::dma_hint::pick_private_pool_size;
17use crate::host_params::mmio::select_vtl2_mmio_range;
18use crate::host_params::shim_params::IsolationType;
19use crate::memory::AddressSpaceManager;
20use crate::memory::AddressSpaceManagerBuilder;
21use crate::memory::AllocationPolicy;
22use crate::memory::AllocationType;
23use crate::single_threaded::OffStackRef;
24use crate::single_threaded::off_stack;
25use arrayvec::ArrayString;
26use arrayvec::ArrayVec;
27use bump_alloc::ALLOCATOR;
28use core::cmp::max;
29use core::fmt::Write;
30use host_fdt_parser::MemoryAllocationMode;
31use host_fdt_parser::MemoryEntry;
32use host_fdt_parser::ParsedDeviceTree;
33use host_fdt_parser::VmbusInfo;
34use hvdef::HV_PAGE_SIZE;
35use igvm_defs::MemoryMapEntryType;
36use loader_defs::paravisor::CommandLinePolicy;
37use loader_defs::shim::MemoryVtlType;
38use loader_defs::shim::PersistedStateHeader;
39use memory_range::MemoryRange;
40use memory_range::subtract_ranges;
41use memory_range::walk_ranges;
42use thiserror::Error;
43use zerocopy::FromBytes;
44
45mod bump_alloc;
46mod dma_hint;
47
48#[derive(Debug, Error)]
50pub enum DtError {
51 #[error("no device tree provided by host")]
53 NoDeviceTree,
54 #[error("host provided device tree is invalid")]
56 DeviceTree(#[source] host_fdt_parser::Error<'static>),
57 #[error("commandline storage is too small to write the parsed command line")]
60 CommandLineSize,
61 #[error("device tree did not contain a vmbus node for VTL2")]
63 Vtl2Vmbus,
64 #[error("device tree did not contain a vmbus node for VTL0")]
66 Vtl0Vmbus,
67 #[error("host provided high MMIO range is insufficient to cover VTL0 and VTL2")]
69 NotEnoughVtl0Mmio,
70 #[error("host provided MMIO range is insufficient to cover VTL2")]
72 NotEnoughVtl2Mmio,
73}
74
75fn allocate_private_pool(
81 address_space: &mut AddressSpaceManager,
82 vtl2_ram: &[MemoryEntry],
83 pool_size_bytes: u64,
84 force_numa_split: bool,
85 enable_vtl2_gpa_pool: crate::cmdline::Vtl2GpaPoolConfig,
86 device_dma_page_count: Option<u64>,
87 vp_count: usize,
88 mem_size: u64,
89) {
90 if !force_numa_split {
100 if let Some(pool) = address_space.allocate(
101 Some(0),
102 pool_size_bytes,
103 AllocationType::GpaPool,
104 AllocationPolicy::HighMemory,
105 ) {
106 log::info!("allocated VTL2 pool at {:#x?}", pool.range);
107 return;
108 }
109 log::info!("node 0 cannot fit full pool, splitting across NUMA nodes");
110 } else {
111 log::info!("forcing VTL2 pool NUMA split across nodes");
112 }
113
114 let mut numa_nodes = off_stack!(ArrayVec<u32, MAX_NUMA_NODES>, ArrayVec::new_const());
118 for entry in vtl2_ram.iter() {
119 match numa_nodes.binary_search(&entry.vnode) {
120 Ok(_) => {}
121 Err(index) => {
122 numa_nodes.insert(index, entry.vnode);
123 }
124 }
125 }
126
127 let num_nodes = numa_nodes.len() as u64;
128 let per_node_size = (pool_size_bytes / num_nodes) & !(HV_PAGE_SIZE - 1);
131 let last_node_size = pool_size_bytes - per_node_size * (num_nodes - 1);
132 let mut remaining = pool_size_bytes;
133
134 if per_node_size == 0 {
139 panic!(
140 "cannot split VTL2 pool of size {pool_size_bytes:#x} bytes across \
141 {num_nodes} nodes, per node size {per_node_size:#x} bytes; \
142 enable_vtl2_gpa_pool={enable_vtl2_gpa_pool:?}, \
143 device_dma_page_count={device_dma_page_count:#x?}, \
144 vp_count={vp_count}, mem_size={mem_size:#x}"
145 );
146 }
147
148 for (i, vnode) in numa_nodes.iter().enumerate() {
149 if remaining == 0 {
150 break;
151 }
152
153 let is_last = i == numa_nodes.len() - 1;
154 let alloc_size = if is_last {
155 last_node_size
156 } else {
157 per_node_size
158 };
159
160 match address_space.allocate(
163 Some(*vnode),
164 alloc_size,
165 AllocationType::GpaPool,
166 AllocationPolicy::HighMemory,
167 ) {
168 Some(pool) => {
169 remaining -= pool.range.len();
170 log::info!(
171 "allocated VTL2 pool on node {} at {:#x?}",
172 vnode,
173 pool.range
174 );
175 }
176 None => {
177 let mut free_ranges = off_stack!(ArrayString<2048>, ArrayString::new_const());
178 for node in numa_nodes.iter() {
179 for range in address_space.free_ranges(*node) {
180 if write!(
181 free_ranges,
182 "n{}:[{:#x?}, {:#x?}) ",
183 node,
184 range.start(),
185 range.end()
186 )
187 .is_err()
188 {
189 let _ = write!(free_ranges, "...");
190 break;
191 }
192 }
193 }
194 let highest_numa_node = vtl2_ram.iter().map(|e| e.vnode).max().unwrap_or(0);
195 panic!(
196 "failed to allocate VTL2 pool on node {vnode}: \
197 need {alloc_size:#x} bytes, pool total {pool_size_bytes:#x} bytes \
198 (enable_vtl2_gpa_pool={enable_vtl2_gpa_pool:?}, \
199 device_dma_page_count={device_dma_page_count:#x?}, \
200 vp_count={vp_count}, mem_size={mem_size:#x}), \
201 highest_numa_node={highest_numa_node}, \
202 free_ranges=[ {}]",
203 free_ranges.as_str()
204 );
205 }
206 }
207 }
208
209 assert_eq!(
210 remaining, 0,
211 "pool allocation arithmetic error: {remaining:#x} bytes unallocated"
212 );
213}
214
215fn allocate_vtl2_ram(
217 params: &ShimParams,
218 partition_memory_map: &[MemoryEntry],
219 ram_size: Option<u64>,
220) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
221 let mut numa_nodes = off_stack!(ArrayVec<u32, MAX_NUMA_NODES>, ArrayVec::new_const());
224
225 for entry in partition_memory_map.iter() {
226 match numa_nodes.binary_search(&entry.vnode) {
227 Ok(_) => {}
228 Err(index) => {
229 numa_nodes.insert(index, entry.vnode);
230 }
231 }
232 }
233
234 let numa_node_count = numa_nodes.len();
235
236 let vtl2_size = if let Some(ram_size) = ram_size {
237 if ram_size < params.memory_size {
238 panic!(
239 "host provided vtl2 ram size {:x} is smaller than measured size {:x}",
240 ram_size, params.memory_size
241 );
242 }
243 max(ram_size, params.memory_size)
244 } else {
245 params.memory_size
246 };
247
248 let ram_per_node = vtl2_size / numa_node_count as u64;
251
252 let mut memory_per_node = off_stack!(ArrayVec<u64, MAX_NUMA_NODES>, ArrayVec::new_const());
254 memory_per_node.extend((0..numa_node_count).map(|_| 0));
255 for entry in partition_memory_map.iter() {
256 memory_per_node[entry.vnode as usize] = ram_per_node;
257 }
258
259 let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
264 let mut free_memory_after_vtl2 = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
265 let file_memory_range = MemoryRange::new(
266 params.memory_start_address..(params.memory_start_address + params.memory_size),
267 );
268
269 for (range, result) in walk_ranges(
270 [(file_memory_range, ())],
271 partition_memory_map.iter().map(|e| (e.range, e)),
272 ) {
273 match result {
274 memory_range::RangeWalkResult::Right(entry) => {
275 free_memory_after_vtl2.push(MemoryEntry {
277 range,
278 mem_type: entry.mem_type,
279 vnode: entry.vnode,
280 });
281 }
282 memory_range::RangeWalkResult::Both(_, entry) => {
283 vtl2_ram.push(MemoryEntry {
285 range,
286 mem_type: entry.mem_type,
287 vnode: entry.vnode,
288 });
289 }
290 memory_range::RangeWalkResult::Left(_) => {
291 panic!("used file range {range:#x?} is not reported as ram by host memmap")
292 }
293 memory_range::RangeWalkResult::Neither => {}
295 }
296 }
297
298 let mut free_memory = off_stack!(ArrayVec<MemoryEntry, 1024>, ArrayVec::new_const());
301 for (range, result) in walk_ranges(
302 params
303 .imported_regions()
304 .filter_map(|(range, _preaccepted)| {
305 if !file_memory_range.contains(&range) {
306 assert!(!file_memory_range.overlaps(&range), "imported range {range:#x?} overlaps vtl2 range and is not fully contained within vtl2 range");
310 Some((range, ()))
311 } else {
312 None
313 }
314 }),
315 free_memory_after_vtl2.iter().map(|e| (e.range, e)),
316 ) {
317 match result {
318 memory_range::RangeWalkResult::Right(entry) => {
319 free_memory.push(MemoryEntry {
320 range,
321 mem_type: entry.mem_type,
322 vnode: entry.vnode,
323 });
324 }
325 memory_range::RangeWalkResult::Left(_) => {
326 #[cfg(target_arch = "x86_64")]
329 if params.isolation_type == IsolationType::Tdx && range.start_4k_gpn() == 0xFFFFF && range.len() == 0x1000 {
330 continue;
331 }
332
333 panic!("launch context range {range:#x?} is not reported as ram by host memmap")
334 }
335 memory_range::RangeWalkResult::Both(_, _) => {
336 }
339 memory_range::RangeWalkResult::Neither => {}
341 }
342 }
343
344 for entry in vtl2_ram.iter() {
346 let mem_req = &mut memory_per_node[entry.vnode as usize];
347
348 if entry.range.len() > *mem_req {
349 log::warn!(
353 "entry {entry:?} is larger than required {mem_req} for vnode {}",
354 entry.vnode
355 );
356 *mem_req = 0;
357 } else {
358 *mem_req -= entry.range.len();
359 }
360 }
361
362 for (node, required_mem) in memory_per_node.iter().enumerate() {
364 let mut required_mem = *required_mem;
365 if required_mem == 0 {
366 continue;
367 }
368
369 for entry in free_memory.iter_mut().rev() {
372 if entry.vnode == node as u32 && !entry.range.is_empty() {
373 assert!(required_mem != 0);
374 let bytes_to_allocate = core::cmp::min(entry.range.len(), required_mem);
375
376 let offset = entry.range.len() - bytes_to_allocate;
378 let (remaining, alloc) = MemoryRange::split_at_offset(&entry.range, offset);
379
380 entry.range = remaining;
381 vtl2_ram.push(MemoryEntry {
382 range: alloc,
383 mem_type: entry.mem_type,
384 vnode: node as u32,
385 });
386
387 required_mem -= bytes_to_allocate;
388
389 if required_mem == 0 {
391 break;
392 }
393 }
394 }
395
396 if required_mem != 0 {
397 panic!(
400 "failed to allocate {required_mem:#x} for vnode {node:#x}, no memory remaining for vnode"
401 );
402 }
403 }
404
405 vtl2_ram.sort_unstable_by_key(|e| e.range.start());
407
408 vtl2_ram
409}
410
411fn parse_host_vtl2_ram(
413 params: &ShimParams,
414 memory: &[MemoryEntry],
415) -> OffStackRef<'static, impl AsRef<[MemoryEntry]> + use<>> {
416 let mut vtl2_ram = off_stack!(ArrayVec<MemoryEntry, MAX_NUMA_NODES>, ArrayVec::new_const());
419 if params.isolation_type.is_hardware_isolated() {
420 let vtl2_size = memory.iter().fold(0, |acc, entry| {
423 if entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE {
424 acc + entry.range.len()
425 } else {
426 acc
427 }
428 });
429
430 log::info!(
431 "host provided vtl2 ram size is {:x}, measured size is {:x}",
432 vtl2_size,
433 params.memory_size
434 );
435
436 let vtl2_size = max(vtl2_size, params.memory_size);
437 vtl2_ram.push(MemoryEntry {
438 range: MemoryRange::new(
439 params.memory_start_address..(params.memory_start_address + vtl2_size),
440 ),
441 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
442 vnode: 0,
443 });
444 } else {
445 for &entry in memory
446 .iter()
447 .filter(|entry| entry.mem_type == MemoryMapEntryType::VTL2_PROTECTABLE)
448 {
449 vtl2_ram.push(entry);
450 }
451 }
452
453 if vtl2_ram.is_empty() {
454 log::info!("using measured vtl2 ram");
455 vtl2_ram.push(MemoryEntry {
456 range: MemoryRange::try_new(
457 params.memory_start_address..(params.memory_start_address + params.memory_size),
458 )
459 .expect("range is valid"),
460 mem_type: MemoryMapEntryType::VTL2_PROTECTABLE,
461 vnode: 0,
462 });
463 }
464
465 vtl2_ram
466}
467
468fn init_heap(params: &ShimParams) {
469 unsafe {
476 ALLOCATOR.init(params.heap);
477 }
478}
479
480type ParsedDt =
481 ParsedDeviceTree<MAX_PARTITION_RAM_RANGES, MAX_CPU_COUNT, COMMAND_LINE_SIZE, MAX_ENTROPY_SIZE>;
482
483fn add_common_ranges<'a, I: Iterator<Item = MemoryRange>>(
486 params: &ShimParams,
487 mut builder: AddressSpaceManagerBuilder<'a, I>,
488) -> AddressSpaceManagerBuilder<'a, I> {
489 builder = builder.with_log_buffer(params.log_buffer);
491
492 if params.vtl2_reserved_region_size != 0 {
493 builder = builder.with_reserved_range(MemoryRange::new(
494 params.vtl2_reserved_region_start
495 ..(params.vtl2_reserved_region_start + params.vtl2_reserved_region_size),
496 ));
497 }
498
499 if params.sidecar_size != 0 {
500 builder = builder.with_sidecar_image(MemoryRange::new(
501 params.sidecar_base..(params.sidecar_base + params.sidecar_size),
502 ));
503 }
504
505 builder
506}
507
508#[derive(Debug, PartialEq, Eq)]
509struct PartitionTopology {
510 vtl2_ram: &'static [MemoryEntry],
511 vtl0_mmio: ArrayVec<MemoryRange, 2>,
512 vtl2_mmio: ArrayVec<MemoryRange, 2>,
513 memory_allocation_mode: MemoryAllocationMode,
514}
515
516#[derive(Debug, PartialEq, Eq)]
519struct PersistedPartitionTopology {
520 topology: PartitionTopology,
521 sidecar_excluded_cpus: &'static [u32],
522}
523
524fn calculate_default_mmio_size(parsed: &ParsedDt) -> Result<u64, DtError> {
529 const MINIMUM_MMIO_SIZE: u64 = 128 * (1 << 20);
530 const MAXIMUM_MMIO_SIZE: u64 = 1 << 30;
531 let half_high_gap = parsed.vmbus_vtl0.as_ref().ok_or(DtError::Vtl0Vmbus)?.mmio[1].len() / 2;
532 Ok(half_high_gap.clamp(MINIMUM_MMIO_SIZE, MAXIMUM_MMIO_SIZE))
533}
534
535fn topology_from_host_dt(
537 params: &ShimParams,
538 parsed: &ParsedDt,
539 options: &BootCommandLineOptions,
540 address_space: &mut AddressSpaceManager,
541) -> Result<PartitionTopology, DtError> {
542 log::info!("reading topology from host device tree");
543
544 let mut vtl2_ram =
545 off_stack!(ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>, ArrayVec::new_const());
546
547 let memory_allocation_mode = parsed.memory_allocation_mode;
550 match memory_allocation_mode {
551 MemoryAllocationMode::Host => {
552 vtl2_ram
553 .try_extend_from_slice(parse_host_vtl2_ram(params, &parsed.memory).as_ref())
554 .expect("vtl2 ram should only be 64 big");
555 }
556 MemoryAllocationMode::Vtl2 {
557 memory_size,
558 mmio_size: _,
559 } => {
560 vtl2_ram
561 .try_extend_from_slice(
562 allocate_vtl2_ram(params, &parsed.memory, memory_size).as_ref(),
563 )
564 .expect("vtl2 ram should only be 64 big");
565 }
566 }
567
568 let (vtl0_mmio, vtl2_mmio) = if params.isolation_type != IsolationType::None
576 || matches!(
577 parsed.memory_allocation_mode,
578 MemoryAllocationMode::Vtl2 { .. }
579 ) {
580 let host_provided_size = match parsed.memory_allocation_mode {
591 MemoryAllocationMode::Vtl2 { mmio_size, .. } => mmio_size.unwrap_or(0),
592 _ => 0,
593 };
594 let vmbus_vtl2 = parsed.vmbus_vtl2.as_ref().ok_or(DtError::Vtl2Vmbus)?;
595 let vmbus_vtl2_mmio_size = vmbus_vtl2.mmio.iter().map(|r| r.len()).sum::<u64>();
596 let mmio_size = if vmbus_vtl2_mmio_size != 0 {
597 host_provided_size
598 } else {
599 max(host_provided_size, calculate_default_mmio_size(parsed)?)
600 };
601
602 log::info!("allocating vtl2 mmio size {mmio_size:#x} bytes");
603 log::info!("host provided vtl2 mmio ranges are {vmbus_vtl2_mmio_size:#x} bytes");
604
605 let vmbus_vtl0 = parsed.vmbus_vtl0.as_ref().ok_or(DtError::Vtl0Vmbus)?;
606 if vmbus_vtl2_mmio_size != 0 {
607 if vmbus_vtl2_mmio_size < mmio_size {
609 return Err(DtError::NotEnoughVtl2Mmio);
610 }
611
612 log::info!("using host provided vtl2 mmio: {:x?}", vmbus_vtl2.mmio);
613 (vmbus_vtl0.mmio.clone(), vmbus_vtl2.mmio.clone())
614 } else {
615 log::info!("no vtl2 mmio provided by host, allocating from vtl0 mmio");
617 let selected_vtl2_mmio = select_vtl2_mmio_range(&vmbus_vtl0.mmio, mmio_size)?;
618
619 let vtl0_mmio = subtract_ranges(vmbus_vtl0.mmio.iter().cloned(), [selected_vtl2_mmio])
621 .collect::<ArrayVec<MemoryRange, 2>>();
622 let vtl2_mmio = [selected_vtl2_mmio]
623 .into_iter()
624 .collect::<ArrayVec<MemoryRange, 2>>();
625
626 assert_eq!(
631 vtl0_mmio.len(),
632 2,
633 "vtl0 mmio ranges are not 2 {:#x?}",
634 vtl0_mmio
635 );
636
637 log::info!("vtl0 mmio: {vtl0_mmio:x?}, vtl2 mmio: {vtl2_mmio:x?}");
638
639 (vtl0_mmio, vtl2_mmio)
640 }
641 } else {
642 (
643 parsed
644 .vmbus_vtl0
645 .as_ref()
646 .ok_or(DtError::Vtl0Vmbus)?
647 .mmio
648 .clone(),
649 parsed
650 .vmbus_vtl2
651 .as_ref()
652 .ok_or(DtError::Vtl2Vmbus)?
653 .mmio
654 .clone(),
655 )
656 };
657
658 let reclaim_base = params.dt_start();
662 let reclaim_end = params.dt_start() + params.dt_size();
663 let vtl2_config_region_reclaim =
664 MemoryRange::try_new(reclaim_base..reclaim_end).expect("range is valid");
665
666 log::info!("reclaim device tree memory {reclaim_base:x}-{reclaim_end:x}");
667
668 let vtl2_config_region = MemoryRange::new(
670 params.parameter_region_start
671 ..(params.parameter_region_start + params.parameter_region_size),
672 );
673
674 const PERSISTED_REGION_SIZE: u64 = 20 * 4096;
680 let (persisted_state_region, remainder) = params
681 .persisted_state
682 .split_at_offset(PERSISTED_REGION_SIZE);
683 log::info!(
684 "persisted state region sized to {persisted_state_region:#x?}, remainder {remainder:#x?}"
685 );
686
687 let mut address_space_builder = AddressSpaceManagerBuilder::new(
688 address_space,
689 &vtl2_ram,
690 params.used,
691 persisted_state_region,
692 subtract_ranges([vtl2_config_region], [vtl2_config_region_reclaim]),
693 );
694
695 address_space_builder = add_common_ranges(params, address_space_builder);
696
697 address_space_builder
698 .init()
699 .expect("failed to initialize address space manager");
700
701 if params.isolation_type == IsolationType::None {
702 let enable_vtl2_gpa_pool = options.enable_vtl2_gpa_pool;
703 let device_dma_page_count = parsed.device_dma_page_count;
704 let vp_count = parsed.cpu_count();
705 let mem_size = vtl2_ram.iter().map(|e| e.range.len()).sum();
706 if let Some(vtl2_gpa_pool_size) = pick_private_pool_size(
707 enable_vtl2_gpa_pool,
708 device_dma_page_count,
709 vp_count,
710 mem_size,
711 ) {
712 let pool_size_bytes = vtl2_gpa_pool_size * HV_PAGE_SIZE;
715
716 allocate_private_pool(
717 address_space,
718 &vtl2_ram,
719 pool_size_bytes,
720 options.vtl2_gpa_pool_numa_split,
721 enable_vtl2_gpa_pool,
722 device_dma_page_count,
723 vp_count,
724 mem_size,
725 );
726 }
727 }
728
729 Ok(PartitionTopology {
730 vtl2_ram: OffStackRef::<'_, ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>>::leak(vtl2_ram),
731 vtl0_mmio,
732 vtl2_mmio,
733 memory_allocation_mode,
734 })
735}
736
737fn topology_from_persisted_state(
739 header: PersistedStateHeader,
740 params: &ShimParams,
741 parsed: &ParsedDt,
742 address_space: &mut AddressSpaceManager,
743) -> Result<PersistedPartitionTopology, DtError> {
744 log::info!("reading topology from persisted state");
745
746 let protobuf_region =
750 MemoryRange::new(header.protobuf_base..(header.protobuf_base + header.protobuf_region_len));
751 assert!(
752 params.persisted_state.contains(&protobuf_region),
753 "protobuf region {protobuf_region:#x?} is not contained within the persisted state region {:#x?}",
754 params.persisted_state
755 );
756
757 assert!(
759 header.protobuf_payload_len <= header.protobuf_region_len,
760 "protobuf payload len {} is larger than region len {}",
761 header.protobuf_payload_len,
762 header.protobuf_region_len
763 );
764
765 let protobuf_raw = unsafe {
768 core::slice::from_raw_parts(
769 header.protobuf_base as *const u8,
770 header.protobuf_payload_len as usize,
771 )
772 };
773
774 let parsed_protobuf: loader_defs::shim::save_restore::SavedState =
775 bump_alloc::with_global_alloc(|| {
776 log::info!("decoding protobuf of size {}", protobuf_raw.len());
777 mesh_protobuf::decode(protobuf_raw).expect("failed to decode protobuf")
778 });
779
780 let loader_defs::shim::save_restore::SavedState {
781 partition_memory,
782 partition_mmio,
783 cpus_with_mapped_interrupts_no_io,
784 cpus_with_outstanding_io,
785 } = parsed_protobuf;
786
787 log::info!(
788 "persisted state: cpus_with_mapped_interrupts_no_io={:?}, cpus_with_outstanding_io={:?}",
789 cpus_with_mapped_interrupts_no_io,
790 cpus_with_outstanding_io,
791 );
792
793 let mut sidecar_excluded_cpus = off_stack!(ArrayVec<u32, MAX_CPU_COUNT>, ArrayVec::new_const());
794 sidecar_excluded_cpus.clear();
795 for c in cpus_with_outstanding_io
798 .iter()
799 .chain(cpus_with_mapped_interrupts_no_io.iter())
800 .copied()
801 {
802 if let Err(i) = sidecar_excluded_cpus.binary_search(&c) {
803 sidecar_excluded_cpus.insert(i, c);
804 }
805 }
806
807 let memory_allocation_mode = parsed.memory_allocation_mode;
810
811 let mut vtl2_ram =
812 off_stack!(ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>, ArrayVec::new_const());
813
814 let previous_vtl2_ram = partition_memory.iter().filter_map(|entry| {
816 if entry.vtl_type.ram() && entry.vtl_type.vtl2() {
817 Some(MemoryEntry {
818 range: entry.range,
819 mem_type: entry.igvm_type.clone().into(),
820 vnode: entry.vnode,
821 })
822 } else {
823 None
824 }
825 });
826
827 let previous_vtl2_ram = memory_range::merge_adjacent_ranges(
831 previous_vtl2_ram.map(|entry| (entry.range, (entry.mem_type, entry.vnode))),
832 );
833
834 vtl2_ram.extend(
835 previous_vtl2_ram.map(|(range, (mem_type, vnode))| MemoryEntry {
836 range,
837 mem_type,
838 vnode,
839 }),
840 );
841
842 if matches!(memory_allocation_mode, MemoryAllocationMode::Host) {
848 let host_vtl2_ram = parse_host_vtl2_ram(params, &parsed.memory);
849 assert_eq!(
850 vtl2_ram.as_slice(),
851 host_vtl2_ram.as_ref(),
852 "vtl2 ram from persisted state does not match host provided ram"
853 );
854 }
855
856 let persisted_header = partition_memory
862 .iter()
863 .find(|entry| entry.vtl_type == MemoryVtlType::VTL2_PERSISTED_STATE_HEADER)
864 .expect("persisted state header missing");
865 let persisted_protobuf = partition_memory
866 .iter()
867 .find(|entry| entry.vtl_type == MemoryVtlType::VTL2_PERSISTED_STATE_PROTOBUF)
868 .expect("persisted state protobuf region missing");
869 assert_eq!(persisted_header.range.end(), protobuf_region.start());
870 let persisted_state_region =
871 MemoryRange::new(persisted_header.range.start()..persisted_protobuf.range.end());
872
873 let reclaim_base = params.dt_start();
877 let reclaim_end = params.dt_start() + params.dt_size();
878 let vtl2_config_region_reclaim =
879 MemoryRange::try_new(reclaim_base..reclaim_end).expect("range is valid");
880
881 log::info!("reclaim device tree memory {reclaim_base:x}-{reclaim_end:x}");
882
883 let vtl2_config_region = MemoryRange::new(
884 params.parameter_region_start
885 ..(params.parameter_region_start + params.parameter_region_size),
886 );
887
888 let mut address_space_builder = AddressSpaceManagerBuilder::new(
889 address_space,
890 &vtl2_ram,
891 params.used,
892 persisted_state_region,
893 subtract_ranges([vtl2_config_region], [vtl2_config_region_reclaim]),
894 );
895
896 let pool_ranges = partition_memory.iter().filter_map(|entry| {
901 if entry.vtl_type == MemoryVtlType::VTL2_GPA_POOL {
902 Some(entry.range)
903 } else {
904 None
905 }
906 });
907
908 address_space_builder = address_space_builder.with_pool_ranges(pool_ranges);
909
910 address_space_builder = add_common_ranges(params, address_space_builder);
912
913 address_space_builder
914 .init()
915 .expect("failed to initialize address space manager");
916
917 let vtl0_mmio = partition_mmio
919 .iter()
920 .filter_map(|entry| {
921 if entry.vtl_type == MemoryVtlType::VTL0_MMIO {
922 Some(entry.range)
923 } else {
924 None
925 }
926 })
927 .collect::<ArrayVec<MemoryRange, 2>>();
928 let vtl2_mmio = partition_mmio
929 .iter()
930 .filter_map(|entry| {
931 if entry.vtl_type == MemoryVtlType::VTL2_MMIO {
932 Some(entry.range)
933 } else {
934 None
935 }
936 })
937 .collect::<ArrayVec<MemoryRange, 2>>();
938
939 Ok(PersistedPartitionTopology {
940 topology: PartitionTopology {
941 vtl2_ram: OffStackRef::<'_, ArrayVec<MemoryEntry, MAX_VTL2_RAM_RANGES>>::leak(vtl2_ram),
942 vtl0_mmio,
943 vtl2_mmio,
944 memory_allocation_mode,
945 },
946 sidecar_excluded_cpus: OffStackRef::leak(sidecar_excluded_cpus),
947 })
948}
949
950fn read_persisted_region_header(params: &ShimParams) -> Option<PersistedStateHeader> {
954 if params.isolation_type != IsolationType::None {
963 return None;
964 }
965
966 let buf = unsafe {
970 core::slice::from_raw_parts(
971 params.persisted_state.start() as *const u8,
972 size_of::<PersistedStateHeader>(),
973 )
974 };
975
976 let header = PersistedStateHeader::read_from_bytes(buf)
977 .expect("region is page aligned and the correct size");
978
979 if header.magic == PersistedStateHeader::MAGIC {
980 Some(header)
981 } else {
982 None
983 }
984}
985
986impl PartitionInfo {
987 pub fn read_from_dt<'a>(
989 params: &'a ShimParams,
990 storage: &'a mut Self,
991 address_space: &'_ mut AddressSpaceManager,
992 mut options: BootCommandLineOptions,
993 can_trust_host: bool,
994 ) -> Result<&'a mut Self, DtError> {
995 let dt = params.device_tree();
996
997 if dt[0] == 0 {
998 log::error!("host did not provide a device tree");
999 return Err(DtError::NoDeviceTree);
1000 }
1001
1002 let mut dt_storage = off_stack!(ParsedDt, ParsedDeviceTree::new());
1003
1004 let parsed = ParsedDeviceTree::parse(dt, &mut *dt_storage).map_err(DtError::DeviceTree)?;
1005
1006 let command_line = params.command_line();
1007
1008 write!(
1010 storage.cmdline,
1011 "{}",
1012 command_line
1013 .command_line()
1014 .expect("measured command line should be valid")
1015 )
1016 .map_err(|_| DtError::CommandLineSize)?;
1017
1018 match command_line.policy {
1019 CommandLinePolicy::STATIC => {
1020 }
1022 CommandLinePolicy::APPEND_CHOSEN if can_trust_host => {
1023 options.parse(&parsed.command_line);
1026 write!(storage.cmdline, " {}", &parsed.command_line)
1027 .map_err(|_| DtError::CommandLineSize)?;
1028 }
1029 CommandLinePolicy::APPEND_CHOSEN if !can_trust_host => {
1030 }
1032 _ => unreachable!(),
1033 }
1034
1035 init_heap(params);
1036
1037 let persisted_state_header = read_persisted_region_header(params);
1038 log::info!(
1039 "read_from_dt: persisted_state_header present={}, sidecar={:?}",
1040 persisted_state_header.is_some(),
1041 options.sidecar,
1042 );
1043 let (topology, sidecar_excluded_cpus) = if let Some(header) = persisted_state_header {
1044 log::info!("found persisted state header");
1045 let persisted_topology =
1046 topology_from_persisted_state(header, params, parsed, address_space)?;
1047 (
1048 persisted_topology.topology,
1049 persisted_topology.sidecar_excluded_cpus,
1050 )
1051 } else {
1052 (
1053 topology_from_host_dt(params, parsed, &options, address_space)?,
1054 &[][..],
1055 )
1056 };
1057
1058 let Self {
1059 vtl2_ram,
1060 partition_ram,
1061 isolation,
1062 bsp_reg,
1063 cpus,
1064 sidecar_cpu_overrides,
1065 vmbus_vtl0,
1066 vmbus_vtl2,
1067 cmdline: _,
1068 com3_serial,
1069 gic,
1070 pmu_gsiv,
1071 memory_allocation_mode,
1072 entropy,
1073 vtl0_alias_map,
1074 nvme_keepalive,
1075 boot_options,
1076 } = storage;
1077
1078 if let (SidecarOptions::Enabled { .. }, true) =
1098 (&boot_options.sidecar, !sidecar_excluded_cpus.is_empty())
1099 {
1100 let max_cpu_id = *sidecar_excluded_cpus.iter().max().unwrap() as usize;
1101 if parsed.cpu_count() <= sidecar_cpu_overrides.sidecar_starts_cpu.len()
1102 && max_cpu_id < sidecar_cpu_overrides.sidecar_starts_cpu.len()
1103 {
1104 sidecar_cpu_overrides.per_cpu_state_specified = true;
1106 for &cpu_id in sidecar_excluded_cpus {
1107 sidecar_cpu_overrides.sidecar_starts_cpu[cpu_id as usize] = false;
1108 }
1109 log::info!(
1110 "sidecar: excluding CPUs {:?} due to restored NVMe device state",
1111 sidecar_excluded_cpus,
1112 );
1113 } else {
1114 log::info!(
1116 "sidecar: disabling, too many CPUs for per-CPU state (max id {max_cpu_id})"
1117 );
1118 boot_options.sidecar = SidecarOptions::DisabledServicing;
1119 options.sidecar = SidecarOptions::DisabledServicing;
1120 }
1121 }
1122
1123 vtl2_ram.clear();
1125 vtl2_ram.extend(topology.vtl2_ram.iter().copied());
1126 partition_ram.clear();
1127 partition_ram.extend(parsed.memory.iter().copied());
1128 *memory_allocation_mode = topology.memory_allocation_mode;
1129
1130 *vmbus_vtl0 = VmbusInfo {
1133 connection_id: parsed
1134 .vmbus_vtl0
1135 .as_ref()
1136 .ok_or(DtError::Vtl0Vmbus)?
1137 .connection_id,
1138 mmio: topology.vtl0_mmio,
1139 };
1140 *vmbus_vtl2 = VmbusInfo {
1141 connection_id: parsed
1142 .vmbus_vtl2
1143 .as_ref()
1144 .ok_or(DtError::Vtl2Vmbus)?
1145 .connection_id,
1146 mmio: topology.vtl2_mmio,
1147 };
1148
1149 if can_trust_host {
1151 *vtl0_alias_map = parsed.vtl0_alias_map;
1152 }
1153
1154 *isolation = params.isolation_type;
1155
1156 *bsp_reg = parsed.boot_cpuid_phys;
1157 cpus.extend(parsed.cpus.iter().copied());
1158 *com3_serial = parsed.com3_serial.clone();
1159 *gic = parsed.gic.clone();
1160 *pmu_gsiv = parsed.pmu_gsiv;
1161 *entropy = parsed.entropy.clone();
1162 *nvme_keepalive = parsed.nvme_keepalive;
1163 *boot_options = options;
1164
1165 Ok(storage)
1166 }
1167}