openhcl_boot/arch/x86_64/
memory.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Routines to prepare VTL2 memory for launching the kernel.
5
6use super::address_space::LocalMap;
7use super::address_space::init_local_map;
8use crate::AddressSpaceManager;
9use crate::ShimParams;
10use crate::arch::TdxHypercallPage;
11use crate::arch::x86_64::address_space::tdx_share_large_page;
12use crate::host_params::PartitionInfo;
13use crate::host_params::shim_params::IsolationType;
14use crate::hypercall::hvcall;
15use crate::memory::AllocationPolicy;
16use crate::memory::AllocationType;
17use crate::off_stack;
18use arrayvec::ArrayVec;
19use loader_defs::shim::MemoryVtlType;
20use memory_range::MemoryRange;
21use page_table::x64::MappedRange;
22use page_table::x64::PAGE_TABLE_MAX_BYTES;
23use page_table::x64::PAGE_TABLE_MAX_COUNT;
24use page_table::x64::PageTable;
25use page_table::x64::PageTableBuilder;
26use sha2::Digest;
27use sha2::Sha384;
28use static_assertions::const_assert;
29use x86defs::X64_LARGE_PAGE_SIZE;
30use x86defs::tdx::TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT;
31use zerocopy::FromZeros;
32
33/// On isolated systems, transitions all VTL2 RAM to be private and accepted, with the appropriate
34/// VTL permissions applied.
35pub fn setup_vtl2_memory(
36    shim_params: &ShimParams,
37    partition_info: &PartitionInfo,
38    address_space: &mut AddressSpaceManager,
39) {
40    // Only if the partition is VBS-isolated, accept memory and apply vtl 2 protections here.
41    // Non-isolated partitions can undergo servicing, and additional information
42    // would be needed to determine whether vtl 2 protections should be applied
43    // or skipped, since the operation is expensive.
44    // TODO: if applying vtl 2 protections for non-isolated VMs moves to the
45    // boot shim, apply them here.
46    if let IsolationType::None = shim_params.isolation_type {
47        return;
48    }
49
50    if let IsolationType::Vbs = shim_params.isolation_type {
51        // Enable VTL protection so that vtl 2 protections can be applied. All other config
52        // should be set by the user mode
53        let vsm_config = hvdef::HvRegisterVsmPartitionConfig::new()
54            .with_default_vtl_protection_mask(0xF)
55            .with_enable_vtl_protection(true);
56
57        hvcall()
58            .set_register(
59                hvdef::HvX64RegisterName::VsmPartitionConfig.into(),
60                hvdef::HvRegisterValue::from(u64::from(vsm_config)),
61            )
62            .expect("setting vsm config shouldn't fail");
63
64        // VBS isolated VMs need to apply VTL2 protections to pages that were already accepted to
65        // prevent VTL0 access. Only those pages that belong to the VTL2 RAM region should have
66        // these protections applied - certain pages belonging to VTL0 are also among the accepted
67        // regions and should not be processed here.
68        let accepted_ranges =
69            shim_params
70                .imported_regions()
71                .filter_map(|(imported_range, already_accepted)| {
72                    already_accepted.then_some(imported_range)
73                });
74        for range in memory_range::overlapping_ranges(
75            partition_info.vtl2_ram.iter().map(|entry| entry.range),
76            accepted_ranges,
77        ) {
78            hvcall()
79                .apply_vtl2_protections(range)
80                .expect("applying vtl 2 protections cannot fail");
81        }
82    }
83
84    // Initialize the local_map
85    // TODO: Consider moving this to ShimParams to pass around.
86    let mut local_map = match shim_params.isolation_type {
87        IsolationType::Snp | IsolationType::Tdx => Some(init_local_map(
88            loader_defs::paravisor::PARAVISOR_LOCAL_MAP_VA,
89        )),
90        IsolationType::None | IsolationType::Vbs => None,
91    };
92
93    // Make sure imported regions are in increasing order.
94    let mut last_range_end = None;
95    for (imported_range, _) in shim_params.imported_regions() {
96        assert!(last_range_end.is_none() || imported_range.start() > last_range_end.unwrap());
97        last_range_end = Some(imported_range.end() - hvdef::HV_PAGE_SIZE);
98    }
99
100    // Iterate over all VTL2 RAM that is not part of an imported region and
101    // accept it with appropriate VTL protections.
102    for range in memory_range::subtract_ranges(
103        partition_info.vtl2_ram.iter().map(|e| e.range),
104        shim_params.imported_regions().map(|(r, _)| r),
105    ) {
106        accept_vtl2_memory(shim_params, &mut local_map, range);
107    }
108
109    let ram_buffer = if let Some(bounce_buffer) = shim_params.bounce_buffer {
110        assert!(bounce_buffer.start() % X64_LARGE_PAGE_SIZE == 0);
111        assert!(bounce_buffer.len() >= X64_LARGE_PAGE_SIZE);
112
113        for range in memory_range::subtract_ranges(
114            core::iter::once(bounce_buffer),
115            partition_info.vtl2_ram.iter().map(|e| e.range),
116        ) {
117            accept_vtl2_memory(shim_params, &mut local_map, range);
118        }
119
120        // SAFETY: The bounce buffer is trusted as it is obtained from measured
121        // shim parameters. The bootloader is identity mapped, and the PA is
122        // guaranteed to be mapped as the pagetable is prebuilt and measured.
123        unsafe {
124            core::slice::from_raw_parts_mut(
125                bounce_buffer.start() as *mut u8,
126                bounce_buffer.len() as usize,
127            )
128        }
129    } else {
130        &mut []
131    };
132
133    // Iterate over all imported regions that are not already accepted. They must be accepted here.
134    // TODO: No VTL0 memory is currently marked as pending.
135    for (imported_range, already_accepted) in shim_params.imported_regions() {
136        if !already_accepted {
137            accept_pending_vtl2_memory(shim_params, &mut local_map, ram_buffer, imported_range);
138        }
139    }
140
141    // TDX has specific memory initialization logic. Create a set of page tables for the APs
142    // to use during the mailbox spinloop, and carve out memory for TDCALL based hypercalls
143    if shim_params.isolation_type == IsolationType::Tdx {
144        // Allocate a range of memory for AP page tables
145        let page_table_region = address_space
146            .allocate_aligned(
147                None,
148                PAGE_TABLE_MAX_BYTES as u64,
149                AllocationType::TdxPageTables,
150                AllocationPolicy::LowMemory,
151                X64_LARGE_PAGE_SIZE,
152            )
153            .expect("allocation of space for TDX page tables must succeed");
154
155        // The local map will map a single 2MB PTE per allocation
156        const_assert!((PAGE_TABLE_MAX_BYTES as u64) < X64_LARGE_PAGE_SIZE);
157        assert_eq!(page_table_region.range.start() % X64_LARGE_PAGE_SIZE, 0);
158
159        let mut local_map = local_map.expect("must be present on TDX");
160        let page_table_region_mapping = local_map.map_pages(page_table_region.range, false);
161        page_table_region_mapping.data.fill(0);
162
163        const MAX_RANGE_COUNT: usize = 64;
164        let mut ranges = off_stack!(
165            ArrayVec::<MappedRange, MAX_RANGE_COUNT>,
166            ArrayVec::new_const()
167        );
168
169        // All VTL2_RAM ranges should be present as R+X in the AP page table mappings, the mailbox
170        // wakeup vector will be somewhere in this range, below the 4GB boundary
171        const AP_MEMORY_BOUNDARY: u64 = 4 * 1024 * 1024 * 1024;
172        let vtl2_ram = address_space
173            .vtl2_ranges()
174            .filter_map(|(range, typ)| match typ {
175                MemoryVtlType::VTL2_RAM => {
176                    if range.start() < AP_MEMORY_BOUNDARY {
177                        let end = if range.end() < AP_MEMORY_BOUNDARY {
178                            range.end()
179                        } else {
180                            AP_MEMORY_BOUNDARY
181                        };
182                        Some(MappedRange::new(range.start(), end).read_only())
183                    } else {
184                        None
185                    }
186                }
187                _ => None,
188            });
189
190        ranges.extend(vtl2_ram);
191
192        // Map the reset vector as executable and writable, as the mailbox protocol uses offsets
193        // in the reset vector to communicate with the kernel
194        const PAGE_SIZE: u64 = 0x1000;
195        ranges.push(MappedRange::new(
196            x86defs::tdx::RESET_VECTOR_PAGE,
197            x86defs::tdx::RESET_VECTOR_PAGE + PAGE_SIZE,
198        ));
199
200        ranges.sort_by_key(|r| r.start());
201
202        let mut page_table_work_buffer =
203            off_stack!(ArrayVec<PageTable, PAGE_TABLE_MAX_COUNT>, ArrayVec::new_const());
204        for _ in 0..PAGE_TABLE_MAX_COUNT {
205            page_table_work_buffer.push(PageTable::new_zeroed());
206        }
207
208        PageTableBuilder::new(
209            page_table_region.range.start(),
210            page_table_work_buffer.as_mut_slice(),
211            page_table_region_mapping.data,
212            ranges.as_slice(),
213        )
214        .expect("page table builder must return no error")
215        .build()
216        .expect("page table construction must succeed");
217
218        crate::arch::tdx::tdx_prepare_ap_trampoline(page_table_region.range.start());
219
220        // For TDVMCALL based hypercalls, take the first 2 MB region from ram_buffer for
221        // hypercall IO pages. ram_buffer must not be used again beyond this point
222        // TODO: find an approach that does not require re-using the ram_buffer
223        let free_buffer = ram_buffer.as_mut_ptr() as u64;
224        assert!(free_buffer.is_multiple_of(X64_LARGE_PAGE_SIZE));
225        // SAFETY: The bottom 2MB region of the ram_buffer is unused by the shim
226        // The region is aligned to 2MB, and mapped as a large page
227        let tdx_io_page = unsafe {
228            tdx_share_large_page(free_buffer);
229            TdxHypercallPage::new(free_buffer)
230        };
231        hvcall().initialize_tdx(tdx_io_page);
232    }
233}
234
235/// Accepts VTL2 memory in the specified gpa range.
236fn accept_vtl2_memory(
237    shim_params: &ShimParams,
238    local_map: &mut Option<LocalMap<'_>>,
239    range: MemoryRange,
240) {
241    match shim_params.isolation_type {
242        IsolationType::Vbs => {
243            hvcall()
244                .accept_vtl2_pages(range, hvdef::hypercall::AcceptMemoryType::RAM)
245                .expect("accepting vtl 2 memory must not fail");
246        }
247        IsolationType::Snp => {
248            super::snp::set_page_acceptance(local_map.as_mut().unwrap(), range, true)
249                .expect("accepting vtl 2 memory must not fail");
250        }
251        IsolationType::Tdx => {
252            super::tdx::accept_pages(range).expect("accepting vtl2 memory must not fail")
253        }
254        _ => unreachable!(),
255    }
256}
257
258/// Accepts VTL2 memory in the specified range that is currently marked as pending, i.e. not
259/// yet assigned as exclusive and private.
260fn accept_pending_vtl2_memory(
261    shim_params: &ShimParams,
262    local_map: &mut Option<LocalMap<'_>>,
263    ram_buffer: &mut [u8],
264    range: MemoryRange,
265) {
266    let isolation_type = shim_params.isolation_type;
267
268    match isolation_type {
269        IsolationType::Vbs => {
270            hvcall()
271                .accept_vtl2_pages(range, hvdef::hypercall::AcceptMemoryType::RAM)
272                .expect("accepting vtl 2 memory must not fail");
273        }
274        IsolationType::Snp | IsolationType::Tdx => {
275            let local_map = local_map.as_mut().unwrap();
276            // Accepting pending memory for SNP is somewhat more complicated. The pending regions
277            // are unencrypted pages. Accepting them would result in their contents being scrambled.
278            // Instead their contents must be copied out to a private region, then copied back once
279            // the pages have been accepted. Additionally, the access to the unencrypted pages must
280            // happen with the C-bit cleared.
281            let mut remaining = range;
282            while !remaining.is_empty() {
283                // Copy up to the next 2MB boundary.
284                let range = MemoryRange::new(
285                    remaining.start()
286                        ..remaining.end().min(
287                            (remaining.start() + X64_LARGE_PAGE_SIZE) & !(X64_LARGE_PAGE_SIZE - 1),
288                        ),
289                );
290                remaining = MemoryRange::new(range.end()..remaining.end());
291
292                let ram_buffer = &mut ram_buffer[..range.len() as usize];
293
294                // Map the pages as shared and copy the necessary number to the buffer.
295                {
296                    let map_range = if isolation_type == IsolationType::Tdx {
297                        // set vtom on the page number
298                        MemoryRange::new(
299                            range.start() | TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT
300                                ..range.end() | TDX_SHARED_GPA_BOUNDARY_ADDRESS_BIT,
301                        )
302                    } else {
303                        range
304                    };
305
306                    let mapping = local_map.map_pages(map_range, false);
307                    ram_buffer.copy_from_slice(mapping.data);
308                }
309
310                // Change visibility on the pages for this iteration.
311                match isolation_type {
312                    IsolationType::Snp => {
313                        super::snp::Ghcb::change_page_visibility(range, false);
314                    }
315                    IsolationType::Tdx => {
316                        super::tdx::change_page_visibility(range, false);
317                    }
318                    _ => unreachable!(),
319                }
320
321                // accept the pages.
322                match isolation_type {
323                    IsolationType::Snp => {
324                        super::snp::set_page_acceptance(local_map, range, true)
325                            .expect("accepting vtl 2 memory must not fail");
326                    }
327                    IsolationType::Tdx => {
328                        super::tdx::accept_pages(range)
329                            .expect("accepting vtl 2 memory must not fail");
330                    }
331                    _ => unreachable!(),
332                }
333
334                // Copy the buffer back. Use the identity map now that the memory has been accepted.
335                {
336                    // SAFETY: Known memory region that was just accepted.
337                    let mapping = unsafe {
338                        core::slice::from_raw_parts_mut(
339                            range.start() as *mut u8,
340                            range.len() as usize,
341                        )
342                    };
343
344                    mapping.copy_from_slice(ram_buffer);
345                }
346            }
347        }
348        _ => unreachable!(),
349    }
350}
351
352// Verify the SHA384 hash of pages that were imported as unaccepted/shared. Compare against the
353// desired hash that is passed in as a measured parameter. Failures result in a panic.
354pub fn verify_imported_regions_hash(shim_params: &ShimParams) {
355    // Non isolated VMs can undergo servicing, and thus the hash might no longer be valid,
356    // as the memory regions can change during runtime.
357    if let IsolationType::None = shim_params.isolation_type {
358        return;
359    }
360
361    // If all imported pages are already accepted, there is no need to verify the hash.
362    if shim_params
363        .imported_regions()
364        .all(|(_, already_accepted)| already_accepted)
365    {
366        return;
367    }
368
369    let mut hasher = Sha384::new();
370    shim_params
371        .imported_regions()
372        .filter(|(_, already_accepted)| !already_accepted)
373        .for_each(|(range, _)| {
374            // SAFETY: The location and identity of the range is trusted as it is obtained from
375            // measured shim parameters.
376            let mapping = unsafe {
377                core::slice::from_raw_parts(range.start() as *const u8, range.len() as usize)
378            };
379            hasher.update(mapping);
380        });
381
382    if hasher.finalize().as_slice() != shim_params.imported_regions_hash() {
383        panic!("Imported regions hash mismatch");
384    }
385}