underhill_mem/mapping.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// UNSAFETY: Implementing GuestMemoryAccess.
#![expect(unsafe_code)]
use crate::MshvVtlWithPolicy;
use crate::RegistrationError;
use crate::registrar::MemoryRegistrar;
use guestmem::GuestMemoryAccess;
use guestmem::GuestMemoryBackingError;
use guestmem::PAGE_SIZE;
use hcl::ioctl::Mshv;
use hcl::ioctl::MshvVtlLow;
use inspect::Inspect;
use memory_range::MemoryRange;
use parking_lot::Mutex;
use sparse_mmap::SparseMapping;
use std::ptr::NonNull;
use thiserror::Error;
use vm_topology::memory::MemoryLayout;
/// An implementation of a [`GuestMemoryAccess`] trait for Underhill VMs.
#[derive(Debug, Inspect)]
pub struct GuestMemoryMapping {
#[inspect(skip)]
mapping: SparseMapping,
iova_offset: Option<u64>,
#[inspect(with = "Option::is_some")]
bitmap: Option<SparseMapping>,
#[inspect(skip)]
bitmap_lock: Mutex<()>,
registrar: Option<MemoryRegistrar<MshvVtlWithPolicy>>,
}
/// Error constructing a [`GuestMemoryMapping`].
#[derive(Debug, Error)]
pub enum MappingError {
#[error("failed to allocate VA space for guest memory")]
Reserve(#[source] std::io::Error),
#[error("failed to map guest memory pages")]
Map(#[source] std::io::Error),
#[error("failed to allocate VA space for bitmap")]
BitmapReserve(#[source] std::io::Error),
#[error("failed to map zero pages for bitmap")]
BitmapMap(#[source] std::io::Error),
#[error("failed to allocate pages for bitmap")]
BitmapAlloc(#[source] std::io::Error),
#[error("memory map entry {0} has insufficient alignment to support a bitmap")]
BadAlignment(MemoryRange),
#[error("failed to open device")]
OpenDevice(#[source] hcl::ioctl::Error),
}
/// A builder for [`GuestMemoryMapping`].
pub struct GuestMemoryMappingBuilder {
physical_address_base: u64,
bitmap_state: Option<bool>,
shared: bool,
for_kernel_access: bool,
dma_base_address: Option<u64>,
ignore_registration_failure: bool,
}
impl GuestMemoryMappingBuilder {
/// Set whether to allocate a tracking for memory access, and specify the
/// initial state of the bitmap.
///
/// This is used to support tracking the shared/encrypted state of each
/// page.
///
/// FUTURE: use bitmaps to track VTL permissions as well, to support guest
/// VSM for hardware-isolated VMs.
pub fn use_bitmap(&mut self, initial_state: Option<bool>) -> &mut Self {
self.bitmap_state = initial_state;
self
}
/// Set whether this is a mapping to access shared memory.
pub fn shared(&mut self, is_shared: bool) -> &mut Self {
self.shared = is_shared;
self
}
/// Set whether this mapping's memory can be locked to pass to the kernel.
///
/// If so, then the memory will be registered with the kernel as part of
/// `expose_va`, which is called when memory is locked.
pub fn for_kernel_access(&mut self, for_kernel_access: bool) -> &mut Self {
self.for_kernel_access = for_kernel_access;
self
}
/// Sets the base address to use for DMAs to this memory.
///
/// This may be `None` if DMA is not supported.
///
/// The address to use depends on the backing technology. For SNP VMs, it
/// should be either zero or the VTOM address, since shared memory is mapped
/// twice. For TDX VMs, shared memory is only mapped once, but the IOMMU
/// expects the SHARED bit to be set in DMA transactions, so it should be
/// set here. And for non-isolated/software-isolated VMs, it should be zero
/// or the VTL0 alias address, depending on which VTL this memory mapping is
/// for.
pub fn dma_base_address(&mut self, dma_base_address: Option<u64>) -> &mut Self {
self.dma_base_address = dma_base_address;
self
}
/// Ignore registration failures when registering memory with the kernel.
///
/// This should be used when user mode is restarted for servicing but the
/// kernel is not. Since this is not currently a production scenario, this
/// is a simple way to avoid needing to track the state of the kernel
/// registration across user-mode restarts.
///
/// It is not a good idea to enable this otherwise, since the kernel very
/// noisily complains if memory is registered twice, so we don't want that
/// leaking into production scenarios.
///
/// FUTURE: fix the kernel to silently succeed duplication registrations.
pub fn ignore_registration_failure(&mut self, ignore: bool) -> &mut Self {
self.ignore_registration_failure = ignore;
self
}
/// Map the lower VTL address space.
///
/// If `is_shared`, then map the kernel mapping as shared memory.
///
/// Add in `file_starting_offset` to construct the page offset for each
/// memory range. This can be the high bit to specify decrypted/shared
/// memory, or it can be the VTL0 alias map start for non-isolated VMs.
///
/// When handing out IOVAs for device DMA, add `iova_offset`. This can be
/// VTOM for SNP-isolated VMs, or it can be the VTL0 alias map start for
/// non-isolated VMs.
///
/// If `bitmap_state` is `Some`, a bitmap is created to track the
/// accessibility state of each page in the lower VTL memory. The bitmap is
/// initialized to the provided state.
pub fn build(
&self,
mshv_vtl_low: &MshvVtlLow,
memory_layout: &MemoryLayout,
) -> Result<GuestMemoryMapping, MappingError> {
// Calculate the file offset within the `mshv_vtl_low` file.
let file_starting_offset = self.physical_address_base
| if self.shared {
MshvVtlLow::SHARED_MEMORY_FLAG
} else {
0
};
// Calculate the total size of the address space by looking at the ending region.
let last_entry = memory_layout
.ram()
.last()
.expect("memory map must have at least 1 entry");
let address_space_size = last_entry.range.end();
let mapping =
SparseMapping::new(address_space_size as usize).map_err(MappingError::Reserve)?;
tracing::trace!(?mapping, "map_lower_vtl_memory mapping");
let bitmap = if self.bitmap_state.is_some() {
let bitmap = SparseMapping::new((address_space_size as usize / PAGE_SIZE + 7) / 8)
.map_err(MappingError::BitmapReserve)?;
bitmap
.map_zero(0, bitmap.len())
.map_err(MappingError::BitmapMap)?;
Some(bitmap)
} else {
None
};
// Loop through each of the memory map entries and create a mapping for it.
for entry in memory_layout.ram() {
if entry.range.is_empty() {
continue;
}
let base_addr = entry.range.start();
let file_offset = file_starting_offset.checked_add(base_addr).unwrap();
tracing::trace!(base_addr, file_offset, "mapping lower ram");
mapping
.map_file(
base_addr as usize,
entry.range.len() as usize,
mshv_vtl_low.get(),
file_offset,
true,
)
.map_err(MappingError::Map)?;
if let Some(bitmap) = &bitmap {
// To simplify bitmap implementation, require that all memory
// regions be 8-page aligned. Relax this if necessary.
if entry.range.start() % (PAGE_SIZE as u64 * 8) != 0
|| entry.range.end() % (PAGE_SIZE as u64 * 8) != 0
{
return Err(MappingError::BadAlignment(entry.range));
}
let bitmap_start = entry.range.start() as usize / PAGE_SIZE / 8;
let bitmap_end = (entry.range.end() - 1) as usize / PAGE_SIZE / 8;
let bitmap_page_start = bitmap_start / PAGE_SIZE;
let bitmap_page_end = bitmap_end / PAGE_SIZE;
let page_count = bitmap_page_end + 1 - bitmap_page_start;
// TODO SNP: map some pre-reserved lower VTL memory into the
// bitmap. Or just figure out how to hot add that memory to the
// kernel. Or have the boot loader reserve it at boot time.
bitmap
.alloc(bitmap_page_start * PAGE_SIZE, page_count * PAGE_SIZE)
.map_err(MappingError::BitmapAlloc)?;
}
tracing::trace!(?entry, "mapped memory map entry");
}
// Set the initial bitmap state.
if let Some((bitmap, true)) = bitmap.as_ref().zip(self.bitmap_state) {
for entry in memory_layout.ram() {
let start_gpn = entry.range.start() / PAGE_SIZE as u64;
let gpn_count = entry.range.len() / PAGE_SIZE as u64;
assert_eq!(entry.range.start() % 8, 0);
assert_eq!(gpn_count % 8, 0);
bitmap
.fill_at(start_gpn as usize / 8, 0xff, gpn_count as usize / 8)
.unwrap();
}
}
let registrar = if self.for_kernel_access {
let mshv = Mshv::new().map_err(MappingError::OpenDevice)?;
let mshv_vtl = mshv.create_vtl().map_err(MappingError::OpenDevice)?;
Some(MemoryRegistrar::new(
memory_layout,
self.physical_address_base,
MshvVtlWithPolicy {
mshv_vtl,
ignore_registration_failure: self.ignore_registration_failure,
shared: self.shared,
},
))
} else {
None
};
Ok(GuestMemoryMapping {
mapping,
iova_offset: self.dma_base_address,
bitmap,
bitmap_lock: Default::default(),
registrar,
})
}
}
impl GuestMemoryMapping {
/// Create a new builder for a guest memory mapping.
///
/// Map all ranges with a physical address offset of
/// `physical_address_base`. This can be zero, or the VTOM address for SNP,
/// or the VTL0 alias address for non-isolated/software-isolated VMs.
pub fn builder(physical_address_base: u64) -> GuestMemoryMappingBuilder {
GuestMemoryMappingBuilder {
physical_address_base,
bitmap_state: None,
shared: false,
for_kernel_access: false,
dma_base_address: None,
ignore_registration_failure: false,
}
}
pub(crate) fn check_bitmap(&self, gpn: u64) -> bool {
let bitmap = self.bitmap.as_ref().unwrap();
let mut b = 0;
bitmap
.read_at(gpn as usize / 8, std::slice::from_mut(&mut b))
.unwrap();
b & (1 << (gpn % 8)) != 0
}
/// Panics if the range is outside of guest RAM.
pub fn update_bitmap(&self, range: MemoryRange, state: bool) {
let bitmap = self.bitmap.as_ref().unwrap();
let _lock = self.bitmap_lock.lock();
for gpn in range.start() / PAGE_SIZE as u64..range.end() / PAGE_SIZE as u64 {
// TODO: use `fill_at` for the aligned part of the range.
let mut b = 0;
bitmap
.read_at(gpn as usize / 8, std::slice::from_mut(&mut b))
.unwrap();
if state {
b |= 1 << (gpn % 8);
} else {
b &= !(1 << (gpn % 8));
}
bitmap
.write_at(gpn as usize / 8, std::slice::from_ref(&b))
.unwrap();
}
}
pub(crate) fn zero_range(
&self,
range: MemoryRange,
) -> Result<(), sparse_mmap::SparseMappingError> {
self.mapping
.fill_at(range.start() as usize, 0, range.len() as usize)
}
}
/// SAFETY: Implementing the `GuestMemoryAccess` contract, including the
/// size and lifetime of the mappings and bitmaps.
unsafe impl GuestMemoryAccess for GuestMemoryMapping {
fn mapping(&self) -> Option<NonNull<u8>> {
NonNull::new(self.mapping.as_ptr().cast())
}
fn max_address(&self) -> u64 {
self.mapping.len() as u64
}
fn expose_va(&self, address: u64, len: u64) -> Result<(), GuestMemoryBackingError> {
if let Some(registrar) = &self.registrar {
registrar
.register(address, len)
.map_err(|start| GuestMemoryBackingError::new(start, RegistrationError))
} else {
// TODO: fail this call once we have a way to avoid calling this for
// user-mode-only accesses to locked memory (e.g., for vmbus ring
// buffers). We can't fail this for now because TDX cannot register
// encrypted memory.
Ok(())
}
}
fn base_iova(&self) -> Option<u64> {
// When the alias map is configured for this mapping, VTL2-mapped
// devices need to do DMA with the alias map bit set to avoid DMAing
// into VTL1 memory.
self.iova_offset
}
fn access_bitmap(&self) -> Option<guestmem::BitmapInfo> {
self.bitmap.as_ref().map(|bitmap| {
let ptr = NonNull::new(bitmap.as_ptr().cast()).unwrap();
guestmem::BitmapInfo {
read_bitmap: ptr,
write_bitmap: ptr,
execute_bitmap: ptr,
bit_offset: 0,
}
})
}
}