Skip to main content

underhill_mem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Underhill VM memory management.
5
6#![cfg(target_os = "linux")]
7
8mod init;
9mod mapping;
10mod registrar;
11
12pub use init::BootInit;
13pub use init::Init;
14pub use init::MemoryMappings;
15pub use init::init;
16
17use aarch64defs::rsi::CcaMemPermIndex;
18use cvm_tracing::CVM_ALLOWED;
19use guestmem::GuestMemoryBackingError;
20use guestmem::PAGE_SIZE;
21use guestmem::ranges::PagedRange;
22use hcl::GuestVtl;
23use hcl::ioctl::AcceptPagesError;
24use hcl::ioctl::ApplyVtlProtectionsError;
25use hcl::ioctl::Mshv;
26use hcl::ioctl::MshvHvcall;
27use hcl::ioctl::MshvVtl;
28use hcl::ioctl::snp::SnpPageError;
29use hv1_structs::VtlArray;
30use hvdef::HV_MAP_GPA_PERMISSIONS_ALL;
31use hvdef::HV_MAP_GPA_PERMISSIONS_NONE;
32use hvdef::HV_PAGE_SHIFT;
33use hvdef::HV_PAGE_SIZE;
34use hvdef::HvError;
35use hvdef::HvMapGpaFlags;
36use hvdef::HypercallCode;
37use hvdef::hypercall::AcceptMemoryType;
38use hvdef::hypercall::HostVisibilityType;
39use hvdef::hypercall::HvInputVtl;
40use mapping::GuestMemoryMapping;
41use mapping::GuestValidMemory;
42use memory_range::MemoryRange;
43use parking_lot::Mutex;
44use parking_lot::MutexGuard;
45use registrar::RegisterMemory;
46use std::collections::VecDeque;
47use std::sync::Arc;
48use std::sync::atomic::AtomicBool;
49use thiserror::Error;
50use virt::IsolationType;
51use virt_mshv_vtl::GpnSource;
52use virt_mshv_vtl::ProtectIsolatedMemory;
53use virt_mshv_vtl::TlbFlushLockAccess;
54use vm_topology::memory::MemoryLayout;
55use x86defs::snp::SevRmpAdjust;
56use x86defs::tdx::GpaVmAttributes;
57use x86defs::tdx::GpaVmAttributesMask;
58use x86defs::tdx::TdgMemPageAttrWriteR8;
59use x86defs::tdx::TdgMemPageGpaAttr;
60
61/// Error querying vtl permissions on a page
62#[derive(Debug, Error)]
63pub enum QueryVtlPermissionsError {
64    /// An SNP-specific error
65    #[error("failed to query rmp permissions")]
66    Snp(#[source] SnpPageError),
67}
68
69#[derive(Debug)]
70struct MshvVtlWithPolicy {
71    mshv_vtl: MshvVtl,
72    ignore_registration_failure: bool,
73    shared: bool,
74}
75
76impl RegisterMemory for MshvVtlWithPolicy {
77    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
78        match self.mshv_vtl.add_vtl0_memory(range, self.shared) {
79            Ok(()) => Ok(()),
80            // TODO: remove this once the kernel driver tracks registration
81            Err(err) if self.ignore_registration_failure => {
82                tracing::warn!(
83                    CVM_ALLOWED,
84                    error = &err as &dyn std::error::Error,
85                    "registration failure, could be expected"
86                );
87                Ok(())
88            }
89            Err(err) => Err(err),
90        }
91    }
92}
93
94#[derive(Debug, Error)]
95#[error("failed to register memory with kernel")]
96struct RegistrationError;
97
98/// Currently built for hardware CVMs, which only define permissions for VTL
99/// 0 and VTL 1 to express what those VTLs have access to. If this were to
100/// extend to non-hardware CVMs, those would need to define permissions
101/// instead for VTL 2 and VTL 1 to express what the lower VTLs have access
102/// to.
103///
104/// Default VTL memory permissions applied to any mapped memory
105struct DefaultVtlPermissions {
106    vtl0: HvMapGpaFlags,
107    vtl1: Option<HvMapGpaFlags>,
108}
109
110impl DefaultVtlPermissions {
111    fn set(&mut self, vtl: GuestVtl, permissions: HvMapGpaFlags) {
112        match vtl {
113            GuestVtl::Vtl0 => self.vtl0 = permissions,
114            GuestVtl::Vtl1 => self.vtl1 = Some(permissions),
115        }
116    }
117}
118
119/// Represents the vtl permissions on a page for a given isolation type
120#[derive(Copy, Clone)]
121enum GpaVtlPermissions {
122    Vbs(HvMapGpaFlags),
123    Snp(SevRmpAdjust),
124    Tdx(TdgMemPageGpaAttr, TdgMemPageAttrWriteR8),
125    // TODO: CCA: we need to use the 'vtl' and 'protections' below to get the correct index
126    // This implies that we've set up the index list properly, and we just select the right one here
127    Cca(CcaMemPermIndex),
128}
129
130impl GpaVtlPermissions {
131    fn new(isolation: IsolationType, vtl: GuestVtl, protections: HvMapGpaFlags) -> Self {
132        match isolation {
133            IsolationType::None => unreachable!(),
134            IsolationType::Vbs => GpaVtlPermissions::Vbs(protections),
135            IsolationType::Snp => {
136                let mut vtl_permissions = GpaVtlPermissions::Snp(SevRmpAdjust::new());
137                vtl_permissions.set(vtl, protections);
138                vtl_permissions
139            }
140            IsolationType::Tdx => {
141                let mut vtl_permissions =
142                    GpaVtlPermissions::Tdx(TdgMemPageGpaAttr::new(), TdgMemPageAttrWriteR8::new());
143                vtl_permissions.set(vtl, protections);
144                vtl_permissions
145            }
146            IsolationType::Cca => {
147                let mut vtl_permissions = GpaVtlPermissions::Cca(CcaMemPermIndex::default());
148                vtl_permissions.set(vtl, protections);
149                vtl_permissions
150            }
151        }
152    }
153
154    fn set(&mut self, vtl: GuestVtl, protections: HvMapGpaFlags) {
155        match self {
156            GpaVtlPermissions::Vbs(flags) => *flags = protections,
157            GpaVtlPermissions::Snp(rmpadjust) => {
158                *rmpadjust = SevRmpAdjust::new()
159                    .with_enable_read(protections.readable())
160                    .with_enable_write(protections.writable())
161                    .with_enable_user_execute(protections.user_executable())
162                    .with_enable_kernel_execute(protections.kernel_executable())
163                    .with_target_vmpl(match vtl {
164                        GuestVtl::Vtl0 => x86defs::snp::Vmpl::Vmpl2.into(),
165                        GuestVtl::Vtl1 => x86defs::snp::Vmpl::Vmpl1.into(),
166                    });
167            }
168            GpaVtlPermissions::Tdx(attributes, mask) => {
169                let vm_attributes = GpaVmAttributes::new()
170                    .with_valid(true)
171                    .with_read(protections.readable())
172                    .with_write(protections.writable())
173                    .with_kernel_execute(protections.kernel_executable())
174                    .with_user_execute(protections.user_executable());
175
176                let (new_attributes, new_mask) = match vtl {
177                    GuestVtl::Vtl0 => {
178                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(vm_attributes);
179                        let mask = TdgMemPageAttrWriteR8::new()
180                            .with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
181                        (attributes, mask)
182                    }
183                    GuestVtl::Vtl1 => {
184                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm2(vm_attributes);
185                        let mask = TdgMemPageAttrWriteR8::new()
186                            .with_l2_vm2(GpaVmAttributesMask::ALL_CHANGED);
187                        (attributes, mask)
188                    }
189                };
190
191                *attributes = new_attributes;
192                *mask = new_mask;
193            }
194            GpaVtlPermissions::Cca(_index) => {
195                tracing::debug!("cca: GpaVtlPermissions::set is doing nothing now");
196            }
197        }
198    }
199}
200
201/// Error returned when modifying gpa visibility.
202#[derive(Debug, Error)]
203#[error("failed to modify gpa visibility, elements successfully processed {processed}")]
204pub struct ModifyGpaVisibilityError {
205    source: HvError,
206    processed: usize,
207}
208
209/// Interface to accept and manipulate lower VTL memory acceptance and page
210/// protections.
211///
212/// FUTURE: this should go away as a separate object once all the logic is moved
213/// into this crate.
214pub struct MemoryAcceptor {
215    mshv_hvcall: MshvHvcall,
216    mshv_vtl: MshvVtl,
217    isolation: IsolationType,
218}
219
220impl MemoryAcceptor {
221    /// Create a new instance.
222    pub fn new(isolation: IsolationType) -> Result<Self, hcl::ioctl::Error> {
223        let mshv = Mshv::new()?;
224        let mshv_vtl = mshv.create_vtl()?;
225        let mshv_hvcall = MshvHvcall::new()?;
226        mshv_hvcall.set_allowed_hypercalls(&[
227            HypercallCode::HvCallAcceptGpaPages,
228            HypercallCode::HvCallModifySparseGpaPageHostVisibility,
229            HypercallCode::HvCallModifyVtlProtectionMask,
230        ]);
231
232        // On boot, VTL 0 should have permissions.
233        Ok(Self {
234            mshv_hvcall,
235            mshv_vtl,
236            isolation,
237        })
238    }
239
240    /// Accept pages for lower VTLs.
241    pub fn accept_lower_vtl_pages(&self, range: MemoryRange) -> Result<(), AcceptPagesError> {
242        match self.isolation {
243            IsolationType::None => unreachable!(),
244            IsolationType::Vbs => self
245                .mshv_hvcall
246                .accept_gpa_pages(range, AcceptMemoryType::RAM),
247            IsolationType::Snp => {
248                self.mshv_vtl
249                    .pvalidate_pages(range, true, false)
250                    .map_err(|err| AcceptPagesError::Snp {
251                        failed_operation: err,
252                        range,
253                    })
254            }
255            IsolationType::Tdx => {
256                let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(GpaVmAttributes::FULL_ACCESS);
257                let mask =
258                    TdgMemPageAttrWriteR8::new().with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
259
260                self.mshv_vtl
261                    .tdx_accept_pages(range, Some((attributes, mask)))
262                    .map_err(|err| AcceptPagesError::Tdx { error: err, range })
263            }
264            IsolationType::Cca => {
265                // TODO: CCA: do we need to set RIPAS here?
266                Ok(())
267            }
268        }
269    }
270
271    fn unaccept_lower_vtl_pages(&self, range: MemoryRange) {
272        match self.isolation {
273            IsolationType::None => unreachable!(),
274            IsolationType::Vbs => {
275                // TODO VBS: is there something to do here?
276            }
277            IsolationType::Snp => {
278                // Revoke permissions before unaccepting pages. This is required
279                // because a subsequent page acceptance is not guaranteed to
280                // reset permissions unless the hypervisor executed RMPUPDATE,
281                // which it cannot be trusted to do. We set new permissions
282                // ourselves, but that still leaves open a tiny window where the
283                // guest could access the pages with the old permissions.
284                for lower_vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
285                    self.apply_protections(range, lower_vtl, HV_MAP_GPA_PERMISSIONS_NONE)
286                        .unwrap();
287                }
288                self.mshv_vtl.pvalidate_pages(range, false, false).unwrap()
289            }
290
291            IsolationType::Tdx => {
292                // Nothing to do for TDX.
293            }
294            IsolationType::Cca => {
295                // TODO: CCA: anything to do here?
296            }
297        }
298    }
299
300    /// Tell the host to change the visibility of the given GPAs.
301    pub fn modify_gpa_visibility(
302        &self,
303        host_visibility: HostVisibilityType,
304        gpns: &[u64],
305    ) -> Result<(), ModifyGpaVisibilityError> {
306        self.mshv_hvcall
307            .modify_gpa_visibility(host_visibility, gpns)
308            .map_err(|(e, processed)| ModifyGpaVisibilityError {
309                source: e,
310                processed,
311            })
312    }
313
314    /// Apply the initial protections on lower-vtl memory.
315    ///
316    /// After initialization, the default protections should be applied.
317    pub fn apply_initial_lower_vtl_protections(
318        &self,
319        range: MemoryRange,
320    ) -> Result<(), ApplyVtlProtectionsError> {
321        self.apply_protections(range, GuestVtl::Vtl0, HV_MAP_GPA_PERMISSIONS_ALL)
322    }
323
324    fn apply_protections(
325        &self,
326        range: MemoryRange,
327        vtl: GuestVtl,
328        flags: HvMapGpaFlags,
329    ) -> Result<(), ApplyVtlProtectionsError> {
330        let permissions = GpaVtlPermissions::new(self.isolation, vtl, flags);
331
332        match permissions {
333            GpaVtlPermissions::Vbs(flags) => {
334                // For VBS-isolated VMs, the permissions apply to all lower
335                // VTLs. Therefore VTL 0 cannot set its own permissions.
336                assert_ne!(vtl, GuestVtl::Vtl0);
337
338                self.mshv_hvcall
339                    .modify_vtl_protection_mask(range, flags, HvInputVtl::from(vtl))
340            }
341            GpaVtlPermissions::Snp(rmpadjust) => {
342                // For SNP VMs, the permissions apply to the specified VTL.
343                // Therefore VTL 2 cannot specify its own permissions.
344                self.mshv_vtl
345                    .rmpadjust_pages(range, rmpadjust, false)
346                    .map_err(|err| ApplyVtlProtectionsError::Snp {
347                        failed_operation: err,
348                        range,
349                        permissions: rmpadjust,
350                        vtl: vtl.into(),
351                    })
352            }
353            GpaVtlPermissions::Tdx(attributes, mask) => {
354                // For TDX VMs, the permissions apply to the specified VTL.
355                // Therefore VTL 2 cannot specify its own permissions.
356                self.mshv_vtl
357                    .tdx_set_page_attributes(range, attributes, mask)
358                    .map_err(|err| ApplyVtlProtectionsError::Tdx {
359                        error: err,
360                        range,
361                        permissions: attributes,
362                        vtl: vtl.into(),
363                    })
364            }
365            GpaVtlPermissions::Cca(_index) => {
366                self.mshv_vtl.rsi_set_mem_perm(vtl, &range).map_err(|_err| {
367                    ApplyVtlProtectionsError::Cca {
368                        range,
369                        vtl: vtl.into(),
370                    }
371                })
372            }
373        }
374    }
375}
376
377/// An implementation of [`ProtectIsolatedMemory`] for Underhill VMs.
378pub struct HardwareIsolatedMemoryProtector {
379    // Serves as a lock for synchronizing visibility and page-protection changes.
380    inner: Mutex<HardwareIsolatedMemoryProtectorInner>,
381    layout: MemoryLayout,
382    acceptor: Arc<MemoryAcceptor>,
383    vtl0: Arc<GuestMemoryMapping>,
384    vtl1_protections_enabled: AtomicBool,
385}
386
387struct HardwareIsolatedMemoryProtectorInner {
388    valid_encrypted: Arc<GuestValidMemory>,
389    valid_shared: Arc<GuestValidMemory>,
390    encrypted: Arc<GuestMemoryMapping>,
391    default_vtl_permissions: DefaultVtlPermissions,
392    overlay_pages: VtlArray<Vec<OverlayPage>, 2>,
393    locked_pages: VtlArray<Vec<Box<[u64]>>, 2>,
394}
395
396struct OverlayPage {
397    gpn: u64,
398    previous_permissions: HvMapGpaFlags,
399    overlay_permissions: HvMapGpaFlags,
400    ref_count: u16,
401    gpn_source: GpnSource,
402}
403
404impl HardwareIsolatedMemoryProtector {
405    /// Returns a new instance.
406    ///
407    /// `shared` provides the mapping for shared memory. `vtl0` provides the
408    /// mapping for encrypted memory.
409    pub fn new(
410        valid_encrypted: Arc<GuestValidMemory>,
411        valid_shared: Arc<GuestValidMemory>,
412        encrypted: Arc<GuestMemoryMapping>,
413        vtl0: Arc<GuestMemoryMapping>,
414        layout: MemoryLayout,
415        acceptor: Arc<MemoryAcceptor>,
416    ) -> Self {
417        Self {
418            inner: Mutex::new(HardwareIsolatedMemoryProtectorInner {
419                valid_encrypted,
420                valid_shared,
421                encrypted,
422                // Grant only VTL 0 all permissions. This will be altered
423                // later by VTL 1 enablement and by VTL 1 itself.
424                default_vtl_permissions: DefaultVtlPermissions {
425                    vtl0: HV_MAP_GPA_PERMISSIONS_ALL,
426                    vtl1: None,
427                },
428                overlay_pages: VtlArray::from_fn(|_| Vec::new()),
429                locked_pages: VtlArray::from_fn(|_| Vec::new()),
430            }),
431            layout,
432            acceptor,
433            vtl0,
434            vtl1_protections_enabled: AtomicBool::new(false),
435        }
436    }
437
438    fn apply_protections_with_overlay_handling(
439        &self,
440        range: MemoryRange,
441        target_vtl: GuestVtl,
442        protections: HvMapGpaFlags,
443        inner: &mut MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
444    ) -> Result<(), ApplyVtlProtectionsError> {
445        let mut range_queue = VecDeque::new();
446        range_queue.push_back(range);
447
448        'outer: while let Some(range) = range_queue.pop_front() {
449            for overlay_page in inner.overlay_pages[target_vtl].iter_mut() {
450                let overlay_addr = overlay_page.gpn * HV_PAGE_SIZE;
451                if range.contains_addr(overlay_addr) {
452                    // If the overlay page is within the range, update the
453                    // permissions that will be restored when it is unlocked.
454                    overlay_page.previous_permissions = protections;
455                    // And split the range around it.
456                    let (left, right_with_overlay) =
457                        range.split_at_offset(range.offset_of(overlay_addr).unwrap());
458                    let (overlay, right) = right_with_overlay.split_at_offset(HV_PAGE_SIZE);
459                    debug_assert_eq!(overlay.start_4k_gpn(), overlay_page.gpn);
460                    debug_assert_eq!(overlay.len(), HV_PAGE_SIZE);
461                    if !left.is_empty() {
462                        range_queue.push_back(left);
463                    }
464                    if !right.is_empty() {
465                        range_queue.push_back(right);
466                    }
467                    continue 'outer;
468                }
469            }
470            // We can only reach here if the range does not contain any overlay
471            // pages, so now we can apply the protections to the range.
472            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)?
473        }
474
475        Ok(())
476    }
477
478    fn apply_protections(
479        &self,
480        range: MemoryRange,
481        target_vtl: GuestVtl,
482        protections: HvMapGpaFlags,
483        gpn_source: GpnSource,
484    ) -> Result<(), ApplyVtlProtectionsError> {
485        if gpn_source == GpnSource::GuestMemory && target_vtl == GuestVtl::Vtl0 {
486            // Only permissions imposed on VTL 0 guest memory are explicitly tracked
487            self.vtl0.update_permission_bitmaps(range, protections);
488        }
489        self.acceptor
490            .apply_protections(range, target_vtl, protections)
491    }
492
493    /// Get the permissions that the given VTL has to the given GPN.
494    ///
495    /// This function does not check for any protections applied by VTL 2,
496    /// only those applied by lower VTLs.
497    fn query_lower_vtl_permissions(
498        &self,
499        vtl: GuestVtl,
500        gpn: u64,
501    ) -> Result<HvMapGpaFlags, HvError> {
502        if !self.is_in_guest_memory(gpn) {
503            return Err(HvError::OperationDenied);
504        }
505
506        let res = match vtl {
507            GuestVtl::Vtl0 => self
508                .vtl0
509                .query_access_permission(gpn)
510                .unwrap_or(HV_MAP_GPA_PERMISSIONS_ALL),
511            GuestVtl::Vtl1 => HV_MAP_GPA_PERMISSIONS_ALL,
512        };
513
514        Ok(res)
515    }
516
517    fn check_gpn_not_locked(
518        &self,
519        inner: &MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
520        vtl: GuestVtl,
521        gpn: u64,
522    ) -> Result<(), HvError> {
523        // Overlay pages have special handling, being locked does not prevent that.
524        // TODO: When uh_mem implements the returning of overlay pages, rather than
525        // requiring them to also be locked through guestmem, the check for overlay
526        // pages can be removed, as locked and overlay pages will be mutually exclusive.
527        if inner.locked_pages[vtl].iter().flatten().any(|x| *x == gpn)
528            && !inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn)
529        {
530            return Err(HvError::OperationDenied);
531        }
532        Ok(())
533    }
534
535    /// Checks whether the given GPN is present in guest RAM.
536    fn is_in_guest_memory(&self, gpn: u64) -> bool {
537        let gpa = gpn << HV_PAGE_SHIFT;
538        self.layout.ram().iter().any(|r| r.range.contains_addr(gpa))
539    }
540}
541
542impl ProtectIsolatedMemory for HardwareIsolatedMemoryProtector {
543    fn change_host_visibility(
544        &self,
545        vtl: GuestVtl,
546        shared: bool,
547        gpns: &[u64],
548        tlb_access: &mut dyn TlbFlushLockAccess,
549    ) -> Result<(), (HvError, usize)> {
550        let inner = self.inner.lock();
551
552        for &gpn in gpns {
553            // Validate the ranges are RAM.
554            if !self.is_in_guest_memory(gpn) {
555                return Err((HvError::OperationDenied, 0));
556            }
557
558            // Validate they're not locked.
559            self.check_gpn_not_locked(&inner, vtl, gpn)
560                .map_err(|x| (x, 0))?;
561
562            // Don't allow overlay pages to be shared.
563            if shared && inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn) {
564                return Err((HvError::OperationDenied, 0));
565            }
566        }
567
568        // Filter out the GPNs that are already in the correct state. If the
569        // page is becoming shared, make sure the requesting VTL has read/write
570        // vtl permissions to the page.
571        let orig_gpns = gpns;
572        let mut failed_vtl_permission_index = None;
573        let gpns = gpns
574            .iter()
575            .copied()
576            .enumerate()
577            .take_while(|&(index, gpn)| {
578                if vtl == GuestVtl::Vtl0 && shared && self.vtl1_protections_enabled() {
579                    let permissions = self
580                        .vtl0
581                        .query_access_permission(gpn)
582                        .expect("vtl 1 protections enabled, vtl permissions should be tracked");
583                    if !permissions.readable() || !permissions.writable() {
584                        failed_vtl_permission_index = Some(index);
585                        false
586                    } else {
587                        true
588                    }
589                } else {
590                    true
591                }
592            })
593            .filter_map(|(_, gpn)| {
594                if inner.valid_shared.check_valid(gpn) != shared {
595                    Some(gpn)
596                } else {
597                    None
598                }
599            })
600            .collect::<Vec<_>>();
601
602        tracing::debug!(
603            orig = orig_gpns.len(),
604            len = gpns.len(),
605            first = gpns.first(),
606            shared,
607            "change vis"
608        );
609
610        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, &gpns)
611            .unwrap()
612            .ranges()
613            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
614            .collect::<Result<Vec<_>, _>>()
615            .unwrap(); // Ok to unwrap, we've validated the gpns above.
616
617        // Prevent accesses via the wrong address.
618        let clear_bitmap = if shared {
619            &inner.valid_encrypted
620        } else {
621            &inner.valid_shared
622        };
623
624        for &range in &ranges {
625            if shared && vtl == GuestVtl::Vtl0 {
626                // Accessing these pages through the encrypted mapping is now
627                // invalid. Make sure the VTL bitmaps reflect this. We could
628                // call apply_protections here but that would result in an extra
629                // hardware interaction that we don't need since we're about to
630                // unaccept the pages anyways.
631                self.vtl0
632                    .update_permission_bitmaps(range, HV_MAP_GPA_PERMISSIONS_NONE);
633            }
634
635            clear_bitmap.update_valid(range, false);
636        }
637
638        // There may be other threads concurrently accessing these pages. We
639        // cannot change the page visibility state until these threads have
640        // stopped those accesses. Flush the RCU domain that `guestmem` uses in
641        // order to flush any threads accessing the pages. After this, we are
642        // guaranteed no threads are accessing these pages (unless the pages are
643        // also locked), since no bitmap currently allows access.
644        guestmem::rcu().synchronize_blocking();
645
646        if let IsolationType::Snp = self.acceptor.isolation {
647            // We need to ensure that the guest TLB has been fully flushed since
648            // the unaccept operation is not guaranteed to do so in hardware,
649            // and the hypervisor is also not trusted with TLB hygiene.
650            tlb_access.flush_entire();
651        }
652
653        if shared {
654            // Unaccept the pages so that the hypervisor can reclaim them.
655            for &range in &ranges {
656                self.acceptor.unaccept_lower_vtl_pages(range);
657            }
658        }
659
660        // Ask the hypervisor to update visibility.
661        let host_visibility = if shared {
662            HostVisibilityType::SHARED
663        } else {
664            HostVisibilityType::PRIVATE
665        };
666
667        let (result, ranges) = match self.acceptor.modify_gpa_visibility(host_visibility, &gpns) {
668            Ok(()) => {
669                // All gpns succeeded, so the whole set of ranges should be
670                // processed.
671                (
672                    match failed_vtl_permission_index {
673                        Some(index) => Err((HvError::AccessDenied, index)),
674                        None => Ok(()),
675                    },
676                    ranges,
677                )
678            }
679            Err(err) => {
680                if shared {
681                    // A transition from private to shared should always
682                    // succeed. There is no safe rollback path, so we must
683                    // panic.
684                    panic!(
685                        "the hypervisor refused to transition pages to shared, we cannot safely roll back: {:?}",
686                        err
687                    );
688                }
689
690                // Only some ranges succeeded. Recreate ranges based on which
691                // gpns succeeded, for further processing.
692                let (successful_gpns, failed_gpns) = gpns.split_at(err.processed);
693                let ranges = PagedRange::new(
694                    0,
695                    successful_gpns.len() * PagedRange::PAGE_SIZE,
696                    successful_gpns,
697                )
698                .unwrap()
699                .ranges()
700                .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
701                .collect::<Result<Vec<_>, _>>()
702                .expect("previous gpns was already checked");
703
704                // Roll back the cleared bitmap for failed gpns, as they should
705                // be still in their original state of shared.
706                let rollback_ranges =
707                    PagedRange::new(0, failed_gpns.len() * PagedRange::PAGE_SIZE, failed_gpns)
708                        .unwrap()
709                        .ranges()
710                        .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
711                        .collect::<Result<Vec<_>, _>>()
712                        .expect("previous gpns was already checked");
713
714                for &range in &rollback_ranges {
715                    clear_bitmap.update_valid(range, true);
716                }
717
718                // Figure out the index of the gpn that failed, in the
719                // pre-filtered list that will be reported back to the caller.
720                let failed_index = orig_gpns
721                    .iter()
722                    .position(|gpn| *gpn == failed_gpns[0])
723                    .expect("failed gpn should be present in the list");
724
725                (Err((err.source, failed_index)), ranges)
726            }
727        };
728
729        if !shared {
730            // Accept the pages so that the guest can use them.
731            for &range in &ranges {
732                self.acceptor
733                    .accept_lower_vtl_pages(range)
734                    .expect("everything should be in a state where we can accept VTL0 pages");
735
736                // For SNP, zero the memory before allowing the guest to access
737                // them. For TDX, this is done by the TDX module. For mshv, this is
738                // done by the hypervisor.
739                if self.acceptor.isolation == IsolationType::Snp {
740                    inner.encrypted.zero_range(range).expect("VTL 2 should have access to lower VTL memory, the page should be accepted, there should be no vtl protections yet.")
741                }
742            }
743        }
744
745        // Allow accesses via the correct address.
746        let set_bitmap = if shared {
747            &inner.valid_shared
748        } else {
749            &inner.valid_encrypted
750        };
751        for &range in &ranges {
752            set_bitmap.update_valid(range, true);
753        }
754
755        if !shared {
756            // Apply vtl protections so that the guest can use them. Any
757            // overlay pages won't be host visible, so just apply the default
758            // protections directly without handling them.
759            for &range in &ranges {
760                // Make sure we reset the permissions bitmaps for VTL 0.
761                self.apply_protections(
762                    range,
763                    GuestVtl::Vtl0,
764                    inner.default_vtl_permissions.vtl0,
765                    GpnSource::GuestMemory,
766                )
767                .expect("should be able to apply default protections");
768
769                if let Some(vtl1_protections) = inner.default_vtl_permissions.vtl1 {
770                    self.apply_protections(
771                        range,
772                        GuestVtl::Vtl1,
773                        vtl1_protections,
774                        GpnSource::GuestMemory,
775                    )
776                    .expect("everything should be in a state where we can apply VTL protections");
777                }
778            }
779        }
780
781        // Return the original result of the underlying page visibility
782        // transition call to the caller.
783        result
784    }
785
786    fn query_host_visibility(
787        &self,
788        gpns: &[u64],
789        host_visibility: &mut [HostVisibilityType],
790    ) -> Result<(), (HvError, usize)> {
791        // Validate the ranges are RAM.
792        for (i, &gpn) in gpns.iter().enumerate() {
793            if !self.is_in_guest_memory(gpn) {
794                return Err((HvError::OperationDenied, i));
795            }
796        }
797
798        let inner = self.inner.lock();
799
800        // Set GPN sharing status in output.
801        for (gpn, host_vis) in gpns.iter().zip(host_visibility.iter_mut()) {
802            *host_vis = if inner.valid_shared.check_valid(*gpn) {
803                HostVisibilityType::SHARED
804            } else {
805                HostVisibilityType::PRIVATE
806            };
807        }
808        Ok(())
809    }
810
811    fn default_vtl0_protections(&self) -> HvMapGpaFlags {
812        self.inner.lock().default_vtl_permissions.vtl0
813    }
814
815    fn change_default_vtl_protections(
816        &self,
817        target_vtl: GuestVtl,
818        vtl_protections: HvMapGpaFlags,
819        tlb_access: &mut dyn TlbFlushLockAccess,
820    ) -> Result<(), HvError> {
821        // Prevent visibility changes while VTL protections are being
822        // applied.
823        //
824        // TODO: This does not need to be synchronized against other
825        // threads performing VTL protection changes; whichever thread
826        // finishes last will control the outcome.
827        let mut inner = self.inner.lock();
828
829        inner
830            .default_vtl_permissions
831            .set(target_vtl, vtl_protections);
832
833        let mut ranges = Vec::new();
834        for ram_range in self.layout.ram().iter() {
835            let mut protect_start = ram_range.range.start();
836            let mut page_count = 0;
837
838            for gpn in
839                ram_range.range.start() / PAGE_SIZE as u64..ram_range.range.end() / PAGE_SIZE as u64
840            {
841                // TODO GUEST VSM: for now, use the encrypted mapping to
842                // find all accepted memory. When lazy acceptance exists,
843                // this should track all pages that have been accepted and
844                // should be used instead.
845                // Also don't attempt to change the permissions of locked pages.
846                if inner.valid_encrypted.check_valid(gpn) {
847                    self.check_gpn_not_locked(&inner, target_vtl, gpn)?;
848                    page_count += 1;
849                } else {
850                    if page_count > 0 {
851                        let end_address = protect_start + (page_count * PAGE_SIZE as u64);
852                        ranges.push(MemoryRange::new(protect_start..end_address));
853                    }
854                    protect_start = (gpn + 1) * PAGE_SIZE as u64;
855                    page_count = 0;
856                }
857            }
858
859            if page_count > 0 {
860                let end_address = protect_start + (page_count * PAGE_SIZE as u64);
861                ranges.push(MemoryRange::new(protect_start..end_address));
862            }
863        }
864
865        for range in ranges {
866            self.apply_protections_with_overlay_handling(
867                range,
868                target_vtl,
869                vtl_protections,
870                &mut inner,
871            )
872            .unwrap();
873        }
874
875        // Flush any threads accessing pages that had their VTL protections
876        // changed.
877        guestmem::rcu().synchronize_blocking();
878
879        // Invalidate the entire VTL 0 TLB to ensure that the new permissions
880        // are observed.
881        tlb_access.flush(GuestVtl::Vtl0);
882        tlb_access.set_wait_for_tlb_locks(target_vtl);
883
884        Ok(())
885    }
886
887    fn change_vtl_protections(
888        &self,
889        target_vtl: GuestVtl,
890        gpns: &[u64],
891        protections: HvMapGpaFlags,
892        tlb_access: &mut dyn TlbFlushLockAccess,
893    ) -> Result<(), (HvError, usize)> {
894        // Prevent visibility changes while VTL protections are being
895        // applied. This does not need to be synchronized against other
896        // threads performing VTL protection changes; whichever thread
897        // finishes last will control the outcome.
898        let inner = self.inner.lock();
899
900        // Validate the ranges are RAM.
901        for &gpn in gpns {
902            if !self.is_in_guest_memory(gpn) {
903                return Err((HvError::OperationDenied, 0));
904            }
905
906            // Validate they're not locked.
907            self.check_gpn_not_locked(&inner, target_vtl, gpn)
908                .map_err(|x| (x, 0))?;
909
910            // Validate they're not overlay pages.
911            if inner.overlay_pages[target_vtl].iter().any(|p| p.gpn == gpn) {
912                return Err((HvError::OperationDenied, 0));
913            }
914        }
915
916        // Protections cannot be applied to a host-visible page
917        if gpns.iter().any(|&gpn| inner.valid_shared.check_valid(gpn)) {
918            return Err((HvError::OperationDenied, 0));
919        }
920
921        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, gpns)
922            .unwrap()
923            .ranges()
924            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
925            .collect::<Result<Vec<_>, _>>()
926            .unwrap(); // Ok to unwrap, we've validated the gpns above.
927
928        for range in ranges {
929            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)
930                .unwrap();
931        }
932
933        // Flush any threads accessing pages that had their VTL protections
934        // changed.
935        guestmem::rcu().synchronize_blocking();
936
937        // Since page protections were modified, we must invalidate the entire
938        // VTL 0 TLB to ensure that the new permissions are observed, and wait for
939        // other CPUs to release all guest mappings before declaring that the VTL
940        // protection change has completed.
941        tlb_access.flush(GuestVtl::Vtl0);
942        tlb_access.set_wait_for_tlb_locks(target_vtl);
943
944        Ok(())
945    }
946
947    fn register_overlay_page(
948        &self,
949        vtl: GuestVtl,
950        gpn: u64,
951        gpn_source: GpnSource,
952        check_perms: HvMapGpaFlags,
953        new_perms: Option<HvMapGpaFlags>,
954        tlb_access: &mut dyn TlbFlushLockAccess,
955    ) -> Result<(), HvError> {
956        let mut inner = self.inner.lock();
957
958        // If the page is already registered as an overlay page, just check
959        // the permissions are adequate. If the permissions requested are
960        // different from the ones already registered just do best effort,
961        // there is no spec-guarantee of which one "wins".
962        if let Some(registered) = inner.overlay_pages[vtl].iter_mut().find(|p| p.gpn == gpn) {
963            let needed_perms = new_perms.unwrap_or(check_perms);
964            if registered.overlay_permissions.into_bits() | needed_perms.into_bits()
965                != registered.overlay_permissions.into_bits()
966            {
967                return Err(HvError::OperationDenied);
968            }
969            registered.ref_count += 1;
970            return Ok(());
971        }
972
973        let current_perms = match gpn_source {
974            GpnSource::GuestMemory => {
975                // Check that the required permissions are present.
976                let current_perms = self.query_lower_vtl_permissions(vtl, gpn)?;
977                if current_perms.into_bits() | check_perms.into_bits() != current_perms.into_bits()
978                {
979                    return Err(HvError::OperationDenied);
980                }
981
982                // Protections cannot be applied to a host-visible page.
983                if inner.valid_shared.check_valid(gpn) {
984                    return Err(HvError::OperationDenied);
985                }
986
987                current_perms
988            }
989            GpnSource::Dma => {
990                if self.is_in_guest_memory(gpn) {
991                    // DMA memory must not be in guest RAM.
992                    return Err(HvError::OperationDenied);
993                }
994
995                HV_MAP_GPA_PERMISSIONS_NONE
996            }
997        };
998
999        // Or a locked page.
1000        self.check_gpn_not_locked(&inner, vtl, gpn)?;
1001
1002        // Everything's validated, change the permissions.
1003        if let Some(new_perms) = new_perms {
1004            self.apply_protections(
1005                MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1006                vtl,
1007                new_perms,
1008                gpn_source,
1009            )
1010            .map_err(|_| HvError::OperationDenied)?;
1011        }
1012
1013        // Nothing from this point on can fail, so we can safely register the overlay page.
1014        inner.overlay_pages[vtl].push(OverlayPage {
1015            gpn,
1016            previous_permissions: current_perms,
1017            overlay_permissions: new_perms.unwrap_or(current_perms),
1018            ref_count: 1,
1019            gpn_source,
1020        });
1021
1022        // Flush any threads accessing pages that had their VTL protections
1023        // changed.
1024        guestmem::rcu().synchronize_blocking();
1025
1026        // Since page protections were modified, we must invalidate the TLB to
1027        // ensure that the new permissions are observed, and wait for other CPUs
1028        // to release all guest mappings before declaring that the VTL
1029        // protection change has completed.
1030        tlb_access.flush(vtl);
1031        tlb_access.set_wait_for_tlb_locks(vtl);
1032
1033        Ok(())
1034    }
1035
1036    fn unregister_overlay_page(
1037        &self,
1038        vtl: GuestVtl,
1039        gpn: u64,
1040        tlb_access: &mut dyn TlbFlushLockAccess,
1041    ) -> Result<(), HvError> {
1042        let mut inner = self.inner.lock();
1043        let overlay_pages = &mut inner.overlay_pages[vtl];
1044
1045        // Find the overlay page.
1046        let index = overlay_pages
1047            .iter()
1048            .position(|p| p.gpn == gpn)
1049            .ok_or(HvError::OperationDenied)?;
1050
1051        // If this overlay page has been registered multiple times, just
1052        // decrement the reference count and return. We don't implement
1053        // full handling of multiple registrations with different permissions,
1054        // since it's best effort anyways.
1055        if overlay_pages[index].ref_count > 1 {
1056            overlay_pages[index].ref_count -= 1;
1057            return Ok(());
1058        }
1059
1060        // Restore its permissions.
1061        self.apply_protections(
1062            MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1063            vtl,
1064            overlay_pages[index].previous_permissions,
1065            overlay_pages[index].gpn_source,
1066        )
1067        .map_err(|_| HvError::OperationDenied)?;
1068
1069        // Nothing from this point on can fail, so we can safely unregister the overlay page.
1070        overlay_pages.remove(index);
1071
1072        // Flush any threads accessing pages that had their VTL protections
1073        // changed.
1074        guestmem::rcu().synchronize_blocking();
1075
1076        // Since page protections were modified, we must invalidate the TLB to
1077        // ensure that the new permissions are observed, and wait for other CPUs
1078        // to release all guest mappings before declaring that the VTL
1079        // protection change has completed.
1080        tlb_access.flush(vtl);
1081        tlb_access.set_wait_for_tlb_locks(vtl);
1082        Ok(())
1083    }
1084
1085    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool {
1086        self.inner.lock().overlay_pages[vtl]
1087            .iter()
1088            .any(|p| p.gpn == gpn)
1089    }
1090
1091    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError> {
1092        // Locking a page multiple times is allowed, so no need to check
1093        // for duplicates.
1094        // We also need to allow locking overlay pages for now.
1095        // TODO: We probably don't want to allow locking overlay pages once
1096        // we return the pointer for them instead of going through guestmem::lock.
1097        // TODO: other preconditions?
1098        self.inner.lock().locked_pages[vtl].push(gpns.to_vec().into_boxed_slice());
1099        Ok(())
1100    }
1101
1102    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) {
1103        let mut inner = self.inner.lock();
1104        let locked_pages = &mut inner.locked_pages[vtl];
1105        for (i, w) in locked_pages.iter().enumerate() {
1106            if **w == *gpns {
1107                locked_pages.swap_remove(i);
1108                return;
1109            }
1110        }
1111
1112        // Don't change protections on locked pages to avoid conflicting
1113        // with unregister_overlay_page.
1114        // TODO: Is this the right decision even after we separate overlay and
1115        // locked pages?
1116
1117        panic!("Tried to unlock pages that were not locked");
1118    }
1119
1120    fn set_vtl1_protections_enabled(&self) {
1121        self.vtl1_protections_enabled
1122            .store(true, std::sync::atomic::Ordering::Relaxed);
1123    }
1124
1125    fn vtl1_protections_enabled(&self) -> bool {
1126        self.vtl1_protections_enabled
1127            .load(std::sync::atomic::Ordering::Relaxed)
1128    }
1129}