underhill_mem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Underhill VM memory management.
5
6#![cfg(target_os = "linux")]
7
8mod init;
9mod mapping;
10mod registrar;
11
12pub use init::BootInit;
13pub use init::Init;
14pub use init::MemoryMappings;
15pub use init::init;
16
17use cvm_tracing::CVM_ALLOWED;
18use guestmem::GuestMemoryBackingError;
19use guestmem::PAGE_SIZE;
20use guestmem::ranges::PagedRange;
21use hcl::GuestVtl;
22use hcl::ioctl::AcceptPagesError;
23use hcl::ioctl::ApplyVtlProtectionsError;
24use hcl::ioctl::Mshv;
25use hcl::ioctl::MshvHvcall;
26use hcl::ioctl::MshvVtl;
27use hcl::ioctl::snp::SnpPageError;
28use hv1_structs::VtlArray;
29use hvdef::HV_MAP_GPA_PERMISSIONS_ALL;
30use hvdef::HV_MAP_GPA_PERMISSIONS_NONE;
31use hvdef::HV_PAGE_SHIFT;
32use hvdef::HV_PAGE_SIZE;
33use hvdef::HvError;
34use hvdef::HvMapGpaFlags;
35use hvdef::HypercallCode;
36use hvdef::hypercall::AcceptMemoryType;
37use hvdef::hypercall::HostVisibilityType;
38use hvdef::hypercall::HvInputVtl;
39use mapping::GuestMemoryMapping;
40use mapping::GuestValidMemory;
41use memory_range::MemoryRange;
42use parking_lot::Mutex;
43use parking_lot::MutexGuard;
44use registrar::RegisterMemory;
45use std::collections::VecDeque;
46use std::sync::Arc;
47use std::sync::atomic::AtomicBool;
48use thiserror::Error;
49use virt::IsolationType;
50use virt_mshv_vtl::GpnSource;
51use virt_mshv_vtl::ProtectIsolatedMemory;
52use virt_mshv_vtl::TlbFlushLockAccess;
53use vm_topology::memory::MemoryLayout;
54use x86defs::snp::SevRmpAdjust;
55use x86defs::tdx::GpaVmAttributes;
56use x86defs::tdx::GpaVmAttributesMask;
57use x86defs::tdx::TdgMemPageAttrWriteR8;
58use x86defs::tdx::TdgMemPageGpaAttr;
59
60/// Error querying vtl permissions on a page
61#[derive(Debug, Error)]
62pub enum QueryVtlPermissionsError {
63    /// An SNP-specific error
64    #[error("failed to query rmp permissions")]
65    Snp(#[source] SnpPageError),
66}
67
68#[derive(Debug)]
69struct MshvVtlWithPolicy {
70    mshv_vtl: MshvVtl,
71    ignore_registration_failure: bool,
72    shared: bool,
73}
74
75impl RegisterMemory for MshvVtlWithPolicy {
76    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
77        match self.mshv_vtl.add_vtl0_memory(range, self.shared) {
78            Ok(()) => Ok(()),
79            // TODO: remove this once the kernel driver tracks registration
80            Err(err) if self.ignore_registration_failure => {
81                tracing::warn!(
82                    CVM_ALLOWED,
83                    error = &err as &dyn std::error::Error,
84                    "registration failure, could be expected"
85                );
86                Ok(())
87            }
88            Err(err) => Err(err),
89        }
90    }
91}
92
93#[derive(Debug, Error)]
94#[error("failed to register memory with kernel")]
95struct RegistrationError;
96
97/// Currently built for hardware CVMs, which only define permissions for VTL
98/// 0 and VTL 1 to express what those VTLs have access to. If this were to
99/// extend to non-hardware CVMs, those would need to define permissions
100/// instead for VTL 2 and VTL 1 to express what the lower VTLs have access
101/// to.
102///
103/// Default VTL memory permissions applied to any mapped memory
104struct DefaultVtlPermissions {
105    vtl0: HvMapGpaFlags,
106    vtl1: Option<HvMapGpaFlags>,
107}
108
109impl DefaultVtlPermissions {
110    fn set(&mut self, vtl: GuestVtl, permissions: HvMapGpaFlags) {
111        match vtl {
112            GuestVtl::Vtl0 => self.vtl0 = permissions,
113            GuestVtl::Vtl1 => self.vtl1 = Some(permissions),
114        }
115    }
116}
117
118/// Represents the vtl permissions on a page for a given isolation type
119#[derive(Copy, Clone)]
120enum GpaVtlPermissions {
121    Vbs(HvMapGpaFlags),
122    Snp(SevRmpAdjust),
123    Tdx(TdgMemPageGpaAttr, TdgMemPageAttrWriteR8),
124}
125
126impl GpaVtlPermissions {
127    fn new(isolation: IsolationType, vtl: GuestVtl, protections: HvMapGpaFlags) -> Self {
128        match isolation {
129            IsolationType::None => unreachable!(),
130            IsolationType::Vbs => GpaVtlPermissions::Vbs(protections),
131            IsolationType::Snp => {
132                let mut vtl_permissions = GpaVtlPermissions::Snp(SevRmpAdjust::new());
133                vtl_permissions.set(vtl, protections);
134                vtl_permissions
135            }
136            IsolationType::Tdx => {
137                let mut vtl_permissions =
138                    GpaVtlPermissions::Tdx(TdgMemPageGpaAttr::new(), TdgMemPageAttrWriteR8::new());
139                vtl_permissions.set(vtl, protections);
140                vtl_permissions
141            }
142        }
143    }
144
145    fn set(&mut self, vtl: GuestVtl, protections: HvMapGpaFlags) {
146        match self {
147            GpaVtlPermissions::Vbs(flags) => *flags = protections,
148            GpaVtlPermissions::Snp(rmpadjust) => {
149                *rmpadjust = SevRmpAdjust::new()
150                    .with_enable_read(protections.readable())
151                    .with_enable_write(protections.writable())
152                    .with_enable_user_execute(protections.user_executable())
153                    .with_enable_kernel_execute(protections.kernel_executable())
154                    .with_target_vmpl(match vtl {
155                        GuestVtl::Vtl0 => x86defs::snp::Vmpl::Vmpl2.into(),
156                        GuestVtl::Vtl1 => x86defs::snp::Vmpl::Vmpl1.into(),
157                    });
158            }
159            GpaVtlPermissions::Tdx(attributes, mask) => {
160                let vm_attributes = GpaVmAttributes::new()
161                    .with_valid(true)
162                    .with_read(protections.readable())
163                    .with_write(protections.writable())
164                    .with_kernel_execute(protections.kernel_executable())
165                    .with_user_execute(protections.user_executable());
166
167                let (new_attributes, new_mask) = match vtl {
168                    GuestVtl::Vtl0 => {
169                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(vm_attributes);
170                        let mask = TdgMemPageAttrWriteR8::new()
171                            .with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
172                        (attributes, mask)
173                    }
174                    GuestVtl::Vtl1 => {
175                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm2(vm_attributes);
176                        let mask = TdgMemPageAttrWriteR8::new()
177                            .with_l2_vm2(GpaVmAttributesMask::ALL_CHANGED);
178                        (attributes, mask)
179                    }
180                };
181
182                *attributes = new_attributes;
183                *mask = new_mask;
184            }
185        }
186    }
187}
188
189/// Error returned when modifying gpa visibility.
190#[derive(Debug, Error)]
191#[error("failed to modify gpa visibility, elements successfully processed {processed}")]
192pub struct ModifyGpaVisibilityError {
193    source: HvError,
194    processed: usize,
195}
196
197/// Interface to accept and manipulate lower VTL memory acceptance and page
198/// protections.
199///
200/// FUTURE: this should go away as a separate object once all the logic is moved
201/// into this crate.
202pub struct MemoryAcceptor {
203    mshv_hvcall: MshvHvcall,
204    mshv_vtl: MshvVtl,
205    isolation: IsolationType,
206}
207
208impl MemoryAcceptor {
209    /// Create a new instance.
210    pub fn new(isolation: IsolationType) -> Result<Self, hcl::ioctl::Error> {
211        let mshv = Mshv::new()?;
212        let mshv_vtl = mshv.create_vtl()?;
213        let mshv_hvcall = MshvHvcall::new()?;
214        mshv_hvcall.set_allowed_hypercalls(&[
215            HypercallCode::HvCallAcceptGpaPages,
216            HypercallCode::HvCallModifySparseGpaPageHostVisibility,
217            HypercallCode::HvCallModifyVtlProtectionMask,
218        ]);
219
220        // On boot, VTL 0 should have permissions.
221        Ok(Self {
222            mshv_hvcall,
223            mshv_vtl,
224            isolation,
225        })
226    }
227
228    /// Accept pages for lower VTLs.
229    pub fn accept_lower_vtl_pages(&self, range: MemoryRange) -> Result<(), AcceptPagesError> {
230        match self.isolation {
231            IsolationType::None => unreachable!(),
232            IsolationType::Vbs => self
233                .mshv_hvcall
234                .accept_gpa_pages(range, AcceptMemoryType::RAM),
235            IsolationType::Snp => {
236                self.mshv_vtl
237                    .pvalidate_pages(range, true, false)
238                    .map_err(|err| AcceptPagesError::Snp {
239                        failed_operation: err,
240                        range,
241                    })
242            }
243            IsolationType::Tdx => {
244                let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(GpaVmAttributes::FULL_ACCESS);
245                let mask =
246                    TdgMemPageAttrWriteR8::new().with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
247
248                self.mshv_vtl
249                    .tdx_accept_pages(range, Some((attributes, mask)))
250                    .map_err(|err| AcceptPagesError::Tdx { error: err, range })
251            }
252        }
253    }
254
255    fn unaccept_lower_vtl_pages(&self, range: MemoryRange) {
256        match self.isolation {
257            IsolationType::None => unreachable!(),
258            IsolationType::Vbs => {
259                // TODO VBS: is there something to do here?
260            }
261            IsolationType::Snp => {
262                // Revoke permissions before unaccepting pages. This is required
263                // because a subsequent page acceptance is not guaranteed to
264                // reset permissions unless the hypervisor executed RMPUPDATE,
265                // which it cannot be trusted to do. We set new permissions
266                // ourselves, but that still leaves open a tiny window where the
267                // guest could access the pages with the old permissions.
268                for lower_vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
269                    self.apply_protections(range, lower_vtl, HV_MAP_GPA_PERMISSIONS_NONE)
270                        .unwrap();
271                }
272                self.mshv_vtl.pvalidate_pages(range, false, false).unwrap()
273            }
274
275            IsolationType::Tdx => {
276                // Nothing to do for TDX.
277            }
278        }
279    }
280
281    /// Tell the host to change the visibility of the given GPAs.
282    pub fn modify_gpa_visibility(
283        &self,
284        host_visibility: HostVisibilityType,
285        gpns: &[u64],
286    ) -> Result<(), ModifyGpaVisibilityError> {
287        self.mshv_hvcall
288            .modify_gpa_visibility(host_visibility, gpns)
289            .map_err(|(e, processed)| ModifyGpaVisibilityError {
290                source: e,
291                processed,
292            })
293    }
294
295    /// Apply the initial protections on lower-vtl memory.
296    ///
297    /// After initialization, the default protections should be applied.
298    pub fn apply_initial_lower_vtl_protections(
299        &self,
300        range: MemoryRange,
301    ) -> Result<(), ApplyVtlProtectionsError> {
302        self.apply_protections(range, GuestVtl::Vtl0, HV_MAP_GPA_PERMISSIONS_ALL)
303    }
304
305    fn apply_protections(
306        &self,
307        range: MemoryRange,
308        vtl: GuestVtl,
309        flags: HvMapGpaFlags,
310    ) -> Result<(), ApplyVtlProtectionsError> {
311        let permissions = GpaVtlPermissions::new(self.isolation, vtl, flags);
312
313        match permissions {
314            GpaVtlPermissions::Vbs(flags) => {
315                // For VBS-isolated VMs, the permissions apply to all lower
316                // VTLs. Therefore VTL 0 cannot set its own permissions.
317                assert_ne!(vtl, GuestVtl::Vtl0);
318
319                self.mshv_hvcall
320                    .modify_vtl_protection_mask(range, flags, HvInputVtl::from(vtl))
321            }
322            GpaVtlPermissions::Snp(rmpadjust) => {
323                // For SNP VMs, the permissions apply to the specified VTL.
324                // Therefore VTL 2 cannot specify its own permissions.
325                self.mshv_vtl
326                    .rmpadjust_pages(range, rmpadjust, false)
327                    .map_err(|err| ApplyVtlProtectionsError::Snp {
328                        failed_operation: err,
329                        range,
330                        permissions: rmpadjust,
331                        vtl: vtl.into(),
332                    })
333            }
334            GpaVtlPermissions::Tdx(attributes, mask) => {
335                // For TDX VMs, the permissions apply to the specified VTL.
336                // Therefore VTL 2 cannot specify its own permissions.
337                self.mshv_vtl
338                    .tdx_set_page_attributes(range, attributes, mask)
339                    .map_err(|err| ApplyVtlProtectionsError::Tdx {
340                        error: err,
341                        range,
342                        permissions: attributes,
343                        vtl: vtl.into(),
344                    })
345            }
346        }
347    }
348}
349
350/// An implementation of [`ProtectIsolatedMemory`] for Underhill VMs.
351pub struct HardwareIsolatedMemoryProtector {
352    // Serves as a lock for synchronizing visibility and page-protection changes.
353    inner: Mutex<HardwareIsolatedMemoryProtectorInner>,
354    layout: MemoryLayout,
355    acceptor: Arc<MemoryAcceptor>,
356    vtl0: Arc<GuestMemoryMapping>,
357    vtl1_protections_enabled: AtomicBool,
358}
359
360struct HardwareIsolatedMemoryProtectorInner {
361    valid_encrypted: Arc<GuestValidMemory>,
362    valid_shared: Arc<GuestValidMemory>,
363    encrypted: Arc<GuestMemoryMapping>,
364    default_vtl_permissions: DefaultVtlPermissions,
365    overlay_pages: VtlArray<Vec<OverlayPage>, 2>,
366    locked_pages: VtlArray<Vec<Box<[u64]>>, 2>,
367}
368
369struct OverlayPage {
370    gpn: u64,
371    previous_permissions: HvMapGpaFlags,
372    overlay_permissions: HvMapGpaFlags,
373    ref_count: u16,
374    gpn_source: GpnSource,
375}
376
377impl HardwareIsolatedMemoryProtector {
378    /// Returns a new instance.
379    ///
380    /// `shared` provides the mapping for shared memory. `vtl0` provides the
381    /// mapping for encrypted memory.
382    pub fn new(
383        valid_encrypted: Arc<GuestValidMemory>,
384        valid_shared: Arc<GuestValidMemory>,
385        encrypted: Arc<GuestMemoryMapping>,
386        vtl0: Arc<GuestMemoryMapping>,
387        layout: MemoryLayout,
388        acceptor: Arc<MemoryAcceptor>,
389    ) -> Self {
390        Self {
391            inner: Mutex::new(HardwareIsolatedMemoryProtectorInner {
392                valid_encrypted,
393                valid_shared,
394                encrypted,
395                // Grant only VTL 0 all permissions. This will be altered
396                // later by VTL 1 enablement and by VTL 1 itself.
397                default_vtl_permissions: DefaultVtlPermissions {
398                    vtl0: HV_MAP_GPA_PERMISSIONS_ALL,
399                    vtl1: None,
400                },
401                overlay_pages: VtlArray::from_fn(|_| Vec::new()),
402                locked_pages: VtlArray::from_fn(|_| Vec::new()),
403            }),
404            layout,
405            acceptor,
406            vtl0,
407            vtl1_protections_enabled: AtomicBool::new(false),
408        }
409    }
410
411    fn apply_protections_with_overlay_handling(
412        &self,
413        range: MemoryRange,
414        target_vtl: GuestVtl,
415        protections: HvMapGpaFlags,
416        inner: &mut MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
417    ) -> Result<(), ApplyVtlProtectionsError> {
418        let mut range_queue = VecDeque::new();
419        range_queue.push_back(range);
420
421        'outer: while let Some(range) = range_queue.pop_front() {
422            for overlay_page in inner.overlay_pages[target_vtl].iter_mut() {
423                let overlay_addr = overlay_page.gpn * HV_PAGE_SIZE;
424                if range.contains_addr(overlay_addr) {
425                    // If the overlay page is within the range, update the
426                    // permissions that will be restored when it is unlocked.
427                    overlay_page.previous_permissions = protections;
428                    // And split the range around it.
429                    let (left, right_with_overlay) =
430                        range.split_at_offset(range.offset_of(overlay_addr).unwrap());
431                    let (overlay, right) = right_with_overlay.split_at_offset(HV_PAGE_SIZE);
432                    debug_assert_eq!(overlay.start_4k_gpn(), overlay_page.gpn);
433                    debug_assert_eq!(overlay.len(), HV_PAGE_SIZE);
434                    if !left.is_empty() {
435                        range_queue.push_back(left);
436                    }
437                    if !right.is_empty() {
438                        range_queue.push_back(right);
439                    }
440                    continue 'outer;
441                }
442            }
443            // We can only reach here if the range does not contain any overlay
444            // pages, so now we can apply the protections to the range.
445            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)?
446        }
447
448        Ok(())
449    }
450
451    fn apply_protections(
452        &self,
453        range: MemoryRange,
454        target_vtl: GuestVtl,
455        protections: HvMapGpaFlags,
456        gpn_source: GpnSource,
457    ) -> Result<(), ApplyVtlProtectionsError> {
458        if gpn_source == GpnSource::GuestMemory && target_vtl == GuestVtl::Vtl0 {
459            // Only permissions imposed on VTL 0 guest memory are explicitly tracked
460            self.vtl0.update_permission_bitmaps(range, protections);
461        }
462        self.acceptor
463            .apply_protections(range, target_vtl, protections)
464    }
465
466    /// Get the permissions that the given VTL has to the given GPN.
467    ///
468    /// This function does not check for any protections applied by VTL 2,
469    /// only those applied by lower VTLs.
470    fn query_lower_vtl_permissions(
471        &self,
472        vtl: GuestVtl,
473        gpn: u64,
474    ) -> Result<HvMapGpaFlags, HvError> {
475        if !self.is_in_guest_memory(gpn) {
476            return Err(HvError::OperationDenied);
477        }
478
479        let res = match vtl {
480            GuestVtl::Vtl0 => self
481                .vtl0
482                .query_access_permission(gpn)
483                .unwrap_or(HV_MAP_GPA_PERMISSIONS_ALL),
484            GuestVtl::Vtl1 => HV_MAP_GPA_PERMISSIONS_ALL,
485        };
486
487        Ok(res)
488    }
489
490    fn check_gpn_not_locked(
491        &self,
492        inner: &MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
493        vtl: GuestVtl,
494        gpn: u64,
495    ) -> Result<(), HvError> {
496        // Overlay pages have special handling, being locked does not prevent that.
497        // TODO: When uh_mem implements the returning of overlay pages, rather than
498        // requiring them to also be locked through guestmem, the check for overlay
499        // pages can be removed, as locked and overlay pages will be mutually exclusive.
500        if inner.locked_pages[vtl].iter().flatten().any(|x| *x == gpn)
501            && !inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn)
502        {
503            return Err(HvError::OperationDenied);
504        }
505        Ok(())
506    }
507
508    /// Checks whether the given GPN is present in guest RAM.
509    fn is_in_guest_memory(&self, gpn: u64) -> bool {
510        let gpa = gpn << HV_PAGE_SHIFT;
511        self.layout.ram().iter().any(|r| r.range.contains_addr(gpa))
512    }
513}
514
515impl ProtectIsolatedMemory for HardwareIsolatedMemoryProtector {
516    fn change_host_visibility(
517        &self,
518        vtl: GuestVtl,
519        shared: bool,
520        gpns: &[u64],
521        tlb_access: &mut dyn TlbFlushLockAccess,
522    ) -> Result<(), (HvError, usize)> {
523        let inner = self.inner.lock();
524
525        for &gpn in gpns {
526            // Validate the ranges are RAM.
527            if !self.is_in_guest_memory(gpn) {
528                return Err((HvError::OperationDenied, 0));
529            }
530
531            // Validate they're not locked.
532            self.check_gpn_not_locked(&inner, vtl, gpn)
533                .map_err(|x| (x, 0))?;
534
535            // Don't allow overlay pages to be shared.
536            if shared && inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn) {
537                return Err((HvError::OperationDenied, 0));
538            }
539        }
540
541        // Filter out the GPNs that are already in the correct state. If the
542        // page is becoming shared, make sure the requesting VTL has read/write
543        // vtl permissions to the page.
544        let orig_gpns = gpns;
545        let mut failed_vtl_permission_index = None;
546        let gpns = gpns
547            .iter()
548            .copied()
549            .enumerate()
550            .take_while(|&(index, gpn)| {
551                if vtl == GuestVtl::Vtl0 && shared && self.vtl1_protections_enabled() {
552                    let permissions = self
553                        .vtl0
554                        .query_access_permission(gpn)
555                        .expect("vtl 1 protections enabled, vtl permissions should be tracked");
556                    if !permissions.readable() || !permissions.writable() {
557                        failed_vtl_permission_index = Some(index);
558                        false
559                    } else {
560                        true
561                    }
562                } else {
563                    true
564                }
565            })
566            .filter_map(|(_, gpn)| {
567                if inner.valid_shared.check_valid(gpn) != shared {
568                    Some(gpn)
569                } else {
570                    None
571                }
572            })
573            .collect::<Vec<_>>();
574
575        tracing::debug!(
576            orig = orig_gpns.len(),
577            len = gpns.len(),
578            first = gpns.first(),
579            shared,
580            "change vis"
581        );
582
583        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, &gpns)
584            .unwrap()
585            .ranges()
586            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
587            .collect::<Result<Vec<_>, _>>()
588            .unwrap(); // Ok to unwrap, we've validated the gpns above.
589
590        // Prevent accesses via the wrong address.
591        let clear_bitmap = if shared {
592            &inner.valid_encrypted
593        } else {
594            &inner.valid_shared
595        };
596
597        for &range in &ranges {
598            if shared && vtl == GuestVtl::Vtl0 {
599                // Accessing these pages through the encrypted mapping is now
600                // invalid. Make sure the VTL bitmaps reflect this. We could
601                // call apply_protections here but that would result in an extra
602                // hardware interaction that we don't need since we're about to
603                // unaccept the pages anyways.
604                self.vtl0
605                    .update_permission_bitmaps(range, HV_MAP_GPA_PERMISSIONS_NONE);
606            }
607
608            clear_bitmap.update_valid(range, false);
609        }
610
611        // There may be other threads concurrently accessing these pages. We
612        // cannot change the page visibility state until these threads have
613        // stopped those accesses. Flush the RCU domain that `guestmem` uses in
614        // order to flush any threads accessing the pages. After this, we are
615        // guaranteed no threads are accessing these pages (unless the pages are
616        // also locked), since no bitmap currently allows access.
617        guestmem::rcu().synchronize_blocking();
618
619        if let IsolationType::Snp = self.acceptor.isolation {
620            // We need to ensure that the guest TLB has been fully flushed since
621            // the unaccept operation is not guaranteed to do so in hardware,
622            // and the hypervisor is also not trusted with TLB hygiene.
623            tlb_access.flush_entire();
624        }
625
626        if shared {
627            // Unaccept the pages so that the hypervisor can reclaim them.
628            for &range in &ranges {
629                self.acceptor.unaccept_lower_vtl_pages(range);
630            }
631        }
632
633        // Ask the hypervisor to update visibility.
634        let host_visibility = if shared {
635            HostVisibilityType::SHARED
636        } else {
637            HostVisibilityType::PRIVATE
638        };
639
640        let (result, ranges) = match self.acceptor.modify_gpa_visibility(host_visibility, &gpns) {
641            Ok(()) => {
642                // All gpns succeeded, so the whole set of ranges should be
643                // processed.
644                (
645                    match failed_vtl_permission_index {
646                        Some(index) => Err((HvError::AccessDenied, index)),
647                        None => Ok(()),
648                    },
649                    ranges,
650                )
651            }
652            Err(err) => {
653                if shared {
654                    // A transition from private to shared should always
655                    // succeed. There is no safe rollback path, so we must
656                    // panic.
657                    panic!(
658                        "the hypervisor refused to transition pages to shared, we cannot safely roll back: {:?}",
659                        err
660                    );
661                }
662
663                // Only some ranges succeeded. Recreate ranges based on which
664                // gpns succeeded, for further processing.
665                let (successful_gpns, failed_gpns) = gpns.split_at(err.processed);
666                let ranges = PagedRange::new(
667                    0,
668                    successful_gpns.len() * PagedRange::PAGE_SIZE,
669                    successful_gpns,
670                )
671                .unwrap()
672                .ranges()
673                .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
674                .collect::<Result<Vec<_>, _>>()
675                .expect("previous gpns was already checked");
676
677                // Roll back the cleared bitmap for failed gpns, as they should
678                // be still in their original state of shared.
679                let rollback_ranges =
680                    PagedRange::new(0, failed_gpns.len() * PagedRange::PAGE_SIZE, failed_gpns)
681                        .unwrap()
682                        .ranges()
683                        .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
684                        .collect::<Result<Vec<_>, _>>()
685                        .expect("previous gpns was already checked");
686
687                for &range in &rollback_ranges {
688                    clear_bitmap.update_valid(range, true);
689                }
690
691                // Figure out the index of the gpn that failed, in the
692                // pre-filtered list that will be reported back to the caller.
693                let failed_index = orig_gpns
694                    .iter()
695                    .position(|gpn| *gpn == failed_gpns[0])
696                    .expect("failed gpn should be present in the list");
697
698                (Err((err.source, failed_index)), ranges)
699            }
700        };
701
702        if !shared {
703            // Accept the pages so that the guest can use them.
704            for &range in &ranges {
705                self.acceptor
706                    .accept_lower_vtl_pages(range)
707                    .expect("everything should be in a state where we can accept VTL0 pages");
708
709                // For SNP, zero the memory before allowing the guest to access
710                // them. For TDX, this is done by the TDX module. For mshv, this is
711                // done by the hypervisor.
712                if self.acceptor.isolation == IsolationType::Snp {
713                    inner.encrypted.zero_range(range).expect("VTL 2 should have access to lower VTL memory, the page should be accepted, there should be no vtl protections yet.")
714                }
715            }
716        }
717
718        // Allow accesses via the correct address.
719        let set_bitmap = if shared {
720            &inner.valid_shared
721        } else {
722            &inner.valid_encrypted
723        };
724        for &range in &ranges {
725            set_bitmap.update_valid(range, true);
726        }
727
728        if !shared {
729            // Apply vtl protections so that the guest can use them. Any
730            // overlay pages won't be host visible, so just apply the default
731            // protections directly without handling them.
732            for &range in &ranges {
733                // Make sure we reset the permissions bitmaps for VTL 0.
734                self.apply_protections(
735                    range,
736                    GuestVtl::Vtl0,
737                    inner.default_vtl_permissions.vtl0,
738                    GpnSource::GuestMemory,
739                )
740                .expect("should be able to apply default protections");
741
742                if let Some(vtl1_protections) = inner.default_vtl_permissions.vtl1 {
743                    self.apply_protections(
744                        range,
745                        GuestVtl::Vtl1,
746                        vtl1_protections,
747                        GpnSource::GuestMemory,
748                    )
749                    .expect("everything should be in a state where we can apply VTL protections");
750                }
751            }
752        }
753
754        // Return the original result of the underlying page visibility
755        // transition call to the caller.
756        result
757    }
758
759    fn query_host_visibility(
760        &self,
761        gpns: &[u64],
762        host_visibility: &mut [HostVisibilityType],
763    ) -> Result<(), (HvError, usize)> {
764        // Validate the ranges are RAM.
765        for (i, &gpn) in gpns.iter().enumerate() {
766            if !self.is_in_guest_memory(gpn) {
767                return Err((HvError::OperationDenied, i));
768            }
769        }
770
771        let inner = self.inner.lock();
772
773        // Set GPN sharing status in output.
774        for (gpn, host_vis) in gpns.iter().zip(host_visibility.iter_mut()) {
775            *host_vis = if inner.valid_shared.check_valid(*gpn) {
776                HostVisibilityType::SHARED
777            } else {
778                HostVisibilityType::PRIVATE
779            };
780        }
781        Ok(())
782    }
783
784    fn default_vtl0_protections(&self) -> HvMapGpaFlags {
785        self.inner.lock().default_vtl_permissions.vtl0
786    }
787
788    fn change_default_vtl_protections(
789        &self,
790        target_vtl: GuestVtl,
791        vtl_protections: HvMapGpaFlags,
792        tlb_access: &mut dyn TlbFlushLockAccess,
793    ) -> Result<(), HvError> {
794        // Prevent visibility changes while VTL protections are being
795        // applied.
796        //
797        // TODO: This does not need to be synchronized against other
798        // threads performing VTL protection changes; whichever thread
799        // finishes last will control the outcome.
800        let mut inner = self.inner.lock();
801
802        inner
803            .default_vtl_permissions
804            .set(target_vtl, vtl_protections);
805
806        let mut ranges = Vec::new();
807        for ram_range in self.layout.ram().iter() {
808            let mut protect_start = ram_range.range.start();
809            let mut page_count = 0;
810
811            for gpn in
812                ram_range.range.start() / PAGE_SIZE as u64..ram_range.range.end() / PAGE_SIZE as u64
813            {
814                // TODO GUEST VSM: for now, use the encrypted mapping to
815                // find all accepted memory. When lazy acceptance exists,
816                // this should track all pages that have been accepted and
817                // should be used instead.
818                // Also don't attempt to change the permissions of locked pages.
819                if inner.valid_encrypted.check_valid(gpn) {
820                    self.check_gpn_not_locked(&inner, target_vtl, gpn)?;
821                    page_count += 1;
822                } else {
823                    if page_count > 0 {
824                        let end_address = protect_start + (page_count * PAGE_SIZE as u64);
825                        ranges.push(MemoryRange::new(protect_start..end_address));
826                    }
827                    protect_start = (gpn + 1) * PAGE_SIZE as u64;
828                    page_count = 0;
829                }
830            }
831
832            if page_count > 0 {
833                let end_address = protect_start + (page_count * PAGE_SIZE as u64);
834                ranges.push(MemoryRange::new(protect_start..end_address));
835            }
836        }
837
838        for range in ranges {
839            self.apply_protections_with_overlay_handling(
840                range,
841                target_vtl,
842                vtl_protections,
843                &mut inner,
844            )
845            .unwrap();
846        }
847
848        // Flush any threads accessing pages that had their VTL protections
849        // changed.
850        guestmem::rcu().synchronize_blocking();
851
852        // Invalidate the entire VTL 0 TLB to ensure that the new permissions
853        // are observed.
854        tlb_access.flush(GuestVtl::Vtl0);
855        tlb_access.set_wait_for_tlb_locks(target_vtl);
856
857        Ok(())
858    }
859
860    fn change_vtl_protections(
861        &self,
862        target_vtl: GuestVtl,
863        gpns: &[u64],
864        protections: HvMapGpaFlags,
865        tlb_access: &mut dyn TlbFlushLockAccess,
866    ) -> Result<(), (HvError, usize)> {
867        // Prevent visibility changes while VTL protections are being
868        // applied. This does not need to be synchronized against other
869        // threads performing VTL protection changes; whichever thread
870        // finishes last will control the outcome.
871        let inner = self.inner.lock();
872
873        // Validate the ranges are RAM.
874        for &gpn in gpns {
875            if !self.is_in_guest_memory(gpn) {
876                return Err((HvError::OperationDenied, 0));
877            }
878
879            // Validate they're not locked.
880            self.check_gpn_not_locked(&inner, target_vtl, gpn)
881                .map_err(|x| (x, 0))?;
882
883            // Validate they're not overlay pages.
884            if inner.overlay_pages[target_vtl].iter().any(|p| p.gpn == gpn) {
885                return Err((HvError::OperationDenied, 0));
886            }
887        }
888
889        // Protections cannot be applied to a host-visible page
890        if gpns.iter().any(|&gpn| inner.valid_shared.check_valid(gpn)) {
891            return Err((HvError::OperationDenied, 0));
892        }
893
894        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, gpns)
895            .unwrap()
896            .ranges()
897            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
898            .collect::<Result<Vec<_>, _>>()
899            .unwrap(); // Ok to unwrap, we've validated the gpns above.
900
901        for range in ranges {
902            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)
903                .unwrap();
904        }
905
906        // Flush any threads accessing pages that had their VTL protections
907        // changed.
908        guestmem::rcu().synchronize_blocking();
909
910        // Since page protections were modified, we must invalidate the entire
911        // VTL 0 TLB to ensure that the new permissions are observed, and wait for
912        // other CPUs to release all guest mappings before declaring that the VTL
913        // protection change has completed.
914        tlb_access.flush(GuestVtl::Vtl0);
915        tlb_access.set_wait_for_tlb_locks(target_vtl);
916
917        Ok(())
918    }
919
920    fn register_overlay_page(
921        &self,
922        vtl: GuestVtl,
923        gpn: u64,
924        gpn_source: GpnSource,
925        check_perms: HvMapGpaFlags,
926        new_perms: Option<HvMapGpaFlags>,
927        tlb_access: &mut dyn TlbFlushLockAccess,
928    ) -> Result<(), HvError> {
929        let mut inner = self.inner.lock();
930
931        // If the page is already registered as an overlay page, just check
932        // the permissions are adequate. If the permissions requested are
933        // different from the ones already registered just do best effort,
934        // there is no spec-guarantee of which one "wins".
935        if let Some(registered) = inner.overlay_pages[vtl].iter_mut().find(|p| p.gpn == gpn) {
936            let needed_perms = new_perms.unwrap_or(check_perms);
937            if registered.overlay_permissions.into_bits() | needed_perms.into_bits()
938                != registered.overlay_permissions.into_bits()
939            {
940                return Err(HvError::OperationDenied);
941            }
942            registered.ref_count += 1;
943            return Ok(());
944        }
945
946        let current_perms = match gpn_source {
947            GpnSource::GuestMemory => {
948                // Check that the required permissions are present.
949                let current_perms = self.query_lower_vtl_permissions(vtl, gpn)?;
950                if current_perms.into_bits() | check_perms.into_bits() != current_perms.into_bits()
951                {
952                    return Err(HvError::OperationDenied);
953                }
954
955                // Protections cannot be applied to a host-visible page.
956                if inner.valid_shared.check_valid(gpn) {
957                    return Err(HvError::OperationDenied);
958                }
959
960                current_perms
961            }
962            GpnSource::Dma => {
963                if self.is_in_guest_memory(gpn) {
964                    // DMA memory must not be in guest RAM.
965                    return Err(HvError::OperationDenied);
966                }
967
968                HV_MAP_GPA_PERMISSIONS_NONE
969            }
970        };
971
972        // Or a locked page.
973        self.check_gpn_not_locked(&inner, vtl, gpn)?;
974
975        // Everything's validated, change the permissions.
976        if let Some(new_perms) = new_perms {
977            self.apply_protections(
978                MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
979                vtl,
980                new_perms,
981                gpn_source,
982            )
983            .map_err(|_| HvError::OperationDenied)?;
984        }
985
986        // Nothing from this point on can fail, so we can safely register the overlay page.
987        inner.overlay_pages[vtl].push(OverlayPage {
988            gpn,
989            previous_permissions: current_perms,
990            overlay_permissions: new_perms.unwrap_or(current_perms),
991            ref_count: 1,
992            gpn_source,
993        });
994
995        // Flush any threads accessing pages that had their VTL protections
996        // changed.
997        guestmem::rcu().synchronize_blocking();
998
999        // Since page protections were modified, we must invalidate the TLB to
1000        // ensure that the new permissions are observed, and wait for other CPUs
1001        // to release all guest mappings before declaring that the VTL
1002        // protection change has completed.
1003        tlb_access.flush(vtl);
1004        tlb_access.set_wait_for_tlb_locks(vtl);
1005
1006        Ok(())
1007    }
1008
1009    fn unregister_overlay_page(
1010        &self,
1011        vtl: GuestVtl,
1012        gpn: u64,
1013        tlb_access: &mut dyn TlbFlushLockAccess,
1014    ) -> Result<(), HvError> {
1015        let mut inner = self.inner.lock();
1016        let overlay_pages = &mut inner.overlay_pages[vtl];
1017
1018        // Find the overlay page.
1019        let index = overlay_pages
1020            .iter()
1021            .position(|p| p.gpn == gpn)
1022            .ok_or(HvError::OperationDenied)?;
1023
1024        // If this overlay page has been registered multiple times, just
1025        // decrement the reference count and return. We don't implement
1026        // full handling of multiple registrations with different permissions,
1027        // since it's best effort anyways.
1028        if overlay_pages[index].ref_count > 1 {
1029            overlay_pages[index].ref_count -= 1;
1030            return Ok(());
1031        }
1032
1033        // Restore its permissions.
1034        self.apply_protections(
1035            MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1036            vtl,
1037            overlay_pages[index].previous_permissions,
1038            overlay_pages[index].gpn_source,
1039        )
1040        .map_err(|_| HvError::OperationDenied)?;
1041
1042        // Nothing from this point on can fail, so we can safely unregister the overlay page.
1043        overlay_pages.remove(index);
1044
1045        // Flush any threads accessing pages that had their VTL protections
1046        // changed.
1047        guestmem::rcu().synchronize_blocking();
1048
1049        // Since page protections were modified, we must invalidate the TLB to
1050        // ensure that the new permissions are observed, and wait for other CPUs
1051        // to release all guest mappings before declaring that the VTL
1052        // protection change has completed.
1053        tlb_access.flush(vtl);
1054        tlb_access.set_wait_for_tlb_locks(vtl);
1055        Ok(())
1056    }
1057
1058    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool {
1059        self.inner.lock().overlay_pages[vtl]
1060            .iter()
1061            .any(|p| p.gpn == gpn)
1062    }
1063
1064    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError> {
1065        // Locking a page multiple times is allowed, so no need to check
1066        // for duplicates.
1067        // We also need to allow locking overlay pages for now.
1068        // TODO: We probably don't want to allow locking overlay pages once
1069        // we return the pointer for them instead of going through guestmem::lock.
1070        // TODO: other preconditions?
1071        self.inner.lock().locked_pages[vtl].push(gpns.to_vec().into_boxed_slice());
1072        Ok(())
1073    }
1074
1075    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) {
1076        let mut inner = self.inner.lock();
1077        let locked_pages = &mut inner.locked_pages[vtl];
1078        for (i, w) in locked_pages.iter().enumerate() {
1079            if **w == *gpns {
1080                locked_pages.swap_remove(i);
1081                return;
1082            }
1083        }
1084
1085        // Don't change protections on locked pages to avoid conflicting
1086        // with unregister_overlay_page.
1087        // TODO: Is this the right decision even after we separate overlay and
1088        // locked pages?
1089
1090        panic!("Tried to unlock pages that were not locked");
1091    }
1092
1093    fn set_vtl1_protections_enabled(&self) {
1094        self.vtl1_protections_enabled
1095            .store(true, std::sync::atomic::Ordering::Relaxed);
1096    }
1097
1098    fn vtl1_protections_enabled(&self) -> bool {
1099        self.vtl1_protections_enabled
1100            .load(std::sync::atomic::Ordering::Relaxed)
1101    }
1102}