underhill_mem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Underhill VM memory management.
5
6#![cfg(target_os = "linux")]
7
8mod init;
9mod mapping;
10mod registrar;
11
12pub use init::BootInit;
13pub use init::Init;
14pub use init::MemoryMappings;
15pub use init::init;
16
17use cvm_tracing::CVM_ALLOWED;
18use guestmem::GuestMemoryBackingError;
19use guestmem::PAGE_SIZE;
20use guestmem::ranges::PagedRange;
21use hcl::GuestVtl;
22use hcl::ioctl::AcceptPagesError;
23use hcl::ioctl::ApplyVtlProtectionsError;
24use hcl::ioctl::Mshv;
25use hcl::ioctl::MshvHvcall;
26use hcl::ioctl::MshvVtl;
27use hcl::ioctl::snp::SnpPageError;
28use hv1_structs::VtlArray;
29use hvdef::HV_MAP_GPA_PERMISSIONS_ALL;
30use hvdef::HV_MAP_GPA_PERMISSIONS_NONE;
31use hvdef::HV_PAGE_SHIFT;
32use hvdef::HV_PAGE_SIZE;
33use hvdef::HvError;
34use hvdef::HvMapGpaFlags;
35use hvdef::HypercallCode;
36use hvdef::hypercall::AcceptMemoryType;
37use hvdef::hypercall::HostVisibilityType;
38use hvdef::hypercall::HvInputVtl;
39use mapping::GuestMemoryMapping;
40use mapping::GuestValidMemory;
41use memory_range::MemoryRange;
42use parking_lot::Mutex;
43use parking_lot::MutexGuard;
44use registrar::RegisterMemory;
45use std::collections::VecDeque;
46use std::sync::Arc;
47use std::sync::atomic::AtomicBool;
48use thiserror::Error;
49use virt::IsolationType;
50use virt_mshv_vtl::GpnSource;
51use virt_mshv_vtl::ProtectIsolatedMemory;
52use virt_mshv_vtl::TlbFlushLockAccess;
53use vm_topology::memory::MemoryLayout;
54use x86defs::snp::SevRmpAdjust;
55use x86defs::tdx::GpaVmAttributes;
56use x86defs::tdx::GpaVmAttributesMask;
57use x86defs::tdx::TdgMemPageAttrWriteR8;
58use x86defs::tdx::TdgMemPageGpaAttr;
59
60/// Error querying vtl permissions on a page
61#[derive(Debug, Error)]
62pub enum QueryVtlPermissionsError {
63    /// An SNP-specific error
64    #[error("failed to query rmp permissions")]
65    Snp(#[source] SnpPageError),
66}
67
68#[derive(Debug)]
69struct MshvVtlWithPolicy {
70    mshv_vtl: MshvVtl,
71    ignore_registration_failure: bool,
72    shared: bool,
73}
74
75impl RegisterMemory for MshvVtlWithPolicy {
76    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
77        match self.mshv_vtl.add_vtl0_memory(range, self.shared) {
78            Ok(()) => Ok(()),
79            // TODO: remove this once the kernel driver tracks registration
80            Err(err) if self.ignore_registration_failure => {
81                tracing::warn!(
82                    CVM_ALLOWED,
83                    error = &err as &dyn std::error::Error,
84                    "registration failure, could be expected"
85                );
86                Ok(())
87            }
88            Err(err) => Err(err),
89        }
90    }
91}
92
93#[derive(Debug, Error)]
94#[error("failed to register memory with kernel")]
95struct RegistrationError;
96
97/// Currently built for hardware CVMs, which only define permissions for VTL
98/// 0 and VTL 1 to express what those VTLs have access to. If this were to
99/// extend to non-hardware CVMs, those would need to define permissions
100/// instead for VTL 2 and VTL 1 to express what the lower VTLs have access
101/// to.
102///
103/// Default VTL memory permissions applied to any mapped memory
104struct DefaultVtlPermissions {
105    vtl0: HvMapGpaFlags,
106    vtl1: Option<HvMapGpaFlags>,
107}
108
109impl DefaultVtlPermissions {
110    fn set(&mut self, vtl: GuestVtl, permissions: HvMapGpaFlags) {
111        match vtl {
112            GuestVtl::Vtl0 => self.vtl0 = permissions,
113            GuestVtl::Vtl1 => self.vtl1 = Some(permissions),
114        }
115    }
116}
117
118/// Represents the vtl permissions on a page for a given isolation type
119#[derive(Copy, Clone)]
120enum GpaVtlPermissions {
121    Vbs(HvMapGpaFlags),
122    Snp(SevRmpAdjust),
123    Tdx(TdgMemPageGpaAttr, TdgMemPageAttrWriteR8),
124}
125
126impl GpaVtlPermissions {
127    fn new(isolation: IsolationType, vtl: GuestVtl, protections: HvMapGpaFlags) -> Self {
128        match isolation {
129            IsolationType::None => unreachable!(),
130            IsolationType::Vbs => GpaVtlPermissions::Vbs(protections),
131            IsolationType::Snp => {
132                let mut vtl_permissions = GpaVtlPermissions::Snp(SevRmpAdjust::new());
133                vtl_permissions.set(vtl, protections);
134                vtl_permissions
135            }
136            IsolationType::Tdx => {
137                let mut vtl_permissions =
138                    GpaVtlPermissions::Tdx(TdgMemPageGpaAttr::new(), TdgMemPageAttrWriteR8::new());
139                vtl_permissions.set(vtl, protections);
140                vtl_permissions
141            }
142        }
143    }
144
145    fn set(&mut self, vtl: GuestVtl, protections: HvMapGpaFlags) {
146        match self {
147            GpaVtlPermissions::Vbs(flags) => *flags = protections,
148            GpaVtlPermissions::Snp(rmpadjust) => {
149                *rmpadjust = SevRmpAdjust::new()
150                    .with_enable_read(protections.readable())
151                    .with_enable_write(protections.writable())
152                    .with_enable_user_execute(protections.user_executable())
153                    .with_enable_kernel_execute(protections.kernel_executable())
154                    .with_target_vmpl(match vtl {
155                        GuestVtl::Vtl0 => x86defs::snp::Vmpl::Vmpl2.into(),
156                        GuestVtl::Vtl1 => x86defs::snp::Vmpl::Vmpl1.into(),
157                    });
158            }
159            GpaVtlPermissions::Tdx(attributes, mask) => {
160                let vm_attributes = GpaVmAttributes::new()
161                    .with_valid(true)
162                    .with_read(protections.readable())
163                    .with_write(protections.writable())
164                    .with_kernel_execute(protections.kernel_executable())
165                    .with_user_execute(protections.user_executable());
166
167                let (new_attributes, new_mask) = match vtl {
168                    GuestVtl::Vtl0 => {
169                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(vm_attributes);
170                        let mask = TdgMemPageAttrWriteR8::new()
171                            .with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
172                        (attributes, mask)
173                    }
174                    GuestVtl::Vtl1 => {
175                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm2(vm_attributes);
176                        let mask = TdgMemPageAttrWriteR8::new()
177                            .with_l2_vm2(GpaVmAttributesMask::ALL_CHANGED);
178                        (attributes, mask)
179                    }
180                };
181
182                *attributes = new_attributes;
183                *mask = new_mask;
184            }
185        }
186    }
187}
188
189/// Error returned when modifying gpa visibility.
190#[derive(Debug, Error)]
191#[error("failed to modify gpa visibility, elements successfully processed {processed}")]
192pub struct ModifyGpaVisibilityError {
193    source: HvError,
194    processed: usize,
195}
196
197/// Interface to accept and manipulate lower VTL memory acceptance and page
198/// protections.
199///
200/// FUTURE: this should go away as a separate object once all the logic is moved
201/// into this crate.
202pub struct MemoryAcceptor {
203    mshv_hvcall: MshvHvcall,
204    mshv_vtl: MshvVtl,
205    isolation: IsolationType,
206}
207
208impl MemoryAcceptor {
209    /// Create a new instance.
210    pub fn new(isolation: IsolationType) -> Result<Self, hcl::ioctl::Error> {
211        let mshv = Mshv::new()?;
212        let mshv_vtl = mshv.create_vtl()?;
213        let mshv_hvcall = MshvHvcall::new()?;
214        mshv_hvcall.set_allowed_hypercalls(&[
215            HypercallCode::HvCallAcceptGpaPages,
216            HypercallCode::HvCallModifySparseGpaPageHostVisibility,
217            HypercallCode::HvCallModifyVtlProtectionMask,
218        ]);
219
220        // On boot, VTL 0 should have permissions.
221        Ok(Self {
222            mshv_hvcall,
223            mshv_vtl,
224            isolation,
225        })
226    }
227
228    /// Accept pages for lower VTLs.
229    pub fn accept_lower_vtl_pages(&self, range: MemoryRange) -> Result<(), AcceptPagesError> {
230        match self.isolation {
231            IsolationType::None => unreachable!(),
232            IsolationType::Vbs => self
233                .mshv_hvcall
234                .accept_gpa_pages(range, AcceptMemoryType::RAM),
235            IsolationType::Snp => {
236                self.mshv_vtl
237                    .pvalidate_pages(range, true, false)
238                    .map_err(|err| AcceptPagesError::Snp {
239                        failed_operation: err,
240                        range,
241                    })
242            }
243            IsolationType::Tdx => {
244                let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(GpaVmAttributes::FULL_ACCESS);
245                let mask =
246                    TdgMemPageAttrWriteR8::new().with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
247
248                self.mshv_vtl
249                    .tdx_accept_pages(range, Some((attributes, mask)))
250                    .map_err(|err| AcceptPagesError::Tdx { error: err, range })
251            }
252        }
253    }
254
255    fn unaccept_lower_vtl_pages(&self, range: MemoryRange) {
256        match self.isolation {
257            IsolationType::None => unreachable!(),
258            IsolationType::Vbs => {
259                // TODO VBS: is there something to do here?
260            }
261            IsolationType::Snp => {
262                // Revoke permissions before unaccepting pages. This is required
263                // because a subsequent page acceptance is not guaranteed to
264                // reset permissions unless the hypervisor executed RMPUPDATE,
265                // which it cannot be trusted to do. We set new permissions
266                // ourselves, but that still leaves open a tiny window where the
267                // guest could access the pages with the old permissions.
268                for lower_vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
269                    self.apply_protections(range, lower_vtl, HV_MAP_GPA_PERMISSIONS_NONE)
270                        .unwrap();
271                }
272                self.mshv_vtl.pvalidate_pages(range, false, false).unwrap()
273            }
274
275            IsolationType::Tdx => {
276                // Nothing to do for TDX.
277            }
278        }
279    }
280
281    /// Tell the host to change the visibility of the given GPAs.
282    pub fn modify_gpa_visibility(
283        &self,
284        host_visibility: HostVisibilityType,
285        gpns: &[u64],
286    ) -> Result<(), ModifyGpaVisibilityError> {
287        self.mshv_hvcall
288            .modify_gpa_visibility(host_visibility, gpns)
289            .map_err(|(e, processed)| ModifyGpaVisibilityError {
290                source: e,
291                processed,
292            })
293    }
294
295    /// Apply the initial protections on lower-vtl memory.
296    ///
297    /// After initialization, the default protections should be applied.
298    pub fn apply_initial_lower_vtl_protections(
299        &self,
300        range: MemoryRange,
301    ) -> Result<(), ApplyVtlProtectionsError> {
302        self.apply_protections(range, GuestVtl::Vtl0, HV_MAP_GPA_PERMISSIONS_ALL)
303    }
304
305    fn apply_protections(
306        &self,
307        range: MemoryRange,
308        vtl: GuestVtl,
309        flags: HvMapGpaFlags,
310    ) -> Result<(), ApplyVtlProtectionsError> {
311        let permissions = GpaVtlPermissions::new(self.isolation, vtl, flags);
312
313        match permissions {
314            GpaVtlPermissions::Vbs(flags) => {
315                // For VBS-isolated VMs, the permissions apply to all lower
316                // VTLs. Therefore VTL 0 cannot set its own permissions.
317                assert_ne!(vtl, GuestVtl::Vtl0);
318
319                self.mshv_hvcall
320                    .modify_vtl_protection_mask(range, flags, HvInputVtl::from(vtl))
321            }
322            GpaVtlPermissions::Snp(rmpadjust) => {
323                // For SNP VMs, the permissions apply to the specified VTL.
324                // Therefore VTL 2 cannot specify its own permissions.
325                self.mshv_vtl
326                    .rmpadjust_pages(range, rmpadjust, false)
327                    .map_err(|err| ApplyVtlProtectionsError::Snp {
328                        failed_operation: err,
329                        range,
330                        permissions: rmpadjust,
331                        vtl: vtl.into(),
332                    })
333            }
334            GpaVtlPermissions::Tdx(attributes, mask) => {
335                // For TDX VMs, the permissions apply to the specified VTL.
336                // Therefore VTL 2 cannot specify its own permissions.
337                self.mshv_vtl
338                    .tdx_set_page_attributes(range, attributes, mask)
339                    .map_err(|err| ApplyVtlProtectionsError::Tdx {
340                        error: err,
341                        range,
342                        permissions: attributes,
343                        vtl: vtl.into(),
344                    })
345            }
346        }
347    }
348}
349
350/// An implementation of [`ProtectIsolatedMemory`] for Underhill VMs.
351pub struct HardwareIsolatedMemoryProtector {
352    // Serves as a lock for synchronizing visibility and page-protection changes.
353    inner: Mutex<HardwareIsolatedMemoryProtectorInner>,
354    layout: MemoryLayout,
355    acceptor: Arc<MemoryAcceptor>,
356    vtl0: Arc<GuestMemoryMapping>,
357    vtl1_protections_enabled: AtomicBool,
358}
359
360struct HardwareIsolatedMemoryProtectorInner {
361    valid_encrypted: Arc<GuestValidMemory>,
362    valid_shared: Arc<GuestValidMemory>,
363    encrypted: Arc<GuestMemoryMapping>,
364    default_vtl_permissions: DefaultVtlPermissions,
365    overlay_pages: VtlArray<Vec<OverlayPage>, 2>,
366    locked_pages: VtlArray<Vec<Box<[u64]>>, 2>,
367}
368
369struct OverlayPage {
370    gpn: u64,
371    previous_permissions: HvMapGpaFlags,
372    overlay_permissions: HvMapGpaFlags,
373    ref_count: u16,
374    gpn_source: GpnSource,
375}
376
377impl HardwareIsolatedMemoryProtector {
378    /// Returns a new instance.
379    ///
380    /// `shared` provides the mapping for shared memory. `vtl0` provides the
381    /// mapping for encrypted memory.
382    pub fn new(
383        valid_encrypted: Arc<GuestValidMemory>,
384        valid_shared: Arc<GuestValidMemory>,
385        encrypted: Arc<GuestMemoryMapping>,
386        vtl0: Arc<GuestMemoryMapping>,
387        layout: MemoryLayout,
388        acceptor: Arc<MemoryAcceptor>,
389    ) -> Self {
390        Self {
391            inner: Mutex::new(HardwareIsolatedMemoryProtectorInner {
392                valid_encrypted,
393                valid_shared,
394                encrypted,
395                // Grant only VTL 0 all permissions. This will be altered
396                // later by VTL 1 enablement and by VTL 1 itself.
397                default_vtl_permissions: DefaultVtlPermissions {
398                    vtl0: HV_MAP_GPA_PERMISSIONS_ALL,
399                    vtl1: None,
400                },
401                overlay_pages: VtlArray::from_fn(|_| Vec::new()),
402                locked_pages: VtlArray::from_fn(|_| Vec::new()),
403            }),
404            layout,
405            acceptor,
406            vtl0,
407            vtl1_protections_enabled: AtomicBool::new(false),
408        }
409    }
410
411    fn apply_protections_with_overlay_handling(
412        &self,
413        range: MemoryRange,
414        target_vtl: GuestVtl,
415        protections: HvMapGpaFlags,
416        inner: &mut MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
417    ) -> Result<(), ApplyVtlProtectionsError> {
418        let mut range_queue = VecDeque::new();
419        range_queue.push_back(range);
420
421        'outer: while let Some(range) = range_queue.pop_front() {
422            for overlay_page in inner.overlay_pages[target_vtl].iter_mut() {
423                let overlay_addr = overlay_page.gpn * HV_PAGE_SIZE;
424                if range.contains_addr(overlay_addr) {
425                    // If the overlay page is within the range, update the
426                    // permissions that will be restored when it is unlocked.
427                    overlay_page.previous_permissions = protections;
428                    // And split the range around it.
429                    let (left, right_with_overlay) =
430                        range.split_at_offset(range.offset_of(overlay_addr).unwrap());
431                    let (overlay, right) = right_with_overlay.split_at_offset(HV_PAGE_SIZE);
432                    debug_assert_eq!(overlay.start_4k_gpn(), overlay_page.gpn);
433                    debug_assert_eq!(overlay.len(), HV_PAGE_SIZE);
434                    if !left.is_empty() {
435                        range_queue.push_back(left);
436                    }
437                    if !right.is_empty() {
438                        range_queue.push_back(right);
439                    }
440                    continue 'outer;
441                }
442            }
443            // We can only reach here if the range does not contain any overlay
444            // pages, so now we can apply the protections to the range.
445            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)?
446        }
447
448        Ok(())
449    }
450
451    fn apply_protections(
452        &self,
453        range: MemoryRange,
454        target_vtl: GuestVtl,
455        protections: HvMapGpaFlags,
456        gpn_source: GpnSource,
457    ) -> Result<(), ApplyVtlProtectionsError> {
458        if gpn_source == GpnSource::GuestMemory && target_vtl == GuestVtl::Vtl0 {
459            // Only permissions imposed on VTL 0 guest memory are explicitly tracked
460            self.vtl0.update_permission_bitmaps(range, protections);
461        }
462        self.acceptor
463            .apply_protections(range, target_vtl, protections)
464    }
465
466    /// Get the permissions that the given VTL has to the given GPN.
467    ///
468    /// This function does not check for any protections applied by VTL 2,
469    /// only those applied by lower VTLs.
470    fn query_lower_vtl_permissions(
471        &self,
472        vtl: GuestVtl,
473        gpn: u64,
474    ) -> Result<HvMapGpaFlags, HvError> {
475        if !self.is_in_guest_memory(gpn) {
476            return Err(HvError::OperationDenied);
477        }
478
479        let res = match vtl {
480            GuestVtl::Vtl0 => self
481                .vtl0
482                .query_access_permission(gpn)
483                .unwrap_or(HV_MAP_GPA_PERMISSIONS_ALL),
484            GuestVtl::Vtl1 => HV_MAP_GPA_PERMISSIONS_ALL,
485        };
486
487        Ok(res)
488    }
489
490    fn check_gpn_not_locked(
491        &self,
492        inner: &MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
493        vtl: GuestVtl,
494        gpn: u64,
495    ) -> Result<(), HvError> {
496        // Overlay pages have special handling, being locked does not prevent that.
497        // TODO: When uh_mem implements the returning of overlay pages, rather than
498        // requiring them to also be locked through guestmem, the check for overlay
499        // pages can be removed, as locked and overlay pages will be mutually exclusive.
500        if inner.locked_pages[vtl].iter().flatten().any(|x| *x == gpn)
501            && !inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn)
502        {
503            return Err(HvError::OperationDenied);
504        }
505        Ok(())
506    }
507
508    /// Checks whether the given GPN is present in guest RAM.
509    fn is_in_guest_memory(&self, gpn: u64) -> bool {
510        let gpa = gpn << HV_PAGE_SHIFT;
511        self.layout.ram().iter().any(|r| r.range.contains_addr(gpa))
512    }
513}
514
515impl ProtectIsolatedMemory for HardwareIsolatedMemoryProtector {
516    fn change_host_visibility(
517        &self,
518        vtl: GuestVtl,
519        shared: bool,
520        gpns: &[u64],
521        tlb_access: &mut dyn TlbFlushLockAccess,
522    ) -> Result<(), (HvError, usize)> {
523        let inner = self.inner.lock();
524
525        for &gpn in gpns {
526            // Validate the ranges are RAM.
527            if !self.is_in_guest_memory(gpn) {
528                return Err((HvError::OperationDenied, 0));
529            }
530
531            // Validate they're not locked.
532            self.check_gpn_not_locked(&inner, vtl, gpn)
533                .map_err(|x| (x, 0))?;
534
535            // Don't allow overlay pages to be shared.
536            if shared && inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn) {
537                return Err((HvError::OperationDenied, 0));
538            }
539        }
540
541        // Filter out the GPNs that are already in the correct state. If the
542        // page is becoming shared, make sure the requesting VTL has read/write
543        // vtl permissions to the page.
544        let orig_gpns = gpns;
545        let mut failed_vtl_permission_index = None;
546        let gpns = gpns
547            .iter()
548            .copied()
549            .enumerate()
550            .take_while(|&(index, gpn)| {
551                if vtl == GuestVtl::Vtl0 && shared && self.vtl1_protections_enabled() {
552                    let permissions = self
553                        .vtl0
554                        .query_access_permission(gpn)
555                        .expect("vtl 1 protections enabled, vtl permissions should be tracked");
556                    if !permissions.readable() || !permissions.writable() {
557                        failed_vtl_permission_index = Some(index);
558                        false
559                    } else {
560                        true
561                    }
562                } else {
563                    true
564                }
565            })
566            .filter_map(|(_, gpn)| {
567                if inner.valid_shared.check_valid(gpn) != shared {
568                    Some(gpn)
569                } else {
570                    None
571                }
572            })
573            .collect::<Vec<_>>();
574
575        tracing::debug!(
576            orig = orig_gpns.len(),
577            len = gpns.len(),
578            first = gpns.first(),
579            shared,
580            "change vis"
581        );
582
583        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, &gpns)
584            .unwrap()
585            .ranges()
586            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
587            .collect::<Result<Vec<_>, _>>()
588            .unwrap(); // Ok to unwrap, we've validated the gpns above.
589
590        // Prevent accesses via the wrong address.
591        let clear_bitmap = if shared {
592            &inner.valid_encrypted
593        } else {
594            &inner.valid_shared
595        };
596
597        for &range in &ranges {
598            if shared && vtl == GuestVtl::Vtl0 {
599                // Accessing these pages through the encrypted mapping is now
600                // invalid. Make sure the VTL bitmaps reflect this. We could
601                // call apply_protections here but that would result in an extra
602                // hardware interaction that we don't need since we're about to
603                // unaccept the pages anyways.
604                self.vtl0
605                    .update_permission_bitmaps(range, HV_MAP_GPA_PERMISSIONS_NONE);
606            }
607
608            clear_bitmap.update_valid(range, false);
609        }
610
611        // There may be other threads concurrently accessing these pages. We
612        // cannot change the page visibility state until these threads have
613        // stopped those accesses. Flush the RCU domain that `guestmem` uses in
614        // order to flush any threads accessing the pages. After this, we are
615        // guaranteed no threads are accessing these pages (unless the pages are
616        // also locked), since no bitmap currently allows access.
617        guestmem::rcu().synchronize_blocking();
618
619        if let IsolationType::Snp = self.acceptor.isolation {
620            // We need to ensure that the guest TLB has been fully flushed since
621            // the unaccept operation is not guaranteed to do so in hardware,
622            // and the hypervisor is also not trusted with TLB hygiene.
623            tlb_access.flush_entire();
624        }
625
626        // TODO SNP: check list of locks, roll back bitmap changes if there was one.
627
628        if shared {
629            // Unaccept the pages so that the hypervisor can reclaim them.
630            for &range in &ranges {
631                self.acceptor.unaccept_lower_vtl_pages(range);
632            }
633        }
634
635        // Ask the hypervisor to update visibility.
636        let host_visibility = if shared {
637            HostVisibilityType::SHARED
638        } else {
639            HostVisibilityType::PRIVATE
640        };
641
642        let (result, ranges) = match self.acceptor.modify_gpa_visibility(host_visibility, &gpns) {
643            Ok(()) => {
644                // All gpns succeeded, so the whole set of ranges should be
645                // processed.
646                (
647                    match failed_vtl_permission_index {
648                        Some(index) => Err((HvError::AccessDenied, index)),
649                        None => Ok(()),
650                    },
651                    ranges,
652                )
653            }
654            Err(err) => {
655                if shared {
656                    // A transition from private to shared should always
657                    // succeed. There is no safe rollback path, so we must
658                    // panic.
659                    panic!(
660                        "the hypervisor refused to transition pages to shared, we cannot safely roll back: {:?}",
661                        err
662                    );
663                }
664
665                // Only some ranges succeeded. Recreate ranges based on which
666                // gpns succeeded, for further processing.
667                let (successful_gpns, failed_gpns) = gpns.split_at(err.processed);
668                let ranges = PagedRange::new(
669                    0,
670                    successful_gpns.len() * PagedRange::PAGE_SIZE,
671                    successful_gpns,
672                )
673                .unwrap()
674                .ranges()
675                .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
676                .collect::<Result<Vec<_>, _>>()
677                .expect("previous gpns was already checked");
678
679                // Roll back the cleared bitmap for failed gpns, as they should
680                // be still in their original state of shared.
681                let rollback_ranges =
682                    PagedRange::new(0, failed_gpns.len() * PagedRange::PAGE_SIZE, failed_gpns)
683                        .unwrap()
684                        .ranges()
685                        .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
686                        .collect::<Result<Vec<_>, _>>()
687                        .expect("previous gpns was already checked");
688
689                for &range in &rollback_ranges {
690                    clear_bitmap.update_valid(range, true);
691                }
692
693                // Figure out the index of the gpn that failed, in the
694                // pre-filtered list that will be reported back to the caller.
695                let failed_index = orig_gpns
696                    .iter()
697                    .position(|gpn| *gpn == failed_gpns[0])
698                    .expect("failed gpn should be present in the list");
699
700                (Err((err.source, failed_index)), ranges)
701            }
702        };
703
704        if !shared {
705            // Accept the pages so that the guest can use them.
706            for &range in &ranges {
707                self.acceptor
708                    .accept_lower_vtl_pages(range)
709                    .expect("everything should be in a state where we can accept VTL0 pages");
710
711                // For SNP, zero the memory before allowing the guest to access
712                // them. For TDX, this is done by the TDX module. For mshv, this is
713                // done by the hypervisor.
714                if self.acceptor.isolation == IsolationType::Snp {
715                    inner.encrypted.zero_range(range).expect("VTL 2 should have access to lower VTL memory, the page should be accepted, there should be no vtl protections yet.")
716                }
717            }
718        }
719
720        // Allow accesses via the correct address.
721        let set_bitmap = if shared {
722            &inner.valid_shared
723        } else {
724            &inner.valid_encrypted
725        };
726        for &range in &ranges {
727            set_bitmap.update_valid(range, true);
728        }
729
730        if !shared {
731            // Apply vtl protections so that the guest can use them. Any
732            // overlay pages won't be host visible, so just apply the default
733            // protections directly without handling them.
734            for &range in &ranges {
735                // Make sure we reset the permissions bitmaps for VTL 0.
736                self.apply_protections(
737                    range,
738                    GuestVtl::Vtl0,
739                    inner.default_vtl_permissions.vtl0,
740                    GpnSource::GuestMemory,
741                )
742                .expect("should be able to apply default protections");
743
744                if let Some(vtl1_protections) = inner.default_vtl_permissions.vtl1 {
745                    self.apply_protections(
746                        range,
747                        GuestVtl::Vtl1,
748                        vtl1_protections,
749                        GpnSource::GuestMemory,
750                    )
751                    .expect("everything should be in a state where we can apply VTL protections");
752                }
753            }
754        }
755
756        // Return the original result of the underlying page visibility
757        // transition call to the caller.
758        result
759    }
760
761    fn query_host_visibility(
762        &self,
763        gpns: &[u64],
764        host_visibility: &mut [HostVisibilityType],
765    ) -> Result<(), (HvError, usize)> {
766        // Validate the ranges are RAM.
767        for (i, &gpn) in gpns.iter().enumerate() {
768            if !self.is_in_guest_memory(gpn) {
769                return Err((HvError::OperationDenied, i));
770            }
771        }
772
773        let inner = self.inner.lock();
774
775        // Set GPN sharing status in output.
776        for (gpn, host_vis) in gpns.iter().zip(host_visibility.iter_mut()) {
777            *host_vis = if inner.valid_shared.check_valid(*gpn) {
778                HostVisibilityType::SHARED
779            } else {
780                HostVisibilityType::PRIVATE
781            };
782        }
783        Ok(())
784    }
785
786    fn default_vtl0_protections(&self) -> HvMapGpaFlags {
787        self.inner.lock().default_vtl_permissions.vtl0
788    }
789
790    fn change_default_vtl_protections(
791        &self,
792        target_vtl: GuestVtl,
793        vtl_protections: HvMapGpaFlags,
794        tlb_access: &mut dyn TlbFlushLockAccess,
795    ) -> Result<(), HvError> {
796        // Prevent visibility changes while VTL protections are being
797        // applied.
798        //
799        // TODO: This does not need to be synchronized against other
800        // threads performing VTL protection changes; whichever thread
801        // finishes last will control the outcome.
802        let mut inner = self.inner.lock();
803
804        inner
805            .default_vtl_permissions
806            .set(target_vtl, vtl_protections);
807
808        let mut ranges = Vec::new();
809        for ram_range in self.layout.ram().iter() {
810            let mut protect_start = ram_range.range.start();
811            let mut page_count = 0;
812
813            for gpn in
814                ram_range.range.start() / PAGE_SIZE as u64..ram_range.range.end() / PAGE_SIZE as u64
815            {
816                // TODO GUEST VSM: for now, use the encrypted mapping to
817                // find all accepted memory. When lazy acceptance exists,
818                // this should track all pages that have been accepted and
819                // should be used instead.
820                // Also don't attempt to change the permissions of locked pages.
821                if inner.valid_encrypted.check_valid(gpn) {
822                    self.check_gpn_not_locked(&inner, target_vtl, gpn)?;
823                    page_count += 1;
824                } else {
825                    if page_count > 0 {
826                        let end_address = protect_start + (page_count * PAGE_SIZE as u64);
827                        ranges.push(MemoryRange::new(protect_start..end_address));
828                    }
829                    protect_start = (gpn + 1) * PAGE_SIZE as u64;
830                    page_count = 0;
831                }
832            }
833
834            if page_count > 0 {
835                let end_address = protect_start + (page_count * PAGE_SIZE as u64);
836                ranges.push(MemoryRange::new(protect_start..end_address));
837            }
838        }
839
840        for range in ranges {
841            self.apply_protections_with_overlay_handling(
842                range,
843                target_vtl,
844                vtl_protections,
845                &mut inner,
846            )
847            .unwrap();
848        }
849
850        // Flush any threads accessing pages that had their VTL protections
851        // changed.
852        guestmem::rcu().synchronize_blocking();
853
854        // Invalidate the entire VTL 0 TLB to ensure that the new permissions
855        // are observed.
856        tlb_access.flush(GuestVtl::Vtl0);
857        tlb_access.set_wait_for_tlb_locks(target_vtl);
858
859        Ok(())
860    }
861
862    fn change_vtl_protections(
863        &self,
864        target_vtl: GuestVtl,
865        gpns: &[u64],
866        protections: HvMapGpaFlags,
867        tlb_access: &mut dyn TlbFlushLockAccess,
868    ) -> Result<(), (HvError, usize)> {
869        // Prevent visibility changes while VTL protections are being
870        // applied. This does not need to be synchronized against other
871        // threads performing VTL protection changes; whichever thread
872        // finishes last will control the outcome.
873        let inner = self.inner.lock();
874
875        // Validate the ranges are RAM.
876        for &gpn in gpns {
877            if !self.is_in_guest_memory(gpn) {
878                return Err((HvError::OperationDenied, 0));
879            }
880
881            // Validate they're not locked.
882            self.check_gpn_not_locked(&inner, target_vtl, gpn)
883                .map_err(|x| (x, 0))?;
884
885            // Validate they're not overlay pages.
886            if inner.overlay_pages[target_vtl].iter().any(|p| p.gpn == gpn) {
887                return Err((HvError::OperationDenied, 0));
888            }
889        }
890
891        // Protections cannot be applied to a host-visible page
892        if gpns.iter().any(|&gpn| inner.valid_shared.check_valid(gpn)) {
893            return Err((HvError::OperationDenied, 0));
894        }
895
896        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, gpns)
897            .unwrap()
898            .ranges()
899            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
900            .collect::<Result<Vec<_>, _>>()
901            .unwrap(); // Ok to unwrap, we've validated the gpns above.
902
903        for range in ranges {
904            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)
905                .unwrap();
906        }
907
908        // Flush any threads accessing pages that had their VTL protections
909        // changed.
910        guestmem::rcu().synchronize_blocking();
911
912        // Since page protections were modified, we must invalidate the entire
913        // VTL 0 TLB to ensure that the new permissions are observed, and wait for
914        // other CPUs to release all guest mappings before declaring that the VTL
915        // protection change has completed.
916        tlb_access.flush(GuestVtl::Vtl0);
917        tlb_access.set_wait_for_tlb_locks(target_vtl);
918
919        Ok(())
920    }
921
922    fn register_overlay_page(
923        &self,
924        vtl: GuestVtl,
925        gpn: u64,
926        gpn_source: GpnSource,
927        check_perms: HvMapGpaFlags,
928        new_perms: Option<HvMapGpaFlags>,
929        tlb_access: &mut dyn TlbFlushLockAccess,
930    ) -> Result<(), HvError> {
931        let mut inner = self.inner.lock();
932
933        // If the page is already registered as an overlay page, just check
934        // the permissions are adequate. If the permissions requested are
935        // different from the ones already registered just do best effort,
936        // there is no spec-guarantee of which one "wins".
937        if let Some(registered) = inner.overlay_pages[vtl].iter_mut().find(|p| p.gpn == gpn) {
938            let needed_perms = new_perms.unwrap_or(check_perms);
939            if registered.overlay_permissions.into_bits() | needed_perms.into_bits()
940                != registered.overlay_permissions.into_bits()
941            {
942                return Err(HvError::OperationDenied);
943            }
944            registered.ref_count += 1;
945            return Ok(());
946        }
947
948        let current_perms = match gpn_source {
949            GpnSource::GuestMemory => {
950                // Check that the required permissions are present.
951                let current_perms = self.query_lower_vtl_permissions(vtl, gpn)?;
952                if current_perms.into_bits() | check_perms.into_bits() != current_perms.into_bits()
953                {
954                    return Err(HvError::OperationDenied);
955                }
956
957                // Protections cannot be applied to a host-visible page.
958                if inner.valid_shared.check_valid(gpn) {
959                    return Err(HvError::OperationDenied);
960                }
961
962                current_perms
963            }
964            GpnSource::Dma => {
965                if self.is_in_guest_memory(gpn) {
966                    // DMA memory must not be in guest RAM.
967                    return Err(HvError::OperationDenied);
968                }
969
970                HV_MAP_GPA_PERMISSIONS_NONE
971            }
972        };
973
974        // Or a locked page.
975        self.check_gpn_not_locked(&inner, vtl, gpn)?;
976
977        // Everything's validated, change the permissions.
978        if let Some(new_perms) = new_perms {
979            self.apply_protections(
980                MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
981                vtl,
982                new_perms,
983                gpn_source,
984            )
985            .map_err(|_| HvError::OperationDenied)?;
986        }
987
988        // Nothing from this point on can fail, so we can safely register the overlay page.
989        inner.overlay_pages[vtl].push(OverlayPage {
990            gpn,
991            previous_permissions: current_perms,
992            overlay_permissions: new_perms.unwrap_or(current_perms),
993            ref_count: 1,
994            gpn_source,
995        });
996
997        // Flush any threads accessing pages that had their VTL protections
998        // changed.
999        guestmem::rcu().synchronize_blocking();
1000
1001        // Since page protections were modified, we must invalidate the TLB to
1002        // ensure that the new permissions are observed, and wait for other CPUs
1003        // to release all guest mappings before declaring that the VTL
1004        // protection change has completed.
1005        tlb_access.flush(vtl);
1006        tlb_access.set_wait_for_tlb_locks(vtl);
1007
1008        Ok(())
1009    }
1010
1011    fn unregister_overlay_page(
1012        &self,
1013        vtl: GuestVtl,
1014        gpn: u64,
1015        tlb_access: &mut dyn TlbFlushLockAccess,
1016    ) -> Result<(), HvError> {
1017        let mut inner = self.inner.lock();
1018        let overlay_pages = &mut inner.overlay_pages[vtl];
1019
1020        // Find the overlay page.
1021        let index = overlay_pages
1022            .iter()
1023            .position(|p| p.gpn == gpn)
1024            .ok_or(HvError::OperationDenied)?;
1025
1026        // If this overlay page has been registered multiple times, just
1027        // decrement the reference count and return. We don't implement
1028        // full handling of multiple registrations with different permissions,
1029        // since it's best effort anyways.
1030        if overlay_pages[index].ref_count > 1 {
1031            overlay_pages[index].ref_count -= 1;
1032            return Ok(());
1033        }
1034
1035        // Restore its permissions.
1036        self.apply_protections(
1037            MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1038            vtl,
1039            overlay_pages[index].previous_permissions,
1040            overlay_pages[index].gpn_source,
1041        )
1042        .map_err(|_| HvError::OperationDenied)?;
1043
1044        // Nothing from this point on can fail, so we can safely unregister the overlay page.
1045        overlay_pages.remove(index);
1046
1047        // Flush any threads accessing pages that had their VTL protections
1048        // changed.
1049        guestmem::rcu().synchronize_blocking();
1050
1051        // Since page protections were modified, we must invalidate the TLB to
1052        // ensure that the new permissions are observed, and wait for other CPUs
1053        // to release all guest mappings before declaring that the VTL
1054        // protection change has completed.
1055        tlb_access.flush(vtl);
1056        tlb_access.set_wait_for_tlb_locks(vtl);
1057        Ok(())
1058    }
1059
1060    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool {
1061        self.inner.lock().overlay_pages[vtl]
1062            .iter()
1063            .any(|p| p.gpn == gpn)
1064    }
1065
1066    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError> {
1067        // Locking a page multiple times is allowed, so no need to check
1068        // for duplicates.
1069        // We also need to allow locking overlay pages for now.
1070        // TODO: We probably don't want to allow locking overlay pages once
1071        // we return the pointer for them instead of going through guestmem::lock.
1072        // TODO: other preconditions?
1073        self.inner.lock().locked_pages[vtl].push(gpns.to_vec().into_boxed_slice());
1074        Ok(())
1075    }
1076
1077    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) {
1078        let mut inner = self.inner.lock();
1079        let locked_pages = &mut inner.locked_pages[vtl];
1080        for (i, w) in locked_pages.iter().enumerate() {
1081            if **w == *gpns {
1082                locked_pages.swap_remove(i);
1083                return;
1084            }
1085        }
1086
1087        // Don't change protections on locked pages to avoid conflicting
1088        // with unregister_overlay_page.
1089        // TODO: Is this the right decision even after we separate overlay and
1090        // locked pages?
1091
1092        panic!("Tried to unlock pages that were not locked");
1093    }
1094
1095    fn set_vtl1_protections_enabled(&self) {
1096        self.vtl1_protections_enabled
1097            .store(true, std::sync::atomic::Ordering::Relaxed);
1098    }
1099
1100    fn vtl1_protections_enabled(&self) -> bool {
1101        self.vtl1_protections_enabled
1102            .load(std::sync::atomic::Ordering::Relaxed)
1103    }
1104}