underhill_mem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Underhill VM memory management.
5
6#![cfg(target_os = "linux")]
7
8mod init;
9mod mapping;
10mod registrar;
11
12pub use init::BootInit;
13pub use init::Init;
14pub use init::MemoryMappings;
15pub use init::init;
16
17use cvm_tracing::CVM_ALLOWED;
18use guestmem::GuestMemoryBackingError;
19use guestmem::PAGE_SIZE;
20use guestmem::ranges::PagedRange;
21use hcl::GuestVtl;
22use hcl::ioctl::AcceptPagesError;
23use hcl::ioctl::ApplyVtlProtectionsError;
24use hcl::ioctl::Mshv;
25use hcl::ioctl::MshvHvcall;
26use hcl::ioctl::MshvVtl;
27use hcl::ioctl::snp::SnpPageError;
28use hv1_structs::VtlArray;
29use hvdef::HV_MAP_GPA_PERMISSIONS_ALL;
30use hvdef::HV_MAP_GPA_PERMISSIONS_NONE;
31use hvdef::HV_PAGE_SHIFT;
32use hvdef::HV_PAGE_SIZE;
33use hvdef::HvError;
34use hvdef::HvMapGpaFlags;
35use hvdef::HypercallCode;
36use hvdef::hypercall::AcceptMemoryType;
37use hvdef::hypercall::HostVisibilityType;
38use hvdef::hypercall::HvInputVtl;
39use mapping::GuestMemoryMapping;
40use mapping::GuestValidMemory;
41use memory_range::MemoryRange;
42use parking_lot::Mutex;
43use parking_lot::MutexGuard;
44use registrar::RegisterMemory;
45use std::collections::VecDeque;
46use std::sync::Arc;
47use std::sync::atomic::AtomicBool;
48use thiserror::Error;
49use virt::IsolationType;
50use virt_mshv_vtl::GpnSource;
51use virt_mshv_vtl::ProtectIsolatedMemory;
52use virt_mshv_vtl::TlbFlushLockAccess;
53use vm_topology::memory::MemoryLayout;
54use x86defs::snp::SevRmpAdjust;
55use x86defs::tdx::GpaVmAttributes;
56use x86defs::tdx::GpaVmAttributesMask;
57use x86defs::tdx::TdgMemPageAttrWriteR8;
58use x86defs::tdx::TdgMemPageGpaAttr;
59
60/// Error querying vtl permissions on a page
61#[derive(Debug, Error)]
62pub enum QueryVtlPermissionsError {
63    /// An SNP-specific error
64    #[error("failed to query rmp permissions")]
65    Snp(#[source] SnpPageError),
66}
67
68#[derive(Debug)]
69struct MshvVtlWithPolicy {
70    mshv_vtl: MshvVtl,
71    ignore_registration_failure: bool,
72    shared: bool,
73}
74
75impl RegisterMemory for MshvVtlWithPolicy {
76    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
77        match self.mshv_vtl.add_vtl0_memory(range, self.shared) {
78            Ok(()) => Ok(()),
79            // TODO: remove this once the kernel driver tracks registration
80            Err(err) if self.ignore_registration_failure => {
81                tracing::warn!(
82                    CVM_ALLOWED,
83                    error = &err as &dyn std::error::Error,
84                    "registration failure, could be expected"
85                );
86                Ok(())
87            }
88            Err(err) => Err(err),
89        }
90    }
91}
92
93#[derive(Debug, Error)]
94#[error("failed to register memory with kernel")]
95struct RegistrationError;
96
97/// Currently built for hardware CVMs, which only define permissions for VTL
98/// 0 and VTL 1 to express what those VTLs have access to. If this were to
99/// extend to non-hardware CVMs, those would need to define permissions
100/// instead for VTL 2 and VTL 1 to express what the lower VTLs have access
101/// to.
102///
103/// Default VTL memory permissions applied to any mapped memory
104struct DefaultVtlPermissions {
105    vtl0: HvMapGpaFlags,
106    vtl1: Option<HvMapGpaFlags>,
107}
108
109impl DefaultVtlPermissions {
110    fn set(&mut self, vtl: GuestVtl, permissions: HvMapGpaFlags) {
111        match vtl {
112            GuestVtl::Vtl0 => self.vtl0 = permissions,
113            GuestVtl::Vtl1 => self.vtl1 = Some(permissions),
114        }
115    }
116}
117
118/// Represents the vtl permissions on a page for a given isolation type
119#[derive(Copy, Clone)]
120enum GpaVtlPermissions {
121    Vbs(HvMapGpaFlags),
122    Snp(SevRmpAdjust),
123    Tdx(TdgMemPageGpaAttr, TdgMemPageAttrWriteR8),
124}
125
126impl GpaVtlPermissions {
127    fn new(isolation: IsolationType, vtl: GuestVtl, protections: HvMapGpaFlags) -> Self {
128        match isolation {
129            IsolationType::None => unreachable!(),
130            IsolationType::Vbs => GpaVtlPermissions::Vbs(protections),
131            IsolationType::Snp => {
132                let mut vtl_permissions = GpaVtlPermissions::Snp(SevRmpAdjust::new());
133                vtl_permissions.set(vtl, protections);
134                vtl_permissions
135            }
136            IsolationType::Tdx => {
137                let mut vtl_permissions =
138                    GpaVtlPermissions::Tdx(TdgMemPageGpaAttr::new(), TdgMemPageAttrWriteR8::new());
139                vtl_permissions.set(vtl, protections);
140                vtl_permissions
141            }
142        }
143    }
144
145    fn set(&mut self, vtl: GuestVtl, protections: HvMapGpaFlags) {
146        match self {
147            GpaVtlPermissions::Vbs(flags) => *flags = protections,
148            GpaVtlPermissions::Snp(rmpadjust) => {
149                *rmpadjust = SevRmpAdjust::new()
150                    .with_enable_read(protections.readable())
151                    .with_enable_write(protections.writable())
152                    .with_enable_user_execute(protections.user_executable())
153                    .with_enable_kernel_execute(protections.kernel_executable())
154                    .with_target_vmpl(match vtl {
155                        GuestVtl::Vtl0 => x86defs::snp::Vmpl::Vmpl2.into(),
156                        GuestVtl::Vtl1 => x86defs::snp::Vmpl::Vmpl1.into(),
157                    });
158            }
159            GpaVtlPermissions::Tdx(attributes, mask) => {
160                let vm_attributes = GpaVmAttributes::new()
161                    .with_valid(true)
162                    .with_read(protections.readable())
163                    .with_write(protections.writable())
164                    .with_kernel_execute(protections.kernel_executable())
165                    .with_user_execute(protections.user_executable());
166
167                let (new_attributes, new_mask) = match vtl {
168                    GuestVtl::Vtl0 => {
169                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(vm_attributes);
170                        let mask = TdgMemPageAttrWriteR8::new()
171                            .with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
172                        (attributes, mask)
173                    }
174                    GuestVtl::Vtl1 => {
175                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm2(vm_attributes);
176                        let mask = TdgMemPageAttrWriteR8::new()
177                            .with_l2_vm2(GpaVmAttributesMask::ALL_CHANGED);
178                        (attributes, mask)
179                    }
180                };
181
182                *attributes = new_attributes;
183                *mask = new_mask;
184            }
185        }
186    }
187}
188
189/// Error returned when modifying gpa visibility.
190#[derive(Debug, Error)]
191#[error("failed to modify gpa visibility, elements successfully processed {processed}")]
192pub struct ModifyGpaVisibilityError {
193    source: HvError,
194    processed: usize,
195}
196
197/// Interface to accept and manipulate lower VTL memory acceptance and page
198/// protections.
199///
200/// FUTURE: this should go away as a separate object once all the logic is moved
201/// into this crate.
202pub struct MemoryAcceptor {
203    mshv_hvcall: MshvHvcall,
204    mshv_vtl: MshvVtl,
205    isolation: IsolationType,
206}
207
208impl MemoryAcceptor {
209    /// Create a new instance.
210    pub fn new(isolation: IsolationType) -> Result<Self, hcl::ioctl::Error> {
211        let mshv = Mshv::new()?;
212        let mshv_vtl = mshv.create_vtl()?;
213        let mshv_hvcall = MshvHvcall::new()?;
214        mshv_hvcall.set_allowed_hypercalls(&[
215            HypercallCode::HvCallAcceptGpaPages,
216            HypercallCode::HvCallModifySparseGpaPageHostVisibility,
217            HypercallCode::HvCallModifyVtlProtectionMask,
218        ]);
219
220        // On boot, VTL 0 should have permissions.
221        Ok(Self {
222            mshv_hvcall,
223            mshv_vtl,
224            isolation,
225        })
226    }
227
228    /// Accept pages for lower VTLs.
229    pub fn accept_lower_vtl_pages(&self, range: MemoryRange) -> Result<(), AcceptPagesError> {
230        match self.isolation {
231            IsolationType::None => unreachable!(),
232            IsolationType::Vbs => self
233                .mshv_hvcall
234                .accept_gpa_pages(range, AcceptMemoryType::RAM),
235            IsolationType::Snp => {
236                self.mshv_vtl
237                    .pvalidate_pages(range, true, false)
238                    .map_err(|err| AcceptPagesError::Snp {
239                        failed_operation: err,
240                        range,
241                    })
242            }
243            IsolationType::Tdx => {
244                let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(GpaVmAttributes::FULL_ACCESS);
245                let mask =
246                    TdgMemPageAttrWriteR8::new().with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
247
248                self.mshv_vtl
249                    .tdx_accept_pages(range, Some((attributes, mask)))
250                    .map_err(|err| AcceptPagesError::Tdx { error: err, range })
251            }
252        }
253    }
254
255    fn unaccept_lower_vtl_pages(&self, range: MemoryRange) {
256        match self.isolation {
257            IsolationType::None => unreachable!(),
258            IsolationType::Vbs => {
259                // TODO VBS: is there something to do here?
260            }
261            IsolationType::Snp => self
262                .mshv_vtl
263                .pvalidate_pages(range, false, false)
264                .expect("pvalidate should not fail"),
265            IsolationType::Tdx => {
266                // Nothing to do for TDX.
267            }
268        }
269    }
270
271    /// Tell the host to change the visibility of the given GPAs.
272    pub fn modify_gpa_visibility(
273        &self,
274        host_visibility: HostVisibilityType,
275        gpns: &[u64],
276    ) -> Result<(), ModifyGpaVisibilityError> {
277        self.mshv_hvcall
278            .modify_gpa_visibility(host_visibility, gpns)
279            .map_err(|(e, processed)| ModifyGpaVisibilityError {
280                source: e,
281                processed,
282            })
283    }
284
285    /// Apply the initial protections on lower-vtl memory.
286    ///
287    /// After initialization, the default protections should be applied.
288    pub fn apply_initial_lower_vtl_protections(
289        &self,
290        range: MemoryRange,
291    ) -> Result<(), ApplyVtlProtectionsError> {
292        self.apply_protections(range, GuestVtl::Vtl0, HV_MAP_GPA_PERMISSIONS_ALL)
293    }
294
295    fn apply_protections(
296        &self,
297        range: MemoryRange,
298        vtl: GuestVtl,
299        flags: HvMapGpaFlags,
300    ) -> Result<(), ApplyVtlProtectionsError> {
301        let permissions = GpaVtlPermissions::new(self.isolation, vtl, flags);
302
303        match permissions {
304            GpaVtlPermissions::Vbs(flags) => {
305                // For VBS-isolated VMs, the permissions apply to all lower
306                // VTLs. Therefore VTL 0 cannot set its own permissions.
307                assert_ne!(vtl, GuestVtl::Vtl0);
308
309                self.mshv_hvcall
310                    .modify_vtl_protection_mask(range, flags, HvInputVtl::from(vtl))
311            }
312            GpaVtlPermissions::Snp(rmpadjust) => {
313                // For SNP VMs, the permissions apply to the specified VTL.
314                // Therefore VTL 2 cannot specify its own permissions.
315                self.mshv_vtl
316                    .rmpadjust_pages(range, rmpadjust, false)
317                    .map_err(|err| ApplyVtlProtectionsError::Snp {
318                        failed_operation: err,
319                        range,
320                        permissions: rmpadjust,
321                        vtl: vtl.into(),
322                    })
323            }
324            GpaVtlPermissions::Tdx(attributes, mask) => {
325                // For TDX VMs, the permissions apply to the specified VTL.
326                // Therefore VTL 2 cannot specify its own permissions.
327                self.mshv_vtl
328                    .tdx_set_page_attributes(range, attributes, mask)
329                    .map_err(|err| ApplyVtlProtectionsError::Tdx {
330                        error: err,
331                        range,
332                        permissions: attributes,
333                        vtl: vtl.into(),
334                    })
335            }
336        }
337    }
338}
339
340/// An implementation of [`ProtectIsolatedMemory`] for Underhill VMs.
341pub struct HardwareIsolatedMemoryProtector {
342    // Serves as a lock for synchronizing visibility and page-protection changes.
343    inner: Mutex<HardwareIsolatedMemoryProtectorInner>,
344    layout: MemoryLayout,
345    acceptor: Arc<MemoryAcceptor>,
346    vtl0: Arc<GuestMemoryMapping>,
347    vtl1_protections_enabled: AtomicBool,
348}
349
350struct HardwareIsolatedMemoryProtectorInner {
351    valid_encrypted: Arc<GuestValidMemory>,
352    valid_shared: Arc<GuestValidMemory>,
353    encrypted: Arc<GuestMemoryMapping>,
354    default_vtl_permissions: DefaultVtlPermissions,
355    overlay_pages: VtlArray<Vec<OverlayPage>, 2>,
356    locked_pages: VtlArray<Vec<Box<[u64]>>, 2>,
357}
358
359struct OverlayPage {
360    gpn: u64,
361    previous_permissions: HvMapGpaFlags,
362    overlay_permissions: HvMapGpaFlags,
363    ref_count: u16,
364    gpn_source: GpnSource,
365}
366
367impl HardwareIsolatedMemoryProtector {
368    /// Returns a new instance.
369    ///
370    /// `shared` provides the mapping for shared memory. `vtl0` provides the
371    /// mapping for encrypted memory.
372    pub fn new(
373        valid_encrypted: Arc<GuestValidMemory>,
374        valid_shared: Arc<GuestValidMemory>,
375        encrypted: Arc<GuestMemoryMapping>,
376        vtl0: Arc<GuestMemoryMapping>,
377        layout: MemoryLayout,
378        acceptor: Arc<MemoryAcceptor>,
379    ) -> Self {
380        Self {
381            inner: Mutex::new(HardwareIsolatedMemoryProtectorInner {
382                valid_encrypted,
383                valid_shared,
384                encrypted,
385                // Grant only VTL 0 all permissions. This will be altered
386                // later by VTL 1 enablement and by VTL 1 itself.
387                default_vtl_permissions: DefaultVtlPermissions {
388                    vtl0: HV_MAP_GPA_PERMISSIONS_ALL,
389                    vtl1: None,
390                },
391                overlay_pages: VtlArray::from_fn(|_| Vec::new()),
392                locked_pages: VtlArray::from_fn(|_| Vec::new()),
393            }),
394            layout,
395            acceptor,
396            vtl0,
397            vtl1_protections_enabled: AtomicBool::new(false),
398        }
399    }
400
401    fn apply_protections_with_overlay_handling(
402        &self,
403        range: MemoryRange,
404        target_vtl: GuestVtl,
405        protections: HvMapGpaFlags,
406        inner: &mut MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
407    ) -> Result<(), ApplyVtlProtectionsError> {
408        let mut range_queue = VecDeque::new();
409        range_queue.push_back(range);
410
411        'outer: while let Some(range) = range_queue.pop_front() {
412            for overlay_page in inner.overlay_pages[target_vtl].iter_mut() {
413                let overlay_addr = overlay_page.gpn * HV_PAGE_SIZE;
414                if range.contains_addr(overlay_addr) {
415                    // If the overlay page is within the range, update the
416                    // permissions that will be restored when it is unlocked.
417                    overlay_page.previous_permissions = protections;
418                    // And split the range around it.
419                    let (left, right_with_overlay) =
420                        range.split_at_offset(range.offset_of(overlay_addr).unwrap());
421                    let (overlay, right) = right_with_overlay.split_at_offset(HV_PAGE_SIZE);
422                    debug_assert_eq!(overlay.start_4k_gpn(), overlay_page.gpn);
423                    debug_assert_eq!(overlay.len(), HV_PAGE_SIZE);
424                    if !left.is_empty() {
425                        range_queue.push_back(left);
426                    }
427                    if !right.is_empty() {
428                        range_queue.push_back(right);
429                    }
430                    continue 'outer;
431                }
432            }
433            // We can only reach here if the range does not contain any overlay
434            // pages, so now we can apply the protections to the range.
435            self.apply_protections(range, target_vtl, protections, GpnSource::GuestMemory)?
436        }
437
438        Ok(())
439    }
440
441    fn apply_protections(
442        &self,
443        range: MemoryRange,
444        target_vtl: GuestVtl,
445        protections: HvMapGpaFlags,
446        gpn_source: GpnSource,
447    ) -> Result<(), ApplyVtlProtectionsError> {
448        if gpn_source == GpnSource::GuestMemory && target_vtl == GuestVtl::Vtl0 {
449            // Only permissions imposed on VTL 0 guest memory are explicitly tracked
450            self.vtl0.update_permission_bitmaps(range, protections);
451        }
452        self.acceptor
453            .apply_protections(range, target_vtl, protections)
454    }
455
456    /// Get the permissions that the given VTL has to the given GPN.
457    ///
458    /// This function does not check for any protections applied by VTL 2,
459    /// only those applied by lower VTLs.
460    fn query_lower_vtl_permissions(
461        &self,
462        vtl: GuestVtl,
463        gpn: u64,
464    ) -> Result<HvMapGpaFlags, HvError> {
465        if !self.is_in_guest_memory(gpn) {
466            return Err(HvError::OperationDenied);
467        }
468
469        let res = match vtl {
470            GuestVtl::Vtl0 => self
471                .vtl0
472                .query_access_permission(gpn)
473                .unwrap_or(HV_MAP_GPA_PERMISSIONS_ALL),
474            GuestVtl::Vtl1 => HV_MAP_GPA_PERMISSIONS_ALL,
475        };
476
477        Ok(res)
478    }
479
480    fn check_gpn_not_locked(
481        &self,
482        inner: &MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
483        vtl: GuestVtl,
484        gpn: u64,
485    ) -> Result<(), HvError> {
486        // Overlay pages have special handling, being locked does not prevent that.
487        // TODO: When uh_mem implements the returning of overlay pages, rather than
488        // requiring them to also be locked through guestmem, the check for overlay
489        // pages can be removed, as locked and overlay pages will be mutually exclusive.
490        if inner.locked_pages[vtl].iter().flatten().any(|x| *x == gpn)
491            && !inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn)
492        {
493            return Err(HvError::OperationDenied);
494        }
495        Ok(())
496    }
497
498    /// Checks whether the given GPN is present in guest RAM.
499    fn is_in_guest_memory(&self, gpn: u64) -> bool {
500        let gpa = gpn << HV_PAGE_SHIFT;
501        self.layout.ram().iter().any(|r| r.range.contains_addr(gpa))
502    }
503}
504
505impl ProtectIsolatedMemory for HardwareIsolatedMemoryProtector {
506    fn change_host_visibility(
507        &self,
508        vtl: GuestVtl,
509        shared: bool,
510        gpns: &[u64],
511        tlb_access: &mut dyn TlbFlushLockAccess,
512    ) -> Result<(), (HvError, usize)> {
513        let inner = self.inner.lock();
514
515        for &gpn in gpns {
516            // Validate the ranges are RAM.
517            if !self.is_in_guest_memory(gpn) {
518                return Err((HvError::OperationDenied, 0));
519            }
520
521            // Validate they're not locked.
522            self.check_gpn_not_locked(&inner, vtl, gpn)
523                .map_err(|x| (x, 0))?;
524
525            // Don't allow overlay pages to be shared.
526            if shared && inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn) {
527                return Err((HvError::OperationDenied, 0));
528            }
529        }
530
531        // Filter out the GPNs that are already in the correct state. If the
532        // page is becoming shared, make sure the requesting VTL has read/write
533        // vtl permissions to the page.
534        let orig_gpns = gpns;
535        let mut failed_vtl_permission_index = None;
536        let gpns = gpns
537            .iter()
538            .copied()
539            .enumerate()
540            .take_while(|&(index, gpn)| {
541                if vtl == GuestVtl::Vtl0 && shared && self.vtl1_protections_enabled() {
542                    let permissions = self
543                        .vtl0
544                        .query_access_permission(gpn)
545                        .expect("vtl 1 protections enabled, vtl permissions should be tracked");
546                    if !permissions.readable() || !permissions.writable() {
547                        failed_vtl_permission_index = Some(index);
548                        false
549                    } else {
550                        true
551                    }
552                } else {
553                    true
554                }
555            })
556            .filter_map(|(_, gpn)| {
557                if inner.valid_shared.check_valid(gpn) != shared {
558                    Some(gpn)
559                } else {
560                    None
561                }
562            })
563            .collect::<Vec<_>>();
564
565        tracing::debug!(
566            orig = orig_gpns.len(),
567            len = gpns.len(),
568            first = gpns.first(),
569            shared,
570            "change vis"
571        );
572
573        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, &gpns)
574            .unwrap()
575            .ranges()
576            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
577            .collect::<Result<Vec<_>, _>>()
578            .unwrap(); // Ok to unwrap, we've validated the gpns above.
579
580        // Prevent accesses via the wrong address.
581        let clear_bitmap = if shared {
582            &inner.valid_encrypted
583        } else {
584            &inner.valid_shared
585        };
586
587        for &range in &ranges {
588            if shared && vtl == GuestVtl::Vtl0 {
589                // Accessing these pages through the encrypted mapping is now
590                // invalid. Make sure the VTL bitmaps reflect this. We could
591                // call apply_protections here but that would result in an extra
592                // hardware interaction that we don't need since we're about to
593                // unaccept the pages anyways.
594                self.vtl0
595                    .update_permission_bitmaps(range, HV_MAP_GPA_PERMISSIONS_NONE);
596            }
597
598            clear_bitmap.update_valid(range, false);
599        }
600
601        // There may be other threads concurrently accessing these pages. We
602        // cannot change the page visibility state until these threads have
603        // stopped those accesses. Flush the RCU domain that `guestmem` uses in
604        // order to flush any threads accessing the pages. After this, we are
605        // guaranteed no threads are accessing these pages (unless the pages are
606        // also locked), since no bitmap currently allows access.
607        guestmem::rcu().synchronize_blocking();
608
609        if let IsolationType::Snp = self.acceptor.isolation {
610            // We need to ensure that the guest TLB has been fully flushed since
611            // the unaccept operation is not guaranteed to do so in hardware,
612            // and the hypervisor is also not trusted with TLB hygiene.
613            tlb_access.flush_entire();
614        }
615
616        // TODO SNP: check list of locks, roll back bitmap changes if there was one.
617
618        if shared {
619            // Unaccept the pages so that the hypervisor can reclaim them.
620            for &range in &ranges {
621                self.acceptor.unaccept_lower_vtl_pages(range);
622            }
623        }
624
625        // Ask the hypervisor to update visibility.
626        let host_visibility = if shared {
627            HostVisibilityType::SHARED
628        } else {
629            HostVisibilityType::PRIVATE
630        };
631
632        let (result, ranges) = match self.acceptor.modify_gpa_visibility(host_visibility, &gpns) {
633            Ok(()) => {
634                // All gpns succeeded, so the whole set of ranges should be
635                // processed.
636                (
637                    match failed_vtl_permission_index {
638                        Some(index) => Err((HvError::AccessDenied, index)),
639                        None => Ok(()),
640                    },
641                    ranges,
642                )
643            }
644            Err(err) => {
645                if shared {
646                    // A transition from private to shared should always
647                    // succeed. There is no safe rollback path, so we must
648                    // panic.
649                    panic!(
650                        "the hypervisor refused to transition pages to shared, we cannot safely roll back: {:?}",
651                        err
652                    );
653                }
654
655                // Only some ranges succeeded. Recreate ranges based on which
656                // gpns succeeded, for further processing.
657                let (successful_gpns, failed_gpns) = gpns.split_at(err.processed);
658                let ranges = PagedRange::new(
659                    0,
660                    successful_gpns.len() * PagedRange::PAGE_SIZE,
661                    successful_gpns,
662                )
663                .unwrap()
664                .ranges()
665                .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
666                .collect::<Result<Vec<_>, _>>()
667                .expect("previous gpns was already checked");
668
669                // Roll back the cleared bitmap for failed gpns, as they should
670                // be still in their original state of shared.
671                let rollback_ranges =
672                    PagedRange::new(0, failed_gpns.len() * PagedRange::PAGE_SIZE, failed_gpns)
673                        .unwrap()
674                        .ranges()
675                        .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
676                        .collect::<Result<Vec<_>, _>>()
677                        .expect("previous gpns was already checked");
678
679                for &range in &rollback_ranges {
680                    clear_bitmap.update_valid(range, true);
681                }
682
683                // Figure out the index of the gpn that failed, in the
684                // pre-filtered list that will be reported back to the caller.
685                let failed_index = orig_gpns
686                    .iter()
687                    .position(|gpn| *gpn == failed_gpns[0])
688                    .expect("failed gpn should be present in the list");
689
690                (Err((err.source, failed_index)), ranges)
691            }
692        };
693
694        if !shared {
695            // Accept the pages so that the guest can use them.
696            for &range in &ranges {
697                self.acceptor
698                    .accept_lower_vtl_pages(range)
699                    .expect("everything should be in a state where we can accept VTL0 pages");
700
701                // For SNP, zero the memory before allowing the guest to access
702                // them. For TDX, this is done by the TDX module. For mshv, this is
703                // done by the hypervisor.
704                if self.acceptor.isolation == IsolationType::Snp {
705                    inner.encrypted.zero_range(range).expect("VTL 2 should have access to lower VTL memory, the page should be accepted, there should be no vtl protections yet.")
706                }
707            }
708        }
709
710        // Allow accesses via the correct address.
711        let set_bitmap = if shared {
712            &inner.valid_shared
713        } else {
714            &inner.valid_encrypted
715        };
716        for &range in &ranges {
717            set_bitmap.update_valid(range, true);
718        }
719
720        if !shared {
721            // Apply vtl protections so that the guest can use them. Any
722            // overlay pages won't be host visible, so just apply the default
723            // protections directly without handling them.
724            for &range in &ranges {
725                // Make sure we reset the permissions bitmaps for VTL 0.
726                self.apply_protections(
727                    range,
728                    GuestVtl::Vtl0,
729                    inner.default_vtl_permissions.vtl0,
730                    GpnSource::GuestMemory,
731                )
732                .expect("should be able to apply default protections");
733
734                if let Some(vtl1_protections) = inner.default_vtl_permissions.vtl1 {
735                    self.apply_protections(
736                        range,
737                        GuestVtl::Vtl1,
738                        vtl1_protections,
739                        GpnSource::GuestMemory,
740                    )
741                    .expect("everything should be in a state where we can apply VTL protections");
742                }
743            }
744        }
745
746        // Return the original result of the underlying page visibility
747        // transition call to the caller.
748        result
749    }
750
751    fn query_host_visibility(
752        &self,
753        gpns: &[u64],
754        host_visibility: &mut [HostVisibilityType],
755    ) -> Result<(), (HvError, usize)> {
756        // Validate the ranges are RAM.
757        for (i, &gpn) in gpns.iter().enumerate() {
758            if !self.is_in_guest_memory(gpn) {
759                return Err((HvError::OperationDenied, i));
760            }
761        }
762
763        let inner = self.inner.lock();
764
765        // Set GPN sharing status in output.
766        for (gpn, host_vis) in gpns.iter().zip(host_visibility.iter_mut()) {
767            *host_vis = if inner.valid_shared.check_valid(*gpn) {
768                HostVisibilityType::SHARED
769            } else {
770                HostVisibilityType::PRIVATE
771            };
772        }
773        Ok(())
774    }
775
776    fn default_vtl0_protections(&self) -> HvMapGpaFlags {
777        self.inner.lock().default_vtl_permissions.vtl0
778    }
779
780    fn change_default_vtl_protections(
781        &self,
782        target_vtl: GuestVtl,
783        vtl_protections: HvMapGpaFlags,
784        tlb_access: &mut dyn TlbFlushLockAccess,
785    ) -> Result<(), HvError> {
786        // Prevent visibility changes while VTL protections are being
787        // applied.
788        //
789        // TODO: This does not need to be synchronized against other
790        // threads performing VTL protection changes; whichever thread
791        // finishes last will control the outcome.
792        let mut inner = self.inner.lock();
793
794        inner
795            .default_vtl_permissions
796            .set(target_vtl, vtl_protections);
797
798        let mut ranges = Vec::new();
799        for ram_range in self.layout.ram().iter() {
800            let mut protect_start = ram_range.range.start();
801            let mut page_count = 0;
802
803            for gpn in
804                ram_range.range.start() / PAGE_SIZE as u64..ram_range.range.end() / PAGE_SIZE as u64
805            {
806                // TODO GUEST VSM: for now, use the encrypted mapping to
807                // find all accepted memory. When lazy acceptance exists,
808                // this should track all pages that have been accepted and
809                // should be used instead.
810                // Also don't attempt to change the permissions of locked pages.
811                if !inner.valid_encrypted.check_valid(gpn)
812                    || self.check_gpn_not_locked(&inner, target_vtl, gpn).is_err()
813                {
814                    if page_count > 0 {
815                        let end_address = protect_start + (page_count * PAGE_SIZE as u64);
816                        ranges.push(MemoryRange::new(protect_start..end_address));
817                    }
818                    protect_start = (gpn + 1) * PAGE_SIZE as u64;
819                    page_count = 0;
820                } else {
821                    page_count += 1;
822                }
823            }
824
825            if page_count > 0 {
826                let end_address = protect_start + (page_count * PAGE_SIZE as u64);
827                ranges.push(MemoryRange::new(protect_start..end_address));
828            }
829        }
830
831        for range in ranges {
832            self.apply_protections_with_overlay_handling(
833                range,
834                target_vtl,
835                vtl_protections,
836                &mut inner,
837            )
838            .unwrap();
839        }
840
841        // Flush any threads accessing pages that had their VTL protections
842        // changed.
843        guestmem::rcu().synchronize_blocking();
844
845        // Invalidate the entire VTL 0 TLB to ensure that the new permissions
846        // are observed.
847        tlb_access.flush(GuestVtl::Vtl0);
848
849        Ok(())
850    }
851
852    fn change_vtl_protections(
853        &self,
854        target_vtl: GuestVtl,
855        gpns: &[u64],
856        protections: HvMapGpaFlags,
857        tlb_access: &mut dyn TlbFlushLockAccess,
858    ) -> Result<(), (HvError, usize)> {
859        // Prevent visibility changes while VTL protections are being
860        // applied. This does not need to be synchronized against other
861        // threads performing VTL protection changes; whichever thread
862        // finishes last will control the outcome.
863        let mut inner = self.inner.lock();
864
865        // Validate the ranges are RAM.
866        for &gpn in gpns {
867            if !self.is_in_guest_memory(gpn) {
868                return Err((HvError::OperationDenied, 0));
869            }
870
871            // Validate they're not locked.
872            self.check_gpn_not_locked(&inner, target_vtl, gpn)
873                .map_err(|x| (x, 0))?;
874        }
875
876        // Protections cannot be applied to a host-visible page
877        if gpns.iter().any(|&gpn| inner.valid_shared.check_valid(gpn)) {
878            return Err((HvError::OperationDenied, 0));
879        }
880
881        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, gpns)
882            .unwrap()
883            .ranges()
884            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
885            .collect::<Result<Vec<_>, _>>()
886            .unwrap(); // Ok to unwrap, we've validated the gpns above.
887
888        for range in ranges {
889            self.apply_protections_with_overlay_handling(
890                range,
891                target_vtl,
892                protections,
893                &mut inner,
894            )
895            .unwrap();
896        }
897
898        // Flush any threads accessing pages that had their VTL protections
899        // changed.
900        guestmem::rcu().synchronize_blocking();
901
902        // Since page protections were modified, we must invalidate the entire
903        // VTL 0 TLB to ensure that the new permissions are observed, and wait for
904        // other CPUs to release all guest mappings before declaring that the VTL
905        // protection change has completed.
906        tlb_access.flush(GuestVtl::Vtl0);
907        tlb_access.set_wait_for_tlb_locks(target_vtl);
908
909        Ok(())
910    }
911
912    fn register_overlay_page(
913        &self,
914        vtl: GuestVtl,
915        gpn: u64,
916        gpn_source: GpnSource,
917        check_perms: HvMapGpaFlags,
918        new_perms: Option<HvMapGpaFlags>,
919        tlb_access: &mut dyn TlbFlushLockAccess,
920    ) -> Result<(), HvError> {
921        let mut inner = self.inner.lock();
922
923        // If the page is already registered as an overlay page, just check
924        // the permissions are adequate. If the permissions requested are
925        // different from the ones already registered just do best effort,
926        // there is no spec-guarantee of which one "wins".
927        if let Some(registered) = inner.overlay_pages[vtl].iter_mut().find(|p| p.gpn == gpn) {
928            let needed_perms = new_perms.unwrap_or(check_perms);
929            if registered.overlay_permissions.into_bits() | needed_perms.into_bits()
930                != registered.overlay_permissions.into_bits()
931            {
932                return Err(HvError::OperationDenied);
933            }
934            registered.ref_count += 1;
935            return Ok(());
936        }
937
938        let current_perms = match gpn_source {
939            GpnSource::GuestMemory => {
940                // Check that the required permissions are present.
941                let current_perms = self.query_lower_vtl_permissions(vtl, gpn)?;
942                if current_perms.into_bits() | check_perms.into_bits() != current_perms.into_bits()
943                {
944                    return Err(HvError::OperationDenied);
945                }
946
947                // Protections cannot be applied to a host-visible page.
948                if inner.valid_shared.check_valid(gpn) {
949                    return Err(HvError::OperationDenied);
950                }
951
952                current_perms
953            }
954            GpnSource::Dma => {
955                if self.is_in_guest_memory(gpn) {
956                    // DMA memory must not be in guest RAM.
957                    return Err(HvError::OperationDenied);
958                }
959
960                HV_MAP_GPA_PERMISSIONS_NONE
961            }
962        };
963
964        // Or a locked page.
965        self.check_gpn_not_locked(&inner, vtl, gpn)?;
966
967        // Everything's validated, change the permissions.
968        if let Some(new_perms) = new_perms {
969            self.apply_protections(
970                MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
971                vtl,
972                new_perms,
973                gpn_source,
974            )
975            .map_err(|_| HvError::OperationDenied)?;
976        }
977
978        // Nothing from this point on can fail, so we can safely register the overlay page.
979        inner.overlay_pages[vtl].push(OverlayPage {
980            gpn,
981            previous_permissions: current_perms,
982            overlay_permissions: new_perms.unwrap_or(current_perms),
983            ref_count: 1,
984            gpn_source,
985        });
986
987        // Flush any threads accessing pages that had their VTL protections
988        // changed.
989        guestmem::rcu().synchronize_blocking();
990
991        // Since page protections were modified, we must invalidate the TLB to
992        // ensure that the new permissions are observed, and wait for other CPUs
993        // to release all guest mappings before declaring that the VTL
994        // protection change has completed.
995        tlb_access.flush(vtl);
996        tlb_access.set_wait_for_tlb_locks(vtl);
997
998        Ok(())
999    }
1000
1001    fn unregister_overlay_page(
1002        &self,
1003        vtl: GuestVtl,
1004        gpn: u64,
1005        tlb_access: &mut dyn TlbFlushLockAccess,
1006    ) -> Result<(), HvError> {
1007        let mut inner = self.inner.lock();
1008        let overlay_pages = &mut inner.overlay_pages[vtl];
1009
1010        // Find the overlay page.
1011        let index = overlay_pages
1012            .iter()
1013            .position(|p| p.gpn == gpn)
1014            .ok_or(HvError::OperationDenied)?;
1015
1016        // If this overlay page has been registered multiple times, just
1017        // decrement the reference count and return. We don't implement
1018        // full handling of multiple registrations with different permissions,
1019        // since it's best effort anyways.
1020        if overlay_pages[index].ref_count > 1 {
1021            overlay_pages[index].ref_count -= 1;
1022            return Ok(());
1023        }
1024
1025        // Restore its permissions.
1026        self.apply_protections(
1027            MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1028            vtl,
1029            overlay_pages[index].previous_permissions,
1030            overlay_pages[index].gpn_source,
1031        )
1032        .map_err(|_| HvError::OperationDenied)?;
1033
1034        // Nothing from this point on can fail, so we can safely unregister the overlay page.
1035        overlay_pages.remove(index);
1036
1037        // Flush any threads accessing pages that had their VTL protections
1038        // changed.
1039        guestmem::rcu().synchronize_blocking();
1040
1041        // Since page protections were modified, we must invalidate the TLB to
1042        // ensure that the new permissions are observed, and wait for other CPUs
1043        // to release all guest mappings before declaring that the VTL
1044        // protection change has completed.
1045        tlb_access.flush(vtl);
1046        tlb_access.set_wait_for_tlb_locks(vtl);
1047        Ok(())
1048    }
1049
1050    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool {
1051        self.inner.lock().overlay_pages[vtl]
1052            .iter()
1053            .any(|p| p.gpn == gpn)
1054    }
1055
1056    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError> {
1057        // Locking a page multiple times is allowed, so no need to check
1058        // for duplicates.
1059        // We also need to allow locking overlay pages for now.
1060        // TODO: We probably don't want to allow locking overlay pages once
1061        // we return the pointer for them instead of going through guestmem::lock.
1062        // TODO: other preconditions?
1063        self.inner.lock().locked_pages[vtl].push(gpns.to_vec().into_boxed_slice());
1064        Ok(())
1065    }
1066
1067    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) {
1068        let mut inner = self.inner.lock();
1069        let locked_pages = &mut inner.locked_pages[vtl];
1070        for (i, w) in locked_pages.iter().enumerate() {
1071            if **w == *gpns {
1072                locked_pages.swap_remove(i);
1073                return;
1074            }
1075        }
1076
1077        // Don't change protections on locked pages to avoid conflicting
1078        // with unregister_overlay_page.
1079        // TODO: Is this the right decision even after we separate overlay and
1080        // locked pages?
1081
1082        panic!("Tried to unlock pages that were not locked");
1083    }
1084
1085    fn set_vtl1_protections_enabled(&self) {
1086        self.vtl1_protections_enabled
1087            .store(true, std::sync::atomic::Ordering::Relaxed);
1088    }
1089
1090    fn vtl1_protections_enabled(&self) -> bool {
1091        self.vtl1_protections_enabled
1092            .load(std::sync::atomic::Ordering::Relaxed)
1093    }
1094}