underhill_mem/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Underhill VM memory management.
5
6#![cfg(target_os = "linux")]
7
8mod init;
9mod mapping;
10mod registrar;
11
12pub use init::BootInit;
13pub use init::Init;
14pub use init::MemoryMappings;
15pub use init::init;
16
17use cvm_tracing::CVM_ALLOWED;
18use guestmem::GuestMemoryBackingError;
19use guestmem::PAGE_SIZE;
20use guestmem::ranges::PagedRange;
21use hcl::GuestVtl;
22use hcl::ioctl::AcceptPagesError;
23use hcl::ioctl::ApplyVtlProtectionsError;
24use hcl::ioctl::Mshv;
25use hcl::ioctl::MshvHvcall;
26use hcl::ioctl::MshvVtl;
27use hcl::ioctl::snp::SnpPageError;
28use hv1_structs::VtlArray;
29use hvdef::HV_MAP_GPA_PERMISSIONS_ALL;
30use hvdef::HV_MAP_GPA_PERMISSIONS_NONE;
31use hvdef::HV_PAGE_SIZE;
32use hvdef::HvError;
33use hvdef::HvMapGpaFlags;
34use hvdef::HypercallCode;
35use hvdef::Vtl;
36use hvdef::hypercall::AcceptMemoryType;
37use hvdef::hypercall::HostVisibilityType;
38use hvdef::hypercall::HvInputVtl;
39use mapping::GuestMemoryMapping;
40use mapping::GuestValidMemory;
41use memory_range::MemoryRange;
42use parking_lot::Mutex;
43use parking_lot::MutexGuard;
44use registrar::RegisterMemory;
45use std::collections::VecDeque;
46use std::sync::Arc;
47use std::sync::atomic::AtomicBool;
48use thiserror::Error;
49use virt::IsolationType;
50use virt_mshv_vtl::ProtectIsolatedMemory;
51use virt_mshv_vtl::TlbFlushLockAccess;
52use vm_topology::memory::MemoryLayout;
53use x86defs::snp::SevRmpAdjust;
54use x86defs::tdx::GpaVmAttributes;
55use x86defs::tdx::GpaVmAttributesMask;
56use x86defs::tdx::TdgMemPageAttrWriteR8;
57use x86defs::tdx::TdgMemPageGpaAttr;
58
59/// Error querying vtl permissions on a page
60#[derive(Debug, Error)]
61pub enum QueryVtlPermissionsError {
62    /// An SNP-specific error
63    #[error("failed to query rmp permissions")]
64    Snp(#[source] SnpPageError),
65}
66
67#[derive(Debug)]
68struct MshvVtlWithPolicy {
69    mshv_vtl: MshvVtl,
70    ignore_registration_failure: bool,
71    shared: bool,
72}
73
74impl RegisterMemory for MshvVtlWithPolicy {
75    fn register_range(&self, range: MemoryRange) -> Result<(), impl 'static + std::error::Error> {
76        match self.mshv_vtl.add_vtl0_memory(range, self.shared) {
77            Ok(()) => Ok(()),
78            // TODO: remove this once the kernel driver tracks registration
79            Err(err) if self.ignore_registration_failure => {
80                tracing::warn!(
81                    CVM_ALLOWED,
82                    error = &err as &dyn std::error::Error,
83                    "registration failure, could be expected"
84                );
85                Ok(())
86            }
87            Err(err) => Err(err),
88        }
89    }
90}
91
92#[derive(Debug, Error)]
93#[error("failed to register memory with kernel")]
94struct RegistrationError;
95
96/// Currently built for hardware CVMs, which only define permissions for VTL
97/// 0 and VTL 1 to express what those VTLs have access to. If this were to
98/// extend to non-hardware CVMs, those would need to define permissions
99/// instead for VTL 2 and VTL 1 to express what the lower VTLs have access
100/// to.
101///
102/// Default VTL memory permissions applied to any mapped memory
103struct DefaultVtlPermissions {
104    vtl0: HvMapGpaFlags,
105    vtl1: Option<HvMapGpaFlags>,
106}
107
108impl DefaultVtlPermissions {
109    fn set(&mut self, vtl: GuestVtl, permissions: HvMapGpaFlags) {
110        match vtl {
111            GuestVtl::Vtl0 => self.vtl0 = permissions,
112            GuestVtl::Vtl1 => self.vtl1 = Some(permissions),
113        }
114    }
115}
116
117/// Represents the vtl permissions on a page for a given isolation type
118#[derive(Copy, Clone)]
119enum GpaVtlPermissions {
120    Vbs(HvMapGpaFlags),
121    Snp(SevRmpAdjust),
122    Tdx(TdgMemPageGpaAttr, TdgMemPageAttrWriteR8),
123}
124
125impl GpaVtlPermissions {
126    fn new(isolation: IsolationType, vtl: GuestVtl, protections: HvMapGpaFlags) -> Self {
127        match isolation {
128            IsolationType::None => unreachable!(),
129            IsolationType::Vbs => GpaVtlPermissions::Vbs(protections),
130            IsolationType::Snp => {
131                let mut vtl_permissions = GpaVtlPermissions::Snp(SevRmpAdjust::new());
132                vtl_permissions.set(vtl, protections);
133                vtl_permissions
134            }
135            IsolationType::Tdx => {
136                let mut vtl_permissions =
137                    GpaVtlPermissions::Tdx(TdgMemPageGpaAttr::new(), TdgMemPageAttrWriteR8::new());
138                vtl_permissions.set(vtl, protections);
139                vtl_permissions
140            }
141        }
142    }
143
144    fn set(&mut self, vtl: GuestVtl, protections: HvMapGpaFlags) {
145        match self {
146            GpaVtlPermissions::Vbs(flags) => *flags = protections,
147            GpaVtlPermissions::Snp(rmpadjust) => {
148                *rmpadjust = SevRmpAdjust::new()
149                    .with_enable_read(protections.readable())
150                    .with_enable_write(protections.writable())
151                    .with_enable_user_execute(protections.user_executable())
152                    .with_enable_kernel_execute(protections.kernel_executable())
153                    .with_target_vmpl(match vtl {
154                        GuestVtl::Vtl0 => x86defs::snp::Vmpl::Vmpl2.into(),
155                        GuestVtl::Vtl1 => x86defs::snp::Vmpl::Vmpl1.into(),
156                    });
157            }
158            GpaVtlPermissions::Tdx(attributes, mask) => {
159                let vm_attributes = GpaVmAttributes::new()
160                    .with_valid(true)
161                    .with_read(protections.readable())
162                    .with_write(protections.writable())
163                    .with_kernel_execute(protections.kernel_executable())
164                    .with_user_execute(protections.user_executable());
165
166                let (new_attributes, new_mask) = match vtl {
167                    GuestVtl::Vtl0 => {
168                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(vm_attributes);
169                        let mask = TdgMemPageAttrWriteR8::new()
170                            .with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
171                        (attributes, mask)
172                    }
173                    GuestVtl::Vtl1 => {
174                        let attributes = TdgMemPageGpaAttr::new().with_l2_vm2(vm_attributes);
175                        let mask = TdgMemPageAttrWriteR8::new()
176                            .with_l2_vm2(GpaVmAttributesMask::ALL_CHANGED);
177                        (attributes, mask)
178                    }
179                };
180
181                *attributes = new_attributes;
182                *mask = new_mask;
183            }
184        }
185    }
186}
187
188/// Error returned when modifying gpa visibility.
189#[derive(Debug, Error)]
190#[error("failed to modify gpa visibility, elements successfully processed {processed}")]
191pub struct ModifyGpaVisibilityError {
192    source: HvError,
193    processed: usize,
194}
195
196/// Interface to accept and manipulate lower VTL memory acceptance and page
197/// protections.
198///
199/// FUTURE: this should go away as a separate object once all the logic is moved
200/// into this crate.
201pub struct MemoryAcceptor {
202    mshv_hvcall: MshvHvcall,
203    mshv_vtl: MshvVtl,
204    isolation: IsolationType,
205}
206
207impl MemoryAcceptor {
208    /// Create a new instance.
209    pub fn new(isolation: IsolationType) -> Result<Self, hcl::ioctl::Error> {
210        let mshv = Mshv::new()?;
211        let mshv_vtl = mshv.create_vtl()?;
212        let mshv_hvcall = MshvHvcall::new()?;
213        mshv_hvcall.set_allowed_hypercalls(&[
214            HypercallCode::HvCallAcceptGpaPages,
215            HypercallCode::HvCallModifySparseGpaPageHostVisibility,
216            HypercallCode::HvCallModifyVtlProtectionMask,
217        ]);
218
219        // On boot, VTL 0 should have permissions.
220        Ok(Self {
221            mshv_hvcall,
222            mshv_vtl,
223            isolation,
224        })
225    }
226
227    /// Accept pages for lower VTLs.
228    pub fn accept_lower_vtl_pages(&self, range: MemoryRange) -> Result<(), AcceptPagesError> {
229        match self.isolation {
230            IsolationType::None => unreachable!(),
231            IsolationType::Vbs => self
232                .mshv_hvcall
233                .accept_gpa_pages(range, AcceptMemoryType::RAM),
234            IsolationType::Snp => {
235                self.mshv_vtl
236                    .pvalidate_pages(range, true, false)
237                    .map_err(|err| AcceptPagesError::Snp {
238                        failed_operation: err,
239                        range,
240                    })
241            }
242            IsolationType::Tdx => {
243                let attributes = TdgMemPageGpaAttr::new().with_l2_vm1(GpaVmAttributes::FULL_ACCESS);
244                let mask =
245                    TdgMemPageAttrWriteR8::new().with_l2_vm1(GpaVmAttributesMask::ALL_CHANGED);
246
247                self.mshv_vtl
248                    .tdx_accept_pages(range, Some((attributes, mask)))
249                    .map_err(|err| AcceptPagesError::Tdx { error: err, range })
250            }
251        }
252    }
253
254    fn unaccept_lower_vtl_pages(&self, range: MemoryRange) {
255        match self.isolation {
256            IsolationType::None => unreachable!(),
257            IsolationType::Vbs => {
258                // TODO VBS: is there something to do here?
259            }
260            IsolationType::Snp => self
261                .mshv_vtl
262                .pvalidate_pages(range, false, false)
263                .expect("pvalidate should not fail"),
264            IsolationType::Tdx => {
265                // Nothing to do for TDX.
266            }
267        }
268    }
269
270    /// Tell the host to change the visibility of the given GPAs.
271    pub fn modify_gpa_visibility(
272        &self,
273        host_visibility: HostVisibilityType,
274        gpns: &[u64],
275    ) -> Result<(), ModifyGpaVisibilityError> {
276        self.mshv_hvcall
277            .modify_gpa_visibility(host_visibility, gpns)
278            .map_err(|(e, processed)| ModifyGpaVisibilityError {
279                source: e,
280                processed,
281            })
282    }
283
284    /// Apply the initial protections on lower-vtl memory.
285    ///
286    /// After initialization, the default protections should be applied.
287    pub fn apply_initial_lower_vtl_protections(
288        &self,
289        range: MemoryRange,
290    ) -> Result<(), ApplyVtlProtectionsError> {
291        self.apply_protections(range, GuestVtl::Vtl0, HV_MAP_GPA_PERMISSIONS_ALL)
292    }
293
294    fn apply_protections(
295        &self,
296        range: MemoryRange,
297        vtl: GuestVtl,
298        flags: HvMapGpaFlags,
299    ) -> Result<(), ApplyVtlProtectionsError> {
300        let permissions = GpaVtlPermissions::new(self.isolation, vtl, flags);
301
302        match permissions {
303            GpaVtlPermissions::Vbs(flags) => {
304                // For VBS-isolated VMs, the permissions apply to all lower
305                // VTLs. Therefore VTL 0 cannot set its own permissions.
306                assert_ne!(vtl, GuestVtl::Vtl0);
307
308                self.mshv_hvcall
309                    .modify_vtl_protection_mask(range, flags, HvInputVtl::from(vtl))
310            }
311            GpaVtlPermissions::Snp(rmpadjust) => {
312                // For SNP VMs, the permissions apply to the specified VTL.
313                // Therefore VTL 2 cannot specify its own permissions.
314                self.mshv_vtl
315                    .rmpadjust_pages(range, rmpadjust, false)
316                    .map_err(|err| ApplyVtlProtectionsError::Snp {
317                        failed_operation: err,
318                        range,
319                        permissions: rmpadjust,
320                        vtl: vtl.into(),
321                    })
322            }
323            GpaVtlPermissions::Tdx(attributes, mask) => {
324                // For TDX VMs, the permissions apply to the specified VTL.
325                // Therefore VTL 2 cannot specify its own permissions.
326                self.mshv_vtl
327                    .tdx_set_page_attributes(range, attributes, mask)
328                    .map_err(|err| ApplyVtlProtectionsError::Tdx {
329                        error: err,
330                        range,
331                        permissions: attributes,
332                        vtl: vtl.into(),
333                    })
334            }
335        }
336    }
337}
338
339/// An implementation of [`ProtectIsolatedMemory`] for Underhill VMs.
340pub struct HardwareIsolatedMemoryProtector {
341    // Serves as a lock for synchronizing visibility and page-protection changes.
342    inner: Mutex<HardwareIsolatedMemoryProtectorInner>,
343    layout: MemoryLayout,
344    acceptor: Arc<MemoryAcceptor>,
345    vtl0: Arc<GuestMemoryMapping>,
346    vtl1_protections_enabled: AtomicBool,
347}
348
349struct HardwareIsolatedMemoryProtectorInner {
350    valid_encrypted: Arc<GuestValidMemory>,
351    valid_shared: Arc<GuestValidMemory>,
352    encrypted: Arc<GuestMemoryMapping>,
353    default_vtl_permissions: DefaultVtlPermissions,
354    overlay_pages: VtlArray<Vec<OverlayPage>, 2>,
355    locked_pages: VtlArray<Vec<Box<[u64]>>, 2>,
356}
357
358struct OverlayPage {
359    gpn: u64,
360    previous_permissions: HvMapGpaFlags,
361    overlay_permissions: HvMapGpaFlags,
362}
363
364impl HardwareIsolatedMemoryProtector {
365    /// Returns a new instance.
366    ///
367    /// `shared` provides the mapping for shared memory. `vtl0` provides the
368    /// mapping for encrypted memory.
369    pub fn new(
370        valid_encrypted: Arc<GuestValidMemory>,
371        valid_shared: Arc<GuestValidMemory>,
372        encrypted: Arc<GuestMemoryMapping>,
373        vtl0: Arc<GuestMemoryMapping>,
374        layout: MemoryLayout,
375        acceptor: Arc<MemoryAcceptor>,
376    ) -> Self {
377        Self {
378            inner: Mutex::new(HardwareIsolatedMemoryProtectorInner {
379                valid_encrypted,
380                valid_shared,
381                encrypted,
382                // Grant only VTL 0 all permissions. This will be altered
383                // later by VTL 1 enablement and by VTL 1 itself.
384                default_vtl_permissions: DefaultVtlPermissions {
385                    vtl0: HV_MAP_GPA_PERMISSIONS_ALL,
386                    vtl1: None,
387                },
388                overlay_pages: VtlArray::from_fn(|_| Vec::new()),
389                locked_pages: VtlArray::from_fn(|_| Vec::new()),
390            }),
391            layout,
392            acceptor,
393            vtl0,
394            vtl1_protections_enabled: AtomicBool::new(false),
395        }
396    }
397
398    fn apply_protections_with_overlay_handling(
399        &self,
400        range: MemoryRange,
401        calling_vtl: Vtl,
402        target_vtl: GuestVtl,
403        protections: HvMapGpaFlags,
404        inner: &mut MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
405    ) -> Result<(), ApplyVtlProtectionsError> {
406        let mut range_queue = VecDeque::new();
407        range_queue.push_back(range);
408
409        'outer: while let Some(range) = range_queue.pop_front() {
410            for overlay_page in inner.overlay_pages[target_vtl].iter_mut() {
411                let overlay_addr = overlay_page.gpn * HV_PAGE_SIZE;
412                if range.contains_addr(overlay_addr) {
413                    // If the overlay page is within the range, update the
414                    // permissions that will be restored when it is unlocked.
415                    overlay_page.previous_permissions = protections;
416                    // And split the range around it.
417                    let (left, right_with_overlay) =
418                        range.split_at_offset(range.offset_of(overlay_addr).unwrap());
419                    let (overlay, right) = right_with_overlay.split_at_offset(HV_PAGE_SIZE);
420                    debug_assert_eq!(overlay.start_4k_gpn(), overlay_page.gpn);
421                    debug_assert_eq!(overlay.len(), HV_PAGE_SIZE);
422                    if !left.is_empty() {
423                        range_queue.push_back(left);
424                    }
425                    if !right.is_empty() {
426                        range_queue.push_back(right);
427                    }
428                    continue 'outer;
429                }
430            }
431            // We can only reach here if the range does not contain any overlay
432            // pages, so now we can apply the protections to the range.
433            self.apply_protections(range, calling_vtl, target_vtl, protections)?
434        }
435
436        Ok(())
437    }
438
439    fn apply_protections(
440        &self,
441        range: MemoryRange,
442        calling_vtl: Vtl,
443        target_vtl: GuestVtl,
444        protections: HvMapGpaFlags,
445    ) -> Result<(), ApplyVtlProtectionsError> {
446        if calling_vtl == Vtl::Vtl1 && target_vtl == GuestVtl::Vtl0 {
447            // Only VTL 1 permissions imposed on VTL 0 are explicitly tracked
448            self.vtl0.update_permission_bitmaps(range, protections);
449        }
450        self.acceptor
451            .apply_protections(range, target_vtl, protections)
452    }
453
454    /// Get the permissions that the given VTL has to the given GPN.
455    ///
456    /// This function does not check for any protections applied by VTL 2,
457    /// only those applied by lower VTLs.
458    fn query_lower_vtl_permissions(
459        &self,
460        vtl: GuestVtl,
461        gpn: u64,
462    ) -> Result<HvMapGpaFlags, HvError> {
463        if !self
464            .layout
465            .ram()
466            .iter()
467            .any(|r| r.range.contains_addr(gpn * HV_PAGE_SIZE))
468        {
469            return Err(HvError::OperationDenied);
470        }
471
472        let res = match vtl {
473            GuestVtl::Vtl0 => self
474                .vtl0
475                .query_access_permission(gpn)
476                .unwrap_or(HV_MAP_GPA_PERMISSIONS_ALL),
477            GuestVtl::Vtl1 => HV_MAP_GPA_PERMISSIONS_ALL,
478        };
479
480        Ok(res)
481    }
482
483    fn check_gpn_not_locked(
484        &self,
485        inner: &MutexGuard<'_, HardwareIsolatedMemoryProtectorInner>,
486        vtl: GuestVtl,
487        gpn: u64,
488    ) -> Result<(), HvError> {
489        // Overlay pages have special handling, being locked does not prevent that.
490        // TODO: When uh_mem implements the returning of overlay pages, rather than
491        // requiring them to also be locked through guestmem, the check for overlay
492        // pages can be removed, as locked and overlay pages will be mutually exclusive.
493        if inner.locked_pages[vtl].iter().flatten().any(|x| *x == gpn)
494            && !inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn)
495        {
496            return Err(HvError::OperationDenied);
497        }
498        Ok(())
499    }
500}
501
502impl ProtectIsolatedMemory for HardwareIsolatedMemoryProtector {
503    fn change_host_visibility(
504        &self,
505        vtl: GuestVtl,
506        shared: bool,
507        gpns: &[u64],
508        tlb_access: &mut dyn TlbFlushLockAccess,
509    ) -> Result<(), (HvError, usize)> {
510        let inner = self.inner.lock();
511
512        for &gpn in gpns {
513            // Validate the ranges are RAM.
514            if !self
515                .layout
516                .ram()
517                .iter()
518                .any(|r| r.range.contains_addr(gpn * HV_PAGE_SIZE))
519            {
520                return Err((HvError::OperationDenied, 0));
521            }
522
523            // Validate they're not locked.
524            self.check_gpn_not_locked(&inner, vtl, gpn)
525                .map_err(|x| (x, 0))?;
526
527            // Don't allow overlay pages to be shared.
528            if shared && inner.overlay_pages[vtl].iter().any(|p| p.gpn == gpn) {
529                return Err((HvError::OperationDenied, 0));
530            }
531        }
532
533        // Filter out the GPNs that are already in the correct state. If the
534        // page is becoming shared, make sure the requesting VTL has read/write
535        // vtl permissions to the page.
536        let orig_gpns = gpns;
537        let mut failed_vtl_permission_index = None;
538        let gpns = gpns
539            .iter()
540            .copied()
541            .enumerate()
542            .take_while(|&(index, gpn)| {
543                if vtl == GuestVtl::Vtl0 && shared && self.vtl1_protections_enabled() {
544                    let permissions = self
545                        .vtl0
546                        .query_access_permission(gpn)
547                        .expect("vtl 1 protections enabled, vtl permissions should be tracked");
548                    if !permissions.readable() || !permissions.writable() {
549                        failed_vtl_permission_index = Some(index);
550                        false
551                    } else {
552                        true
553                    }
554                } else {
555                    true
556                }
557            })
558            .filter_map(|(_, gpn)| {
559                if inner.valid_shared.check_valid(gpn) != shared {
560                    Some(gpn)
561                } else {
562                    None
563                }
564            })
565            .collect::<Vec<_>>();
566
567        tracing::debug!(
568            orig = orig_gpns.len(),
569            len = gpns.len(),
570            first = gpns.first(),
571            shared,
572            "change vis"
573        );
574
575        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, &gpns)
576            .unwrap()
577            .ranges()
578            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
579            .collect::<Result<Vec<_>, _>>()
580            .unwrap(); // Ok to unwrap, we've validated the gpns above.
581
582        // Prevent accesses via the wrong address.
583        let clear_bitmap = if shared {
584            &inner.valid_encrypted
585        } else {
586            &inner.valid_shared
587        };
588
589        for &range in &ranges {
590            if shared && vtl == GuestVtl::Vtl0 {
591                // Accessing these pages through the encrypted mapping is now
592                // invalid. Make sure the VTL bitmaps reflect this.
593                self.vtl0
594                    .update_permission_bitmaps(range, HV_MAP_GPA_PERMISSIONS_NONE);
595            }
596
597            clear_bitmap.update_valid(range, false);
598        }
599
600        // There may be other threads concurrently accessing these pages. We
601        // cannot change the page visibility state until these threads have
602        // stopped those accesses. Flush the RCU domain that `guestmem` uses in
603        // order to flush any threads accessing the pages. After this, we are
604        // guaranteed no threads are accessing these pages (unless the pages are
605        // also locked), since no bitmap currently allows access.
606        guestmem::rcu().synchronize_blocking();
607
608        if let IsolationType::Snp = self.acceptor.isolation {
609            // We need to ensure that the guest TLB has been fully flushed since
610            // the unaccept operation is not guaranteed to do so in hardware,
611            // and the hypervisor is also not trusted with TLB hygiene.
612            tlb_access.flush_entire();
613        }
614
615        // TODO SNP: check list of locks, roll back bitmap changes if there was one.
616
617        if shared {
618            // Unaccept the pages so that the hypervisor can reclaim them.
619            for &range in &ranges {
620                self.acceptor.unaccept_lower_vtl_pages(range);
621            }
622        }
623
624        // Ask the hypervisor to update visibility.
625        let host_visibility = if shared {
626            HostVisibilityType::SHARED
627        } else {
628            HostVisibilityType::PRIVATE
629        };
630
631        let (result, ranges) = match self.acceptor.modify_gpa_visibility(host_visibility, &gpns) {
632            Ok(()) => {
633                // All gpns succeeded, so the whole set of ranges should be
634                // processed.
635                (
636                    match failed_vtl_permission_index {
637                        Some(index) => Err((HvError::AccessDenied, index)),
638                        None => Ok(()),
639                    },
640                    ranges,
641                )
642            }
643            Err(err) => {
644                if shared {
645                    // A transition from private to shared should always
646                    // succeed. There is no safe rollback path, so we must
647                    // panic.
648                    panic!(
649                        "the hypervisor refused to transition pages to shared, we cannot safely roll back: {:?}",
650                        err
651                    );
652                }
653
654                // Only some ranges succeeded. Recreate ranges based on which
655                // gpns succeeded, for further processing.
656                let (successful_gpns, failed_gpns) = gpns.split_at(err.processed);
657                let ranges = PagedRange::new(
658                    0,
659                    successful_gpns.len() * PagedRange::PAGE_SIZE,
660                    successful_gpns,
661                )
662                .unwrap()
663                .ranges()
664                .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
665                .collect::<Result<Vec<_>, _>>()
666                .expect("previous gpns was already checked");
667
668                // Roll back the cleared bitmap for failed gpns, as they should
669                // be still in their original state of shared.
670                let rollback_ranges =
671                    PagedRange::new(0, failed_gpns.len() * PagedRange::PAGE_SIZE, failed_gpns)
672                        .unwrap()
673                        .ranges()
674                        .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
675                        .collect::<Result<Vec<_>, _>>()
676                        .expect("previous gpns was already checked");
677
678                for &range in &rollback_ranges {
679                    clear_bitmap.update_valid(range, true);
680                }
681
682                // Figure out the index of the gpn that failed, in the
683                // pre-filtered list that will be reported back to the caller.
684                let failed_index = orig_gpns
685                    .iter()
686                    .position(|gpn| *gpn == failed_gpns[0])
687                    .expect("failed gpn should be present in the list");
688
689                (Err((err.source, failed_index)), ranges)
690            }
691        };
692
693        if !shared {
694            // Accept the pages so that the guest can use them.
695            for &range in &ranges {
696                self.acceptor
697                    .accept_lower_vtl_pages(range)
698                    .expect("everything should be in a state where we can accept VTL0 pages");
699
700                // For SNP, zero the memory before allowing the guest to access
701                // them. For TDX, this is done by the TDX module. For mshv, this is
702                // done by the hypervisor.
703                if self.acceptor.isolation == IsolationType::Snp {
704                    inner.encrypted.zero_range(range).expect("VTL 2 should have access to lower VTL memory, the page should be accepted, there should be no vtl protections yet.")
705                }
706            }
707        }
708
709        // Allow accesses via the correct address.
710        let set_bitmap = if shared {
711            &inner.valid_shared
712        } else {
713            &inner.valid_encrypted
714        };
715        for &range in &ranges {
716            set_bitmap.update_valid(range, true);
717        }
718
719        if !shared {
720            // Apply vtl protections so that the guest can use them. Any
721            // overlay pages won't be host visible, so just apply the default
722            // protections directly without handling them.
723            for &range in &ranges {
724                self.apply_protections(
725                    range,
726                    if self.vtl1_protections_enabled() {
727                        Vtl::Vtl1
728                    } else {
729                        Vtl::Vtl2
730                    },
731                    GuestVtl::Vtl0,
732                    inner.default_vtl_permissions.vtl0,
733                )
734                .expect("should be able to apply default protections");
735
736                if let Some(vtl1_protections) = inner.default_vtl_permissions.vtl1 {
737                    self.apply_protections(range, Vtl::Vtl2, GuestVtl::Vtl1, vtl1_protections)
738                        .expect(
739                            "everything should be in a state where we can apply VTL protections",
740                        );
741                }
742            }
743        }
744
745        // Return the original result of the underlying page visibility
746        // transition call to the caller.
747        result
748    }
749
750    fn query_host_visibility(
751        &self,
752        gpns: &[u64],
753        host_visibility: &mut [HostVisibilityType],
754    ) -> Result<(), (HvError, usize)> {
755        // Validate the ranges are RAM.
756        for (i, &gpn) in gpns.iter().enumerate() {
757            if !self
758                .layout
759                .ram()
760                .iter()
761                .any(|r| r.range.contains_addr(gpn * HV_PAGE_SIZE))
762            {
763                return Err((HvError::OperationDenied, i));
764            }
765        }
766
767        let inner = self.inner.lock();
768
769        // Set GPN sharing status in output.
770        for (gpn, host_vis) in gpns.iter().zip(host_visibility.iter_mut()) {
771            *host_vis = if inner.valid_shared.check_valid(*gpn) {
772                HostVisibilityType::SHARED
773            } else {
774                HostVisibilityType::PRIVATE
775            };
776        }
777        Ok(())
778    }
779
780    fn default_vtl0_protections(&self) -> HvMapGpaFlags {
781        self.inner.lock().default_vtl_permissions.vtl0
782    }
783
784    fn change_default_vtl_protections(
785        &self,
786        calling_vtl: Vtl,
787        target_vtl: GuestVtl,
788        vtl_protections: HvMapGpaFlags,
789        tlb_access: &mut dyn TlbFlushLockAccess,
790    ) -> Result<(), HvError> {
791        // Prevent visibility changes while VTL protections are being
792        // applied.
793        //
794        // TODO: This does not need to be synchronized against other
795        // threads performing VTL protection changes; whichever thread
796        // finishes last will control the outcome.
797        let mut inner = self.inner.lock();
798
799        inner
800            .default_vtl_permissions
801            .set(target_vtl, vtl_protections);
802
803        let mut ranges = Vec::new();
804        for ram_range in self.layout.ram().iter() {
805            let mut protect_start = ram_range.range.start();
806            let mut page_count = 0;
807
808            for gpn in
809                ram_range.range.start() / PAGE_SIZE as u64..ram_range.range.end() / PAGE_SIZE as u64
810            {
811                // TODO GUEST VSM: for now, use the encrypted mapping to
812                // find all accepted memory. When lazy acceptance exists,
813                // this should track all pages that have been accepted and
814                // should be used instead.
815                // Also don't attempt to change the permissions of locked pages.
816                if !inner.valid_encrypted.check_valid(gpn)
817                    || self.check_gpn_not_locked(&inner, target_vtl, gpn).is_err()
818                {
819                    if page_count > 0 {
820                        let end_address = protect_start + (page_count * PAGE_SIZE as u64);
821                        ranges.push(MemoryRange::new(protect_start..end_address));
822                    }
823                    protect_start = (gpn + 1) * PAGE_SIZE as u64;
824                    page_count = 0;
825                } else {
826                    page_count += 1;
827                }
828            }
829
830            if page_count > 0 {
831                let end_address = protect_start + (page_count * PAGE_SIZE as u64);
832                ranges.push(MemoryRange::new(protect_start..end_address));
833            }
834        }
835
836        for range in ranges {
837            self.apply_protections_with_overlay_handling(
838                range,
839                calling_vtl,
840                target_vtl,
841                vtl_protections,
842                &mut inner,
843            )
844            .unwrap();
845        }
846
847        // Flush any threads accessing pages that had their VTL protections
848        // changed.
849        guestmem::rcu().synchronize_blocking();
850
851        // Invalidate the entire VTL 0 TLB to ensure that the new permissions
852        // are observed.
853        tlb_access.flush(GuestVtl::Vtl0);
854
855        Ok(())
856    }
857
858    fn change_vtl_protections(
859        &self,
860        calling_vtl: Vtl,
861        target_vtl: GuestVtl,
862        gpns: &[u64],
863        protections: HvMapGpaFlags,
864        tlb_access: &mut dyn TlbFlushLockAccess,
865    ) -> Result<(), (HvError, usize)> {
866        // Prevent visibility changes while VTL protections are being
867        // applied. This does not need to be synchronized against other
868        // threads performing VTL protection changes; whichever thread
869        // finishes last will control the outcome.
870        let mut inner = self.inner.lock();
871
872        // Validate the ranges are RAM.
873        for &gpn in gpns {
874            if !self
875                .layout
876                .ram()
877                .iter()
878                .any(|r| r.range.contains_addr(gpn * HV_PAGE_SIZE))
879            {
880                return Err((HvError::OperationDenied, 0));
881            }
882
883            // Validate they're not locked.
884            self.check_gpn_not_locked(&inner, target_vtl, gpn)
885                .map_err(|x| (x, 0))?;
886        }
887
888        // Protections cannot be applied to a host-visible page
889        if gpns.iter().any(|&gpn| inner.valid_shared.check_valid(gpn)) {
890            return Err((HvError::OperationDenied, 0));
891        }
892
893        let ranges = PagedRange::new(0, gpns.len() * PagedRange::PAGE_SIZE, gpns)
894            .unwrap()
895            .ranges()
896            .map(|r| r.map(|r| MemoryRange::new(r.start..r.end)))
897            .collect::<Result<Vec<_>, _>>()
898            .unwrap(); // Ok to unwrap, we've validated the gpns above.
899
900        for range in ranges {
901            self.apply_protections_with_overlay_handling(
902                range,
903                calling_vtl,
904                target_vtl,
905                protections,
906                &mut inner,
907            )
908            .unwrap();
909        }
910
911        // Flush any threads accessing pages that had their VTL protections
912        // changed.
913        guestmem::rcu().synchronize_blocking();
914
915        // Since page protections were modified, we must invalidate the entire
916        // VTL 0 TLB to ensure that the new permissions are observed, and wait for
917        // other CPUs to release all guest mappings before declaring that the VTL
918        // protection change has completed.
919        tlb_access.flush(GuestVtl::Vtl0);
920        tlb_access.set_wait_for_tlb_locks(target_vtl);
921
922        Ok(())
923    }
924
925    fn register_overlay_page(
926        &self,
927        vtl: GuestVtl,
928        gpn: u64,
929        check_perms: HvMapGpaFlags,
930        new_perms: Option<HvMapGpaFlags>,
931        tlb_access: &mut dyn TlbFlushLockAccess,
932    ) -> Result<(), HvError> {
933        let mut inner = self.inner.lock();
934
935        // If the page is already registered as an overlay page, just check
936        // the permissions are adequate. If the permissions requested are
937        // different from the ones already registered just do best effort,
938        // there is no spec-guarantee of which one "wins".
939        if let Some(registered) = inner.overlay_pages[vtl].iter().find(|p| p.gpn == gpn) {
940            let needed_perms = new_perms.unwrap_or(check_perms);
941            if registered.overlay_permissions.into_bits() | needed_perms.into_bits()
942                != registered.overlay_permissions.into_bits()
943            {
944                return Err(HvError::OperationDenied);
945            }
946            return Ok(());
947        }
948
949        // Check that the required permissions are present.
950        let current_perms = self.query_lower_vtl_permissions(vtl, gpn)?;
951        if current_perms.into_bits() | check_perms.into_bits() != current_perms.into_bits() {
952            return Err(HvError::OperationDenied);
953        }
954
955        // Protections cannot be applied to a host-visible page.
956        if inner.valid_shared.check_valid(gpn) {
957            return Err(HvError::OperationDenied);
958        }
959
960        // Or a locked page.
961        self.check_gpn_not_locked(&inner, vtl, gpn)?;
962
963        // Everything's validated, change the permissions.
964        if let Some(new_perms) = new_perms {
965            self.apply_protections(
966                MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
967                Vtl::Vtl2,
968                vtl,
969                new_perms,
970            )
971            .map_err(|_| HvError::OperationDenied)?;
972        }
973
974        // Nothing from this point on can fail, so we can safely register the overlay page.
975        inner.overlay_pages[vtl].push(OverlayPage {
976            gpn,
977            previous_permissions: current_perms,
978            overlay_permissions: new_perms.unwrap_or(current_perms),
979        });
980
981        // Flush any threads accessing pages that had their VTL protections
982        // changed.
983        guestmem::rcu().synchronize_blocking();
984
985        // Since page protections were modified, we must invalidate the TLB to
986        // ensure that the new permissions are observed, and wait for other CPUs
987        // to release all guest mappings before declaring that the VTL
988        // protection change has completed.
989        tlb_access.flush(vtl);
990        tlb_access.set_wait_for_tlb_locks(vtl);
991
992        Ok(())
993    }
994
995    fn unregister_overlay_page(
996        &self,
997        vtl: GuestVtl,
998        gpn: u64,
999        tlb_access: &mut dyn TlbFlushLockAccess,
1000    ) -> Result<(), HvError> {
1001        let mut inner = self.inner.lock();
1002        let overlay_pages = &mut inner.overlay_pages[vtl];
1003
1004        // Find the overlay page.
1005        let index = overlay_pages
1006            .iter()
1007            .position(|p| p.gpn == gpn)
1008            .ok_or(HvError::OperationDenied)?;
1009
1010        // Restore its permissions.
1011        self.apply_protections(
1012            MemoryRange::from_4k_gpn_range(gpn..gpn + 1),
1013            Vtl::Vtl2,
1014            vtl,
1015            overlay_pages[index].previous_permissions,
1016        )
1017        .map_err(|_| HvError::OperationDenied)?;
1018
1019        // Nothing from this point on can fail, so we can safely unregister the overlay page.
1020        overlay_pages.remove(index);
1021
1022        // Flush any threads accessing pages that had their VTL protections
1023        // changed.
1024        guestmem::rcu().synchronize_blocking();
1025
1026        // Since page protections were modified, we must invalidate the TLB to
1027        // ensure that the new permissions are observed, and wait for other CPUs
1028        // to release all guest mappings before declaring that the VTL
1029        // protection change has completed.
1030        tlb_access.flush(vtl);
1031        tlb_access.set_wait_for_tlb_locks(vtl);
1032        Ok(())
1033    }
1034
1035    fn is_overlay_page(&self, vtl: GuestVtl, gpn: u64) -> bool {
1036        self.inner.lock().overlay_pages[vtl]
1037            .iter()
1038            .any(|p| p.gpn == gpn)
1039    }
1040
1041    fn lock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) -> Result<(), GuestMemoryBackingError> {
1042        // Locking a page multiple times is allowed, so no need to check
1043        // for duplicates.
1044        // We also need to allow locking overlay pages for now.
1045        // TODO: We probably don't want to allow locking overlay pages once
1046        // we return the pointer for them instead of going through guestmem::lock.
1047        // TODO: other preconditions?
1048        self.inner.lock().locked_pages[vtl].push(gpns.to_vec().into_boxed_slice());
1049        Ok(())
1050    }
1051
1052    fn unlock_gpns(&self, vtl: GuestVtl, gpns: &[u64]) {
1053        let mut inner = self.inner.lock();
1054        let locked_pages = &mut inner.locked_pages[vtl];
1055        for (i, w) in locked_pages.iter().enumerate() {
1056            if **w == *gpns {
1057                locked_pages.swap_remove(i);
1058                return;
1059            }
1060        }
1061
1062        // Don't change protections on locked pages to avoid conflicting
1063        // with unregister_overlay_page.
1064        // TODO: Is this the right decision even after we separate overlay and
1065        // locked pages?
1066
1067        panic!("Tried to unlock pages that were not locked");
1068    }
1069
1070    fn set_vtl1_protections_enabled(&self) {
1071        self.vtl1_protections_enabled
1072            .store(true, std::sync::atomic::Ordering::Relaxed);
1073    }
1074
1075    fn vtl1_protections_enabled(&self) -> bool {
1076        self.vtl1_protections_enabled
1077            .load(std::sync::atomic::Ordering::Relaxed)
1078    }
1079}