virt_support_x86emu/
translate.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! x86-64 page table walking.
5
6#![warn(missing_docs)]
7
8use guestmem::GuestMemory;
9use hvdef::hypercall::TranslateGvaControlFlagsX64;
10use hvdef::hypercall::TranslateGvaResultCode;
11use thiserror::Error;
12use x86defs::LargePde;
13use x86defs::Pte;
14use x86defs::RFlags;
15use x86defs::SegmentRegister;
16use x86defs::X64_CR0_PG;
17use x86defs::X64_CR4_LA57;
18use x86defs::X64_CR4_PAE;
19use x86defs::X64_CR4_PSE;
20use x86defs::X64_CR4_SMAP;
21use x86defs::X64_CR4_SMEP;
22use x86defs::X64_EFER_LMA;
23use x86defs::X64_EFER_NXE;
24
25/// Registers needed to walk the page table.
26#[derive(Debug, Clone)]
27pub struct TranslationRegisters {
28    /// CR0
29    pub cr0: u64,
30    /// CR4
31    pub cr4: u64,
32    /// EFER
33    pub efer: u64,
34    /// CR3
35    pub cr3: u64,
36    /// RFLAGS
37    pub rflags: u64,
38    /// SS
39    pub ss: SegmentRegister,
40    /// The way the processor uses to determine if an access is to encrypted
41    /// memory. This is used to enforce that page tables and executable code are
42    /// in encrypted memory.
43    pub encryption_mode: EncryptionMode,
44}
45
46/// The way the processor uses to determine if an access is to encrypted memory.
47#[derive(Debug, Copy, Clone)]
48pub enum EncryptionMode {
49    /// Memory accesses below the virtual top of memory address are encrypted.
50    Vtom(u64),
51    /// No memory is encrypted.
52    None,
53}
54
55/// Flags to control the page table walk.
56#[derive(Debug, Clone)]
57pub struct TranslateFlags {
58    /// Validate a VP in the current state can execute from this GVA.
59    pub validate_execute: bool,
60    /// Validate a VP in the current state can read from this GVA.
61    pub validate_read: bool,
62    /// Validate a VP in the current state can write to this GVA.
63    pub validate_write: bool,
64    /// Allow access even if SMAP would prevent it.
65    pub override_smap: bool,
66    /// Enforce SMAP even if it is disabled via the AC flag.
67    pub enforce_smap: bool,
68    /// The type of privilege check to perform.
69    pub privilege_check: TranslatePrivilegeCheck,
70    /// Update the page table entries' access and dirty bits as appropriate.
71    pub set_page_table_bits: bool,
72}
73
74/// The type of privilege check to perform.
75#[derive(Debug, Copy, Clone)]
76pub enum TranslatePrivilegeCheck {
77    /// No privilege checks.
78    None,
79    /// Validate user-mode access.
80    User,
81    /// Validate supervisor access.
82    Supervisor,
83    /// Validate both supervisor and user-mode access.
84    Both,
85    /// Validate according to the current privilege level.
86    CurrentPrivilegeLevel,
87}
88
89impl TranslateFlags {
90    /// Return flags based on the `HvTranslateVirtualAddress` hypercall input
91    /// flags.
92    ///
93    /// Note that not all flags are considered.
94    pub fn from_hv_flags(flags: TranslateGvaControlFlagsX64) -> Self {
95        Self {
96            validate_execute: flags.validate_execute(),
97            validate_read: flags.validate_read(),
98            validate_write: flags.validate_write(),
99            override_smap: flags.override_smap(),
100            enforce_smap: flags.enforce_smap(),
101            privilege_check: if flags.privilege_exempt() {
102                TranslatePrivilegeCheck::None
103            } else if flags.user_access() {
104                if flags.supervisor_access() {
105                    TranslatePrivilegeCheck::Both
106                } else {
107                    TranslatePrivilegeCheck::User
108                }
109            } else if flags.supervisor_access() {
110                TranslatePrivilegeCheck::Supervisor
111            } else {
112                TranslatePrivilegeCheck::CurrentPrivilegeLevel
113            },
114            set_page_table_bits: flags.set_page_table_bits(),
115        }
116    }
117}
118
119/// Result of translation
120pub struct TranslateResult {
121    /// The translated GPA.
122    pub gpa: u64,
123
124    /// Information from the walk that can be used to determine memory type
125    pub cache_info: TranslateCachingInfo,
126}
127
128/// Information from a translation walk that can be used to determine memory
129/// type.
130pub enum TranslateCachingInfo {
131    /// Paging wasn't enabled for the translation.
132    NoPaging,
133    /// State from a page table walk
134    Paging {
135        /// Index that can be used into the pat register to determine cache type
136        pat_index: u64,
137    },
138}
139
140/// Translation error.
141#[derive(Debug, Error)]
142pub enum Error {
143    /// The page table flags were invalid.
144    #[error("invalid page table flags")]
145    InvalidPageTableFlags,
146    /// The requested GVA is a non-canonical address.
147    #[error("non-canonical address")]
148    NonCanonicalAddress,
149    /// A page table GPA was not mapped.
150    #[error("gpa unmapped")]
151    GpaUnmapped,
152    /// The page was not present in the page table.
153    #[error("page not present")]
154    PageNotPresent,
155    /// Accessing the GVA would create a privilege violation.
156    #[error("privilege violation")]
157    PrivilegeViolation,
158}
159
160impl From<Error> for TranslateGvaResultCode {
161    fn from(err: Error) -> TranslateGvaResultCode {
162        match err {
163            Error::InvalidPageTableFlags | Error::NonCanonicalAddress => {
164                TranslateGvaResultCode::INVALID_PAGE_TABLE_FLAGS
165            }
166            Error::GpaUnmapped => TranslateGvaResultCode::GPA_UNMAPPED,
167            Error::PageNotPresent => TranslateGvaResultCode::PAGE_NOT_PRESENT,
168            Error::PrivilegeViolation => TranslateGvaResultCode::PRIVILEGE_VIOLATION,
169        }
170    }
171}
172
173/// Translate a GVA by walking the processor's page tables.
174pub fn translate_gva_to_gpa(
175    guest_memory: &GuestMemory,
176    gva: u64,
177    registers: &TranslationRegisters,
178    mut flags: TranslateFlags,
179) -> Result<TranslateResult, Error> {
180    tracing::trace!(gva, ?registers, ?flags, "translating gva");
181
182    let long_mode = registers.efer & X64_EFER_LMA != 0;
183    // Truncate the address if operating in 32-bit mode.
184    let gva = if long_mode { gva } else { gva as u32 as u64 };
185
186    // If paging is disabled, just return the GVA as the GPA.
187    if registers.cr0 & X64_CR0_PG == 0 {
188        return Ok(TranslateResult {
189            gpa: gva,
190            cache_info: TranslateCachingInfo::NoPaging,
191        });
192    }
193
194    let address_bits;
195    let large_pte;
196    if long_mode {
197        large_pte = true;
198        address_bits = if registers.cr4 & X64_CR4_LA57 != 0 {
199            57
200        } else {
201            48
202        };
203
204        if !is_canonical_address(gva, address_bits) {
205            return Err(Error::NonCanonicalAddress);
206        }
207    } else if registers.cr4 & X64_CR4_PAE != 0 {
208        large_pte = true;
209        // Only 32 bits are used from the input address; higher bits are zeroed
210        // above. Bits 30..32 are used on x86 to index into the PDP table, but
211        // for simplicity the code below uses the full 9-bit range 30..39.
212        address_bits = 39;
213    } else {
214        large_pte = false;
215        address_bits = 32;
216    }
217
218    // Determine the permission requirements of the walk according to the
219    // current mode.
220    if registers.efer & X64_EFER_NXE == 0 {
221        flags.validate_execute = false;
222    }
223
224    let (user_access, supervisor_access) = match flags.privilege_check {
225        TranslatePrivilegeCheck::None => (false, false),
226        TranslatePrivilegeCheck::User => (true, false),
227        TranslatePrivilegeCheck::Both => (true, true),
228        TranslatePrivilegeCheck::CurrentPrivilegeLevel
229            if registers.ss.attributes.descriptor_privilege_level() == 3 =>
230        {
231            (true, false)
232        }
233        TranslatePrivilegeCheck::Supervisor | TranslatePrivilegeCheck::CurrentPrivilegeLevel => {
234            (false, true)
235        }
236    };
237
238    let mut no_user_access = supervisor_access
239        && ((flags.validate_execute && registers.cr4 & X64_CR4_SMEP != 0)
240            || ((flags.validate_read || flags.validate_write)
241                && !flags.override_smap
242                && registers.cr4 & X64_CR4_SMAP != 0
243                && (flags.enforce_smap || !RFlags::from(registers.rflags).alignment_check())));
244
245    let mut gpa_base = registers.cr3 & !0xfff;
246    let mut remaining_bits: u32 = address_bits;
247    let cache_disable: bool;
248    let write_through: bool;
249    let pat_supported: bool;
250    loop {
251        // Compute the PTE address.
252        let pte_address = if large_pte {
253            // Consume the next 9 bits as an index into the table.
254            //
255            // Note that for 32-bit with PAE, the PDP table is only 4 entries,
256            // but the high 7 bits of the index (bits 32..39 of the address)
257            // were zeroed above.
258            remaining_bits -= 9;
259            gpa_base + (((gva >> remaining_bits) & 0x1ff) * 8)
260        } else {
261            // Consume the next 10 bits as an index into the table.
262            remaining_bits -= 10;
263            gpa_base + (((gva >> remaining_bits) & 0x3ff) * 4)
264        };
265
266        // All PTE accesses occur to encrypted memory. If VTOM is enabled, then
267        // just fail the translation in shared memory since there is no way to
268        // set the c bit. In theory we could just mask off the VTOM bit to get
269        // to an encrypted address, but that depends on the hypervisor aliasing
270        // the memory identically across VTOM, which is not guaranteed at this
271        // layer in the stack.
272        let pte_address = match registers.encryption_mode {
273            EncryptionMode::Vtom(vtom) => {
274                if pte_address >= vtom {
275                    return Err(Error::InvalidPageTableFlags);
276                }
277                pte_address
278            }
279            EncryptionMode::None => pte_address,
280        };
281
282        let mut pte_access = if large_pte {
283            guest_memory.read_plain::<u64>(pte_address).map(Pte::from)
284        } else {
285            guest_memory
286                .read_plain::<u32>(pte_address)
287                .map(|n| Pte::from(n as u64))
288        };
289
290        // Loop on updating PTE a/d flags.
291        let (pte, done) = loop {
292            // TODO: different fault for VTL violation
293            let pte = pte_access.map_err(|_| Error::GpaUnmapped)?;
294            gpa_base = pte.pfn() << 12;
295
296            if registers.efer & X64_EFER_LMA == 0 {
297                if pte.available1() != 0 || (registers.efer & X64_EFER_NXE != 0 && pte.no_execute())
298                {
299                    return Err(Error::InvalidPageTableFlags);
300                }
301            }
302
303            if !pte.present() {
304                tracing::trace!(pte_address, ?pte, "page not present");
305                return Err(Error::PageNotPresent);
306            }
307
308            if (flags.validate_write && !pte.read_write())
309                || (flags.validate_execute && pte.no_execute())
310                || (user_access && !pte.user())
311            {
312                return Err(Error::PrivilegeViolation);
313            }
314
315            // Determine whether this is the terminal PTE.
316            let done = remaining_bits == 12
317                || (registers.cr4 & (X64_CR4_PAE | X64_CR4_PSE) != 0 && pte.pat());
318
319            if done {
320                if no_user_access && pte.user() {
321                    return Err(Error::PrivilegeViolation);
322                }
323
324                // Only allow execute from encrypted memory.
325                if flags.validate_execute {
326                    let encrypted = match registers.encryption_mode {
327                        EncryptionMode::Vtom(vtom) => gpa_base < vtom,
328                        EncryptionMode::None => true,
329                    };
330                    if !encrypted {
331                        return Err(Error::InvalidPageTableFlags);
332                    }
333                }
334            }
335
336            // Update access and dirty bits.
337            let mut new_pte = pte;
338            if flags.set_page_table_bits {
339                new_pte.set_accessed(true);
340                if flags.validate_write && done {
341                    new_pte.set_dirty(true);
342                }
343            }
344
345            if new_pte != pte {
346                let r = if large_pte {
347                    guest_memory.compare_exchange(pte_address, pte, new_pte)
348                } else {
349                    guest_memory
350                        .compare_exchange(
351                            pte_address,
352                            u64::from(pte) as u32,
353                            u64::from(new_pte) as u32,
354                        )
355                        .map(|r| {
356                            r.map(|n| Pte::from(n as u64))
357                                .map_err(|n| Pte::from(n as u64))
358                        })
359                };
360
361                match r {
362                    Ok(Ok(_)) => {
363                        // Compare exchange succeeded, so continue.
364                    }
365                    Ok(Err(pte)) => {
366                        // Compare exchange failed. Loop around again.
367                        pte_access = Ok(pte);
368                        continue;
369                    }
370                    Err(err) => {
371                        // Memory access failed. Loop around again to handle the
372                        // failure consistently.
373                        pte_access = Err(err);
374                        continue;
375                    }
376                }
377            }
378
379            break (pte, done);
380        };
381
382        // When user permission is revoked at any level of the hierarchy,
383        // supervisor access will always be permitted regardless of the user bit
384        // in the terminal PTE.
385        if !pte.user() {
386            no_user_access = false;
387        }
388
389        if done {
390            cache_disable = pte.cache_disable();
391            write_through = pte.write_through();
392            pat_supported = if remaining_bits == 12 {
393                pte.pat()
394            } else {
395                let large_pde = LargePde::from(u64::from(pte));
396                large_pde.pat()
397            };
398
399            break;
400        }
401    }
402
403    // The bits that didn't get used for page table indexes form the offset into
404    // the page (of whatever size).
405    let address_mask = !0 << remaining_bits;
406    let pat_index =
407        ((cache_disable as u64) << 1) | (write_through as u64) | ((pat_supported as u64) << 2);
408    Ok(TranslateResult {
409        gpa: (gpa_base & address_mask) | (gva & !address_mask),
410        cache_info: TranslateCachingInfo::Paging { pat_index },
411    })
412}
413
414/// Returns whether a virtual address is canonical. On x86-64, this means that
415/// the N top unused bits are equal to the top used bit, where N is 64 minus the
416/// number of effective address bits (48 or 57).
417fn is_canonical_address(gva: u64, address_bits: u32) -> bool {
418    // Shift out the address bits that aren't part of the check, sign extending.
419    // This makes the subsequent check an easy comparison.
420    let high_bits = (gva as i64) >> (address_bits - 1);
421    high_bits == 0 || high_bits == -1
422}
423
424#[cfg(test)]
425mod tests {
426    #[test]
427    fn test_canonical() {
428        let cases = &[
429            (0, 48, true),
430            (0x0000_4000_0000_0000, 48, true),
431            (0x0000_8000_0000_0000, 48, false),
432            (0x0000_8000_0000_0000, 57, true),
433            (0x0100_0000_0000_0000, 57, false),
434            (0xffff_ffff_0000_0000, 48, true),
435            (0xffff_8000_0000_0000, 48, true),
436            (0xffff_0000_0000_0000, 48, false),
437            (0xffff_0000_0000_0000, 57, true),
438            (0xff00_0000_0000_0000, 57, true),
439            (0xfc00_0000_0000_0000, 57, false),
440        ];
441
442        for &(addr, bits, is_canonical) in cases {
443            assert_eq!(
444                super::is_canonical_address(addr, bits),
445                is_canonical,
446                "{addr:#x} {bits}"
447            );
448        }
449    }
450}