virt/x86/
mod.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! x86-specific state.
5
6pub mod apic_software_device;
7pub mod topology;
8pub mod vm;
9pub mod vp;
10
11use crate::state::StateElement;
12use inspect::Inspect;
13use mesh_protobuf::Protobuf;
14use std::fmt::Debug;
15use thiserror::Error;
16use vm_topology::processor::ProcessorTopology;
17use vm_topology::processor::x86::ApicMode;
18use vm_topology::processor::x86::X86Topology;
19use vm_topology::processor::x86::X86VpInfo;
20use x86defs::cpuid::CpuidFunction;
21use x86defs::cpuid::SgxCpuidSubleafEax;
22use x86defs::cpuid::Vendor;
23use x86defs::xsave::XSAVE_VARIABLE_OFFSET;
24
25/// VP state that can be set for initial boot.
26#[derive(Debug, PartialEq, Eq, Protobuf)]
27pub struct X86InitialRegs {
28    /// Register state to be set on the BSP.
29    pub registers: vp::Registers,
30    /// MTRR state to be set on all processors.
31    pub mtrrs: vp::Mtrrs,
32    /// PAT state to be set on all processors.
33    pub pat: vp::Pat,
34}
35
36impl X86InitialRegs {
37    pub fn at_reset(caps: &X86PartitionCapabilities, bsp: &X86VpInfo) -> Self {
38        Self {
39            registers: vp::Registers::at_reset(caps, bsp),
40            mtrrs: vp::Mtrrs::at_reset(caps, bsp),
41            pat: vp::Pat::at_reset(caps, bsp),
42        }
43    }
44}
45
46/// Partition capabilities, used to determine which state is active on a
47/// partition and what the reset state should be.
48#[derive(Debug, Inspect)]
49pub struct X86PartitionCapabilities {
50    /// The processor vendor.
51    #[inspect(display)]
52    pub vendor: Vendor,
53    /// The MS hypervisor is available.
54    pub hv1: bool,
55    /// The reference TSC page is available.
56    pub hv1_reference_tsc_page: bool,
57    /// Xsave information.
58    pub xsave: XsaveCapabilities,
59    /// X2apic is supported.
60    pub x2apic: bool,
61    /// X2apic is enabled at boot.
62    pub x2apic_enabled: bool,
63    /// The initial value for rdx.
64    #[inspect(hex)]
65    pub reset_rdx: u64,
66    /// CET is supported.
67    pub cet: bool,
68    /// CET-SS is supported.
69    pub cet_ss: bool,
70    /// SGX is enabled.
71    pub sgx: bool,
72    /// TSC_AUX is supported
73    pub tsc_aux: bool,
74    /// The address of the virtual top of memory, for encrypted VMs.
75    ///
76    /// This is computed from the Hyper-V isolation leaf. It is guaranteed to be
77    /// a power of 2, if present.
78    #[inspect(hex)]
79    pub vtom: Option<u64>,
80    /// The physical address width of the CPU, as reported by CPUID.
81    pub physical_address_width: u8,
82
83    /// The hypervisor can freeze time across state manipulation.
84    pub can_freeze_time: bool,
85    /// The hypervisor has a broken implementation querying xsave state, where
86    /// supervisor states are not correctly set in xstate_bv.
87    pub xsaves_state_bv_broken: bool,
88    /// The hypervisor has a broken implementation setting dr6, where bit 16 is
89    /// forced on even if the processor supports TSX.
90    pub dr6_tsx_broken: bool,
91    /// EFER.NXE is forced on. This is set for TDX 1.5 partitions, which require
92    /// this.
93    pub nxe_forced_on: bool,
94}
95
96#[derive(Error, Debug)]
97pub enum X86PartitionCapabilitiesError {
98    #[error(
99        "advertised xsave length ({advertised}) too small for features, requires ({required}) bytes"
100    )]
101    XSaveLengthTooSmall { advertised: u32, required: u32 },
102    #[error("x2apic topology and cpuid mismatch, expected x2apic={expected}, found {found}")]
103    X2ApicMismatch { expected: bool, found: bool },
104}
105
106impl X86PartitionCapabilities {
107    pub fn from_cpuid(
108        processor_topology: &ProcessorTopology<X86Topology>,
109        f: &mut dyn FnMut(u32, u32) -> [u32; 4],
110    ) -> Result<Self, X86PartitionCapabilitiesError> {
111        let mut this = Self {
112            vendor: Vendor([0; 12]),
113            hv1: false,
114            hv1_reference_tsc_page: false,
115            xsave: XsaveCapabilities {
116                features: 0,
117                supervisor_features: 0,
118                standard_len: XSAVE_VARIABLE_OFFSET as u32,
119                compact_len: XSAVE_VARIABLE_OFFSET as u32,
120                feature_info: [Default::default(); 63],
121            },
122            x2apic: false,
123            x2apic_enabled: false,
124            reset_rdx: 0,
125            cet: false,
126            cet_ss: false,
127            sgx: false,
128            tsc_aux: false,
129            vtom: None,
130            physical_address_width: max_physical_address_size_from_cpuid(&mut *f),
131            can_freeze_time: false,
132            xsaves_state_bv_broken: false,
133            dr6_tsx_broken: false,
134            nxe_forced_on: false,
135        };
136
137        let max_function = {
138            let [eax, ebx, ecx, edx] = f(CpuidFunction::VendorAndMaxFunction.0, 0);
139            this.vendor = Vendor::from_ebx_ecx_edx(ebx, ecx, edx);
140            eax
141        };
142
143        let mut hypervisor = false;
144        let mut xsave = false;
145        if max_function >= CpuidFunction::VersionAndFeatures.0 {
146            let result = f(CpuidFunction::VersionAndFeatures.0, 0);
147            this.reset_rdx = result[0].into();
148            let features = result[2] as u64 | ((result[3] as u64) << 32);
149            this.x2apic = features & (1 << 21) != 0;
150            xsave = features & (1 << 26) != 0;
151            hypervisor = features & (1 << 31) != 0;
152        }
153
154        let extended_features = if max_function >= CpuidFunction::ExtendedFeatures.0 {
155            f(CpuidFunction::ExtendedFeatures.0, 0)
156        } else {
157            Default::default()
158        };
159
160        if max_function >= CpuidFunction::ExtendedFeatures.0 {
161            if extended_features[2] & (1 << 7) != 0 {
162                this.cet = true;
163                this.cet_ss = true;
164            }
165            if extended_features[3] & (1 << 20) != 0 {
166                this.cet = true;
167            }
168        }
169
170        if max_function >= CpuidFunction::SgxEnumeration.0 {
171            let sgx_result: SgxCpuidSubleafEax =
172                SgxCpuidSubleafEax::from(f(CpuidFunction::SgxEnumeration.0, 2)[0]);
173            if sgx_result.sgx_type() != 0 {
174                this.sgx = true;
175            }
176        }
177
178        if xsave {
179            let result = f(CpuidFunction::ExtendedStateEnumeration.0, 0);
180            this.xsave.features = result[0] as u64 | ((result[3] as u64) << 32);
181            let standard_len = result[2];
182
183            let result = f(CpuidFunction::ExtendedStateEnumeration.0, 1);
184            this.xsave.supervisor_features = result[2] as u64 | ((result[3] as u64) << 32);
185
186            let mut n = (this.xsave.features | this.xsave.supervisor_features) & !3;
187            while n != 0 {
188                let i = n.trailing_zeros();
189                n -= 1 << i;
190                let result = f(CpuidFunction::ExtendedStateEnumeration.0, i);
191                let feature = XsaveFeature {
192                    offset: result[1],
193                    len: result[0],
194                    align: result[2] & 2 != 0,
195                };
196                this.xsave.feature_info[i as usize] = feature;
197            }
198            this.xsave.compact_len = this.xsave.compact_len_for(!0);
199            this.xsave.standard_len = this.xsave.standard_len_for(!0);
200
201            if this.xsave.standard_len > standard_len {
202                return Err(X86PartitionCapabilitiesError::XSaveLengthTooSmall {
203                    advertised: standard_len,
204                    required: this.xsave.standard_len,
205                });
206            }
207        }
208
209        // Hypervisor info.
210        if hypervisor {
211            let hv_max = f(hvdef::HV_CPUID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION, 0)[0];
212            if hv_max >= hvdef::HV_CPUID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION
213                && f(hvdef::HV_CPUID_FUNCTION_HV_INTERFACE, 0)[0] == u32::from_le_bytes(*b"Hv#1")
214            {
215                this.hv1 = true;
216                let result = f(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
217                let privs = hvdef::HvPartitionPrivilege::from(
218                    result[0] as u64 | ((result[1] as u64) << 32),
219                );
220                this.hv1_reference_tsc_page = privs.access_partition_reference_tsc();
221                if privs.isolation()
222                    && hv_max >= hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION
223                {
224                    let [eax, ebx, ecx, edx] =
225                        f(hvdef::HV_CPUID_FUNCTION_MS_HV_ISOLATION_CONFIGURATION, 0);
226                    let config = hvdef::HvIsolationConfiguration::from(
227                        eax as u128
228                            | ((ebx as u128) << 32)
229                            | ((ecx as u128) << 64)
230                            | ((edx as u128) << 96),
231                    );
232                    if config.shared_gpa_boundary_active() {
233                        this.vtom = Some(1 << config.shared_gpa_boundary_bits());
234                    }
235                }
236            }
237        }
238
239        match (processor_topology.apic_mode(), this.x2apic) {
240            (ApicMode::XApic, true) => {
241                return Err(X86PartitionCapabilitiesError::X2ApicMismatch {
242                    expected: false,
243                    found: true,
244                });
245            }
246            (ApicMode::X2ApicSupported | ApicMode::X2ApicEnabled, false) => {
247                return Err(X86PartitionCapabilitiesError::X2ApicMismatch {
248                    expected: true,
249                    found: false,
250                });
251            }
252            (ApicMode::XApic, false) | (ApicMode::X2ApicSupported, true) => {}
253            (ApicMode::X2ApicEnabled, true) => {
254                this.x2apic_enabled = true;
255            }
256        }
257
258        this.tsc_aux = {
259            let rdtscp = {
260                let extended_max_function = f(CpuidFunction::ExtendedMaxFunction.0, 0)[0];
261                if extended_max_function >= CpuidFunction::ExtendedVersionAndFeatures.0 {
262                    x86defs::cpuid::ExtendedVersionAndFeaturesEdx::from(
263                        f(CpuidFunction::ExtendedVersionAndFeatures.0, 0)[3],
264                    )
265                    .rdtscp()
266                } else {
267                    false
268                }
269            };
270
271            let rdpid =
272                x86defs::cpuid::ExtendedFeatureSubleaf0Ecx::from(extended_features[2]).rd_pid();
273
274            rdtscp || rdpid
275        };
276
277        Ok(this)
278    }
279}
280
281#[derive(Debug, Copy, Clone, Inspect)]
282pub struct XsaveCapabilities {
283    pub features: u64,
284    pub supervisor_features: u64,
285    pub standard_len: u32,
286    pub compact_len: u32,
287    #[inspect(skip)] // TODO
288    pub feature_info: [XsaveFeature; 63],
289}
290
291#[derive(Default, Debug, Copy, Clone)]
292pub struct XsaveFeature {
293    pub offset: u32,
294    pub len: u32,
295    pub align: bool,
296}
297
298impl XsaveCapabilities {
299    pub fn standard_len_for(&self, xfem: u64) -> u32 {
300        let mut len = XSAVE_VARIABLE_OFFSET as u32;
301        for i in 2..63 {
302            if xfem & (1 << i) != 0 {
303                let feature = &self.feature_info[i as usize];
304                len = len.max(feature.offset + feature.len);
305            }
306        }
307        len
308    }
309
310    pub fn compact_len_for(&self, xfem: u64) -> u32 {
311        let mut len = XSAVE_VARIABLE_OFFSET as u32;
312        for i in 2..63 {
313            if xfem & (1 << i) != 0 {
314                let feature = &self.feature_info[i as usize];
315                if feature.align {
316                    len = (len + 63) & !63;
317                }
318                len += feature.len;
319            }
320        }
321        len
322    }
323}
324
325#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Protobuf, Inspect)]
326#[mesh(package = "virt.x86")]
327pub struct TableRegister {
328    #[inspect(hex)]
329    #[mesh(1)]
330    pub base: u64,
331    #[inspect(hex)]
332    #[mesh(2)]
333    pub limit: u16,
334}
335
336impl From<hvdef::HvX64TableRegister> for TableRegister {
337    fn from(table: hvdef::HvX64TableRegister) -> Self {
338        Self {
339            base: table.base,
340            limit: table.limit,
341        }
342    }
343}
344
345impl From<TableRegister> for hvdef::HvX64TableRegister {
346    fn from(table: TableRegister) -> Self {
347        Self {
348            base: table.base,
349            limit: table.limit,
350            pad: [0; 3],
351        }
352    }
353}
354
355#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Protobuf, Inspect)]
356#[mesh(package = "virt.x86")]
357pub struct SegmentRegister {
358    #[inspect(hex)]
359    #[mesh(1)]
360    pub base: u64,
361    #[inspect(hex)]
362    #[mesh(2)]
363    pub limit: u32,
364    #[inspect(hex)]
365    #[mesh(3)]
366    pub selector: u16,
367    #[inspect(hex)]
368    #[mesh(4)]
369    pub attributes: u16,
370}
371
372impl From<x86defs::SegmentRegister> for SegmentRegister {
373    fn from(seg: x86defs::SegmentRegister) -> Self {
374        Self {
375            base: seg.base,
376            limit: seg.limit,
377            selector: seg.selector,
378            attributes: seg.attributes.into(),
379        }
380    }
381}
382
383impl From<SegmentRegister> for x86defs::SegmentRegister {
384    fn from(seg: SegmentRegister) -> Self {
385        Self {
386            base: seg.base,
387            limit: seg.limit,
388            selector: seg.selector,
389            attributes: seg.attributes.into(),
390        }
391    }
392}
393
394impl From<hvdef::HvX64SegmentRegister> for SegmentRegister {
395    fn from(seg: hvdef::HvX64SegmentRegister) -> Self {
396        Self {
397            base: seg.base,
398            limit: seg.limit,
399            selector: seg.selector,
400            attributes: seg.attributes,
401        }
402    }
403}
404
405impl From<SegmentRegister> for hvdef::HvX64SegmentRegister {
406    fn from(seg: SegmentRegister) -> Self {
407        Self {
408            base: seg.base,
409            limit: seg.limit,
410            selector: seg.selector,
411            attributes: seg.attributes,
412        }
413    }
414}
415
416/// Guest debugging state, for gdbstub or similar use cases.
417#[derive(Debug, Copy, Clone, Protobuf)]
418pub struct DebugState {
419    /// Single step the VP.
420    pub single_step: bool,
421    /// Hardware breakpoints/watchpoints.
422    pub breakpoints: [Option<HardwareBreakpoint>; 4],
423}
424
425#[derive(Debug, Copy, Clone, Protobuf, PartialEq, Eq)]
426pub struct HardwareBreakpoint {
427    /// The address to watch.
428    pub address: u64,
429    /// The breakpoint type.
430    pub ty: BreakpointType,
431    /// The size of the memory location to watch.
432    pub size: BreakpointSize,
433}
434
435impl HardwareBreakpoint {
436    /// Parses the hardware breakpoint from DR7, the address of the breakpoint,
437    /// and the debug register index (0-3).
438    pub fn from_dr7(dr7: u64, address: u64, reg: usize) -> Self {
439        let v = dr7 >> (16 + reg * 4);
440        let ty = match v & 3 {
441            0 => BreakpointType::Execute,
442            1 => BreakpointType::Invalid,
443            2 => BreakpointType::Write,
444            3 => BreakpointType::ReadOrWrite,
445            _ => unreachable!(),
446        };
447        let size = match (v >> 2) & 3 {
448            0 => BreakpointSize::Byte,
449            1 => BreakpointSize::Word,
450            2 => BreakpointSize::QWord,
451            3 => BreakpointSize::DWord,
452            _ => unreachable!(),
453        };
454        Self { address, ty, size }
455    }
456
457    /// Returns a value to OR into DR7 to enable this breakpoint.
458    pub fn dr7_bits(&self, reg: usize) -> u64 {
459        ((self.ty as u64 | ((self.size as u64) << 2)) << (16 + reg * 4)) | (1 << (1 + reg * 2))
460    }
461}
462
463/// A hardware breakpoint type.
464#[derive(Debug, Copy, Clone, Protobuf, PartialEq, Eq)]
465pub enum BreakpointType {
466    /// Break on execute. Size should be [`BreakpointSize::Byte`].
467    Execute = 0,
468    /// Invalid type, not used on x86.
469    Invalid = 1,
470    /// Break on write.
471    Write = 2,
472    /// Break on read or write.
473    ReadOrWrite = 3,
474}
475
476/// The size of the debug breakpoint.
477#[derive(Debug, Copy, Clone, Protobuf, PartialEq, Eq)]
478pub enum BreakpointSize {
479    /// 1 byte.
480    Byte = 0,
481    /// 2 bytes.
482    Word = 1,
483    /// 4 bytes.
484    DWord = 3,
485    /// 8 bytes.
486    QWord = 2,
487}
488
489/// The requested breakpoint size is not supported.
490#[derive(Debug)]
491pub struct UnsupportedBreakpointSize;
492
493impl TryFrom<usize> for BreakpointSize {
494    type Error = UnsupportedBreakpointSize;
495
496    fn try_from(value: usize) -> Result<Self, Self::Error> {
497        Ok(match value {
498            1 => BreakpointSize::Byte,
499            2 => BreakpointSize::Word,
500            4 => BreakpointSize::DWord,
501            8 => BreakpointSize::QWord,
502            _ => return Err(UnsupportedBreakpointSize),
503        })
504    }
505}
506
507/// Query the max physical address size of the system.
508pub fn max_physical_address_size_from_cpuid(mut cpuid: impl FnMut(u32, u32) -> [u32; 4]) -> u8 {
509    const DEFAULT_PHYSICAL_ADDRESS_SIZE: u8 = 32;
510
511    let max_extended = {
512        let result = cpuid(CpuidFunction::ExtendedMaxFunction.0, 0);
513        result[0]
514    };
515
516    if max_extended >= CpuidFunction::ExtendedAddressSpaceSizes.0 {
517        let result = cpuid(CpuidFunction::ExtendedAddressSpaceSizes.0, 0);
518        (result[0] & 0xFF) as u8
519    } else {
520        DEFAULT_PHYSICAL_ADDRESS_SIZE
521    }
522}
523
524/// Error returned by MSR routines.
525#[derive(Debug)]
526pub enum MsrError {
527    /// The MSR is not implemented. Depending on the configuration, this should
528    /// either be ignored (returning 0 for reads) or should result in a #GP.
529    Unknown,
530    /// The MSR is implemented but this is an invalid read or write and should
531    /// always result in a #GP.
532    InvalidAccess,
533}
534
535/// Extension trait to chain MSR accesses together.
536pub trait MsrErrorExt: Sized {
537    /// Calls `f` if `self` is `Err(Msr::Unknown)`.
538    fn or_else_if_unknown(self, f: impl FnOnce() -> Self) -> Self;
539}
540
541impl<T> MsrErrorExt for Result<T, MsrError> {
542    fn or_else_if_unknown(self, f: impl FnOnce() -> Self) -> Self {
543        match self {
544            Err(MsrError::Unknown) => f(),
545            r => r,
546        }
547    }
548}