hv1_emulator/
hv.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Hypervisor MSR emulation.
5
6use super::synic::GlobalSynic;
7use super::synic::ProcessorSynic;
8use crate::VtlProtectAccess;
9use crate::pages::LockedPage;
10use crate::pages::OverlayPage;
11use hv1_structs::VtlArray;
12use hvdef::HV_REFERENCE_TSC_SEQUENCE_INVALID;
13use hvdef::HvError;
14use hvdef::HvMapGpaFlags;
15use hvdef::HvRegisterVpAssistPage;
16use hvdef::HvVpVtlControl;
17use hvdef::HvVtlEntryReason;
18use hvdef::Vtl;
19use inspect::Inspect;
20use parking_lot::Mutex;
21use safeatomic::AtomicSliceOps;
22use std::mem::offset_of;
23use std::sync::Arc;
24use std::sync::atomic::Ordering;
25use virt::x86::MsrError;
26use vm_topology::processor::VpIndex;
27use vmcore::reference_time::ReferenceTimeSource;
28use x86defs::cpuid::Vendor;
29use zerocopy::FromZeros;
30
31/// The partition-wide hypervisor state.
32#[derive(Inspect)]
33pub struct GlobalHv<const VTL_COUNT: usize> {
34    #[inspect(flatten)]
35    partition_state: Arc<GlobalHvState>,
36    /// Mutable state, per VTL
37    vtl_mutable_state: VtlArray<Arc<Mutex<MutableHvState>>, VTL_COUNT>,
38    /// The per-vtl synic state.
39    pub synic: VtlArray<GlobalSynic, VTL_COUNT>,
40}
41
42#[derive(Inspect)]
43struct GlobalHvState {
44    #[inspect(display)]
45    vendor: Vendor,
46    ref_time: ReferenceTimeSource,
47    tsc_frequency: u64,
48    is_ref_time_backed_by_tsc: bool,
49}
50
51#[derive(Inspect)]
52struct MutableHvState {
53    #[inspect(hex, with = "|&x| u64::from(x)")]
54    hypercall_reg: hvdef::hypercall::MsrHypercallContents,
55    hypercall_page: ReadOnlyLockedPage,
56    #[inspect(hex, with = "|&x| u64::from(x)")]
57    guest_os_id: hvdef::hypercall::HvGuestOsId,
58    #[inspect(hex, with = "|&x| u64::from(x)")]
59    reference_tsc_reg: hvdef::HvRegisterReferenceTsc,
60    reference_tsc_page: ReadOnlyLockedPage,
61    tsc_sequence: u32,
62}
63
64impl MutableHvState {
65    fn new() -> Self {
66        Self {
67            hypercall_reg: hvdef::hypercall::MsrHypercallContents::new(),
68            hypercall_page: ReadOnlyLockedPage::default(),
69            guest_os_id: hvdef::hypercall::HvGuestOsId::new(),
70            reference_tsc_reg: hvdef::HvRegisterReferenceTsc::new(),
71            reference_tsc_page: ReadOnlyLockedPage::default(),
72            tsc_sequence: 0,
73        }
74    }
75
76    fn reset(&mut self, prot_access: &mut dyn VtlProtectAccess) {
77        let Self {
78            hypercall_reg,
79            hypercall_page,
80            guest_os_id,
81            reference_tsc_reg,
82            reference_tsc_page,
83            tsc_sequence,
84        } = self;
85
86        *hypercall_reg = hvdef::hypercall::MsrHypercallContents::new();
87        hypercall_page.unmap(prot_access);
88        *guest_os_id = hvdef::hypercall::HvGuestOsId::new();
89        *reference_tsc_reg = hvdef::HvRegisterReferenceTsc::new();
90        reference_tsc_page.unmap(prot_access);
91        *tsc_sequence = 0;
92    }
93}
94
95/// Parameters used when constructing a [`GlobalHv`].
96pub struct GlobalHvParams<const VTL_COUNT: usize> {
97    /// The maximum VP count for the VM.
98    pub max_vp_count: u32,
99    /// The vendor of the virtual processor.
100    pub vendor: Vendor,
101    /// The TSC frequency.
102    pub tsc_frequency: u64,
103    /// The reference time system to use.
104    pub ref_time: ReferenceTimeSource,
105    /// If true, the reference time is backed by the TSC, with an implicit
106    /// offset of zero.
107    pub is_ref_time_backed_by_tsc: bool,
108}
109
110impl<const VTL_COUNT: usize> GlobalHv<VTL_COUNT> {
111    /// Returns a new hypervisor emulator instance.
112    pub fn new(params: GlobalHvParams<VTL_COUNT>) -> Self {
113        Self {
114            partition_state: Arc::new(GlobalHvState {
115                vendor: params.vendor,
116                tsc_frequency: params.tsc_frequency,
117                is_ref_time_backed_by_tsc: params.is_ref_time_backed_by_tsc,
118                ref_time: params.ref_time,
119            }),
120            vtl_mutable_state: VtlArray::from_fn(|_| Arc::new(Mutex::new(MutableHvState::new()))),
121            synic: VtlArray::from_fn(|_| GlobalSynic::new(params.max_vp_count)),
122        }
123    }
124
125    /// Adds a virtual processor to the vtl.
126    pub fn add_vp(&self, vp_index: VpIndex, vtl: Vtl) -> ProcessorVtlHv {
127        ProcessorVtlHv {
128            vp_index,
129            partition_state: self.partition_state.clone(),
130            vtl_state: self.vtl_mutable_state[vtl].clone(),
131            synic: self.synic[vtl].add_vp(vp_index),
132            vp_assist_page_reg: Default::default(),
133            vp_assist_page: OverlayPage::default(),
134        }
135    }
136
137    /// Resets the global (but not per-processor) state.
138    pub fn reset(&self, mut prot_access: VtlArray<&mut dyn VtlProtectAccess, VTL_COUNT>) {
139        for (state, overlay_access) in self.vtl_mutable_state.iter().zip(prot_access.iter_mut()) {
140            state.lock().reset(*overlay_access);
141        }
142        // There is no global synic state to reset, since the synic is per-VP.
143    }
144
145    /// The current guest_os_id value.
146    pub fn guest_os_id(&self, vtl: Vtl) -> hvdef::hypercall::HvGuestOsId {
147        self.vtl_mutable_state[vtl].lock().guest_os_id
148    }
149
150    /// Returns the reference time source.
151    pub fn ref_time_source(&self) -> &ReferenceTimeSource {
152        &self.partition_state.ref_time
153    }
154}
155
156/// A virtual processor's per-VTL hypervisor state.
157#[derive(Inspect)]
158pub struct ProcessorVtlHv {
159    vp_index: VpIndex,
160    #[inspect(skip)]
161    partition_state: Arc<GlobalHvState>,
162    vtl_state: Arc<Mutex<MutableHvState>>,
163    /// The virtual processor's synic state.
164    pub synic: ProcessorSynic,
165    #[inspect(hex, with = "|&x| u64::from(x)")]
166    vp_assist_page_reg: HvRegisterVpAssistPage,
167    vp_assist_page: OverlayPage,
168}
169
170impl ProcessorVtlHv {
171    /// The current reference time.
172    pub fn ref_time_now(&self) -> u64 {
173        self.partition_state.ref_time.now().ref_time
174    }
175
176    /// Resets the processor's state.
177    pub fn reset(&mut self) {
178        let Self {
179            vp_index: _,
180            partition_state: _,
181            vtl_state: _,
182            synic,
183            vp_assist_page_reg,
184            vp_assist_page,
185        } = self;
186
187        synic.reset();
188        *vp_assist_page_reg = Default::default();
189        *vp_assist_page = OverlayPage::default();
190    }
191
192    /// Emulates an MSR write for the guest OS ID MSR.
193    pub fn msr_write_guest_os_id(&mut self, v: u64) {
194        self.vtl_state.lock().guest_os_id = v.into();
195    }
196
197    /// Emulates an MSR write for the VP assist page MSR.
198    pub fn msr_write_vp_assist_page(
199        &mut self,
200        v: u64,
201        prot_access: &mut dyn VtlProtectAccess,
202    ) -> Result<(), MsrError> {
203        if v & !u64::from(
204            HvRegisterVpAssistPage::new()
205                .with_enabled(true)
206                .with_gpa_page_number(!0 >> 12),
207        ) != 0
208        {
209            return Err(MsrError::InvalidAccess);
210        }
211        let new_vp_assist_page_reg = HvRegisterVpAssistPage::from(v);
212
213        if new_vp_assist_page_reg.enabled()
214            && (!self.vp_assist_page_reg.enabled()
215                || new_vp_assist_page_reg.gpa_page_number()
216                    != self.vp_assist_page_reg.gpa_page_number())
217        {
218            self.vp_assist_page
219                .remap(new_vp_assist_page_reg.gpa_page_number(), prot_access)
220                .map_err(|_| MsrError::InvalidAccess)?
221        } else if !new_vp_assist_page_reg.enabled() {
222            self.vp_assist_page.unmap(prot_access);
223        }
224
225        self.vp_assist_page_reg = new_vp_assist_page_reg;
226
227        Ok(())
228    }
229
230    fn msr_write_hypercall_page(
231        &mut self,
232        v: u64,
233        prot_access: &mut dyn VtlProtectAccess,
234    ) -> Result<(), MsrError> {
235        let mut mutable = self.vtl_state.lock();
236
237        if mutable.hypercall_reg.locked() {
238            return Err(MsrError::InvalidAccess);
239        }
240
241        let hc = hvdef::hypercall::MsrHypercallContents::from(v);
242        if hc.reserved_p() != 0 {
243            return Err(MsrError::InvalidAccess);
244        }
245
246        if hc.enable()
247            && (!mutable.hypercall_reg.enable() || hc.gpn() != mutable.hypercall_reg.gpn())
248        {
249            let new_page = mutable
250                .hypercall_page
251                .remap(hc.gpn(), prot_access, true)
252                .map_err(|_| MsrError::InvalidAccess)?;
253            self.write_hypercall_page(new_page);
254        } else if !hc.enable() {
255            mutable.hypercall_page.unmap(prot_access);
256        }
257
258        mutable.hypercall_reg = hc;
259        Ok(())
260    }
261
262    fn msr_write_reference_tsc(
263        &mut self,
264        v: u64,
265        prot_access: &mut dyn VtlProtectAccess,
266    ) -> Result<(), MsrError> {
267        let mut mutable = self.vtl_state.lock();
268        let v = hvdef::HvRegisterReferenceTsc::from(v);
269
270        if v.reserved_p() != 0 {
271            return Err(MsrError::InvalidAccess);
272        }
273
274        if v.enable()
275            && (!mutable.reference_tsc_reg.enable() || v.gpn() != mutable.reference_tsc_reg.gpn())
276        {
277            let MutableHvState {
278                reference_tsc_page,
279                tsc_sequence,
280                ..
281            } = &mut *mutable;
282            let new_page = reference_tsc_page
283                .remap(v.gpn(), prot_access, false)
284                .map_err(|_| MsrError::InvalidAccess)?;
285            new_page[..4].atomic_write_obj(&HV_REFERENCE_TSC_SEQUENCE_INVALID);
286
287            if self.partition_state.is_ref_time_backed_by_tsc {
288                // TDX TODO: offset might need to be included
289                let tsc_scale =
290                    (((10_000_000_u128) << 64) / self.partition_state.tsc_frequency as u128) as u64;
291                *tsc_sequence = tsc_sequence.wrapping_add(1);
292                if *tsc_sequence == HV_REFERENCE_TSC_SEQUENCE_INVALID {
293                    *tsc_sequence = tsc_sequence.wrapping_add(1);
294                }
295                let reference_page = hvdef::HvReferenceTscPage {
296                    tsc_sequence: *tsc_sequence,
297                    tsc_scale,
298                    ..FromZeros::new_zeroed()
299                };
300                new_page.atomic_write_obj(&reference_page);
301            }
302        } else if !v.enable() {
303            mutable.reference_tsc_page.unmap(prot_access);
304        }
305
306        mutable.reference_tsc_reg = v;
307        Ok(())
308    }
309
310    /// Emulates an MSR write for an HV#1 synthetic MSR.
311    pub fn msr_write(
312        &mut self,
313        n: u32,
314        v: u64,
315        prot_access: &mut dyn VtlProtectAccess,
316    ) -> Result<(), MsrError> {
317        match n {
318            hvdef::HV_X64_MSR_GUEST_OS_ID => self.msr_write_guest_os_id(v),
319            hvdef::HV_X64_MSR_HYPERCALL => self.msr_write_hypercall_page(v, prot_access)?,
320            hvdef::HV_X64_MSR_VP_INDEX => return Err(MsrError::InvalidAccess),
321            hvdef::HV_X64_MSR_TIME_REF_COUNT => return Err(MsrError::InvalidAccess),
322            hvdef::HV_X64_MSR_REFERENCE_TSC => self.msr_write_reference_tsc(v, prot_access)?,
323            hvdef::HV_X64_MSR_TSC_FREQUENCY => return Err(MsrError::InvalidAccess),
324            hvdef::HV_X64_MSR_VP_ASSIST_PAGE => self.msr_write_vp_assist_page(v, prot_access)?,
325            msr @ hvdef::HV_X64_MSR_SCONTROL..=hvdef::HV_X64_MSR_STIMER3_COUNT => {
326                self.synic.write_msr(msr, v, prot_access)?
327            }
328            _ => return Err(MsrError::Unknown),
329        }
330        Ok(())
331    }
332
333    fn write_hypercall_page(&self, page: &LockedPage) {
334        // Fill the page with int3 to catch invalid jumps into the page.
335        let int3 = 0xcc;
336        page.atomic_fill(int3);
337
338        let page_contents: &[u8] = if self.partition_state.vendor.is_amd_compatible() {
339            &AMD_HYPERCALL_PAGE.page
340        } else if self.partition_state.vendor.is_intel_compatible() {
341            &INTEL_HYPERCALL_PAGE.page
342        } else {
343            unreachable!()
344        };
345
346        page[..page_contents.len()].atomic_write(page_contents);
347    }
348
349    /// Gets the VSM code page offset register that corresponds to the hypercall
350    /// page generated by this emulator.
351    pub fn vsm_code_page_offsets(&self, bit64: bool) -> hvdef::HvRegisterVsmCodePageOffsets {
352        // The code page offsets are the same for all VTLs.
353        let page = if self.partition_state.vendor.is_amd_compatible() {
354            &AMD_HYPERCALL_PAGE
355        } else if self.partition_state.vendor.is_intel_compatible() {
356            &INTEL_HYPERCALL_PAGE
357        } else {
358            unreachable!()
359        };
360        if bit64 {
361            page.offsets64
362        } else {
363            page.offsets32
364        }
365    }
366
367    /// Emulates an MSR read for an HV#1 synthetic MSR.
368    pub fn msr_read(&self, msr: u32) -> Result<u64, MsrError> {
369        let v = match msr {
370            hvdef::HV_X64_MSR_GUEST_OS_ID => self.vtl_state.lock().guest_os_id.into(),
371            hvdef::HV_X64_MSR_HYPERCALL => self.vtl_state.lock().hypercall_reg.into(),
372            hvdef::HV_X64_MSR_VP_INDEX => self.vp_index.index() as u64, // VP index
373            hvdef::HV_X64_MSR_TIME_REF_COUNT => self.partition_state.ref_time.now().ref_time,
374            hvdef::HV_X64_MSR_REFERENCE_TSC => self.vtl_state.lock().reference_tsc_reg.into(),
375            hvdef::HV_X64_MSR_TSC_FREQUENCY => self.partition_state.tsc_frequency,
376            hvdef::HV_X64_MSR_VP_ASSIST_PAGE => self.vp_assist_page_reg.into(),
377            msr @ hvdef::HV_X64_MSR_SCONTROL..=hvdef::HV_X64_MSR_STIMER3_COUNT => {
378                self.synic.read_msr(msr)?
379            }
380            _ => {
381                return Err(MsrError::Unknown);
382            }
383        };
384        Ok(v)
385    }
386
387    /// Returns the current value of the VP assist page register.
388    pub fn vp_assist_page(&self) -> u64 {
389        self.vp_assist_page_reg.into()
390    }
391
392    /// Sets the lazy EOI bit in the VP assist page.
393    ///
394    /// If this returns true, the caller must call `clear_lazy_eoi` after the
395    /// next VP exit but before manipulating the APIC.
396    #[must_use]
397    pub fn set_lazy_eoi(&mut self) -> bool {
398        if !self.vp_assist_page_reg.enabled() {
399            return false;
400        }
401
402        let offset = offset_of!(hvdef::HvVpAssistPage, apic_assist);
403        let v = 1u32;
404        self.vp_assist_page[offset..offset + 4].atomic_write_obj(&v);
405        true
406    }
407
408    /// Clears the lazy EOI bit in the VP assist page.
409    ///
410    /// Must only be called if `set_lazy_eoi` returned true.
411    ///
412    /// If the bit was already clear, returns true; the caller must then send an
413    /// EOI to the APIC.
414    #[must_use]
415    pub fn clear_lazy_eoi(&mut self) -> bool {
416        let offset = offset_of!(hvdef::HvVpAssistPage, apic_assist);
417        let v: u32 = self.vp_assist_page[offset..offset + 4].atomic_read_obj();
418
419        if v & 1 == 0 {
420            // The guest cleared the bit. The caller will perform the EOI to the
421            // APIC.
422            true
423        } else {
424            // Clear the bit in case the EOI state changes before the guest runs
425            // again.
426            let v = v & !1;
427            self.vp_assist_page[offset..offset + 4].atomic_write_obj(&v);
428            false
429        }
430    }
431
432    /// Get the register values to restore on vtl return
433    pub fn return_registers(&self) -> [u64; 2] {
434        let offset =
435            offset_of!(hvdef::HvVpAssistPage, vtl_control) + offset_of!(HvVpVtlControl, registers);
436        self.vp_assist_page[offset..offset + 16].atomic_read_obj()
437    }
438
439    /// Set the reason for the vtl return into the vp assist page
440    pub fn set_return_reason(&mut self, reason: HvVtlEntryReason) {
441        let offset = offset_of!(hvdef::HvVpAssistPage, vtl_control)
442            + offset_of!(HvVpVtlControl, entry_reason);
443        self.vp_assist_page[offset..offset + 4].atomic_write_obj(&reason);
444    }
445
446    /// Gets whether VINA is currently asserted.
447    pub fn vina_asserted(&self) -> bool {
448        let offset = offset_of!(hvdef::HvVpAssistPage, vtl_control)
449            + offset_of!(HvVpVtlControl, vina_status);
450        self.vp_assist_page[offset].load(Ordering::Relaxed) != 0
451    }
452
453    /// Sets whether VINA is currently asserted.
454    pub fn set_vina_asserted(&mut self, value: bool) {
455        let offset = offset_of!(hvdef::HvVpAssistPage, vtl_control)
456            + offset_of!(HvVpVtlControl, vina_status);
457        self.vp_assist_page[offset].store(value as u8, Ordering::Relaxed);
458    }
459}
460
461struct HypercallPage {
462    page: [u8; 50],
463    offsets32: hvdef::HvRegisterVsmCodePageOffsets,
464    offsets64: hvdef::HvRegisterVsmCodePageOffsets,
465}
466
467const fn hypercall_page(use_vmmcall: bool) -> HypercallPage {
468    let [hc0, hc1, hc2] = if use_vmmcall {
469        [0x0f, 0x01, 0xd9] // vmmcall
470    } else {
471        [0x0f, 0x01, 0xc1] // vmcall
472    };
473
474    #[rustfmt::skip]
475    let page = [
476        // Normal entry
477        hc0, hc1, hc2,                  // 0:  0f 01 d9                vmmcall
478        0xc3,                           // 3:  c3                      ret
479        // 32-bit VTL call
480        0x89, 0xc1,                     // 4:  89 c1                   mov    ecx,eax
481        0xb8, 0x11, 0x00, 0x00, 0x00,   // 6:  b8 11 00 00 00          mov    eax,0x11
482        hc0, hc1, hc2,                  // b:  0f 01 d9                vmmcall
483        0xc3,                           // e:  c3                      ret
484        // 64-bit VTL call
485        0x48, 0x89, 0xc8,               // f:  48 89 c8                mov    rax,rcx
486        0xb9, 0x11, 0x00, 0x00, 0x00,   // 12: b9 11 00 00 00          mov    ecx,0x11
487        hc0, hc1, hc2,                  // 17: 0f 01 d9                vmmcall
488        0xc3,                           // 1a: c3                      ret
489        // 32-bit VTL return
490        0x89, 0xc1,                     // 1b: 89 c1                   mov    ecx,eax
491        0xb8, 0x12, 0x00, 0x00, 0x00,   // 1d: b8 12 00 00 00          mov    eax,0x12
492        hc0, hc1, hc2,                  // 22: 0f 01 d9                vmmcall
493        0xc3,                           // 25: c3                      ret
494        // 64-bit VTL return
495        0x48, 0x89, 0xc8,               // 26: 48 89 c8                mov    rax,rcx
496        0xb9, 0x12, 0x00, 0x00, 0x00,   // 29: b9 12 00 00 00          mov    ecx,0x12
497        hc0, hc1, hc2,                  // 2e: 0f 01 d9                vmmcall
498        0xc3,                           // 31: c3                      ret
499    ];
500
501    HypercallPage {
502        page,
503        offsets32: hvdef::HvRegisterVsmCodePageOffsets::new()
504            .with_call_offset(0x4)
505            .with_return_offset(0x1b),
506        offsets64: hvdef::HvRegisterVsmCodePageOffsets::new()
507            .with_call_offset(0xf)
508            .with_return_offset(0x26),
509    }
510}
511
512const AMD_HYPERCALL_PAGE: HypercallPage = hypercall_page(true);
513const INTEL_HYPERCALL_PAGE: HypercallPage = hypercall_page(false);
514
515#[derive(Default, Inspect)]
516#[inspect(transparent)]
517struct ReadOnlyLockedPage(Option<ReadOnlyLockedPageInner>);
518
519#[derive(Inspect)]
520struct ReadOnlyLockedPageInner {
521    #[inspect(skip)]
522    page: LockedPage,
523}
524
525impl ReadOnlyLockedPage {
526    pub fn remap(
527        &mut self,
528        gpn: u64,
529        prot_access: &mut dyn VtlProtectAccess,
530        exec: bool,
531    ) -> Result<&LockedPage, HvError> {
532        // First try to acquire the new page.
533        let new_page = prot_access.check_modify_and_lock_overlay_page(
534            gpn,
535            HvMapGpaFlags::new().with_readable(true).with_writable(true),
536            Some(
537                HvMapGpaFlags::new()
538                    .with_readable(true)
539                    .with_user_executable(exec)
540                    .with_kernel_executable(exec),
541            ),
542        )?;
543        let new_page = LockedPage::new(gpn, new_page);
544
545        // If we got a new page without error we can now unset the previous page, if any.
546        self.unmap(prot_access);
547
548        // Store and return the new page.
549        *self = ReadOnlyLockedPage(Some(ReadOnlyLockedPageInner { page: new_page }));
550
551        Ok(&self.0.as_ref().unwrap().page)
552    }
553
554    pub fn unmap(&mut self, prot_access: &mut dyn VtlProtectAccess) {
555        if let Some(ReadOnlyLockedPageInner { page }) = self.0.take() {
556            prot_access.unlock_overlay_page(page.gpn).unwrap();
557        }
558    }
559}