virt_support_x86emu/
emulate.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Wrapper around x86emu for emulating single instructions to handle VM exits.
5
6use crate::translate::TranslateFlags;
7use crate::translate::TranslatePrivilegeCheck;
8use crate::translate::translate_gva_to_gpa;
9use guestmem::GuestMemory;
10use guestmem::GuestMemoryError;
11use hvdef::HV_PAGE_SIZE;
12use hvdef::HvInterceptAccessType;
13use hvdef::HvMapGpaFlags;
14use thiserror::Error;
15use virt::VpHaltReason;
16use virt::io::CpuIo;
17use vm_topology::processor::VpIndex;
18use x86defs::Exception;
19use x86defs::RFlags;
20use x86defs::SegmentRegister;
21use x86emu::AlignmentMode;
22use x86emu::Gp;
23use x86emu::RegisterIndex;
24use x86emu::Segment;
25use zerocopy::FromBytes;
26use zerocopy::IntoBytes;
27
28/// Support routines for the emulator.
29pub trait EmulatorSupport {
30    /// The hypervisor error type.
31    type Error: 'static + std::error::Error + Send + Sync;
32
33    /// The current VP index.
34    fn vp_index(&self) -> VpIndex;
35
36    /// The processor vendor.
37    fn vendor(&self) -> x86defs::cpuid::Vendor;
38
39    /// Read a GP
40    fn gp(&mut self, index: Gp) -> u64;
41
42    /// Set a GP
43    fn set_gp(&mut self, reg: Gp, v: u64);
44
45    /// Read the instruction pointer
46    fn rip(&mut self) -> u64;
47
48    /// Set the instruction pointer
49    fn set_rip(&mut self, v: u64);
50
51    /// Read a segment register
52    fn segment(&mut self, index: Segment) -> SegmentRegister;
53
54    /// Read the efer
55    fn efer(&mut self) -> u64;
56
57    /// Read cr0
58    fn cr0(&mut self) -> u64;
59
60    /// Read rflags
61    fn rflags(&mut self) -> RFlags;
62
63    /// Set rflags
64    fn set_rflags(&mut self, v: RFlags);
65
66    /// Gets the value of an XMM* register.
67    fn xmm(&mut self, reg: usize) -> u128;
68
69    /// Sets the value of an XMM* register.
70    fn set_xmm(&mut self, reg: usize, value: u128) -> Result<(), Self::Error>;
71
72    /// Flush registers in the emulation cache to the backing
73    fn flush(&mut self) -> Result<(), Self::Error>;
74
75    /// The instruction bytes, if available.
76    fn instruction_bytes(&self) -> &[u8];
77
78    /// The physical address that caused the fault.
79    fn physical_address(&self) -> Option<u64>;
80
81    /// The gva translation included in the intercept message header, if valid.
82    fn initial_gva_translation(&mut self) -> Option<InitialTranslation>;
83
84    /// If interrupt pending is marked in the intercept message
85    fn interruption_pending(&self) -> bool;
86
87    /// Check that the current GPA is valid to access by the current VTL with the following access mode.
88    /// Returns true if valid to access.
89    fn check_vtl_access(
90        &mut self,
91        gpa: u64,
92        mode: TranslateMode,
93    ) -> Result<(), EmuCheckVtlAccessError<Self::Error>>;
94
95    /// Translates a GVA to a GPA.
96    fn translate_gva(
97        &mut self,
98        gva: u64,
99        mode: TranslateMode,
100    ) -> Result<Result<EmuTranslateResult, EmuTranslateError>, Self::Error>;
101
102    /// Generates an event (exception, guest nested page fault, etc.) in the guest.
103    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent);
104
105    /// Check if the specified write is wholly inside the monitor page, and signal the associated
106    /// connected ID if it is.
107    fn check_monitor_write(&self, gpa: u64, bytes: &[u8]) -> bool {
108        let _ = (gpa, bytes);
109        false
110    }
111
112    /// Returns true if `gpa` is mapped for the specified permissions.
113    ///
114    /// If true, then the emulator will use [`GuestMemory`] to access the GPA,
115    /// and any failures will be fatal to the VM.
116    ///
117    /// If false, then the emulator will use [`CpuIo`] to access the GPA as
118    /// MMIO.
119    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool;
120
121    /// Returns the page-aligned base address of the enabled local APIC in xapic
122    /// mode.
123    fn lapic_base_address(&self) -> Option<u64>;
124
125    /// Read from the current processor's local APIC memory mapped interface.
126    ///
127    /// This will only be called on an address in the page returned by
128    /// `lapic_base_address`.
129    fn lapic_read(&mut self, address: u64, data: &mut [u8]);
130
131    /// Write to the current processor's local APIC memory mapped interface.
132    ///
133    /// This will only be called on an address in the page returned by
134    /// `lapic_base_address`.
135    fn lapic_write(&mut self, address: u64, data: &[u8]);
136}
137
138pub trait TranslateGvaSupport {
139    type Error;
140
141    /// Gets the object used to access the guest memory.
142    fn guest_memory(&self) -> &GuestMemory;
143
144    /// Acquires the TLB lock for this processor.
145    fn acquire_tlb_lock(&mut self);
146
147    /// Returns the registers used to walk the page table.
148    fn registers(&mut self) -> Result<crate::translate::TranslationRegisters, Self::Error>;
149}
150
151/// Emulates a page table walk.
152///
153/// This is suitable for implementing [`EmulatorSupport::translate_gva`].
154pub fn emulate_translate_gva<T: TranslateGvaSupport>(
155    support: &mut T,
156    gva: u64,
157    mode: TranslateMode,
158) -> Result<Result<EmuTranslateResult, EmuTranslateError>, T::Error> {
159    // Always acquire the TLB lock for this path.
160    support.acquire_tlb_lock();
161
162    let flags = TranslateFlags {
163        validate_execute: matches!(mode, TranslateMode::Execute),
164        validate_read: matches!(mode, TranslateMode::Read | TranslateMode::Write),
165        validate_write: matches!(mode, TranslateMode::Write),
166        override_smap: false,
167        enforce_smap: false,
168        privilege_check: TranslatePrivilegeCheck::CurrentPrivilegeLevel,
169        set_page_table_bits: true,
170    };
171
172    let registers = support.registers()?;
173
174    let r = match translate_gva_to_gpa(support.guest_memory(), gva, &registers, flags) {
175        Ok(crate::translate::TranslateResult { gpa, cache_info: _ }) => Ok(EmuTranslateResult {
176            gpa,
177            overlay_page: None,
178        }),
179        Err(err) => Err(EmuTranslateError {
180            code: err.into(),
181            event_info: None,
182        }),
183    };
184    Ok(r)
185}
186
187/// The result of translate_gva on [`EmulatorSupport`].
188pub struct EmuTranslateResult {
189    /// The GPA result of the translation.
190    pub gpa: u64,
191    /// Whether the page is an overlay page.
192    /// Not all implementations return overlay page or event_info yet, so these values are optional
193    pub overlay_page: Option<bool>,
194}
195
196/// The translation, if any, provided in the intercept message and provided by [`EmulatorSupport`].
197#[derive(Debug)]
198pub struct InitialTranslation {
199    /// GVA for the translation
200    pub gva: u64,
201    /// Translated gpa for the gva
202    pub gpa: u64,
203    // Whether the translation has read, write, or execute permissions.
204    pub translate_mode: TranslateMode,
205}
206
207#[derive(Error, Debug)]
208pub enum EmuCheckVtlAccessError<E> {
209    #[error(transparent)]
210    Hypervisor(#[from] E),
211    #[error("failed vtl permissions access for vtl {vtl:?} and access flags {denied_flags:?}")]
212    AccessDenied {
213        vtl: hvdef::Vtl,
214        denied_flags: HvMapGpaFlags,
215    },
216}
217
218#[derive(Error, Debug)]
219#[error("translate gva to gpa returned non-successful code {code:?}")]
220/// Error for a failed gva translation from [`EmulatorSupport`].
221pub struct EmuTranslateError {
222    /// Translate code of type hvdef::hypercall::TranslateGvaResultCode
223    /// Should != Success
224    pub code: hvdef::hypercall::TranslateGvaResultCode,
225    /// Pending event, if any, returned by hypervisor to go with the translate code.
226    pub event_info: Option<hvdef::HvX64PendingEvent>,
227}
228
229/// The access type for a gva translation for [`EmulatorSupport`].
230#[derive(Debug, Copy, Clone, PartialEq, Eq)]
231pub enum TranslateMode {
232    /// A read operation.
233    Read,
234    /// A write operation.
235    Write,
236    /// An execute operation.
237    Execute,
238}
239
240/// The requested intercept access type isn't supported
241#[derive(Debug)]
242pub struct UnsupportedInterceptAccessType;
243
244impl TryFrom<HvInterceptAccessType> for TranslateMode {
245    type Error = UnsupportedInterceptAccessType;
246
247    fn try_from(access_type: HvInterceptAccessType) -> Result<Self, Self::Error> {
248        match access_type {
249            HvInterceptAccessType::READ => Ok(TranslateMode::Read),
250            HvInterceptAccessType::WRITE => Ok(TranslateMode::Write),
251            HvInterceptAccessType::EXECUTE => Ok(TranslateMode::Execute),
252            _ => Err(UnsupportedInterceptAccessType),
253        }
254    }
255}
256
257#[derive(Debug, Error)]
258enum EmulationError<E> {
259    #[error("an interrupt caused the memory access exit")]
260    InterruptionPending,
261    #[error("linear IP was not within CS segment limit")]
262    LinearIpPastCsLimit,
263    #[error("failed to flush the emulator cache")]
264    CacheFlushFailed(#[source] E),
265    #[error("failed to read instruction stream")]
266    InstructionRead(#[source] E),
267    #[error("emulator error (instruction {bytes:02x?})")]
268    Emulator {
269        bytes: Vec<u8>,
270        #[source]
271        error: x86emu::Error<E>,
272    },
273}
274
275pub struct EmulatorMemoryAccess<'a> {
276    pub gm: &'a GuestMemory,
277    pub kx_gm: &'a GuestMemory,
278    pub ux_gm: &'a GuestMemory,
279}
280
281enum EmulatorMemoryAccessType {
282    ReadWrite,
283    InstructionRead { is_user_mode: bool },
284}
285
286impl EmulatorMemoryAccess<'_> {
287    fn gm(&self, access_type: EmulatorMemoryAccessType) -> &GuestMemory {
288        match access_type {
289            EmulatorMemoryAccessType::ReadWrite => self.gm,
290            EmulatorMemoryAccessType::InstructionRead { is_user_mode } => {
291                if is_user_mode {
292                    self.ux_gm
293                } else {
294                    self.kx_gm
295                }
296            }
297        }
298    }
299}
300
301/// Emulates an instruction.
302pub async fn emulate<T: EmulatorSupport>(
303    support: &mut T,
304    emu_mem: &EmulatorMemoryAccess<'_>,
305    dev: &impl CpuIo,
306) -> Result<(), VpHaltReason<T::Error>> {
307    let vendor = support.vendor();
308
309    let mut bytes = [0; 16];
310    let mut valid_bytes;
311    {
312        let instruction_bytes = support.instruction_bytes();
313        valid_bytes = instruction_bytes.len();
314        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
315    }
316    let instruction_bytes = &bytes[..valid_bytes];
317
318    tracing::trace!(
319        ?instruction_bytes,
320        physical_address = support.physical_address(),
321        "emulating"
322    );
323
324    if support.interruption_pending() {
325        // This means a fault or interruption *caused* the intercept
326        // (and only really applies to memory intercept handling).
327        // An example of how this could happen is if the
328        // interrupt vector table itself is in mmio space; taking an
329        // interrupt at that point requires that the processor reads the
330        // vector out of the table, which generates an mmio intercept,
331        // but not one associated with any particular instruction.
332        // Therefore, there is nothing to emulate.
333        //
334        // A fault can't be injected into the guest because that could
335        // cause an infinite loop (as the processor tries to get the trap
336        // vector out of the mmio-ed vector table).  Just give up.
337
338        return Err(VpHaltReason::EmulationFailure(
339            EmulationError::<T::Error>::InterruptionPending.into(),
340        ));
341    }
342
343    let initial_alignment_check = support.rflags().alignment_check();
344
345    let mut cpu = EmulatorCpu::new(
346        emu_mem.gm(EmulatorMemoryAccessType::ReadWrite),
347        dev,
348        support,
349    );
350    let result = loop {
351        let instruction_bytes = &bytes[..valid_bytes];
352        let mut emu = x86emu::Emulator::new(&mut cpu, vendor, instruction_bytes);
353        let res = emu.run().await;
354
355        if let Err(e) = &res {
356            if let x86emu::Error::NotEnoughBytes = **e {
357                assert!(valid_bytes < bytes.len());
358
359                // TODO: inject #GP due to segmentation fault.
360                let linear_ip =
361                    emu.linear_ip(valid_bytes as u64)
362                        .ok_or(VpHaltReason::EmulationFailure(
363                            EmulationError::<T::Error>::LinearIpPastCsLimit.into(),
364                        ))?;
365
366                let is_user_mode = emu.is_user_mode();
367
368                let translate_result =
369                    cpu.translate_gva(linear_ip, TranslateMode::Execute, is_user_mode);
370
371                let phys_ip = match translate_result {
372                    Ok(ip) => ip,
373                    Err(translate_error) => {
374                        if inject_memory_access_fault(linear_ip, &translate_error, support) {
375                            return Ok(());
376                        } else {
377                            return Err(VpHaltReason::EmulationFailure(
378                                EmulationError::InstructionRead(translate_error).into(),
379                            ));
380                        }
381                    }
382                };
383
384                // TODO: fold this access check into the GuestMemory object for
385                // each of the backings, if possible.
386                if let Err(err) = cpu.check_vtl_access(phys_ip, TranslateMode::Execute) {
387                    if inject_memory_access_fault(linear_ip, &err, support) {
388                        return Ok(());
389                    } else {
390                        return Err(VpHaltReason::EmulationFailure(
391                            EmulationError::InstructionRead(err).into(),
392                        ));
393                    };
394                }
395
396                tracing::trace!(linear_ip, phys_ip, "fetching instruction bytes");
397
398                let len = (bytes.len() - valid_bytes)
399                    .min((HV_PAGE_SIZE - (phys_ip & (HV_PAGE_SIZE - 1))) as usize);
400
401                let instruction_gm =
402                    emu_mem.gm(EmulatorMemoryAccessType::InstructionRead { is_user_mode });
403
404                if let Err(err) =
405                    instruction_gm.read_at(phys_ip, &mut bytes[valid_bytes..valid_bytes + len])
406                {
407                    tracing::error!(error = &err as &dyn std::error::Error, "read failed");
408                    support.inject_pending_event(gpf_event());
409                    return Ok(());
410                }
411
412                valid_bytes += len;
413                continue;
414            }
415        }
416
417        break res;
418    };
419
420    cpu.support.flush().map_err(|err| {
421        VpHaltReason::EmulationFailure(EmulationError::<T::Error>::CacheFlushFailed(err).into())
422    })?;
423
424    // If the alignment check flag is not in sync with the hypervisor because the instruction emulator
425    // modifies internally, then the appropriate SMAP enforcement flags need to be passed to the hypervisor
426    // during the translation of gvas to gpa.
427    //
428    // Note: also applies if the instruction emulator emulates instructions resulting in implicit
429    // memory accesses, which is currently not done. See Intel Spec 4.6 Access Rights:
430    // "Some operations implicitly access system data structures with linear addresses;
431    // the resulting accesses to those data structures are supervisor-mode accesses regardless of CPL.
432    // Examples of such accesses include the following: accesses to the global descriptor table (GDT)
433    // or local descriptor table (LDT) to load a segment descriptor; accesses to the interrupt
434    // descriptor table (IDT) when delivering an interrupt or exception; and accesses to the task-state
435    // segment (TSS) as part of a task switch or change of CPL."
436    assert_eq!(
437        initial_alignment_check,
438        cpu.support.rflags().alignment_check()
439    );
440
441    let instruction_bytes = &bytes[..valid_bytes];
442    if let Err(e) = result {
443        match *e {
444            err @ (x86emu::Error::DecodeFailure | x86emu::Error::UnsupportedInstruction { .. }) => {
445                tracelimit::error_ratelimited!(
446                    error = &err as &dyn std::error::Error,
447                    ?instruction_bytes,
448                    physical_address = cpu.support.physical_address(),
449                    "unsupported instruction"
450                );
451
452                cpu.support.inject_pending_event(make_exception_event(
453                    Exception::INVALID_OPCODE,
454                    None,
455                    None,
456                ));
457            }
458            err @ x86emu::Error::NonMemoryOrPortInstruction { .. } => {
459                tracelimit::error_ratelimited!(
460                    error = &err as &dyn std::error::Error,
461                    ?instruction_bytes,
462                    physical_address = cpu.support.physical_address(),
463                    "given an instruction that we shouldn't have been asked to emulate - likely a bug in the caller"
464                );
465
466                return Err(VpHaltReason::EmulationFailure(
467                    EmulationError::Emulator {
468                        bytes: instruction_bytes.to_vec(),
469                        error: err,
470                    }
471                    .into(),
472                ));
473            }
474            x86emu::Error::InstructionException(exception, error_code, cause) => {
475                tracing::trace!(
476                    ?exception,
477                    ?error_code,
478                    ?cause,
479                    "emulated instruction caused exception"
480                );
481                cpu.support
482                    .inject_pending_event(make_exception_event(exception, error_code, None));
483            }
484            x86emu::Error::MemoryAccess(addr, kind, err) => {
485                if !inject_memory_access_fault(addr, &err, support) {
486                    return Err(VpHaltReason::EmulationFailure(
487                        EmulationError::Emulator {
488                            bytes: instruction_bytes.to_vec(),
489                            error: x86emu::Error::MemoryAccess(addr, kind, err),
490                        }
491                        .into(),
492                    ));
493                }
494            }
495            err @ (x86emu::Error::IoPort { .. } | x86emu::Error::XmmRegister { .. }) => {
496                return Err(VpHaltReason::EmulationFailure(
497                    EmulationError::Emulator {
498                        bytes: instruction_bytes.to_vec(),
499                        error: err,
500                    }
501                    .into(),
502                ));
503            }
504            x86emu::Error::NotEnoughBytes => unreachable!(),
505        }
506    }
507
508    Ok(())
509}
510
511/// Performs a memory operation as if it had been performed by an emulated instruction.
512///
513/// "As if it had been performed by an emulated instruction" means that the given
514/// GVA will be translated to a GPA, subject to applicable segmentation, permission,
515/// and alignment checks, may be determined to be MMIO instead of RAM, etc.
516pub async fn emulate_insn_memory_op<T: EmulatorSupport>(
517    support: &mut T,
518    gm: &GuestMemory,
519    dev: &impl CpuIo,
520    gva: u64,
521    segment: Segment,
522    alignment: AlignmentMode,
523    op: EmulatedMemoryOperation<'_>,
524) -> Result<(), VpHaltReason<T::Error>> {
525    assert!(!support.interruption_pending());
526
527    let vendor = support.vendor();
528    let mut cpu = EmulatorCpu::new(gm, dev, support);
529    let mut emu = x86emu::Emulator::new(&mut cpu, vendor, &[]);
530
531    match op {
532        EmulatedMemoryOperation::Read(data) => emu.read_memory(segment, gva, alignment, data).await,
533        EmulatedMemoryOperation::Write(data) => {
534            emu.write_memory(segment, gva, alignment, data).await
535        }
536    }
537    .map_err(|e| VpHaltReason::EmulationFailure(e.into()))
538
539    // No need to flush the cache, we have not modified any registers.
540}
541
542pub enum EmulatedMemoryOperation<'a> {
543    Read(&'a mut [u8]),
544    Write(&'a [u8]),
545}
546
547/// For storing gva to gpa translations in a cache in [`EmulatorCpu`]
548struct GvaGpaCacheEntry {
549    gva_page: u64,
550    gpa_page: u64,
551    translate_mode: TranslateMode,
552}
553
554impl GvaGpaCacheEntry {
555    pub fn new(gva: u64, gpa: u64, translate_mode: TranslateMode) -> Self {
556        GvaGpaCacheEntry {
557            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
558            gpa_page: gpa >> hvdef::HV_PAGE_SHIFT,
559            translate_mode,
560        }
561    }
562}
563
564struct EmulatorCpu<'a, T, U> {
565    gm: &'a GuestMemory,
566    support: &'a mut T,
567    dev: &'a U,
568    // TODO: this should be able to hold at least two entries for effective use for
569    // rep move instructions.
570    cached_translation: Option<GvaGpaCacheEntry>,
571}
572
573#[derive(Debug, Error)]
574enum Error<E> {
575    #[error(transparent)]
576    Hypervisor(#[from] E),
577    #[error("translation error")]
578    Translate(
579        #[source] TranslateGvaError,
580        Option<hvdef::HvX64PendingEvent>,
581    ),
582    #[error("vtl permissions denied access for gpa {gpa}")]
583    NoVtlAccess {
584        gpa: u64,
585        intercepting_vtl: hvdef::Vtl,
586        denied_flags: HvMapGpaFlags,
587    },
588    #[error("failed to access mapped memory")]
589    Memory(#[source] GuestMemoryError),
590}
591
592/// Result of a gva translation in [`EmulatorCpu`]
593#[derive(Error, Debug)]
594enum TranslateGvaError {
595    #[error("gpa access denied code {0:?}")]
596    AccessDenied(hvdef::hypercall::TranslateGvaResultCode),
597    #[error("write on overlay page")]
598    OverlayPageWrite,
599    #[error("translation failed with unknown code {0:?}")]
600    UnknownCode(hvdef::hypercall::TranslateGvaResultCode),
601    #[error("translation failed with an intercept code")]
602    Intercept,
603    #[error("translation failed with a page fault-related code {0:?}")]
604    PageFault(hvdef::hypercall::TranslateGvaResultCode),
605}
606
607impl<T: EmulatorSupport, U> EmulatorCpu<'_, T, U> {
608    pub fn new<'a>(gm: &'a GuestMemory, dev: &'a U, support: &'a mut T) -> EmulatorCpu<'a, T, U> {
609        let init_cache = {
610            if let Some(InitialTranslation {
611                gva,
612                gpa,
613                translate_mode,
614            }) = support.initial_gva_translation()
615            {
616                tracing::trace!(
617                    ?gva,
618                    ?gpa,
619                    ?translate_mode,
620                    "adding initial translation to cache"
621                );
622                Some(GvaGpaCacheEntry::new(gva, gpa, translate_mode))
623            } else {
624                None
625            }
626        };
627
628        EmulatorCpu {
629            gm,
630            dev,
631            support,
632            cached_translation: init_cache,
633        }
634    }
635
636    pub fn translate_gva(
637        &mut self,
638        gva: u64,
639        mode: TranslateMode,
640        is_user_mode: bool,
641    ) -> Result<u64, Error<T::Error>> {
642        type TranslateCode = hvdef::hypercall::TranslateGvaResultCode;
643
644        if let Some(GvaGpaCacheEntry {
645            gva_page: cached_gva_page,
646            gpa_page: cached_gpa_page,
647            translate_mode: cached_mode,
648        }) = self.cached_translation
649        {
650            if ((gva >> hvdef::HV_PAGE_SHIFT) == cached_gva_page) && (cached_mode == mode) {
651                tracing::trace!(
652                    ?gva,
653                    ?cached_gva_page,
654                    cached_gpa_page,
655                    ?cached_mode,
656                    "using cached entry"
657                );
658                return Ok((cached_gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)));
659            }
660        };
661
662        match self.support.translate_gva(gva, mode) {
663            Ok(Ok(EmuTranslateResult { gpa, overlay_page })) => {
664                if overlay_page.is_some()
665                    && overlay_page
666                        .expect("should've already checked that the overlay page has value")
667                    && (mode == TranslateMode::Write)
668                {
669                    // We will support reads to overlay pages in order to support Win2k3
670                    // crash dumps (which do direct port io to the ide for all of memory,
671                    // including overlay pages).  Writes, though, are right out.  There is
672                    // no known scenario where guests should be writing to overlay pages,
673                    // and doing so would be difficult and expensive.  Overlay pages are
674                    // special purpose pages set up by the hypervisor (to implement hypercalls,
675                    // for instance), and there should be no reason that guests should be doing
676                    // io to them.
677                    //
678                    // At this time, there is no infrastructure to allow us to actually
679                    // read the overlay page.  We will instead return 0xff's for such reads.
680                    // This is an emulation hole.  It is felt to be acceptable at this time.
681                    // But for this reason, we give up if this wasn't the intercepting instruction
682                    // and let the processor run the instruction directly.
683                    return Err(Error::Translate(
684                        TranslateGvaError::OverlayPageWrite,
685                        Some(gpf_event()),
686                    ));
687                }
688
689                let new_cache_entry = GvaGpaCacheEntry::new(gva, gpa, mode);
690
691                self.cached_translation = Some(new_cache_entry);
692                Ok(gpa)
693            }
694            Ok(Err(EmuTranslateError { code, event_info })) => {
695                match code {
696                    TranslateCode::INTERCEPT => {
697                        tracing::trace!("translate gva to gpa returned an intercept event");
698                        Err(Error::Translate(TranslateGvaError::Intercept, event_info))
699                    }
700                    TranslateCode::GPA_NO_READ_ACCESS
701                    | TranslateCode::GPA_NO_WRITE_ACCESS
702                    | TranslateCode::GPA_UNMAPPED
703                    | TranslateCode::GPA_ILLEGAL_OVERLAY_ACCESS
704                    | TranslateCode::GPA_UNACCEPTED => {
705                        // The page table walk failed because one of the page
706                        // table entries was inaccessible in the second-level
707                        // page tables.
708                        //
709                        // Inject a #GP.
710                        tracing::trace!(
711                            "translate gva to gpa returned no access to page {:?}",
712                            code
713                        );
714                        Err(Error::Translate(
715                            TranslateGvaError::AccessDenied(code),
716                            Some(gpf_event()),
717                        ))
718                    }
719                    TranslateCode::PAGE_NOT_PRESENT
720                    | TranslateCode::PRIVILEGE_VIOLATION
721                    | TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
722                        // The page table walk failed for ordinary reasons not
723                        // having to do with second-level address translation.
724                        // We need to inject a page fault.
725                        //
726                        // It should be rare to get to this point even for a
727                        // misbehaving guest, since the processor usually should
728                        // have detected and injected this fault without
729                        // requiring an exit.
730                        //
731                        // Trace since this is more likely to indicate a bug in
732                        // our page table walking code, but rate limit the trace
733                        // since there are still cases where this could be
734                        // triggered by guest behavior.
735                        tracelimit::warn_ratelimited!(gva, ?code, "page table walk failed");
736
737                        let mut error = x86defs::PageFaultErrorCode::new();
738                        match code {
739                            TranslateCode::PAGE_NOT_PRESENT => (),
740                            TranslateCode::PRIVILEGE_VIOLATION => error.set_present(true),
741                            TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
742                                error.set_present(true);
743                                error.set_reserved(true);
744                            }
745                            _ => unreachable!(),
746                        };
747
748                        match mode {
749                            TranslateMode::Execute => error.set_fetch(true),
750                            TranslateMode::Write => error.set_write(true),
751                            _ => (),
752                        };
753
754                        if is_user_mode {
755                            error.set_user(true);
756                        }
757
758                        // Page fault
759                        let event = make_exception_event(
760                            Exception::PAGE_FAULT,
761                            Some(error.into()),
762                            Some(gva),
763                        );
764
765                        Err(Error::Translate(
766                            TranslateGvaError::PageFault(code),
767                            Some(event),
768                        ))
769                    }
770                    TranslateCode::SUCCESS => unreachable!(),
771                    _ => {
772                        tracing::trace!(
773                            "translate error: unknown translation result code {:?}",
774                            code
775                        );
776
777                        Err(Error::Translate(TranslateGvaError::UnknownCode(code), None))
778                    }
779                }
780            }
781            Err(e) => {
782                tracing::trace!("translate error {:?}", e);
783                Err(Error::Hypervisor(e))
784            }
785        }
786    }
787
788    pub fn check_vtl_access(
789        &mut self,
790        gpa: u64,
791        mode: TranslateMode,
792    ) -> Result<(), Error<T::Error>> {
793        self.support
794            .check_vtl_access(gpa, mode)
795            .map_err(|e| match e {
796                EmuCheckVtlAccessError::Hypervisor(hv_err) => Error::Hypervisor(hv_err),
797                EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags } => Error::NoVtlAccess {
798                    gpa,
799                    intercepting_vtl: vtl,
800                    denied_flags,
801                },
802            })
803    }
804}
805
806impl<T: EmulatorSupport, U: CpuIo> x86emu::Cpu for EmulatorCpu<'_, T, U> {
807    type Error = Error<T::Error>;
808
809    async fn read_memory(
810        &mut self,
811        gva: u64,
812        bytes: &mut [u8],
813        is_user_mode: bool,
814    ) -> Result<(), Self::Error> {
815        let gpa = self.translate_gva(gva, TranslateMode::Read, is_user_mode)?;
816
817        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
818            self.support.lapic_read(gpa, bytes);
819            return Ok(());
820        }
821
822        self.check_vtl_access(gpa, TranslateMode::Read)?;
823
824        if self.support.is_gpa_mapped(gpa, false) {
825            self.gm.read_at(gpa, bytes).map_err(Error::Memory)?;
826        } else {
827            self.dev
828                .read_mmio(self.support.vp_index(), gpa, bytes)
829                .await;
830        }
831        Ok(())
832    }
833
834    async fn write_memory(
835        &mut self,
836        gva: u64,
837        bytes: &[u8],
838        is_user_mode: bool,
839    ) -> Result<(), Self::Error> {
840        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
841
842        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
843            self.support.lapic_write(gpa, bytes);
844            return Ok(());
845        }
846
847        self.check_vtl_access(gpa, TranslateMode::Write)?;
848
849        if self.support.is_gpa_mapped(gpa, true) {
850            self.gm.write_at(gpa, bytes).map_err(Error::Memory)?;
851        } else {
852            self.dev
853                .write_mmio(self.support.vp_index(), gpa, bytes)
854                .await;
855        }
856        Ok(())
857    }
858
859    async fn compare_and_write_memory(
860        &mut self,
861        gva: u64,
862        current: &[u8],
863        new: &[u8],
864        is_user_mode: bool,
865    ) -> Result<bool, Self::Error> {
866        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
867        self.check_vtl_access(gpa, TranslateMode::Write)?;
868
869        let success = if self.support.check_monitor_write(gpa, new) {
870            true
871        } else if self.support.is_gpa_mapped(gpa, true) {
872            let buf = &mut [0; 16][..current.len()];
873            buf.copy_from_slice(current);
874            self.gm
875                .compare_exchange_bytes(gpa, buf, new)
876                .map_err(Error::Memory)?
877        } else {
878            // Ignore the comparison aspect for device MMIO.
879            self.dev.write_mmio(self.support.vp_index(), gpa, new).await;
880            true
881        };
882        Ok(success)
883    }
884
885    async fn read_io(&mut self, io_port: u16, bytes: &mut [u8]) -> Result<(), Self::Error> {
886        self.dev
887            .read_io(self.support.vp_index(), io_port, bytes)
888            .await;
889        Ok(())
890    }
891
892    async fn write_io(&mut self, io_port: u16, bytes: &[u8]) -> Result<(), Self::Error> {
893        self.dev
894            .write_io(self.support.vp_index(), io_port, bytes)
895            .await;
896        Ok(())
897    }
898
899    fn gp(&mut self, reg: RegisterIndex) -> u64 {
900        let extended_register = self.support.gp(reg.extended_index);
901        reg.apply_sizing(extended_register)
902    }
903
904    fn gp_sign_extend(&mut self, reg: RegisterIndex) -> i64 {
905        let extended_register = self.support.gp(reg.extended_index);
906        reg.apply_sizing_signed(extended_register)
907    }
908
909    fn set_gp(&mut self, reg: RegisterIndex, v: u64) {
910        let register_value = self.gp(reg);
911        let updated_register_value = reg.apply_update(register_value, v);
912        self.support
913            .set_gp(reg.extended_index, updated_register_value);
914    }
915
916    fn rip(&mut self) -> u64 {
917        self.support.rip()
918    }
919
920    fn set_rip(&mut self, v: u64) {
921        self.support.set_rip(v);
922    }
923
924    fn segment(&mut self, index: Segment) -> SegmentRegister {
925        self.support.segment(index)
926    }
927
928    fn efer(&mut self) -> u64 {
929        self.support.efer()
930    }
931
932    fn cr0(&mut self) -> u64 {
933        self.support.cr0()
934    }
935
936    fn rflags(&mut self) -> RFlags {
937        self.support.rflags()
938    }
939
940    fn set_rflags(&mut self, v: RFlags) {
941        self.support.set_rflags(v);
942    }
943
944    /// Gets the value of an XMM* register.
945    fn xmm(&mut self, reg: usize) -> u128 {
946        self.support.xmm(reg)
947    }
948
949    /// Sets the value of an XMM* register.
950    fn set_xmm(&mut self, reg: usize, value: u128) -> Result<(), Self::Error> {
951        self.support.set_xmm(reg, value).map_err(Error::Hypervisor)
952    }
953}
954
955/// Emulates an IO port instruction.
956///
957/// Just handles calling into the IO bus and updating `rax`. The caller must
958/// update RIP, and it must update the VP's `rax` register (when `!is_write`).
959///
960/// The caller is also responsible for performing any security checks to ensure
961/// the guest is allowed to execute I/O instructions. However, typically this is handled
962/// by the hardware and hypervisor automatically.
963pub async fn emulate_io(
964    vp_index: VpIndex,
965    is_write: bool,
966    port: u16,
967    rax: &mut u64,
968    len: u8,
969    dev: &impl CpuIo,
970) {
971    let len = len as usize;
972    if is_write {
973        dev.write_io(vp_index, port, &rax.to_ne_bytes()[..len])
974            .await;
975    } else {
976        // Preserve the high bits of eax but not of rax.
977        let mut value = (*rax as u32).to_ne_bytes();
978        dev.read_io(vp_index, port, &mut value[..len]).await;
979        *rax = u32::from_ne_bytes(value) as u64;
980    }
981}
982
983/// Injects an event into the guest if appropriate.
984///
985/// Returns true if an event was injected into the guest.
986/// In the case of false being returned, the caller can
987/// return the appropriate error code.
988#[must_use]
989fn inject_memory_access_fault<T: EmulatorSupport>(
990    gva: u64,
991    result: &Error<T::Error>,
992    support: &mut T,
993) -> bool {
994    match result {
995        Error::Translate(e, event) => {
996            tracing::trace!(
997                error = e as &dyn std::error::Error,
998                "translation failed, injecting event"
999            );
1000
1001            if let Some(event_info) = event {
1002                support.inject_pending_event(*event_info);
1003
1004                // The emulation did what it was supposed to do, which is throw a fault, so the emulation is done.
1005                return true;
1006            }
1007            false
1008        }
1009        Error::NoVtlAccess {
1010            gpa,
1011            intercepting_vtl,
1012            denied_flags,
1013        } => {
1014            tracing::trace!(
1015                error = result as &dyn std::error::Error,
1016                ?gva,
1017                ?gpa,
1018                "Vtl permissions checking failed"
1019            );
1020
1021            let event = vtl_access_event(gva, *gpa, *intercepting_vtl, *denied_flags);
1022            support.inject_pending_event(event);
1023            true
1024        }
1025        Error::Hypervisor(_) | Error::Memory(_) => false,
1026    }
1027}
1028
1029/// Creates a pending event for the exception type
1030fn make_exception_event(
1031    exception: Exception,
1032    error_code: Option<u32>,
1033    exception_parameter: Option<u64>,
1034) -> hvdef::HvX64PendingEvent {
1035    let exception_event = hvdef::HvX64PendingExceptionEvent::new()
1036        .with_event_pending(true)
1037        .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
1038        .with_deliver_error_code(error_code.is_some())
1039        .with_error_code(error_code.unwrap_or(0))
1040        .with_vector(exception.0.into())
1041        .with_exception_parameter(exception_parameter.unwrap_or(0));
1042
1043    hvdef::HvX64PendingEvent::from(exception_event)
1044}
1045
1046/// Generates a general protection fault pending event
1047fn gpf_event() -> hvdef::HvX64PendingEvent {
1048    make_exception_event(Exception::GENERAL_PROTECTION_FAULT, Some(0), None)
1049}
1050
1051/// Generates the appropriate event for a VTL access error based
1052/// on the intercepting VTL
1053fn vtl_access_event(
1054    gva: u64,
1055    gpa: u64,
1056    intercepting_vtl: hvdef::Vtl,
1057    denied_access: HvMapGpaFlags,
1058) -> hvdef::HvX64PendingEvent {
1059    if intercepting_vtl != hvdef::Vtl::Vtl2 {
1060        let event_header = hvdef::HvX64PendingEventMemoryInterceptPendingEventHeader::new()
1061            .with_event_pending(true)
1062            .with_event_type(hvdef::HV_X64_PENDING_EVENT_MEMORY_INTERCEPT);
1063        let access_flags = hvdef::HvX64PendingEventMemoryInterceptAccessFlags::new()
1064            .with_guest_linear_address_valid(true)
1065            .with_caused_by_gpa_access(true);
1066
1067        let access_type = if denied_access.kernel_executable() || denied_access.user_executable() {
1068            HvInterceptAccessType::EXECUTE
1069        } else if denied_access.writable() {
1070            HvInterceptAccessType::WRITE
1071        } else {
1072            HvInterceptAccessType::READ
1073        };
1074
1075        let memory_event = hvdef::HvX64PendingEventMemoryIntercept {
1076            event_header,
1077            target_vtl: intercepting_vtl.into(),
1078            access_type,
1079            access_flags,
1080            _reserved2: 0,
1081            guest_linear_address: (gva >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1082            guest_physical_address: (gpa >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1083            _reserved3: 0,
1084        };
1085
1086        hvdef::HvX64PendingEvent::read_from_bytes(memory_event.as_bytes())
1087            .expect("memory event and pending event should be the same size")
1088    } else {
1089        gpf_event()
1090    }
1091}
1092
1093/// Tries to emulate monitor page writes without taking the slower, full
1094/// emulation path.
1095///
1096/// The caller must have already validated that the fault was due to a write to
1097/// a monitor page GPA.
1098///
1099/// Returns the bit number being set within the monitor page.
1100pub fn emulate_mnf_write_fast_path<T: EmulatorSupport>(
1101    support: &mut T,
1102    gm: &GuestMemory,
1103    dev: &impl CpuIo,
1104    interruption_pending: bool,
1105    tlb_lock_held: bool,
1106) -> Result<Option<u32>, VpHaltReason<T::Error>> {
1107    let mut cpu = EmulatorCpu::new(gm, dev, support);
1108    let instruction_bytes = cpu.support.instruction_bytes();
1109    if interruption_pending || !tlb_lock_held || instruction_bytes.is_empty() {
1110        return Ok(None);
1111    }
1112    let mut bytes = [0; 16];
1113    let valid_bytes;
1114    {
1115        let instruction_bytes = cpu.support.instruction_bytes();
1116        valid_bytes = instruction_bytes.len();
1117        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
1118    }
1119    let instruction_bytes = &bytes[..valid_bytes];
1120    let bit = x86emu::fast_path::emulate_fast_path_set_bit(instruction_bytes, &mut cpu);
1121    support.flush().map_err(|err| {
1122        VpHaltReason::EmulationFailure(EmulationError::<T::Error>::CacheFlushFailed(err).into())
1123    })?;
1124    Ok(bit)
1125}