virt_support_x86emu/
emulate.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Wrapper around x86emu for emulating single instructions to handle VM exits.
5
6use crate::translate::TranslateFlags;
7use crate::translate::TranslatePrivilegeCheck;
8use crate::translate::translate_gva_to_gpa;
9use guestmem::GuestMemory;
10use guestmem::GuestMemoryError;
11use hvdef::HV_PAGE_SIZE;
12use hvdef::HvInterceptAccessType;
13use hvdef::HvMapGpaFlags;
14use thiserror::Error;
15use virt::EmulatorMonitorSupport;
16use virt::VpHaltReason;
17use virt::io::CpuIo;
18use vm_topology::processor::VpIndex;
19use x86defs::Exception;
20use x86defs::RFlags;
21use x86defs::SegmentRegister;
22use x86emu::AlignmentMode;
23use x86emu::Gp;
24use x86emu::RegisterIndex;
25use x86emu::Segment;
26use zerocopy::FromBytes;
27use zerocopy::IntoBytes;
28
29/// Support routines for the emulator.
30pub trait EmulatorSupport {
31    /// The current VP index.
32    fn vp_index(&self) -> VpIndex;
33
34    /// The processor vendor.
35    fn vendor(&self) -> x86defs::cpuid::Vendor;
36
37    /// Read a GP
38    fn gp(&mut self, index: Gp) -> u64;
39
40    /// Set a GP
41    fn set_gp(&mut self, reg: Gp, v: u64);
42
43    /// Read the instruction pointer
44    fn rip(&mut self) -> u64;
45
46    /// Set the instruction pointer
47    fn set_rip(&mut self, v: u64);
48
49    /// Read a segment register
50    fn segment(&mut self, index: Segment) -> SegmentRegister;
51
52    /// Read the efer
53    fn efer(&mut self) -> u64;
54
55    /// Read cr0
56    fn cr0(&mut self) -> u64;
57
58    /// Read rflags
59    fn rflags(&mut self) -> RFlags;
60
61    /// Set rflags
62    fn set_rflags(&mut self, v: RFlags);
63
64    /// Gets the value of an XMM* register.
65    fn xmm(&mut self, reg: usize) -> u128;
66
67    /// Sets the value of an XMM* register.
68    fn set_xmm(&mut self, reg: usize, value: u128);
69
70    /// Flush registers in the emulation cache to the backing
71    fn flush(&mut self);
72
73    /// The instruction bytes, if available.
74    fn instruction_bytes(&self) -> &[u8];
75
76    /// The physical address that caused the fault.
77    fn physical_address(&self) -> Option<u64>;
78
79    /// The gva translation included in the intercept message header, if valid.
80    fn initial_gva_translation(&mut self) -> Option<InitialTranslation>;
81
82    /// If interrupt pending is marked in the intercept message
83    fn interruption_pending(&self) -> bool;
84
85    /// Check that the current GPA is valid to access by the current VTL with the following access mode.
86    /// Returns true if valid to access.
87    fn check_vtl_access(
88        &mut self,
89        gpa: u64,
90        mode: TranslateMode,
91    ) -> Result<(), EmuCheckVtlAccessError>;
92
93    /// Translates a GVA to a GPA.
94    fn translate_gva(
95        &mut self,
96        gva: u64,
97        mode: TranslateMode,
98    ) -> Result<EmuTranslateResult, EmuTranslateError>;
99
100    /// Generates an event (exception, guest nested page fault, etc.) in the guest.
101    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent);
102
103    /// Get access to monitor support for the emulator, if it supports it.
104    fn monitor_support(&self) -> Option<&dyn EmulatorMonitorSupport> {
105        None
106    }
107
108    /// Returns true if `gpa` is mapped for the specified permissions.
109    ///
110    /// If true, then the emulator will use [`GuestMemory`] to access the GPA,
111    /// and any failures will be fatal to the VM.
112    ///
113    /// If false, then the emulator will use [`CpuIo`] to access the GPA as
114    /// MMIO.
115    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool;
116
117    /// Returns the page-aligned base address of the enabled local APIC in xapic
118    /// mode.
119    fn lapic_base_address(&self) -> Option<u64>;
120
121    /// Read from the current processor's local APIC memory mapped interface.
122    ///
123    /// This will only be called on an address in the page returned by
124    /// `lapic_base_address`.
125    fn lapic_read(&mut self, address: u64, data: &mut [u8]);
126
127    /// Write to the current processor's local APIC memory mapped interface.
128    ///
129    /// This will only be called on an address in the page returned by
130    /// `lapic_base_address`.
131    fn lapic_write(&mut self, address: u64, data: &[u8]);
132}
133
134pub trait TranslateGvaSupport {
135    /// Gets the object used to access the guest memory.
136    fn guest_memory(&self) -> &GuestMemory;
137
138    /// Acquires the TLB lock for this processor.
139    fn acquire_tlb_lock(&mut self);
140
141    /// Returns the registers used to walk the page table.
142    fn registers(&mut self) -> crate::translate::TranslationRegisters;
143}
144
145/// Emulates a page table walk.
146///
147/// This is suitable for implementing [`EmulatorSupport::translate_gva`].
148pub fn emulate_translate_gva<T: TranslateGvaSupport>(
149    support: &mut T,
150    gva: u64,
151    mode: TranslateMode,
152) -> Result<EmuTranslateResult, EmuTranslateError> {
153    // Always acquire the TLB lock for this path.
154    support.acquire_tlb_lock();
155
156    let flags = TranslateFlags {
157        validate_execute: matches!(mode, TranslateMode::Execute),
158        validate_read: matches!(mode, TranslateMode::Read | TranslateMode::Write),
159        validate_write: matches!(mode, TranslateMode::Write),
160        override_smap: false,
161        enforce_smap: false,
162        privilege_check: TranslatePrivilegeCheck::CurrentPrivilegeLevel,
163        set_page_table_bits: true,
164    };
165
166    let registers = support.registers();
167
168    match translate_gva_to_gpa(support.guest_memory(), gva, &registers, flags) {
169        Ok(crate::translate::TranslateResult { gpa, cache_info: _ }) => Ok(EmuTranslateResult {
170            gpa,
171            overlay_page: None,
172        }),
173        Err(err) => Err(EmuTranslateError {
174            code: err.into(),
175            event_info: None,
176        }),
177    }
178}
179
180/// The result of translate_gva on [`EmulatorSupport`].
181pub struct EmuTranslateResult {
182    /// The GPA result of the translation.
183    pub gpa: u64,
184    /// Whether the page is an overlay page.
185    /// Not all implementations return overlay page or event_info yet, so these values are optional
186    pub overlay_page: Option<bool>,
187}
188
189/// The translation, if any, provided in the intercept message and provided by [`EmulatorSupport`].
190#[derive(Debug)]
191pub struct InitialTranslation {
192    /// GVA for the translation
193    pub gva: u64,
194    /// Translated gpa for the gva
195    pub gpa: u64,
196    // Whether the translation has read, write, or execute permissions.
197    pub translate_mode: TranslateMode,
198}
199
200#[derive(Error, Debug)]
201pub enum EmuCheckVtlAccessError {
202    #[error("failed vtl permissions access for vtl {vtl:?} and access flags {denied_flags:?}")]
203    AccessDenied {
204        vtl: hvdef::Vtl,
205        denied_flags: HvMapGpaFlags,
206    },
207}
208
209#[derive(Error, Debug)]
210#[error("translate gva to gpa returned non-successful code {code:?}")]
211/// Error for a failed gva translation from [`EmulatorSupport`].
212pub struct EmuTranslateError {
213    /// Translate code of type hvdef::hypercall::TranslateGvaResultCode
214    /// Should != Success
215    pub code: hvdef::hypercall::TranslateGvaResultCode,
216    /// Pending event, if any, returned by hypervisor to go with the translate code.
217    pub event_info: Option<hvdef::HvX64PendingEvent>,
218}
219
220/// The access type for a gva translation for [`EmulatorSupport`].
221#[derive(Debug, Copy, Clone, PartialEq, Eq)]
222pub enum TranslateMode {
223    /// A read operation.
224    Read,
225    /// A write operation.
226    Write,
227    /// An execute operation.
228    Execute,
229}
230
231/// The requested intercept access type isn't supported
232#[derive(Debug)]
233pub struct UnsupportedInterceptAccessType;
234
235impl TryFrom<HvInterceptAccessType> for TranslateMode {
236    type Error = UnsupportedInterceptAccessType;
237
238    fn try_from(access_type: HvInterceptAccessType) -> Result<Self, Self::Error> {
239        match access_type {
240            HvInterceptAccessType::READ => Ok(TranslateMode::Read),
241            HvInterceptAccessType::WRITE => Ok(TranslateMode::Write),
242            HvInterceptAccessType::EXECUTE => Ok(TranslateMode::Execute),
243            _ => Err(UnsupportedInterceptAccessType),
244        }
245    }
246}
247
248#[derive(Debug, Error)]
249enum EmulationError {
250    #[error("an interrupt caused the memory access exit")]
251    InterruptionPending,
252    #[error("linear IP was not within CS segment limit")]
253    LinearIpPastCsLimit,
254    #[error("failed to read instruction stream")]
255    InstructionRead(#[source] Error),
256    #[error("emulator error (instruction {bytes:02x?})")]
257    Emulator {
258        bytes: Vec<u8>,
259        #[source]
260        error: x86emu::Error<Error>,
261    },
262}
263
264pub struct EmulatorMemoryAccess<'a> {
265    pub gm: &'a GuestMemory,
266    pub kx_gm: &'a GuestMemory,
267    pub ux_gm: &'a GuestMemory,
268}
269
270enum EmulatorMemoryAccessType {
271    ReadWrite,
272    InstructionRead { is_user_mode: bool },
273}
274
275impl EmulatorMemoryAccess<'_> {
276    fn gm(&self, access_type: EmulatorMemoryAccessType) -> &GuestMemory {
277        match access_type {
278            EmulatorMemoryAccessType::ReadWrite => self.gm,
279            EmulatorMemoryAccessType::InstructionRead { is_user_mode } => {
280                if is_user_mode {
281                    self.ux_gm
282                } else {
283                    self.kx_gm
284                }
285            }
286        }
287    }
288}
289
290/// Emulates an instruction.
291pub async fn emulate<T: EmulatorSupport>(
292    support: &mut T,
293    emu_mem: &EmulatorMemoryAccess<'_>,
294    dev: &impl CpuIo,
295) -> Result<(), VpHaltReason> {
296    emulate_core(support, emu_mem, dev)
297        .await
298        .map_err(|e| VpHaltReason::EmulationFailure(e.into()))
299}
300
301async fn emulate_core<T: EmulatorSupport>(
302    support: &mut T,
303    emu_mem: &EmulatorMemoryAccess<'_>,
304    dev: &impl CpuIo,
305) -> Result<(), EmulationError> {
306    let vendor = support.vendor();
307
308    let mut bytes = [0; 16];
309    let mut valid_bytes;
310    {
311        let instruction_bytes = support.instruction_bytes();
312        valid_bytes = instruction_bytes.len();
313        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
314    }
315    let instruction_bytes = &bytes[..valid_bytes];
316
317    tracing::trace!(
318        ?instruction_bytes,
319        physical_address = support.physical_address(),
320        "emulating"
321    );
322
323    if support.interruption_pending() {
324        // This means a fault or interruption *caused* the intercept
325        // (and only really applies to memory intercept handling).
326        // An example of how this could happen is if the
327        // interrupt vector table itself is in mmio space; taking an
328        // interrupt at that point requires that the processor reads the
329        // vector out of the table, which generates an mmio intercept,
330        // but not one associated with any particular instruction.
331        // Therefore, there is nothing to emulate.
332        //
333        // A fault can't be injected into the guest because that could
334        // cause an infinite loop (as the processor tries to get the trap
335        // vector out of the mmio-ed vector table).  Just give up.
336
337        return Err(EmulationError::InterruptionPending);
338    }
339
340    let initial_alignment_check = support.rflags().alignment_check();
341
342    let mut cpu = EmulatorCpu::new(
343        emu_mem.gm(EmulatorMemoryAccessType::ReadWrite),
344        dev,
345        support,
346    );
347    let result = loop {
348        let instruction_bytes = &bytes[..valid_bytes];
349        let mut emu = x86emu::Emulator::new(&mut cpu, vendor, instruction_bytes);
350        let res = emu.run().await;
351
352        if let Err(e) = &res {
353            if let x86emu::Error::NotEnoughBytes = **e {
354                assert!(valid_bytes < bytes.len());
355
356                // TODO: inject #GP due to segmentation fault.
357                let linear_ip = emu
358                    .linear_ip(valid_bytes as u64)
359                    .ok_or(EmulationError::LinearIpPastCsLimit)?;
360
361                let is_user_mode = emu.is_user_mode();
362
363                let translate_result =
364                    cpu.translate_gva(linear_ip, TranslateMode::Execute, is_user_mode);
365
366                let phys_ip = match translate_result {
367                    Ok(ip) => ip,
368                    Err(translate_error) => {
369                        if inject_memory_access_fault(linear_ip, &translate_error, support) {
370                            return Ok(());
371                        } else {
372                            return Err(EmulationError::InstructionRead(translate_error));
373                        }
374                    }
375                };
376
377                // TODO: fold this access check into the GuestMemory object for
378                // each of the backings, if possible.
379                if let Err(err) = cpu.check_vtl_access(phys_ip, TranslateMode::Execute) {
380                    if inject_memory_access_fault(linear_ip, &err, support) {
381                        return Ok(());
382                    } else {
383                        return Err(EmulationError::InstructionRead(err));
384                    };
385                }
386
387                tracing::trace!(linear_ip, phys_ip, "fetching instruction bytes");
388
389                let len = (bytes.len() - valid_bytes)
390                    .min((HV_PAGE_SIZE - (phys_ip & (HV_PAGE_SIZE - 1))) as usize);
391
392                let instruction_gm =
393                    emu_mem.gm(EmulatorMemoryAccessType::InstructionRead { is_user_mode });
394
395                if let Err(err) =
396                    instruction_gm.read_at(phys_ip, &mut bytes[valid_bytes..valid_bytes + len])
397                {
398                    tracing::error!(error = &err as &dyn std::error::Error, "read failed");
399                    support.inject_pending_event(gpf_event());
400                    return Ok(());
401                }
402
403                valid_bytes += len;
404                continue;
405            }
406        }
407
408        break res;
409    };
410
411    cpu.support.flush();
412
413    // If the alignment check flag is not in sync with the hypervisor because the instruction emulator
414    // modifies internally, then the appropriate SMAP enforcement flags need to be passed to the hypervisor
415    // during the translation of gvas to gpa.
416    //
417    // Note: also applies if the instruction emulator emulates instructions resulting in implicit
418    // memory accesses, which is currently not done. See Intel Spec 4.6 Access Rights:
419    // "Some operations implicitly access system data structures with linear addresses;
420    // the resulting accesses to those data structures are supervisor-mode accesses regardless of CPL.
421    // Examples of such accesses include the following: accesses to the global descriptor table (GDT)
422    // or local descriptor table (LDT) to load a segment descriptor; accesses to the interrupt
423    // descriptor table (IDT) when delivering an interrupt or exception; and accesses to the task-state
424    // segment (TSS) as part of a task switch or change of CPL."
425    assert_eq!(
426        initial_alignment_check,
427        cpu.support.rflags().alignment_check()
428    );
429
430    let instruction_bytes = &bytes[..valid_bytes];
431    if let Err(e) = result {
432        match *e {
433            err @ (x86emu::Error::DecodeFailure | x86emu::Error::UnsupportedInstruction { .. }) => {
434                tracelimit::error_ratelimited!(
435                    error = &err as &dyn std::error::Error,
436                    ?instruction_bytes,
437                    physical_address = cpu.support.physical_address(),
438                    "unsupported instruction"
439                );
440
441                cpu.support.inject_pending_event(make_exception_event(
442                    Exception::INVALID_OPCODE,
443                    None,
444                    None,
445                ));
446            }
447            err @ x86emu::Error::NonMemoryOrPortInstruction { .. } => {
448                tracelimit::error_ratelimited!(
449                    error = &err as &dyn std::error::Error,
450                    ?instruction_bytes,
451                    physical_address = cpu.support.physical_address(),
452                    "given an instruction that we shouldn't have been asked to emulate - likely a bug in the caller"
453                );
454
455                return Err(EmulationError::Emulator {
456                    bytes: instruction_bytes.to_vec(),
457                    error: err,
458                });
459            }
460            x86emu::Error::InstructionException(exception, error_code, cause) => {
461                tracing::trace!(
462                    ?exception,
463                    ?error_code,
464                    ?cause,
465                    "emulated instruction caused exception"
466                );
467                cpu.support
468                    .inject_pending_event(make_exception_event(exception, error_code, None));
469            }
470            x86emu::Error::MemoryAccess(addr, kind, err) => {
471                if !inject_memory_access_fault(addr, &err, support) {
472                    return Err(EmulationError::Emulator {
473                        bytes: instruction_bytes.to_vec(),
474                        error: x86emu::Error::MemoryAccess(addr, kind, err),
475                    });
476                }
477            }
478            err @ x86emu::Error::IoPort { .. } => {
479                return Err(EmulationError::Emulator {
480                    bytes: instruction_bytes.to_vec(),
481                    error: err,
482                });
483            }
484            x86emu::Error::NotEnoughBytes => unreachable!(),
485        }
486    }
487
488    Ok(())
489}
490
491/// Performs a memory operation as if it had been performed by an emulated instruction.
492///
493/// "As if it had been performed by an emulated instruction" means that the given
494/// GVA will be translated to a GPA, subject to applicable segmentation, permission,
495/// and alignment checks, may be determined to be MMIO instead of RAM, etc.
496pub async fn emulate_insn_memory_op<T: EmulatorSupport>(
497    support: &mut T,
498    gm: &GuestMemory,
499    dev: &impl CpuIo,
500    gva: u64,
501    segment: Segment,
502    alignment: AlignmentMode,
503    op: EmulatedMemoryOperation<'_>,
504) -> Result<(), VpHaltReason> {
505    assert!(!support.interruption_pending());
506
507    let vendor = support.vendor();
508    let mut cpu = EmulatorCpu::new(gm, dev, support);
509    let mut emu = x86emu::Emulator::new(&mut cpu, vendor, &[]);
510
511    match op {
512        EmulatedMemoryOperation::Read(data) => emu.read_memory(segment, gva, alignment, data).await,
513        EmulatedMemoryOperation::Write(data) => {
514            emu.write_memory(segment, gva, alignment, data).await
515        }
516    }
517    .map_err(|e| VpHaltReason::EmulationFailure(e.into()))
518
519    // No need to flush the cache, we have not modified any registers.
520}
521
522pub enum EmulatedMemoryOperation<'a> {
523    Read(&'a mut [u8]),
524    Write(&'a [u8]),
525}
526
527/// For storing gva to gpa translations in a cache in [`EmulatorCpu`]
528struct GvaGpaCacheEntry {
529    gva_page: u64,
530    gpa_page: u64,
531    translate_mode: TranslateMode,
532}
533
534impl GvaGpaCacheEntry {
535    pub fn new(gva: u64, gpa: u64, translate_mode: TranslateMode) -> Self {
536        GvaGpaCacheEntry {
537            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
538            gpa_page: gpa >> hvdef::HV_PAGE_SHIFT,
539            translate_mode,
540        }
541    }
542}
543
544struct EmulatorCpu<'a, T, U> {
545    gm: &'a GuestMemory,
546    support: &'a mut T,
547    dev: &'a U,
548    // TODO: this should be able to hold at least two entries for effective use for
549    // rep move instructions.
550    cached_translation: Option<GvaGpaCacheEntry>,
551}
552
553#[derive(Debug, Error)]
554enum Error {
555    #[error("translation error")]
556    Translate(
557        #[source] TranslateGvaError,
558        Option<hvdef::HvX64PendingEvent>,
559    ),
560    #[error("vtl permissions denied access for gpa {gpa}")]
561    NoVtlAccess {
562        gpa: u64,
563        intercepting_vtl: hvdef::Vtl,
564        denied_flags: HvMapGpaFlags,
565    },
566    #[error("failed to access mapped memory")]
567    Memory(#[source] GuestMemoryError),
568}
569
570/// Result of a gva translation in [`EmulatorCpu`]
571#[derive(Error, Debug)]
572enum TranslateGvaError {
573    #[error("gpa access denied code {0:?}")]
574    AccessDenied(hvdef::hypercall::TranslateGvaResultCode),
575    #[error("write on overlay page")]
576    OverlayPageWrite,
577    #[error("translation failed with unknown code {0:?}")]
578    UnknownCode(hvdef::hypercall::TranslateGvaResultCode),
579    #[error("translation failed with an intercept code")]
580    Intercept,
581    #[error("translation failed with a page fault-related code {0:?}")]
582    PageFault(hvdef::hypercall::TranslateGvaResultCode),
583}
584
585impl<T: EmulatorSupport, U> EmulatorCpu<'_, T, U> {
586    pub fn new<'a>(gm: &'a GuestMemory, dev: &'a U, support: &'a mut T) -> EmulatorCpu<'a, T, U> {
587        let init_cache = {
588            if let Some(InitialTranslation {
589                gva,
590                gpa,
591                translate_mode,
592            }) = support.initial_gva_translation()
593            {
594                tracing::trace!(
595                    ?gva,
596                    ?gpa,
597                    ?translate_mode,
598                    "adding initial translation to cache"
599                );
600                Some(GvaGpaCacheEntry::new(gva, gpa, translate_mode))
601            } else {
602                None
603            }
604        };
605
606        EmulatorCpu {
607            gm,
608            dev,
609            support,
610            cached_translation: init_cache,
611        }
612    }
613
614    pub fn translate_gva(
615        &mut self,
616        gva: u64,
617        mode: TranslateMode,
618        is_user_mode: bool,
619    ) -> Result<u64, Error> {
620        type TranslateCode = hvdef::hypercall::TranslateGvaResultCode;
621
622        if let Some(GvaGpaCacheEntry {
623            gva_page: cached_gva_page,
624            gpa_page: cached_gpa_page,
625            translate_mode: cached_mode,
626        }) = self.cached_translation
627        {
628            if ((gva >> hvdef::HV_PAGE_SHIFT) == cached_gva_page) && (cached_mode == mode) {
629                tracing::trace!(
630                    ?gva,
631                    ?cached_gva_page,
632                    cached_gpa_page,
633                    ?cached_mode,
634                    "using cached entry"
635                );
636                return Ok((cached_gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)));
637            }
638        };
639
640        match self.support.translate_gva(gva, mode) {
641            Ok(EmuTranslateResult { gpa, overlay_page }) => {
642                if overlay_page.is_some()
643                    && overlay_page
644                        .expect("should've already checked that the overlay page has value")
645                    && (mode == TranslateMode::Write)
646                {
647                    // We will support reads to overlay pages in order to support Win2k3
648                    // crash dumps (which do direct port io to the ide for all of memory,
649                    // including overlay pages).  Writes, though, are right out.  There is
650                    // no known scenario where guests should be writing to overlay pages,
651                    // and doing so would be difficult and expensive.  Overlay pages are
652                    // special purpose pages set up by the hypervisor (to implement hypercalls,
653                    // for instance), and there should be no reason that guests should be doing
654                    // io to them.
655                    //
656                    // At this time, there is no infrastructure to allow us to actually
657                    // read the overlay page.  We will instead return 0xff's for such reads.
658                    // This is an emulation hole.  It is felt to be acceptable at this time.
659                    // But for this reason, we give up if this wasn't the intercepting instruction
660                    // and let the processor run the instruction directly.
661                    return Err(Error::Translate(
662                        TranslateGvaError::OverlayPageWrite,
663                        Some(gpf_event()),
664                    ));
665                }
666
667                let new_cache_entry = GvaGpaCacheEntry::new(gva, gpa, mode);
668
669                self.cached_translation = Some(new_cache_entry);
670                Ok(gpa)
671            }
672            Err(EmuTranslateError { code, event_info }) => {
673                match code {
674                    TranslateCode::INTERCEPT => {
675                        tracing::trace!("translate gva to gpa returned an intercept event");
676                        Err(Error::Translate(TranslateGvaError::Intercept, event_info))
677                    }
678                    TranslateCode::GPA_NO_READ_ACCESS
679                    | TranslateCode::GPA_NO_WRITE_ACCESS
680                    | TranslateCode::GPA_UNMAPPED
681                    | TranslateCode::GPA_ILLEGAL_OVERLAY_ACCESS
682                    | TranslateCode::GPA_UNACCEPTED => {
683                        // The page table walk failed because one of the page
684                        // table entries was inaccessible in the second-level
685                        // page tables.
686                        //
687                        // Inject a #GP.
688                        tracing::trace!(
689                            "translate gva to gpa returned no access to page {:?}",
690                            code
691                        );
692                        Err(Error::Translate(
693                            TranslateGvaError::AccessDenied(code),
694                            Some(gpf_event()),
695                        ))
696                    }
697                    TranslateCode::PAGE_NOT_PRESENT
698                    | TranslateCode::PRIVILEGE_VIOLATION
699                    | TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
700                        // The page table walk failed for ordinary reasons not
701                        // having to do with second-level address translation.
702                        // We need to inject a page fault.
703                        //
704                        // It should be rare to get to this point even for a
705                        // misbehaving guest, since the processor usually should
706                        // have detected and injected this fault without
707                        // requiring an exit.
708                        //
709                        // Trace since this is more likely to indicate a bug in
710                        // our page table walking code, but rate limit the trace
711                        // since there are still cases where this could be
712                        // triggered by guest behavior.
713                        tracelimit::warn_ratelimited!(gva, ?code, "page table walk failed");
714
715                        let mut error = x86defs::PageFaultErrorCode::new();
716                        match code {
717                            TranslateCode::PAGE_NOT_PRESENT => (),
718                            TranslateCode::PRIVILEGE_VIOLATION => error.set_present(true),
719                            TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
720                                error.set_present(true);
721                                error.set_reserved(true);
722                            }
723                            _ => unreachable!(),
724                        };
725
726                        match mode {
727                            TranslateMode::Execute => error.set_fetch(true),
728                            TranslateMode::Write => error.set_write(true),
729                            _ => (),
730                        };
731
732                        if is_user_mode {
733                            error.set_user(true);
734                        }
735
736                        // Page fault
737                        let event = make_exception_event(
738                            Exception::PAGE_FAULT,
739                            Some(error.into()),
740                            Some(gva),
741                        );
742
743                        Err(Error::Translate(
744                            TranslateGvaError::PageFault(code),
745                            Some(event),
746                        ))
747                    }
748                    TranslateCode::SUCCESS => unreachable!(),
749                    _ => {
750                        tracing::trace!(
751                            "translate error: unknown translation result code {:?}",
752                            code
753                        );
754
755                        Err(Error::Translate(TranslateGvaError::UnknownCode(code), None))
756                    }
757                }
758            }
759        }
760    }
761
762    pub fn check_vtl_access(&mut self, gpa: u64, mode: TranslateMode) -> Result<(), Error> {
763        self.support
764            .check_vtl_access(gpa, mode)
765            .map_err(|e| match e {
766                EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags } => Error::NoVtlAccess {
767                    gpa,
768                    intercepting_vtl: vtl,
769                    denied_flags,
770                },
771            })
772    }
773
774    fn check_monitor_write(&self, gpa: u64, bytes: &[u8]) -> bool {
775        if let Some(monitor_support) = self.support.monitor_support() {
776            monitor_support.check_write(gpa, bytes)
777        } else {
778            false
779        }
780    }
781
782    fn check_monitor_read(&self, gpa: u64, bytes: &mut [u8]) -> bool {
783        if let Some(monitor_support) = self.support.monitor_support() {
784            monitor_support.check_read(gpa, bytes)
785        } else {
786            false
787        }
788    }
789}
790
791impl<T: EmulatorSupport, U: CpuIo> x86emu::Cpu for EmulatorCpu<'_, T, U> {
792    type Error = Error;
793
794    async fn read_memory(
795        &mut self,
796        gva: u64,
797        bytes: &mut [u8],
798        is_user_mode: bool,
799    ) -> Result<(), Self::Error> {
800        let gpa = self.translate_gva(gva, TranslateMode::Read, is_user_mode)?;
801
802        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
803            self.support.lapic_read(gpa, bytes);
804            return Ok(());
805        }
806
807        self.check_vtl_access(gpa, TranslateMode::Read)?;
808
809        if self.check_monitor_read(gpa, bytes) {
810            Ok(())
811        } else if self.support.is_gpa_mapped(gpa, false) {
812            self.gm.read_at(gpa, bytes).map_err(Error::Memory)
813        } else {
814            self.dev
815                .read_mmio(self.support.vp_index(), gpa, bytes)
816                .await;
817            Ok(())
818        }
819    }
820
821    async fn write_memory(
822        &mut self,
823        gva: u64,
824        bytes: &[u8],
825        is_user_mode: bool,
826    ) -> Result<(), Self::Error> {
827        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
828
829        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
830            self.support.lapic_write(gpa, bytes);
831            return Ok(());
832        }
833
834        self.check_vtl_access(gpa, TranslateMode::Write)?;
835
836        if self.support.is_gpa_mapped(gpa, true) {
837            self.gm.write_at(gpa, bytes).map_err(Error::Memory)?;
838        } else {
839            self.dev
840                .write_mmio(self.support.vp_index(), gpa, bytes)
841                .await;
842        }
843        Ok(())
844    }
845
846    async fn compare_and_write_memory(
847        &mut self,
848        gva: u64,
849        current: &[u8],
850        new: &[u8],
851        is_user_mode: bool,
852    ) -> Result<bool, Self::Error> {
853        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
854        self.check_vtl_access(gpa, TranslateMode::Write)?;
855
856        let success = if self.check_monitor_write(gpa, new) {
857            true
858        } else if self.support.is_gpa_mapped(gpa, true) {
859            match (current.len(), new.len()) {
860                (1, 1) => self
861                    .gm
862                    .compare_exchange(gpa, current[0], new[0])
863                    .map(|r| r.is_ok()),
864                (2, 2) => self
865                    .gm
866                    .compare_exchange(
867                        gpa,
868                        u16::from_ne_bytes(current.try_into().unwrap()),
869                        u16::from_ne_bytes(new.try_into().unwrap()),
870                    )
871                    .map(|r| r.is_ok()),
872                (4, 4) => self
873                    .gm
874                    .compare_exchange(
875                        gpa,
876                        u32::from_ne_bytes(current.try_into().unwrap()),
877                        u32::from_ne_bytes(new.try_into().unwrap()),
878                    )
879                    .map(|r| r.is_ok()),
880                (8, 8) => self
881                    .gm
882                    .compare_exchange(
883                        gpa,
884                        u64::from_ne_bytes(current.try_into().unwrap()),
885                        u64::from_ne_bytes(new.try_into().unwrap()),
886                    )
887                    .map(|r| r.is_ok()),
888                _ => panic!("unsupported cmpxchg size"),
889            }
890            .map_err(Error::Memory)?
891        } else {
892            // Ignore the comparison aspect for device MMIO.
893            self.dev.write_mmio(self.support.vp_index(), gpa, new).await;
894            true
895        };
896        Ok(success)
897    }
898
899    async fn read_io(&mut self, io_port: u16, bytes: &mut [u8]) -> Result<(), Self::Error> {
900        self.dev
901            .read_io(self.support.vp_index(), io_port, bytes)
902            .await;
903        Ok(())
904    }
905
906    async fn write_io(&mut self, io_port: u16, bytes: &[u8]) -> Result<(), Self::Error> {
907        self.dev
908            .write_io(self.support.vp_index(), io_port, bytes)
909            .await;
910        Ok(())
911    }
912
913    fn gp(&mut self, reg: RegisterIndex) -> u64 {
914        let extended_register = self.support.gp(reg.extended_index);
915        reg.apply_sizing(extended_register)
916    }
917
918    fn gp_sign_extend(&mut self, reg: RegisterIndex) -> i64 {
919        let extended_register = self.support.gp(reg.extended_index);
920        reg.apply_sizing_signed(extended_register)
921    }
922
923    fn set_gp(&mut self, reg: RegisterIndex, v: u64) {
924        let register_value = self.gp(reg);
925        let updated_register_value = reg.apply_update(register_value, v);
926        self.support
927            .set_gp(reg.extended_index, updated_register_value);
928    }
929
930    fn rip(&mut self) -> u64 {
931        self.support.rip()
932    }
933
934    fn set_rip(&mut self, v: u64) {
935        self.support.set_rip(v);
936    }
937
938    fn segment(&mut self, index: Segment) -> SegmentRegister {
939        self.support.segment(index)
940    }
941
942    fn efer(&mut self) -> u64 {
943        self.support.efer()
944    }
945
946    fn cr0(&mut self) -> u64 {
947        self.support.cr0()
948    }
949
950    fn rflags(&mut self) -> RFlags {
951        self.support.rflags()
952    }
953
954    fn set_rflags(&mut self, v: RFlags) {
955        self.support.set_rflags(v);
956    }
957
958    /// Gets the value of an XMM* register.
959    fn xmm(&mut self, reg: usize) -> u128 {
960        self.support.xmm(reg)
961    }
962
963    /// Sets the value of an XMM* register.
964    fn set_xmm(&mut self, reg: usize, value: u128) {
965        self.support.set_xmm(reg, value)
966    }
967}
968
969/// Emulates an IO port instruction.
970///
971/// Just handles calling into the IO bus and updating `rax`. The caller must
972/// update RIP, and it must update the VP's `rax` register (when `!is_write`).
973///
974/// The caller is also responsible for performing any security checks to ensure
975/// the guest is allowed to execute I/O instructions. However, typically this is handled
976/// by the hardware and hypervisor automatically.
977pub async fn emulate_io(
978    vp_index: VpIndex,
979    is_write: bool,
980    port: u16,
981    rax: &mut u64,
982    len: u8,
983    dev: &impl CpuIo,
984) {
985    let len = len as usize;
986    if is_write {
987        dev.write_io(vp_index, port, &rax.to_ne_bytes()[..len])
988            .await;
989    } else {
990        // Preserve the high bits of eax but not of rax.
991        let mut value = (*rax as u32).to_ne_bytes();
992        dev.read_io(vp_index, port, &mut value[..len]).await;
993        *rax = u32::from_ne_bytes(value) as u64;
994    }
995}
996
997/// Injects an event into the guest if appropriate.
998///
999/// Returns true if an event was injected into the guest.
1000/// In the case of false being returned, the caller can
1001/// return the appropriate error code.
1002#[must_use]
1003fn inject_memory_access_fault<T: EmulatorSupport>(
1004    gva: u64,
1005    result: &Error,
1006    support: &mut T,
1007) -> bool {
1008    match result {
1009        Error::Translate(e, event) => {
1010            tracing::trace!(
1011                error = e as &dyn std::error::Error,
1012                "translation failed, injecting event"
1013            );
1014
1015            if let Some(event_info) = event {
1016                support.inject_pending_event(*event_info);
1017
1018                // The emulation did what it was supposed to do, which is throw a fault, so the emulation is done.
1019                return true;
1020            }
1021            false
1022        }
1023        Error::NoVtlAccess {
1024            gpa,
1025            intercepting_vtl,
1026            denied_flags,
1027        } => {
1028            tracing::trace!(
1029                error = result as &dyn std::error::Error,
1030                ?gva,
1031                ?gpa,
1032                "Vtl permissions checking failed"
1033            );
1034
1035            let event = vtl_access_event(gva, *gpa, *intercepting_vtl, *denied_flags);
1036            support.inject_pending_event(event);
1037            true
1038        }
1039        Error::Memory(_) => false,
1040    }
1041}
1042
1043/// Creates a pending event for the exception type
1044fn make_exception_event(
1045    exception: Exception,
1046    error_code: Option<u32>,
1047    exception_parameter: Option<u64>,
1048) -> hvdef::HvX64PendingEvent {
1049    let exception_event = hvdef::HvX64PendingExceptionEvent::new()
1050        .with_event_pending(true)
1051        .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
1052        .with_deliver_error_code(error_code.is_some())
1053        .with_error_code(error_code.unwrap_or(0))
1054        .with_vector(exception.0.into())
1055        .with_exception_parameter(exception_parameter.unwrap_or(0));
1056
1057    hvdef::HvX64PendingEvent::from(exception_event)
1058}
1059
1060/// Generates a general protection fault pending event
1061fn gpf_event() -> hvdef::HvX64PendingEvent {
1062    make_exception_event(Exception::GENERAL_PROTECTION_FAULT, Some(0), None)
1063}
1064
1065/// Generates the appropriate event for a VTL access error based
1066/// on the intercepting VTL
1067fn vtl_access_event(
1068    gva: u64,
1069    gpa: u64,
1070    intercepting_vtl: hvdef::Vtl,
1071    denied_access: HvMapGpaFlags,
1072) -> hvdef::HvX64PendingEvent {
1073    if intercepting_vtl != hvdef::Vtl::Vtl2 {
1074        let event_header = hvdef::HvX64PendingEventMemoryInterceptPendingEventHeader::new()
1075            .with_event_pending(true)
1076            .with_event_type(hvdef::HV_X64_PENDING_EVENT_MEMORY_INTERCEPT);
1077        let access_flags = hvdef::HvX64PendingEventMemoryInterceptAccessFlags::new()
1078            .with_guest_linear_address_valid(true)
1079            .with_caused_by_gpa_access(true);
1080
1081        let access_type = if denied_access.kernel_executable() || denied_access.user_executable() {
1082            HvInterceptAccessType::EXECUTE
1083        } else if denied_access.writable() {
1084            HvInterceptAccessType::WRITE
1085        } else {
1086            HvInterceptAccessType::READ
1087        };
1088
1089        let memory_event = hvdef::HvX64PendingEventMemoryIntercept {
1090            event_header,
1091            target_vtl: intercepting_vtl.into(),
1092            access_type,
1093            access_flags,
1094            _reserved2: 0,
1095            guest_linear_address: (gva >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1096            guest_physical_address: (gpa >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1097            _reserved3: 0,
1098        };
1099
1100        hvdef::HvX64PendingEvent::read_from_bytes(memory_event.as_bytes())
1101            .expect("memory event and pending event should be the same size")
1102    } else {
1103        gpf_event()
1104    }
1105}
1106
1107/// Tries to emulate monitor page writes without taking the slower, full
1108/// emulation path.
1109///
1110/// The caller must have already validated that the fault was due to a write to
1111/// a monitor page GPA.
1112///
1113/// Returns the bit number being set within the monitor page.
1114pub fn emulate_mnf_write_fast_path<T: EmulatorSupport>(
1115    support: &mut T,
1116    gm: &GuestMemory,
1117    dev: &impl CpuIo,
1118    interruption_pending: bool,
1119    tlb_lock_held: bool,
1120) -> Option<u32> {
1121    let mut cpu = EmulatorCpu::new(gm, dev, support);
1122    let instruction_bytes = cpu.support.instruction_bytes();
1123    if interruption_pending || !tlb_lock_held || instruction_bytes.is_empty() {
1124        return None;
1125    }
1126    let mut bytes = [0; 16];
1127    let valid_bytes;
1128    {
1129        let instruction_bytes = cpu.support.instruction_bytes();
1130        valid_bytes = instruction_bytes.len();
1131        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
1132    }
1133    let instruction_bytes = &bytes[..valid_bytes];
1134    let bit = x86emu::fast_path::emulate_fast_path_set_bit(instruction_bytes, &mut cpu);
1135    support.flush();
1136    bit
1137}