Skip to main content

virt_support_x86emu/
emulate.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Wrapper around x86emu for emulating single instructions to handle VM exits.
5
6use crate::translate::TranslateFlags;
7use crate::translate::TranslatePrivilegeCheck;
8use crate::translate::translate_gva_to_gpa;
9use cvm_tracing::CVM_ALLOWED;
10use cvm_tracing::CVM_CONFIDENTIAL;
11use guestmem::GuestMemory;
12use guestmem::GuestMemoryError;
13use hvdef::HV_PAGE_SIZE;
14use hvdef::HvInterceptAccessType;
15use hvdef::HvMapGpaFlags;
16use thiserror::Error;
17use virt::EmulatorMonitorSupport;
18use virt::VpHaltReason;
19use virt::io::CpuIo;
20use vm_topology::processor::VpIndex;
21use x86defs::Exception;
22use x86defs::RFlags;
23use x86defs::SegmentRegister;
24use x86emu::AlignmentMode;
25use x86emu::Gp;
26use x86emu::RegisterIndex;
27use x86emu::Segment;
28use zerocopy::FromBytes;
29use zerocopy::IntoBytes;
30
31/// Support routines for the emulator.
32pub trait EmulatorSupport {
33    /// The current VP index.
34    fn vp_index(&self) -> VpIndex;
35
36    /// The processor vendor.
37    fn vendor(&self) -> x86defs::cpuid::Vendor;
38
39    /// Read a GP
40    fn gp(&mut self, index: Gp) -> u64;
41
42    /// Set a GP
43    fn set_gp(&mut self, reg: Gp, v: u64);
44
45    /// Read the instruction pointer
46    fn rip(&mut self) -> u64;
47
48    /// Set the instruction pointer
49    fn set_rip(&mut self, v: u64);
50
51    /// Read a segment register
52    fn segment(&mut self, index: Segment) -> SegmentRegister;
53
54    /// Read the efer
55    fn efer(&mut self) -> u64;
56
57    /// Read cr0
58    fn cr0(&mut self) -> u64;
59
60    /// Read rflags
61    fn rflags(&mut self) -> RFlags;
62
63    /// Set rflags
64    fn set_rflags(&mut self, v: RFlags);
65
66    /// Gets the value of an XMM* register.
67    fn xmm(&mut self, reg: usize) -> u128;
68
69    /// Sets the value of an XMM* register.
70    fn set_xmm(&mut self, reg: usize, value: u128);
71
72    /// Flush registers in the emulation cache to the backing
73    fn flush(&mut self);
74
75    /// The instruction bytes, if available.
76    fn instruction_bytes(&self) -> &[u8];
77
78    /// The physical address that caused the fault.
79    fn physical_address(&self) -> Option<u64>;
80
81    /// The gva translation included in the intercept message header, if valid.
82    fn initial_gva_translation(&mut self) -> Option<InitialTranslation>;
83
84    /// If interrupt pending is marked in the intercept message
85    fn interruption_pending(&self) -> bool;
86
87    /// Check that the current GPA is valid to access by the current VTL with the following access mode.
88    /// Returns true if valid to access.
89    fn check_vtl_access(
90        &mut self,
91        gpa: u64,
92        mode: TranslateMode,
93    ) -> Result<(), EmuCheckVtlAccessError>;
94
95    /// Translates a GVA to a GPA.
96    fn translate_gva(
97        &mut self,
98        gva: u64,
99        mode: TranslateMode,
100    ) -> Result<EmuTranslateResult, EmuTranslateError>;
101
102    /// Generates an event (exception, guest nested page fault, etc.) in the guest.
103    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent);
104
105    /// Get access to monitor support for the emulator, if it supports it.
106    fn monitor_support(&self) -> Option<&dyn EmulatorMonitorSupport> {
107        None
108    }
109
110    /// Returns true if `gpa` is mapped for the specified permissions.
111    ///
112    /// If true, then the emulator will use [`GuestMemory`] to access the GPA,
113    /// and any failures will be fatal to the VM.
114    ///
115    /// If false, then the emulator will use [`CpuIo`] to access the GPA as
116    /// MMIO.
117    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool;
118
119    /// Returns the page-aligned base address of the enabled local APIC in xapic
120    /// mode.
121    fn lapic_base_address(&self) -> Option<u64>;
122
123    /// Read from the current processor's local APIC memory mapped interface.
124    ///
125    /// This will only be called on an address in the page returned by
126    /// `lapic_base_address`.
127    fn lapic_read(&mut self, address: u64, data: &mut [u8]);
128
129    /// Write to the current processor's local APIC memory mapped interface.
130    ///
131    /// This will only be called on an address in the page returned by
132    /// `lapic_base_address`.
133    fn lapic_write(&mut self, address: u64, data: &[u8]);
134}
135
136pub trait TranslateGvaSupport {
137    /// Gets the object used to access the guest memory.
138    fn guest_memory(&self) -> &GuestMemory;
139
140    /// Acquires the TLB lock for this processor.
141    fn acquire_tlb_lock(&mut self);
142
143    /// Returns the registers used to walk the page table.
144    fn registers(&mut self) -> crate::translate::TranslationRegisters;
145}
146
147/// Emulates a page table walk.
148///
149/// This is suitable for implementing [`EmulatorSupport::translate_gva`].
150pub fn emulate_translate_gva<T: TranslateGvaSupport>(
151    support: &mut T,
152    gva: u64,
153    mode: TranslateMode,
154) -> Result<EmuTranslateResult, EmuTranslateError> {
155    // Always acquire the TLB lock for this path.
156    support.acquire_tlb_lock();
157
158    let flags = TranslateFlags {
159        validate_execute: matches!(mode, TranslateMode::Execute),
160        validate_read: matches!(mode, TranslateMode::Read | TranslateMode::Write),
161        validate_write: matches!(mode, TranslateMode::Write),
162        override_smap: false,
163        enforce_smap: false,
164        privilege_check: TranslatePrivilegeCheck::CurrentPrivilegeLevel,
165        set_page_table_bits: true,
166    };
167
168    let registers = support.registers();
169
170    match translate_gva_to_gpa(support.guest_memory(), gva, &registers, flags) {
171        Ok(crate::translate::TranslateResult { gpa, cache_info: _ }) => Ok(EmuTranslateResult {
172            gpa,
173            overlay_page: None,
174        }),
175        Err(err) => Err(EmuTranslateError {
176            code: err.into(),
177            event_info: None,
178        }),
179    }
180}
181
182/// The result of translate_gva on [`EmulatorSupport`].
183pub struct EmuTranslateResult {
184    /// The GPA result of the translation.
185    pub gpa: u64,
186    /// Whether the page is an overlay page.
187    /// Not all implementations return overlay page or event_info yet, so these values are optional
188    pub overlay_page: Option<bool>,
189}
190
191/// The translation, if any, provided in the intercept message and provided by [`EmulatorSupport`].
192#[derive(Debug)]
193pub struct InitialTranslation {
194    /// GVA for the translation
195    pub gva: u64,
196    /// Translated gpa for the gva
197    pub gpa: u64,
198    // Whether the translation has read, write, or execute permissions.
199    pub translate_mode: TranslateMode,
200}
201
202#[derive(Error, Debug)]
203pub enum EmuCheckVtlAccessError {
204    #[error("failed vtl permissions access for vtl {vtl:?} and access flags {denied_flags:?}")]
205    AccessDenied {
206        vtl: hvdef::Vtl,
207        denied_flags: HvMapGpaFlags,
208    },
209}
210
211#[derive(Error, Debug)]
212#[error("translate gva to gpa returned non-successful code {code:?}")]
213/// Error for a failed gva translation from [`EmulatorSupport`].
214pub struct EmuTranslateError {
215    /// Translate code of type hvdef::hypercall::TranslateGvaResultCode
216    /// Should != Success
217    pub code: hvdef::hypercall::TranslateGvaResultCode,
218    /// Pending event, if any, returned by hypervisor to go with the translate code.
219    pub event_info: Option<hvdef::HvX64PendingEvent>,
220}
221
222/// The access type for a gva translation for [`EmulatorSupport`].
223#[derive(Debug, Copy, Clone, PartialEq, Eq)]
224pub enum TranslateMode {
225    /// A read operation.
226    Read,
227    /// A write operation.
228    Write,
229    /// An execute operation.
230    Execute,
231}
232
233/// The requested intercept access type isn't supported
234#[derive(Debug)]
235pub struct UnsupportedInterceptAccessType;
236
237impl TryFrom<HvInterceptAccessType> for TranslateMode {
238    type Error = UnsupportedInterceptAccessType;
239
240    fn try_from(access_type: HvInterceptAccessType) -> Result<Self, Self::Error> {
241        match access_type {
242            HvInterceptAccessType::READ => Ok(TranslateMode::Read),
243            HvInterceptAccessType::WRITE => Ok(TranslateMode::Write),
244            HvInterceptAccessType::EXECUTE => Ok(TranslateMode::Execute),
245            _ => Err(UnsupportedInterceptAccessType),
246        }
247    }
248}
249
250#[derive(Debug, Error)]
251enum EmulationError {
252    #[error("an interrupt caused the memory access exit")]
253    InterruptionPending,
254    #[error("linear IP was not within CS segment limit")]
255    LinearIpPastCsLimit,
256    #[error("failed to read instruction stream")]
257    InstructionRead(#[source] Error),
258    #[error("emulator error (instruction {bytes:02x?})")]
259    Emulator {
260        bytes: Vec<u8>,
261        #[source]
262        error: x86emu::Error<Error>,
263    },
264}
265
266pub struct EmulatorMemoryAccess<'a> {
267    pub gm: &'a GuestMemory,
268    pub kx_gm: &'a GuestMemory,
269    pub ux_gm: &'a GuestMemory,
270}
271
272enum EmulatorMemoryAccessType {
273    ReadWrite,
274    InstructionRead { is_user_mode: bool },
275}
276
277impl EmulatorMemoryAccess<'_> {
278    fn gm(&self, access_type: EmulatorMemoryAccessType) -> &GuestMemory {
279        match access_type {
280            EmulatorMemoryAccessType::ReadWrite => self.gm,
281            EmulatorMemoryAccessType::InstructionRead { is_user_mode } => {
282                if is_user_mode {
283                    self.ux_gm
284                } else {
285                    self.kx_gm
286                }
287            }
288        }
289    }
290}
291
292/// Emulates an instruction.
293pub async fn emulate<T: EmulatorSupport>(
294    support: &mut T,
295    emu_mem: &EmulatorMemoryAccess<'_>,
296    dev: &impl CpuIo,
297) -> Result<(), VpHaltReason> {
298    emulate_core(support, emu_mem, dev).await.map_err(|e| {
299        let rip = support.rip();
300        let efer = support.efer();
301        let cr0 = support.cr0();
302        let rflags = support.rflags();
303        let vendor = support.vendor();
304        let gpa = support.physical_address();
305        let initial_translation = support.initial_gva_translation();
306        let int_pend = support.interruption_pending();
307        let gpa_mapped = gpa.map(|a| support.is_gpa_mapped(a, false));
308        tracing::warn!(
309            CVM_ALLOWED,
310            rip,
311            ?vendor,
312            efer,
313            cr0,
314            ?rflags,
315            gpa,
316            ?initial_translation,
317            int_pend,
318            gpa_mapped,
319            "emulation failed"
320        );
321        let gps = [
322            Gp::RAX,
323            Gp::RCX,
324            Gp::RDX,
325            Gp::RBX,
326            Gp::RSP,
327            Gp::RBP,
328            Gp::RSI,
329            Gp::RDI,
330            Gp::R8,
331            Gp::R9,
332            Gp::R10,
333            Gp::R11,
334            Gp::R12,
335            Gp::R13,
336            Gp::R14,
337            Gp::R15,
338        ]
339        .map(|i| support.gp(i));
340        tracing::warn!(CVM_CONFIDENTIAL, ?gps, "emulation failed");
341        dev.fatal_error(e.into())
342    })
343}
344
345async fn emulate_core<T: EmulatorSupport>(
346    support: &mut T,
347    emu_mem: &EmulatorMemoryAccess<'_>,
348    dev: &impl CpuIo,
349) -> Result<(), EmulationError> {
350    let vendor = support.vendor();
351
352    let mut bytes = [0; 16];
353    let mut valid_bytes;
354    {
355        let instruction_bytes = support.instruction_bytes();
356        valid_bytes = instruction_bytes.len();
357        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
358    }
359    let instruction_bytes = &bytes[..valid_bytes];
360
361    tracing::trace!(
362        ?instruction_bytes,
363        physical_address = support.physical_address(),
364        "emulating"
365    );
366
367    if support.interruption_pending() {
368        // This means a fault or interruption *caused* the intercept
369        // (and only really applies to memory intercept handling).
370        // An example of how this could happen is if the
371        // interrupt vector table itself is in mmio space; taking an
372        // interrupt at that point requires that the processor reads the
373        // vector out of the table, which generates an mmio intercept,
374        // but not one associated with any particular instruction.
375        // Therefore, there is nothing to emulate.
376        //
377        // A fault can't be injected into the guest because that could
378        // cause an infinite loop (as the processor tries to get the trap
379        // vector out of the mmio-ed vector table).  Just give up.
380
381        return Err(EmulationError::InterruptionPending);
382    }
383
384    let initial_alignment_check = support.rflags().alignment_check();
385
386    let mut cpu = EmulatorCpu::new(
387        emu_mem.gm(EmulatorMemoryAccessType::ReadWrite),
388        dev,
389        support,
390    );
391    let result = loop {
392        let instruction_bytes = &bytes[..valid_bytes];
393        let mut emu = x86emu::Emulator::new(&mut cpu, vendor, instruction_bytes);
394        let res = emu.run().await;
395
396        if let Err(e) = &res {
397            if let x86emu::Error::NotEnoughBytes = **e {
398                assert!(valid_bytes < bytes.len());
399
400                // TODO: inject #GP due to segmentation fault.
401                let linear_ip = emu
402                    .linear_ip(valid_bytes as u64)
403                    .ok_or(EmulationError::LinearIpPastCsLimit)?;
404
405                let is_user_mode = emu.is_user_mode();
406
407                let translate_result =
408                    cpu.translate_gva(linear_ip, TranslateMode::Execute, is_user_mode);
409
410                let phys_ip = match translate_result {
411                    Ok(ip) => ip,
412                    Err(translate_error) => {
413                        if inject_memory_access_fault(linear_ip, &translate_error, support) {
414                            return Ok(());
415                        } else {
416                            return Err(EmulationError::InstructionRead(translate_error));
417                        }
418                    }
419                };
420
421                // TODO: fold this access check into the GuestMemory object for
422                // each of the backings, if possible.
423                if let Err(err) = cpu.check_vtl_access(phys_ip, TranslateMode::Execute) {
424                    if inject_memory_access_fault(linear_ip, &err, support) {
425                        return Ok(());
426                    } else {
427                        return Err(EmulationError::InstructionRead(err));
428                    };
429                }
430
431                tracing::trace!(linear_ip, phys_ip, "fetching instruction bytes");
432
433                let len = (bytes.len() - valid_bytes)
434                    .min((HV_PAGE_SIZE - (phys_ip & (HV_PAGE_SIZE - 1))) as usize);
435
436                let instruction_gm =
437                    emu_mem.gm(EmulatorMemoryAccessType::InstructionRead { is_user_mode });
438
439                if let Err(err) =
440                    instruction_gm.read_at(phys_ip, &mut bytes[valid_bytes..valid_bytes + len])
441                {
442                    tracing::error!(error = &err as &dyn std::error::Error, "read failed");
443                    support.inject_pending_event(gpf_event());
444                    return Ok(());
445                }
446
447                valid_bytes += len;
448                continue;
449            }
450        }
451
452        break res;
453    };
454
455    cpu.support.flush();
456
457    // If the alignment check flag is not in sync with the hypervisor because the instruction emulator
458    // modifies internally, then the appropriate SMAP enforcement flags need to be passed to the hypervisor
459    // during the translation of gvas to gpa.
460    //
461    // Note: also applies if the instruction emulator emulates instructions resulting in implicit
462    // memory accesses, which is currently not done. See Intel Spec 4.6 Access Rights:
463    // "Some operations implicitly access system data structures with linear addresses;
464    // the resulting accesses to those data structures are supervisor-mode accesses regardless of CPL.
465    // Examples of such accesses include the following: accesses to the global descriptor table (GDT)
466    // or local descriptor table (LDT) to load a segment descriptor; accesses to the interrupt
467    // descriptor table (IDT) when delivering an interrupt or exception; and accesses to the task-state
468    // segment (TSS) as part of a task switch or change of CPL."
469    assert_eq!(
470        initial_alignment_check,
471        cpu.support.rflags().alignment_check()
472    );
473
474    let instruction_bytes = &bytes[..valid_bytes];
475    if let Err(e) = result {
476        match *e {
477            err @ (x86emu::Error::DecodeFailure | x86emu::Error::UnsupportedInstruction { .. }) => {
478                tracelimit::error_ratelimited!(
479                    error = &err as &dyn std::error::Error,
480                    ?instruction_bytes,
481                    physical_address = cpu.support.physical_address(),
482                    "unsupported instruction"
483                );
484
485                cpu.support.inject_pending_event(make_exception_event(
486                    Exception::INVALID_OPCODE,
487                    None,
488                    None,
489                ));
490            }
491            err @ x86emu::Error::NonMemoryOrPortInstruction { .. } => {
492                tracelimit::error_ratelimited!(
493                    error = &err as &dyn std::error::Error,
494                    ?instruction_bytes,
495                    physical_address = cpu.support.physical_address(),
496                    "given an instruction that we shouldn't have been asked to emulate - likely a bug in the caller"
497                );
498
499                return Err(EmulationError::Emulator {
500                    bytes: instruction_bytes.to_vec(),
501                    error: err,
502                });
503            }
504            x86emu::Error::InstructionException(exception, error_code, cause) => {
505                tracing::trace!(
506                    ?exception,
507                    ?error_code,
508                    ?cause,
509                    "emulated instruction caused exception"
510                );
511                cpu.support
512                    .inject_pending_event(make_exception_event(exception, error_code, None));
513            }
514            x86emu::Error::MemoryAccess(addr, kind, err) => {
515                if !inject_memory_access_fault(addr, &err, support) {
516                    return Err(EmulationError::Emulator {
517                        bytes: instruction_bytes.to_vec(),
518                        error: x86emu::Error::MemoryAccess(addr, kind, err),
519                    });
520                }
521            }
522            err @ x86emu::Error::IoPort { .. } => {
523                return Err(EmulationError::Emulator {
524                    bytes: instruction_bytes.to_vec(),
525                    error: err,
526                });
527            }
528            x86emu::Error::NotEnoughBytes => unreachable!(),
529        }
530    }
531
532    Ok(())
533}
534
535/// Performs a memory operation as if it had been performed by an emulated instruction.
536///
537/// "As if it had been performed by an emulated instruction" means that the given
538/// GVA will be translated to a GPA, subject to applicable segmentation, permission,
539/// and alignment checks, may be determined to be MMIO instead of RAM, etc.
540pub async fn emulate_insn_memory_op<T: EmulatorSupport>(
541    support: &mut T,
542    gm: &GuestMemory,
543    dev: &impl CpuIo,
544    gva: u64,
545    segment: Segment,
546    alignment: AlignmentMode,
547    op: EmulatedMemoryOperation<'_>,
548) -> Result<(), VpHaltReason> {
549    assert!(!support.interruption_pending());
550
551    let vendor = support.vendor();
552    let mut cpu = EmulatorCpu::new(gm, dev, support);
553    let mut emu = x86emu::Emulator::new(&mut cpu, vendor, &[]);
554
555    match op {
556        EmulatedMemoryOperation::Read(data) => emu.read_memory(segment, gva, alignment, data).await,
557        EmulatedMemoryOperation::Write(data) => {
558            emu.write_memory(segment, gva, alignment, data).await
559        }
560    }
561    .map_err(|e| dev.fatal_error(e.into()))
562
563    // No need to flush the cache, we have not modified any registers.
564}
565
566pub enum EmulatedMemoryOperation<'a> {
567    Read(&'a mut [u8]),
568    Write(&'a [u8]),
569}
570
571/// For storing gva to gpa translations in a cache in [`EmulatorCpu`]
572struct GvaGpaCacheEntry {
573    gva_page: u64,
574    gpa_page: u64,
575    translate_mode: TranslateMode,
576}
577
578impl GvaGpaCacheEntry {
579    pub fn new(gva: u64, gpa: u64, translate_mode: TranslateMode) -> Self {
580        GvaGpaCacheEntry {
581            gva_page: gva >> hvdef::HV_PAGE_SHIFT,
582            gpa_page: gpa >> hvdef::HV_PAGE_SHIFT,
583            translate_mode,
584        }
585    }
586}
587
588struct EmulatorCpu<'a, T, U> {
589    gm: &'a GuestMemory,
590    support: &'a mut T,
591    dev: &'a U,
592    // TODO: this should be able to hold at least two entries for effective use for
593    // rep move instructions.
594    cached_translation: Option<GvaGpaCacheEntry>,
595}
596
597#[derive(Debug, Error)]
598enum Error {
599    #[error("translation error")]
600    Translate(
601        #[source] TranslateGvaError,
602        Option<hvdef::HvX64PendingEvent>,
603    ),
604    #[error("vtl permissions denied access for gpa {gpa}")]
605    NoVtlAccess {
606        gpa: u64,
607        intercepting_vtl: hvdef::Vtl,
608        denied_flags: HvMapGpaFlags,
609    },
610    #[error("failed to access mapped memory")]
611    Memory(#[source] GuestMemoryError),
612}
613
614/// Result of a gva translation in [`EmulatorCpu`]
615#[derive(Error, Debug)]
616enum TranslateGvaError {
617    #[error("gpa access denied code {0:?}")]
618    AccessDenied(hvdef::hypercall::TranslateGvaResultCode),
619    #[error("write on overlay page")]
620    OverlayPageWrite,
621    #[error("translation failed with unknown code {0:?}")]
622    UnknownCode(hvdef::hypercall::TranslateGvaResultCode),
623    #[error("translation failed with an intercept code")]
624    Intercept,
625    #[error("translation failed with a page fault-related code {0:?}")]
626    PageFault(hvdef::hypercall::TranslateGvaResultCode),
627}
628
629impl<T: EmulatorSupport, U> EmulatorCpu<'_, T, U> {
630    pub fn new<'a>(gm: &'a GuestMemory, dev: &'a U, support: &'a mut T) -> EmulatorCpu<'a, T, U> {
631        let init_cache = {
632            if let Some(InitialTranslation {
633                gva,
634                gpa,
635                translate_mode,
636            }) = support.initial_gva_translation()
637            {
638                tracing::trace!(
639                    ?gva,
640                    ?gpa,
641                    ?translate_mode,
642                    "adding initial translation to cache"
643                );
644                Some(GvaGpaCacheEntry::new(gva, gpa, translate_mode))
645            } else {
646                None
647            }
648        };
649
650        EmulatorCpu {
651            gm,
652            dev,
653            support,
654            cached_translation: init_cache,
655        }
656    }
657
658    pub fn translate_gva(
659        &mut self,
660        gva: u64,
661        mode: TranslateMode,
662        is_user_mode: bool,
663    ) -> Result<u64, Error> {
664        type TranslateCode = hvdef::hypercall::TranslateGvaResultCode;
665
666        if let Some(GvaGpaCacheEntry {
667            gva_page: cached_gva_page,
668            gpa_page: cached_gpa_page,
669            translate_mode: cached_mode,
670        }) = self.cached_translation
671        {
672            if ((gva >> hvdef::HV_PAGE_SHIFT) == cached_gva_page) && (cached_mode == mode) {
673                tracing::trace!(
674                    ?gva,
675                    ?cached_gva_page,
676                    cached_gpa_page,
677                    ?cached_mode,
678                    "using cached entry"
679                );
680                return Ok((cached_gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)));
681            }
682        };
683
684        match self.support.translate_gva(gva, mode) {
685            Ok(EmuTranslateResult { gpa, overlay_page }) => {
686                if overlay_page.is_some()
687                    && overlay_page
688                        .expect("should've already checked that the overlay page has value")
689                    && (mode == TranslateMode::Write)
690                {
691                    // We will support reads to overlay pages in order to support Win2k3
692                    // crash dumps (which do direct port io to the ide for all of memory,
693                    // including overlay pages).  Writes, though, are right out.  There is
694                    // no known scenario where guests should be writing to overlay pages,
695                    // and doing so would be difficult and expensive.  Overlay pages are
696                    // special purpose pages set up by the hypervisor (to implement hypercalls,
697                    // for instance), and there should be no reason that guests should be doing
698                    // io to them.
699                    //
700                    // At this time, there is no infrastructure to allow us to actually
701                    // read the overlay page.  We will instead return 0xff's for such reads.
702                    // This is an emulation hole.  It is felt to be acceptable at this time.
703                    // But for this reason, we give up if this wasn't the intercepting instruction
704                    // and let the processor run the instruction directly.
705                    return Err(Error::Translate(
706                        TranslateGvaError::OverlayPageWrite,
707                        Some(gpf_event()),
708                    ));
709                }
710
711                let new_cache_entry = GvaGpaCacheEntry::new(gva, gpa, mode);
712
713                self.cached_translation = Some(new_cache_entry);
714                Ok(gpa)
715            }
716            Err(EmuTranslateError { code, event_info }) => {
717                match code {
718                    TranslateCode::INTERCEPT => {
719                        tracing::trace!("translate gva to gpa returned an intercept event");
720                        Err(Error::Translate(TranslateGvaError::Intercept, event_info))
721                    }
722                    TranslateCode::GPA_NO_READ_ACCESS
723                    | TranslateCode::GPA_NO_WRITE_ACCESS
724                    | TranslateCode::GPA_UNMAPPED
725                    | TranslateCode::GPA_ILLEGAL_OVERLAY_ACCESS
726                    | TranslateCode::GPA_UNACCEPTED => {
727                        // The page table walk failed because one of the page
728                        // table entries was inaccessible in the second-level
729                        // page tables.
730                        //
731                        // Inject a #GP.
732                        tracing::trace!(
733                            "translate gva to gpa returned no access to page {:?}",
734                            code
735                        );
736                        Err(Error::Translate(
737                            TranslateGvaError::AccessDenied(code),
738                            Some(gpf_event()),
739                        ))
740                    }
741                    TranslateCode::PAGE_NOT_PRESENT
742                    | TranslateCode::PRIVILEGE_VIOLATION
743                    | TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
744                        // The page table walk failed for ordinary reasons not
745                        // having to do with second-level address translation.
746                        // We need to inject a page fault.
747                        //
748                        // It should be rare to get to this point even for a
749                        // misbehaving guest, since the processor usually should
750                        // have detected and injected this fault without
751                        // requiring an exit.
752                        //
753                        // Trace since this is more likely to indicate a bug in
754                        // our page table walking code, but rate limit the trace
755                        // since there are still cases where this could be
756                        // triggered by guest behavior.
757                        tracelimit::warn_ratelimited!(gva, ?code, "page table walk failed");
758
759                        let mut error = x86defs::PageFaultErrorCode::new();
760                        match code {
761                            TranslateCode::PAGE_NOT_PRESENT => (),
762                            TranslateCode::PRIVILEGE_VIOLATION => error.set_present(true),
763                            TranslateCode::INVALID_PAGE_TABLE_FLAGS => {
764                                error.set_present(true);
765                                error.set_reserved(true);
766                            }
767                            _ => unreachable!(),
768                        };
769
770                        match mode {
771                            TranslateMode::Execute => error.set_fetch(true),
772                            TranslateMode::Write => error.set_write(true),
773                            _ => (),
774                        };
775
776                        if is_user_mode {
777                            error.set_user(true);
778                        }
779
780                        // Page fault
781                        let event = make_exception_event(
782                            Exception::PAGE_FAULT,
783                            Some(error.into()),
784                            Some(gva),
785                        );
786
787                        Err(Error::Translate(
788                            TranslateGvaError::PageFault(code),
789                            Some(event),
790                        ))
791                    }
792                    TranslateCode::SUCCESS => unreachable!(),
793                    _ => {
794                        tracing::trace!(
795                            "translate error: unknown translation result code {:?}",
796                            code
797                        );
798
799                        Err(Error::Translate(TranslateGvaError::UnknownCode(code), None))
800                    }
801                }
802            }
803        }
804    }
805
806    pub fn check_vtl_access(&mut self, gpa: u64, mode: TranslateMode) -> Result<(), Error> {
807        self.support
808            .check_vtl_access(gpa, mode)
809            .map_err(|e| match e {
810                EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags } => Error::NoVtlAccess {
811                    gpa,
812                    intercepting_vtl: vtl,
813                    denied_flags,
814                },
815            })
816    }
817
818    fn check_monitor_write(&self, gpa: u64, bytes: &[u8]) -> bool {
819        if let Some(monitor_support) = self.support.monitor_support() {
820            monitor_support.check_write(gpa, bytes)
821        } else {
822            false
823        }
824    }
825
826    fn check_monitor_read(&self, gpa: u64, bytes: &mut [u8]) -> bool {
827        if let Some(monitor_support) = self.support.monitor_support() {
828            monitor_support.check_read(gpa, bytes)
829        } else {
830            false
831        }
832    }
833}
834
835impl<T: EmulatorSupport, U: CpuIo> x86emu::Cpu for EmulatorCpu<'_, T, U> {
836    type Error = Error;
837
838    async fn read_memory(
839        &mut self,
840        gva: u64,
841        bytes: &mut [u8],
842        is_user_mode: bool,
843    ) -> Result<(), Self::Error> {
844        let gpa = self.translate_gva(gva, TranslateMode::Read, is_user_mode)?;
845
846        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
847            self.support.lapic_read(gpa, bytes);
848            return Ok(());
849        }
850
851        self.check_vtl_access(gpa, TranslateMode::Read)?;
852
853        if self.check_monitor_read(gpa, bytes) {
854            Ok(())
855        } else if self.support.is_gpa_mapped(gpa, false) {
856            self.gm.read_at(gpa, bytes).map_err(Error::Memory)
857        } else {
858            self.dev
859                .read_mmio(self.support.vp_index(), gpa, bytes)
860                .await;
861            Ok(())
862        }
863    }
864
865    async fn write_memory(
866        &mut self,
867        gva: u64,
868        bytes: &[u8],
869        is_user_mode: bool,
870    ) -> Result<(), Self::Error> {
871        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
872
873        if Some(gpa & !0xfff) == self.support.lapic_base_address() {
874            self.support.lapic_write(gpa, bytes);
875            return Ok(());
876        }
877
878        self.check_vtl_access(gpa, TranslateMode::Write)?;
879
880        if self.support.is_gpa_mapped(gpa, true) {
881            self.gm.write_at(gpa, bytes).map_err(Error::Memory)?;
882        } else {
883            self.dev
884                .write_mmio(self.support.vp_index(), gpa, bytes)
885                .await;
886        }
887        Ok(())
888    }
889
890    async fn compare_and_write_memory(
891        &mut self,
892        gva: u64,
893        current: &[u8],
894        new: &[u8],
895        is_user_mode: bool,
896    ) -> Result<bool, Self::Error> {
897        let gpa = self.translate_gva(gva, TranslateMode::Write, is_user_mode)?;
898        self.check_vtl_access(gpa, TranslateMode::Write)?;
899
900        let success = if self.check_monitor_write(gpa, new) {
901            true
902        } else if self.support.is_gpa_mapped(gpa, true) {
903            match (current.len(), new.len()) {
904                (1, 1) => self
905                    .gm
906                    .compare_exchange(gpa, current[0], new[0])
907                    .map(|r| r.is_ok()),
908                (2, 2) => self
909                    .gm
910                    .compare_exchange(
911                        gpa,
912                        u16::from_ne_bytes(current.try_into().unwrap()),
913                        u16::from_ne_bytes(new.try_into().unwrap()),
914                    )
915                    .map(|r| r.is_ok()),
916                (4, 4) => self
917                    .gm
918                    .compare_exchange(
919                        gpa,
920                        u32::from_ne_bytes(current.try_into().unwrap()),
921                        u32::from_ne_bytes(new.try_into().unwrap()),
922                    )
923                    .map(|r| r.is_ok()),
924                (8, 8) => self
925                    .gm
926                    .compare_exchange(
927                        gpa,
928                        u64::from_ne_bytes(current.try_into().unwrap()),
929                        u64::from_ne_bytes(new.try_into().unwrap()),
930                    )
931                    .map(|r| r.is_ok()),
932                _ => panic!("unsupported cmpxchg size"),
933            }
934            .map_err(Error::Memory)?
935        } else {
936            // Ignore the comparison aspect for device MMIO.
937            self.dev.write_mmio(self.support.vp_index(), gpa, new).await;
938            true
939        };
940        Ok(success)
941    }
942
943    async fn read_io(&mut self, io_port: u16, bytes: &mut [u8]) -> Result<(), Self::Error> {
944        self.dev
945            .read_io(self.support.vp_index(), io_port, bytes)
946            .await;
947        Ok(())
948    }
949
950    async fn write_io(&mut self, io_port: u16, bytes: &[u8]) -> Result<(), Self::Error> {
951        self.dev
952            .write_io(self.support.vp_index(), io_port, bytes)
953            .await;
954        Ok(())
955    }
956
957    fn gp(&mut self, reg: RegisterIndex) -> u64 {
958        let extended_register = self.support.gp(reg.extended_index);
959        reg.apply_sizing(extended_register)
960    }
961
962    fn gp_sign_extend(&mut self, reg: RegisterIndex) -> i64 {
963        let extended_register = self.support.gp(reg.extended_index);
964        reg.apply_sizing_signed(extended_register)
965    }
966
967    fn set_gp(&mut self, reg: RegisterIndex, v: u64) {
968        let register_value = self.gp(reg);
969        let updated_register_value = reg.apply_update(register_value, v);
970        self.support
971            .set_gp(reg.extended_index, updated_register_value);
972    }
973
974    fn rip(&mut self) -> u64 {
975        self.support.rip()
976    }
977
978    fn set_rip(&mut self, v: u64) {
979        self.support.set_rip(v);
980    }
981
982    fn segment(&mut self, index: Segment) -> SegmentRegister {
983        self.support.segment(index)
984    }
985
986    fn efer(&mut self) -> u64 {
987        self.support.efer()
988    }
989
990    fn cr0(&mut self) -> u64 {
991        self.support.cr0()
992    }
993
994    fn rflags(&mut self) -> RFlags {
995        self.support.rflags()
996    }
997
998    fn set_rflags(&mut self, v: RFlags) {
999        self.support.set_rflags(v);
1000    }
1001
1002    /// Gets the value of an XMM* register.
1003    fn xmm(&mut self, reg: usize) -> u128 {
1004        self.support.xmm(reg)
1005    }
1006
1007    /// Sets the value of an XMM* register.
1008    fn set_xmm(&mut self, reg: usize, value: u128) {
1009        self.support.set_xmm(reg, value)
1010    }
1011}
1012
1013/// Emulates an IO port instruction.
1014///
1015/// Just handles calling into the IO bus and updating `rax`. The caller must
1016/// update RIP, and it must update the VP's `rax` register (when `!is_write`).
1017///
1018/// The caller is also responsible for performing any security checks to ensure
1019/// the guest is allowed to execute I/O instructions. However, typically this is handled
1020/// by the hardware and hypervisor automatically.
1021pub async fn emulate_io(
1022    vp_index: VpIndex,
1023    is_write: bool,
1024    port: u16,
1025    rax: &mut u64,
1026    len: u8,
1027    dev: &impl CpuIo,
1028) {
1029    let len = len as usize;
1030    if is_write {
1031        dev.write_io(vp_index, port, &rax.to_ne_bytes()[..len])
1032            .await;
1033    } else {
1034        // Preserve the high bits of eax but not of rax.
1035        let mut value = (*rax as u32).to_ne_bytes();
1036        dev.read_io(vp_index, port, &mut value[..len]).await;
1037        *rax = u32::from_ne_bytes(value) as u64;
1038    }
1039}
1040
1041/// Injects an event into the guest if appropriate.
1042///
1043/// Returns true if an event was injected into the guest.
1044/// In the case of false being returned, the caller can
1045/// return the appropriate error code.
1046#[must_use]
1047fn inject_memory_access_fault<T: EmulatorSupport>(
1048    gva: u64,
1049    result: &Error,
1050    support: &mut T,
1051) -> bool {
1052    match result {
1053        Error::Translate(e, event) => {
1054            tracing::trace!(
1055                error = e as &dyn std::error::Error,
1056                "translation failed, injecting event"
1057            );
1058
1059            if let Some(event_info) = event {
1060                support.inject_pending_event(*event_info);
1061
1062                // The emulation did what it was supposed to do, which is throw a fault, so the emulation is done.
1063                return true;
1064            }
1065            false
1066        }
1067        Error::NoVtlAccess {
1068            gpa,
1069            intercepting_vtl,
1070            denied_flags,
1071        } => {
1072            tracing::trace!(
1073                error = result as &dyn std::error::Error,
1074                ?gva,
1075                ?gpa,
1076                "Vtl permissions checking failed"
1077            );
1078
1079            let event = vtl_access_event(gva, *gpa, *intercepting_vtl, *denied_flags);
1080            support.inject_pending_event(event);
1081            true
1082        }
1083        Error::Memory(_) => false,
1084    }
1085}
1086
1087/// Creates a pending event for the exception type
1088fn make_exception_event(
1089    exception: Exception,
1090    error_code: Option<u32>,
1091    exception_parameter: Option<u64>,
1092) -> hvdef::HvX64PendingEvent {
1093    let exception_event = hvdef::HvX64PendingExceptionEvent::new()
1094        .with_event_pending(true)
1095        .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
1096        .with_deliver_error_code(error_code.is_some())
1097        .with_error_code(error_code.unwrap_or(0))
1098        .with_vector(exception.0.into())
1099        .with_exception_parameter(exception_parameter.unwrap_or(0));
1100
1101    hvdef::HvX64PendingEvent::from(exception_event)
1102}
1103
1104/// Generates a general protection fault pending event
1105fn gpf_event() -> hvdef::HvX64PendingEvent {
1106    make_exception_event(Exception::GENERAL_PROTECTION_FAULT, Some(0), None)
1107}
1108
1109/// Generates the appropriate event for a VTL access error based
1110/// on the intercepting VTL
1111fn vtl_access_event(
1112    gva: u64,
1113    gpa: u64,
1114    intercepting_vtl: hvdef::Vtl,
1115    denied_access: HvMapGpaFlags,
1116) -> hvdef::HvX64PendingEvent {
1117    if intercepting_vtl != hvdef::Vtl::Vtl2 {
1118        let event_header = hvdef::HvX64PendingEventMemoryInterceptPendingEventHeader::new()
1119            .with_event_pending(true)
1120            .with_event_type(hvdef::HV_X64_PENDING_EVENT_MEMORY_INTERCEPT);
1121        let access_flags = hvdef::HvX64PendingEventMemoryInterceptAccessFlags::new()
1122            .with_guest_linear_address_valid(true)
1123            .with_caused_by_gpa_access(true);
1124
1125        let access_type = if denied_access.kernel_executable() || denied_access.user_executable() {
1126            HvInterceptAccessType::EXECUTE
1127        } else if denied_access.writable() {
1128            HvInterceptAccessType::WRITE
1129        } else {
1130            HvInterceptAccessType::READ
1131        };
1132
1133        let memory_event = hvdef::HvX64PendingEventMemoryIntercept {
1134            event_header,
1135            target_vtl: intercepting_vtl.into(),
1136            access_type,
1137            access_flags,
1138            _reserved2: 0,
1139            guest_linear_address: (gva >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1140            guest_physical_address: (gpa >> hvdef::HV_PAGE_SHIFT) << hvdef::HV_PAGE_SHIFT,
1141            _reserved3: 0,
1142        };
1143
1144        hvdef::HvX64PendingEvent::read_from_bytes(memory_event.as_bytes())
1145            .expect("memory event and pending event should be the same size")
1146    } else {
1147        gpf_event()
1148    }
1149}
1150
1151/// Tries to emulate monitor page writes without taking the slower, full
1152/// emulation path.
1153///
1154/// The caller must have already validated that the fault was due to a write to
1155/// a monitor page GPA.
1156///
1157/// Returns the bit number being set within the monitor page.
1158pub fn emulate_mnf_write_fast_path<T: EmulatorSupport>(
1159    support: &mut T,
1160    gm: &GuestMemory,
1161    dev: &impl CpuIo,
1162    interruption_pending: bool,
1163    tlb_lock_held: bool,
1164) -> Option<u32> {
1165    let mut cpu = EmulatorCpu::new(gm, dev, support);
1166    let instruction_bytes = cpu.support.instruction_bytes();
1167    if interruption_pending || !tlb_lock_held || instruction_bytes.is_empty() {
1168        return None;
1169    }
1170    let mut bytes = [0; 16];
1171    let valid_bytes;
1172    {
1173        let instruction_bytes = cpu.support.instruction_bytes();
1174        valid_bytes = instruction_bytes.len();
1175        bytes[..valid_bytes].copy_from_slice(instruction_bytes);
1176    }
1177    let instruction_bytes = &bytes[..valid_bytes];
1178    let bit = x86emu::fast_path::emulate_fast_path_set_bit(instruction_bytes, &mut cpu);
1179    support.flush();
1180    bit
1181}