virt_mshv_vtl/processor/mshv/
x64.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! X64 Processor support for Microsoft hypervisor-backed partitions.
5
6#![cfg(guest_arch = "x86_64")]
7
8type VpRegisterName = HvX64RegisterName;
9
10use super::super::BackingParams;
11use super::super::BackingPrivate;
12use super::super::UhEmulationState;
13use super::super::signal_mnf;
14use super::super::vp_state;
15use super::super::vp_state::UhVpStateAccess;
16use super::MshvRunVpError;
17use super::VbsIsolatedVtl1State;
18use crate::BackingShared;
19use crate::Error;
20use crate::GuestVsmState;
21use crate::GuestVtl;
22use crate::UhPartitionNewParams;
23use crate::processor::BackingSharedParams;
24use crate::processor::SidecarExitReason;
25use crate::processor::SidecarRemoveExit;
26use crate::processor::UhHypercallHandler;
27use crate::processor::UhProcessor;
28use crate::validate_vtl_gpa_flags;
29use hcl::ioctl;
30use hcl::ioctl::ApplyVtlProtectionsError;
31use hcl::ioctl::x64::MshvX64;
32use hcl::protocol;
33use hv1_emulator::hv::ProcessorVtlHv;
34use hv1_hypercall::HvRepResult;
35use hv1_structs::VtlSet;
36use hvdef::HV_PAGE_SIZE;
37use hvdef::HvDeliverabilityNotificationsRegister;
38use hvdef::HvError;
39use hvdef::HvInterceptAccessType;
40use hvdef::HvMapGpaFlags;
41use hvdef::HvMessageType;
42use hvdef::HvRegisterValue;
43use hvdef::HvRegisterVsmPartitionConfig;
44use hvdef::HvX64InterceptMessageHeader;
45use hvdef::HvX64PendingEvent;
46use hvdef::HvX64PendingInterruptionType;
47use hvdef::HvX64RegisterName;
48use hvdef::Vtl;
49use hvdef::hypercall;
50use inspect::Inspect;
51use inspect::InspectMut;
52use inspect_counters::Counter;
53use parking_lot::RwLock;
54use std::sync::atomic::Ordering::Relaxed;
55use virt::EmulatorMonitorSupport;
56use virt::StopVp;
57use virt::VpHaltReason;
58use virt::VpIndex;
59use virt::io::CpuIo;
60use virt::state::HvRegisterState;
61use virt::state::StateElement;
62use virt::vp;
63use virt::vp::AccessVpState;
64use virt::x86::MsrError;
65use virt_support_x86emu::emulate::EmuCheckVtlAccessError;
66use virt_support_x86emu::emulate::EmuTranslateError;
67use virt_support_x86emu::emulate::EmuTranslateResult;
68use virt_support_x86emu::emulate::EmulatorSupport;
69use x86defs::RFlags;
70use x86defs::SegmentRegister;
71use x86defs::xsave::Fxsave;
72use x86defs::xsave::XFEATURE_SSE;
73use x86defs::xsave::XFEATURE_X87;
74use x86defs::xsave::XsaveHeader;
75use zerocopy::FromZeros;
76use zerocopy::Immutable;
77use zerocopy::IntoBytes;
78use zerocopy::KnownLayout;
79
80/// A backing for hypervisor-backed partitions (non-isolated and
81/// software-isolated).
82#[derive(InspectMut)]
83pub struct HypervisorBackedX86 {
84    // VTL0 only, used for synic message and extint readiness notifications.
85    // We do not currently support synic message ports or extint interrupts for VTL1.
86    #[inspect(hex, with = "|&x| u64::from(x)")]
87    deliverability_notifications: HvDeliverabilityNotificationsRegister,
88    /// Next set of deliverability notifications. See register definition for details.
89    #[inspect(hex, with = "|&x| u64::from(x)")]
90    pub(super) next_deliverability_notifications: HvDeliverabilityNotificationsRegister,
91    stats: ProcessorStatsX86,
92    /// Send an INIT to VTL0 before running the VP, to simulate setting startup
93    /// suspend. Newer hypervisors allow setting startup suspend explicitly.
94    deferred_init: bool,
95}
96
97/// Partition-wide shared data for hypervisor backed VMs.
98#[derive(Inspect)]
99pub struct HypervisorBackedX86Shared {
100    pub(crate) guest_vsm: RwLock<GuestVsmState<VbsIsolatedVtl1State>>,
101}
102
103impl HypervisorBackedX86Shared {
104    /// Creates a new partition-shared data structure for hypervisor backed VMs.
105    pub(crate) fn new(
106        _partition_params: &UhPartitionNewParams<'_>,
107        params: BackingSharedParams<'_>,
108    ) -> Result<Self, Error> {
109        Ok(Self {
110            guest_vsm: RwLock::new(GuestVsmState::from_availability(params.guest_vsm_available)),
111        })
112    }
113}
114
115#[derive(Inspect, Default)]
116struct ProcessorStatsX86 {
117    io_port: Counter,
118    mmio: Counter,
119    unaccepted_gpa: Counter,
120    hypercall: Counter,
121    synic_deliverable: Counter,
122    interrupt_deliverable: Counter,
123    cpuid: Counter,
124    msr: Counter,
125    eoi: Counter,
126    unrecoverable_exception: Counter,
127    halt: Counter,
128    exception_intercept: Counter,
129}
130
131pub struct MshvEmulationCache {
132    rsp: u64,
133    es: SegmentRegister,
134    ds: SegmentRegister,
135    fs: SegmentRegister,
136    gs: SegmentRegister,
137    ss: SegmentRegister,
138    cr0: u64,
139    efer: u64,
140    rip: u64,
141    rflags: RFlags,
142}
143
144#[expect(private_interfaces)]
145impl BackingPrivate for HypervisorBackedX86 {
146    type HclBacking<'mshv> = MshvX64<'mshv>;
147    type Shared = HypervisorBackedX86Shared;
148    type EmulationCache = MshvEmulationCache;
149
150    fn shared(shared: &BackingShared) -> &Self::Shared {
151        let BackingShared::Hypervisor(shared) = shared else {
152            unreachable!()
153        };
154        shared
155    }
156
157    fn new(
158        params: BackingParams<'_, '_, Self>,
159        _shared: &HypervisorBackedX86Shared,
160    ) -> Result<Self, Error> {
161        // Initialize shared register state to architectural state. The kernel
162        // zero initializes this.
163        //
164        // When restoring, this will be overwritten, but it's not expensive
165        // enough to bother skipping.
166        let regs = vp::Registers::at_reset(&params.partition.caps, params.vp_info);
167        *params.runner.cpu_context_mut() = protocol::hcl_cpu_context_x64 {
168            gps: [
169                regs.rax, regs.rcx, regs.rdx, regs.rbx, 0, /* cr2 */
170                regs.rbp, regs.rsi, regs.rdi, regs.r8, regs.r9, regs.r10, regs.r11, regs.r12,
171                regs.r13, regs.r14, regs.r15,
172            ],
173            fx_state: vp::Xsave::at_reset(&params.partition.caps, params.vp_info).fxsave(),
174            reserved: [0; 384],
175        };
176
177        Ok(Self {
178            deliverability_notifications: Default::default(),
179            next_deliverability_notifications: Default::default(),
180            stats: Default::default(),
181            deferred_init: false,
182        })
183    }
184
185    fn init(this: &mut UhProcessor<'_, Self>) {
186        // The hypervisor initializes startup suspend to false. Set it to the
187        // architectural default.
188        if !this.vp_index().is_bsp() {
189            this.backing.deferred_init = true;
190        }
191    }
192
193    type StateAccess<'p, 'a>
194        = UhVpStateAccess<'a, 'p, Self>
195    where
196        Self: 'a + 'p,
197        'p: 'a;
198
199    fn access_vp_state<'a, 'p>(
200        this: &'a mut UhProcessor<'p, Self>,
201        vtl: GuestVtl,
202    ) -> Self::StateAccess<'p, 'a> {
203        UhVpStateAccess::new(this, vtl)
204    }
205
206    fn pre_run_vp(this: &mut UhProcessor<'_, Self>) {
207        if std::mem::take(&mut this.backing.deferred_init) {
208            tracelimit::info_ratelimited!(
209                vp = this.vp_index().index(),
210                "sending deferred INIT to set startup suspend"
211            );
212            this.partition.request_msi(
213                GuestVtl::Vtl0,
214                virt::irqcon::MsiRequest::new_x86(
215                    virt::irqcon::DeliveryMode::INIT,
216                    this.inner.vp_info.apic_id,
217                    false,
218                    0,
219                    true,
220                ),
221            );
222        }
223    }
224
225    async fn run_vp(
226        this: &mut UhProcessor<'_, Self>,
227        dev: &impl CpuIo,
228        stop: &mut StopVp<'_>,
229    ) -> Result<(), VpHaltReason> {
230        if this.backing.deliverability_notifications
231            != this.backing.next_deliverability_notifications
232        {
233            let notifications = this.backing.next_deliverability_notifications;
234            tracing::trace!(?notifications, "setting notifications");
235            this.runner
236                .set_vp_register(
237                    // TODO GUEST VSM
238                    GuestVtl::Vtl0,
239                    VpRegisterName::DeliverabilityNotifications,
240                    u64::from(notifications).into(),
241                )
242                .expect("requesting deliverability is not a fallable operation");
243            this.backing.deliverability_notifications =
244                this.backing.next_deliverability_notifications;
245        }
246
247        let intercepted = if this.runner.is_sidecar() {
248            let mut run = this
249                .runner
250                .run_sidecar()
251                .map_err(|e| dev.fatal_error(e.into()))?;
252            match stop.until_stop(run.wait()).await {
253                Ok(r) => r,
254                Err(stop) => {
255                    run.cancel();
256                    let r = run.wait().await;
257                    if matches!(r, Ok(false)) {
258                        // No intercept, so stop the VP.
259                        return Err(stop.into());
260                    }
261                    r
262                }
263            }
264            .map_err(|e| dev.fatal_error(ioctl::Error::Sidecar(e).into()))?
265        } else {
266            this.unlock_tlb_lock(Vtl::Vtl2);
267            this.runner
268                .run()
269                .map_err(|e| dev.fatal_error(MshvRunVpError(e).into()))?
270        };
271
272        if intercepted {
273            let message_type = this.runner.exit_message().header.typ;
274
275            let mut intercept_handler =
276                InterceptHandler::new(this).map_err(|e| dev.fatal_error(e.into()))?;
277
278            let stat = match message_type {
279                HvMessageType::HvMessageTypeX64IoPortIntercept => {
280                    intercept_handler.handle_io_port_exit(dev).await?;
281                    &mut this.backing.stats.io_port
282                }
283                HvMessageType::HvMessageTypeUnmappedGpa
284                | HvMessageType::HvMessageTypeGpaIntercept => {
285                    intercept_handler.handle_mmio_exit(dev).await?;
286                    &mut this.backing.stats.mmio
287                }
288                HvMessageType::HvMessageTypeUnacceptedGpa => {
289                    intercept_handler
290                        .handle_unaccepted_gpa_intercept(dev)
291                        .await?;
292                    &mut this.backing.stats.unaccepted_gpa
293                }
294                HvMessageType::HvMessageTypeHypercallIntercept => {
295                    intercept_handler.handle_hypercall_exit(dev);
296                    &mut this.backing.stats.hypercall
297                }
298                HvMessageType::HvMessageTypeSynicSintDeliverable => {
299                    intercept_handler.handle_synic_deliverable_exit();
300                    &mut this.backing.stats.synic_deliverable
301                }
302                HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
303                    intercept_handler.handle_interrupt_deliverable_exit(dev)?;
304                    &mut this.backing.stats.interrupt_deliverable
305                }
306                HvMessageType::HvMessageTypeX64CpuidIntercept => {
307                    intercept_handler.handle_cpuid_intercept();
308                    &mut this.backing.stats.cpuid
309                }
310                HvMessageType::HvMessageTypeMsrIntercept => {
311                    intercept_handler.handle_msr_intercept();
312                    &mut this.backing.stats.msr
313                }
314                HvMessageType::HvMessageTypeX64ApicEoi => {
315                    intercept_handler.handle_eoi(dev);
316                    &mut this.backing.stats.eoi
317                }
318                HvMessageType::HvMessageTypeUnrecoverableException => {
319                    intercept_handler.handle_unrecoverable_exception()?;
320                    &mut this.backing.stats.unrecoverable_exception
321                }
322                HvMessageType::HvMessageTypeExceptionIntercept => {
323                    intercept_handler.handle_exception(dev)?;
324                    &mut this.backing.stats.exception_intercept
325                }
326                reason => unreachable!("unknown exit reason: {:#x?}", reason),
327            };
328            stat.increment();
329
330            if this.runner.is_sidecar()
331                && !this.signaled_sidecar_exit
332                && !this.partition.no_sidecar_hotplug.load(Relaxed)
333            {
334                // We got and handled an exit and this is a sidecar VP. Cancel
335                // the run so that we can move the sidecar VP over to the main
336                // kernel and handle future exits there.
337                //
338                // This is not strictly necessary--we can continue to run the VP
339                // in the sidecar kernel. But since we have received at least
340                // one exit, we can expect that we will receive more, and
341                // handling the exits remotely introduces jitter.
342                let message = this.runner.exit_message();
343                this.inner
344                    .set_sidecar_exit_reason(SidecarExitReason::Exit(parse_sidecar_exit(message)));
345                this.signaled_sidecar_exit = true;
346                return Err(VpHaltReason::Cancel);
347            }
348        }
349        Ok(())
350    }
351
352    fn poll_apic(_this: &mut UhProcessor<'_, Self>, _vtl: GuestVtl, _scan_irr: bool) {}
353
354    fn process_interrupts(
355        _this: &mut UhProcessor<'_, Self>,
356        _scan_irr: hv1_structs::VtlArray<bool, 2>,
357        _first_scan_irr: &mut bool,
358        _dev: &impl CpuIo,
359    ) -> bool {
360        false
361    }
362
363    fn request_extint_readiness(this: &mut UhProcessor<'_, Self>) {
364        this.backing
365            .next_deliverability_notifications
366            .set_interrupt_notification(true);
367    }
368
369    fn request_untrusted_sint_readiness(this: &mut UhProcessor<'_, Self>, sints: u16) {
370        this.backing
371            .next_deliverability_notifications
372            .set_sints(this.backing.next_deliverability_notifications.sints() | sints);
373    }
374
375    fn hv(&self, _vtl: GuestVtl) -> Option<&ProcessorVtlHv> {
376        None
377    }
378
379    fn hv_mut(&mut self, _vtl: GuestVtl) -> Option<&mut ProcessorVtlHv> {
380        None
381    }
382
383    fn handle_vp_start_enable_vtl_wake(_this: &mut UhProcessor<'_, Self>, _vtl: GuestVtl) {
384        unimplemented!()
385    }
386
387    fn vtl1_inspectable(_this: &UhProcessor<'_, Self>) -> bool {
388        // TODO: Use the VsmVpStatus register to query the hypervisor for
389        // whether VTL 1 is enabled on the vp (this can be cached).
390        false
391    }
392}
393
394fn parse_sidecar_exit(message: &hvdef::HvMessage) -> SidecarRemoveExit {
395    match message.header.typ {
396        HvMessageType::HvMessageTypeX64IoPortIntercept => {
397            let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
398            SidecarRemoveExit::Io {
399                port: message.port_number,
400                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
401            }
402        }
403        HvMessageType::HvMessageTypeUnmappedGpa | HvMessageType::HvMessageTypeGpaIntercept => {
404            let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
405            SidecarRemoveExit::Mmio {
406                gpa: message.guest_physical_address,
407                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
408            }
409        }
410        HvMessageType::HvMessageTypeHypercallIntercept => {
411            let message = message.as_message::<hvdef::HvX64HypercallInterceptMessage>();
412            let is_64bit = message.header.execution_state.cr0_pe()
413                && message.header.execution_state.efer_lma();
414            let control = if is_64bit {
415                message.rcx
416            } else {
417                (message.rdx << 32) | (message.rax as u32 as u64)
418            };
419            SidecarRemoveExit::Hypercall {
420                code: hvdef::HypercallCode(hypercall::Control::from(control).code()),
421            }
422        }
423        HvMessageType::HvMessageTypeX64CpuidIntercept => {
424            let message = message.as_message::<hvdef::HvX64CpuidInterceptMessage>();
425            SidecarRemoveExit::Cpuid {
426                leaf: message.rax as u32,
427                subleaf: message.rcx as u32,
428            }
429        }
430        HvMessageType::HvMessageTypeMsrIntercept => {
431            let message = message.as_message::<hvdef::HvX64MsrInterceptMessage>();
432            SidecarRemoveExit::Msr {
433                msr: message.msr_number,
434                value: (message.header.intercept_access_type == HvInterceptAccessType::WRITE)
435                    .then_some((message.rdx << 32) | message.rax as u32 as u64),
436            }
437        }
438        typ => SidecarRemoveExit::Hypervisor { message: typ },
439    }
440}
441
442fn next_rip(value: &HvX64InterceptMessageHeader) -> u64 {
443    value.rip.wrapping_add(value.instruction_len() as u64)
444}
445
446struct InterceptHandler<'a, 'b> {
447    vp: &'a mut UhProcessor<'b, HypervisorBackedX86>,
448    intercepted_vtl: GuestVtl,
449}
450
451#[derive(Debug, Error)]
452#[error("invalid intercepted vtl {0:?}")]
453struct InvalidInterceptedVtl(u8);
454
455#[derive(Debug, Error)]
456#[error("guest accessed unaccepted gpa {0}")]
457struct UnacceptedMemoryAccess(u64);
458
459impl<'a, 'b> InterceptHandler<'a, 'b> {
460    fn new(
461        vp: &'a mut UhProcessor<'b, HypervisorBackedX86>,
462    ) -> Result<Self, InvalidInterceptedVtl> {
463        let message_type = vp.runner.exit_message().header.typ;
464
465        let intercepted_vtl = match vp.runner.reg_page_vtl() {
466            Ok(vtl) => vtl,
467            Err(ioctl::x64::RegisterPageVtlError::InvalidVtl(vtl)) => {
468                return Err(InvalidInterceptedVtl(vtl));
469            }
470            Err(ioctl::x64::RegisterPageVtlError::NoRegisterPage) => {
471                if matches!(&message_type, &HvMessageType::HvMessageTypeX64ApicEoi) {
472                    // At the moment this is only used for the ioapic, so assume
473                    // that this is targeting VTL 0 for now. TODO: fix
474                    GuestVtl::Vtl0
475                } else {
476                    let message_header = match &message_type {
477                        &HvMessageType::HvMessageTypeX64IoPortIntercept => {
478                            &vp.runner
479                                .exit_message()
480                                .as_message::<hvdef::HvX64IoPortInterceptMessage>()
481                                .header
482                        }
483                        &HvMessageType::HvMessageTypeUnmappedGpa
484                        | &HvMessageType::HvMessageTypeGpaIntercept => {
485                            &vp.runner
486                                .exit_message()
487                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
488                                .header
489                        }
490                        &HvMessageType::HvMessageTypeUnacceptedGpa => {
491                            &vp.runner
492                                .exit_message()
493                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
494                                .header
495                        }
496                        &HvMessageType::HvMessageTypeHypercallIntercept => {
497                            &vp.runner
498                                .exit_message()
499                                .as_message::<hvdef::HvX64HypercallInterceptMessage>()
500                                .header
501                        }
502                        &HvMessageType::HvMessageTypeSynicSintDeliverable => {
503                            &vp.runner
504                                .exit_message()
505                                .as_message::<hvdef::HvX64SynicSintDeliverableMessage>()
506                                .header
507                        }
508                        &HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
509                            &vp.runner
510                                .exit_message()
511                                .as_message::<hvdef::HvX64InterruptionDeliverableMessage>()
512                                .header
513                        }
514                        &HvMessageType::HvMessageTypeX64CpuidIntercept => {
515                            &vp.runner
516                                .exit_message()
517                                .as_message::<hvdef::HvX64CpuidInterceptMessage>()
518                                .header
519                        }
520                        &HvMessageType::HvMessageTypeMsrIntercept => {
521                            &vp.runner
522                                .exit_message()
523                                .as_message::<hvdef::HvX64MsrInterceptMessage>()
524                                .header
525                        }
526                        &HvMessageType::HvMessageTypeUnrecoverableException => {
527                            &vp.runner
528                                .exit_message()
529                                .as_message::<hvdef::HvX64UnrecoverableExceptionMessage>()
530                                .header
531                        }
532                        &HvMessageType::HvMessageTypeX64Halt => {
533                            &vp.runner
534                                .exit_message()
535                                .as_message::<hvdef::HvX64HaltMessage>()
536                                .header
537                        }
538                        &HvMessageType::HvMessageTypeExceptionIntercept => {
539                            &vp.runner
540                                .exit_message()
541                                .as_message::<hvdef::HvX64ExceptionInterceptMessage>()
542                                .header
543                        }
544                        reason => unreachable!("unknown exit reason: {:#x?}", reason),
545                    };
546
547                    message_header
548                        .execution_state
549                        .vtl()
550                        .try_into()
551                        .map_err(|hcl::UnsupportedGuestVtl(vtl)| InvalidInterceptedVtl(vtl))?
552                }
553            }
554        };
555
556        Ok(Self {
557            vp,
558            intercepted_vtl,
559        })
560    }
561
562    fn handle_interrupt_deliverable_exit(&mut self, bus: &impl CpuIo) -> Result<(), VpHaltReason> {
563        let message = self
564            .vp
565            .runner
566            .exit_message()
567            .as_message::<hvdef::HvX64InterruptionDeliverableMessage>();
568
569        assert_eq!(
570            message.deliverable_type,
571            HvX64PendingInterruptionType::HV_X64_PENDING_INTERRUPT
572        );
573
574        self.vp
575            .backing
576            .deliverability_notifications
577            .set_interrupt_notification(false);
578
579        self.vp
580            .backing
581            .next_deliverability_notifications
582            .set_interrupt_notification(false);
583
584        if let Some(vector) = bus.acknowledge_pic_interrupt() {
585            let event = hvdef::HvX64PendingExtIntEvent::new()
586                .with_event_pending(true)
587                .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXT_INT)
588                .with_vector(vector);
589
590            self.vp
591                .runner
592                .set_vp_register(
593                    self.intercepted_vtl,
594                    HvX64RegisterName::PendingEvent0,
595                    u128::from(event).into(),
596                )
597                .unwrap();
598        }
599
600        Ok(())
601    }
602
603    fn handle_synic_deliverable_exit(&mut self) {
604        let message = self
605            .vp
606            .runner
607            .exit_message()
608            .as_message::<hvdef::HvX64SynicSintDeliverableMessage>();
609
610        tracing::trace!(
611            deliverable_sints = message.deliverable_sints,
612            "sint deliverable"
613        );
614
615        self.vp.backing.deliverability_notifications.set_sints(
616            self.vp.backing.deliverability_notifications.sints() & !message.deliverable_sints,
617        );
618
619        // This is updated by `deliver_synic_messages below`, so clear it here.
620        self.vp
621            .backing
622            .next_deliverability_notifications
623            .set_sints(0);
624
625        // These messages are always delivered to VTL0, as VTL1 does not own any VMBUS channels.
626        self.vp
627            .deliver_synic_messages(GuestVtl::Vtl0, message.deliverable_sints);
628    }
629
630    fn handle_hypercall_exit(&mut self, bus: &impl CpuIo) {
631        let message = self
632            .vp
633            .runner
634            .exit_message()
635            .as_message::<hvdef::HvX64HypercallInterceptMessage>();
636
637        tracing::trace!(msg = %format_args!("{:x?}", message), "hypercall");
638
639        let is_64bit =
640            message.header.execution_state.cr0_pe() && message.header.execution_state.efer_lma();
641
642        let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
643        let handler = UhHypercallHandler {
644            vp: self.vp,
645            bus,
646            trusted: false,
647            intercepted_vtl: self.intercepted_vtl,
648        };
649        UhHypercallHandler::MSHV_DISPATCHER.dispatch(
650            guest_memory,
651            hv1_hypercall::X64RegisterIo::new(handler, is_64bit),
652        );
653    }
654
655    async fn handle_mmio_exit(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
656        let message = self
657            .vp
658            .runner
659            .exit_message()
660            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
661
662        tracing::trace!(msg = %format_args!("{:x?}", message), "mmio");
663
664        let interruption_pending = message.header.execution_state.interruption_pending();
665
666        // Fast path for monitor page writes.
667        if Some(message.guest_physical_address & !(HV_PAGE_SIZE - 1))
668            == self.vp.partition.monitor_page.gpa()
669            && message.header.intercept_access_type == HvInterceptAccessType::WRITE
670        {
671            let tlb_lock_held = message.memory_access_info.gva_gpa_valid()
672                || message.memory_access_info.tlb_locked();
673            let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
674            let cache = self.vp.emulation_cache(self.intercepted_vtl);
675            let mut emulation_state = UhEmulationState {
676                vp: &mut *self.vp,
677                interruption_pending,
678                devices: dev,
679                vtl: self.intercepted_vtl,
680                cache,
681            };
682            if let Some(bit) = virt_support_x86emu::emulate::emulate_mnf_write_fast_path(
683                &mut emulation_state,
684                guest_memory,
685                dev,
686                interruption_pending,
687                tlb_lock_held,
688            ) {
689                if let Some(connection_id) = self.vp.partition.monitor_page.write_bit(bit) {
690                    signal_mnf(dev, connection_id);
691                }
692                return Ok(());
693            }
694        }
695
696        let cache = self.vp.emulation_cache(self.intercepted_vtl);
697        self.vp
698            .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
699            .await
700    }
701
702    async fn handle_io_port_exit(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
703        let message = self
704            .vp
705            .runner
706            .exit_message()
707            .as_message::<hvdef::HvX64IoPortInterceptMessage>();
708
709        tracing::trace!(msg = %format_args!("{:x?}", message), "io_port");
710
711        assert_eq!(message.rax, self.vp.runner.cpu_context().gps[protocol::RAX]);
712
713        let interruption_pending = message.header.execution_state.interruption_pending();
714
715        if message.access_info.string_op() || message.access_info.rep_prefix() {
716            let cache = self.vp.emulation_cache(self.intercepted_vtl);
717            self.vp
718                .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
719                .await
720        } else {
721            let next_rip = next_rip(&message.header);
722            let access_size = message.access_info.access_size();
723            virt_support_x86emu::emulate::emulate_io(
724                self.vp.vp_index(),
725                message.header.intercept_access_type == HvInterceptAccessType::WRITE,
726                message.port_number,
727                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX],
728                access_size,
729                dev,
730            )
731            .await;
732            self.vp.set_rip(self.intercepted_vtl, next_rip);
733            Ok(())
734        }
735    }
736
737    async fn handle_unaccepted_gpa_intercept(
738        &mut self,
739        dev: &impl CpuIo,
740    ) -> Result<(), VpHaltReason> {
741        let gpa = self
742            .vp
743            .runner
744            .exit_message()
745            .as_message::<hvdef::HvX64MemoryInterceptMessage>()
746            .guest_physical_address;
747
748        if self.vp.partition.is_gpa_lower_vtl_ram(gpa) {
749            // The host may have moved the page to an unaccepted state, so fail
750            // here. This does not apply to VTL 2 memory - for unaccepted pages,
751            // the intercept goes to host VTL0.
752            //
753            // Note: SGX memory should be included in this check, so if SGX is
754            // no longer included in the lower_vtl_memory_layout, make sure the
755            // appropriate changes are reflected here.
756            Err(dev.fatal_error(UnacceptedMemoryAccess(gpa).into()))
757        } else {
758            self.handle_mmio_exit(dev).await
759        }
760    }
761
762    fn handle_cpuid_intercept(&mut self) {
763        let message = self
764            .vp
765            .runner
766            .exit_message()
767            .as_message::<hvdef::HvX64CpuidInterceptMessage>();
768
769        let default_result = [
770            message.default_result_rax as u32,
771            message.default_result_rbx as u32,
772            message.default_result_rcx as u32,
773            message.default_result_rdx as u32,
774        ];
775
776        tracing::trace!(msg = %format_args!("{:x?}", message), "cpuid");
777
778        let [eax, ebx, ecx, edx] =
779            self.vp
780                .partition
781                .cpuid
782                .result(message.rax as u32, message.rcx as u32, &default_result);
783
784        let next_rip = next_rip(&message.header);
785        self.vp.runner.cpu_context_mut().gps[protocol::RAX] = eax.into();
786        self.vp.runner.cpu_context_mut().gps[protocol::RBX] = ebx.into();
787        self.vp.runner.cpu_context_mut().gps[protocol::RCX] = ecx.into();
788        self.vp.runner.cpu_context_mut().gps[protocol::RDX] = edx.into();
789
790        self.vp.set_rip(self.intercepted_vtl, next_rip);
791    }
792
793    fn handle_msr_intercept(&mut self) {
794        let message = self
795            .vp
796            .runner
797            .exit_message()
798            .as_message::<hvdef::HvX64MsrInterceptMessage>();
799        let rip = next_rip(&message.header);
800
801        tracing::trace!(msg = %format_args!("{:x?}", message), "msr");
802
803        let msr = message.msr_number;
804        match message.header.intercept_access_type {
805            HvInterceptAccessType::READ => {
806                // Only supported MSRs are the crash MSRs.
807                let value = match self.vp.read_crash_msr(msr, self.intercepted_vtl) {
808                    Ok(v) => v,
809                    Err(MsrError::Unknown) => {
810                        tracing::trace!(msr, "unknown msr read");
811                        0
812                    }
813                    Err(MsrError::InvalidAccess) => {
814                        self.vp.inject_gpf(self.intercepted_vtl);
815                        // Do not advance RIP.
816                        return;
817                    }
818                };
819
820                self.vp.runner.cpu_context_mut().gps[protocol::RAX] = value & 0xffff_ffff;
821                self.vp.runner.cpu_context_mut().gps[protocol::RDX] = value >> 32;
822            }
823            HvInterceptAccessType::WRITE => {
824                let value = (message.rax & 0xffff_ffff) | (message.rdx << 32);
825                // Only supported MSRs are the crash MSRs.
826                match self.vp.write_crash_msr(msr, value, self.intercepted_vtl) {
827                    Ok(()) => {}
828                    Err(MsrError::Unknown) => {
829                        tracing::trace!(msr, value, "unknown msr write");
830                    }
831                    Err(MsrError::InvalidAccess) => {
832                        self.vp.inject_gpf(self.intercepted_vtl);
833                        // Do not advance RIP.
834                        return;
835                    }
836                }
837            }
838            _ => unreachable!(),
839        }
840
841        self.vp.set_rip(self.intercepted_vtl, rip);
842    }
843
844    fn handle_eoi(&self, dev: &impl CpuIo) {
845        let message = self
846            .vp
847            .runner
848            .exit_message()
849            .as_message::<hvdef::HvX64ApicEoiMessage>();
850
851        tracing::trace!(msg = %format_args!("{:x?}", message), "eoi");
852
853        dev.handle_eoi(message.interrupt_vector);
854    }
855
856    fn handle_unrecoverable_exception(&self) -> Result<(), VpHaltReason> {
857        Err(VpHaltReason::TripleFault {
858            vtl: self.intercepted_vtl.into(),
859        })
860    }
861
862    fn handle_exception(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
863        let message = self
864            .vp
865            .runner
866            .exit_message()
867            .as_message::<hvdef::HvX64ExceptionInterceptMessage>();
868
869        match x86defs::Exception(message.vector as u8) {
870            x86defs::Exception::DEBUG if cfg!(feature = "gdb") => {
871                self.vp.handle_debug_exception(dev, self.intercepted_vtl)?
872            }
873            _ => tracing::error!("unexpected exception type {:#x?}", message.vector),
874        }
875        Ok(())
876    }
877}
878
879impl UhProcessor<'_, HypervisorBackedX86> {
880    fn set_rip(&mut self, vtl: GuestVtl, rip: u64) {
881        self.runner
882            .set_vp_register(vtl, HvX64RegisterName::Rip, rip.into())
883            .unwrap();
884    }
885
886    fn inject_gpf(&mut self, vtl: GuestVtl) {
887        let exception_event = hvdef::HvX64PendingExceptionEvent::new()
888            .with_event_pending(true)
889            .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
890            .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0.into())
891            .with_deliver_error_code(true)
892            .with_error_code(0);
893
894        self.runner
895            .set_vp_register(
896                vtl,
897                HvX64RegisterName::PendingEvent0,
898                u128::from(exception_event).into(),
899            )
900            .expect("set_vp_register should succeed for pending event");
901    }
902
903    fn set_vsm_partition_config(
904        &mut self,
905        vtl: GuestVtl,
906        value: HvRegisterVsmPartitionConfig,
907    ) -> Result<(), HvError> {
908        if vtl != GuestVtl::Vtl1 {
909            return Err(HvError::InvalidParameter);
910        }
911
912        assert!(self.partition.isolation.is_isolated());
913
914        let status = self
915            .partition
916            .vsm_status()
917            .expect("cannot fail to query vsm status");
918
919        let vtl1_enabled = VtlSet::from(status.enabled_vtl_set()).is_set(GuestVtl::Vtl1);
920        if !vtl1_enabled {
921            return Err(HvError::InvalidVtlState);
922        }
923
924        let mut guest_vsm_lock = self.shared.guest_vsm.write();
925
926        // Initialize partition.guest_vsm state if necessary.
927        match *guest_vsm_lock {
928            GuestVsmState::NotPlatformSupported => {
929                return Err(HvError::AccessDenied);
930            }
931            GuestVsmState::NotGuestEnabled => {
932                // TODO: check status
933                *guest_vsm_lock = GuestVsmState::Enabled {
934                    vtl1: Default::default(),
935                };
936            }
937            GuestVsmState::Enabled { .. } => {}
938        }
939
940        let GuestVsmState::Enabled { vtl1 } = &mut *guest_vsm_lock else {
941            unreachable!()
942        };
943        let protections = HvMapGpaFlags::from(value.default_vtl_protection_mask() as u32);
944
945        if value.reserved() != 0 {
946            return Err(HvError::InvalidRegisterValue);
947        }
948
949        // VTL protection cannot be disabled once enabled.
950        //
951        // The hypervisor should intercept only the case where the lower VTL is
952        // setting the enable_vtl_protection bit when it was previously
953        // disabled; other cases are handled directly by the hypervisor.
954        if !value.enable_vtl_protection() {
955            if vtl1.enable_vtl_protection {
956                // A malicious guest could change its hypercall parameters in
957                // memory while the intercept is being handled; this case
958                // explicitly handles that situation.
959                return Err(HvError::InvalidRegisterValue);
960            } else {
961                panic!("unexpected SetVpRegisters intercept");
962            }
963        }
964
965        // For VBS-isolated VMs, protections apply to VTLs lower than the one specified when
966        // setting VsmPartitionConfig.
967        let mbec_enabled = VtlSet::from(status.mbec_enabled_vtl_set()).is_set(GuestVtl::Vtl0);
968        let shadow_supervisor_stack_enabled =
969            VtlSet::from(status.supervisor_shadow_stack_enabled_vtl_set() as u16)
970                .is_set(GuestVtl::Vtl0);
971
972        if !validate_vtl_gpa_flags(protections, mbec_enabled, shadow_supervisor_stack_enabled) {
973            return Err(HvError::InvalidRegisterValue);
974        }
975
976        // Default VTL protection mask must include read and write.
977        if !(protections.readable() && protections.writable()) {
978            return Err(HvError::InvalidRegisterValue);
979        }
980
981        // Don't allow changing existing protections once set.
982        if let Some(current_protections) = vtl1.default_vtl_protections {
983            if protections != current_protections {
984                return Err(HvError::InvalidRegisterValue);
985            }
986        }
987        vtl1.default_vtl_protections = Some(protections);
988
989        for ram_range in self.partition.lower_vtl_memory_layout.ram().iter() {
990            self.partition
991                .hcl
992                .modify_vtl_protection_mask(ram_range.range, protections, vtl.into())
993                .map_err(|e| match e {
994                    ApplyVtlProtectionsError::Hypervisor {
995                        range: _,
996                        output: _,
997                        hv_error,
998                        vtl: _,
999                    } => hv_error,
1000                    _ => unreachable!(),
1001                })?;
1002        }
1003
1004        let hc_regs = [(HvX64RegisterName::VsmPartitionConfig, u64::from(value))];
1005        self.runner.set_vp_registers_hvcall(vtl.into(), hc_regs)?;
1006        vtl1.enable_vtl_protection = true;
1007
1008        Ok(())
1009    }
1010
1011    ///Eagerly load registers for emulation
1012    ///Typically we load expensive registers lazily, however some registers will always be used,
1013    ///and the underlying ioctl supports batching multiple register retrievals into a single call
1014    fn emulation_cache(&mut self, vtl: GuestVtl) -> MshvEmulationCache {
1015        const NAMES: &[HvX64RegisterName] = &[
1016            HvX64RegisterName::Rsp,
1017            HvX64RegisterName::Es,
1018            HvX64RegisterName::Ds,
1019            HvX64RegisterName::Fs,
1020            HvX64RegisterName::Gs,
1021            HvX64RegisterName::Ss,
1022            HvX64RegisterName::Cr0,
1023            HvX64RegisterName::Efer,
1024        ];
1025        let mut values = [FromZeros::new_zeroed(); NAMES.len()];
1026        self.runner
1027            .get_vp_registers(vtl, NAMES, &mut values)
1028            .expect("register query should not fail");
1029
1030        let [rsp, es, ds, fs, gs, ss, cr0, efer] = values;
1031
1032        let header = self
1033            .runner
1034            .exit_message()
1035            .as_message::<HvX64InterceptMessageHeader>();
1036
1037        MshvEmulationCache {
1038            rsp: rsp.as_u64(),
1039            es: from_seg(es.into()),
1040            ds: from_seg(ds.into()),
1041            fs: from_seg(fs.into()),
1042            gs: from_seg(gs.into()),
1043            ss: from_seg(ss.into()),
1044            cr0: cr0.as_u64(),
1045            efer: efer.as_u64(),
1046            rip: header.rip,
1047            rflags: header.rflags.into(),
1048        }
1049    }
1050}
1051
1052fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1053    SegmentRegister {
1054        base: reg.base,
1055        limit: reg.limit,
1056        selector: reg.selector,
1057        attributes: reg.attributes.into(),
1058    }
1059}
1060
1061impl<T: CpuIo> EmulatorSupport for UhEmulationState<'_, '_, T, HypervisorBackedX86> {
1062    fn flush(&mut self) {
1063        self.vp
1064            .runner
1065            .set_vp_registers(
1066                self.vtl,
1067                [
1068                    (HvX64RegisterName::Rip, self.cache.rip),
1069                    (HvX64RegisterName::Rflags, self.cache.rflags.into()),
1070                    (HvX64RegisterName::Rsp, self.cache.rsp),
1071                ],
1072            )
1073            .unwrap();
1074    }
1075
1076    fn vp_index(&self) -> VpIndex {
1077        self.vp.vp_index()
1078    }
1079
1080    fn vendor(&self) -> x86defs::cpuid::Vendor {
1081        self.vp.partition.caps.vendor
1082    }
1083
1084    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
1085        match reg {
1086            x86emu::Gp::RSP => self.cache.rsp,
1087            _ => self.vp.runner.cpu_context().gps[reg as usize],
1088        }
1089    }
1090
1091    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
1092        if reg == x86emu::Gp::RSP {
1093            self.cache.rsp = v;
1094        }
1095        self.vp.runner.cpu_context_mut().gps[reg as usize] = v;
1096    }
1097
1098    fn xmm(&mut self, index: usize) -> u128 {
1099        u128::from_le_bytes(self.vp.runner.cpu_context().fx_state.xmm[index])
1100    }
1101
1102    fn set_xmm(&mut self, index: usize, v: u128) {
1103        self.vp.runner.cpu_context_mut().fx_state.xmm[index] = v.to_le_bytes();
1104    }
1105
1106    fn rip(&mut self) -> u64 {
1107        self.cache.rip
1108    }
1109
1110    fn set_rip(&mut self, v: u64) {
1111        self.cache.rip = v;
1112    }
1113
1114    fn segment(&mut self, index: x86emu::Segment) -> SegmentRegister {
1115        match index {
1116            x86emu::Segment::CS => {
1117                let header = self
1118                    .vp
1119                    .runner
1120                    .exit_message()
1121                    .as_message::<HvX64InterceptMessageHeader>();
1122                from_seg(header.cs_segment)
1123            }
1124            x86emu::Segment::ES => self.cache.es,
1125            x86emu::Segment::SS => self.cache.ss,
1126            x86emu::Segment::DS => self.cache.ds,
1127            x86emu::Segment::FS => self.cache.fs,
1128            x86emu::Segment::GS => self.cache.gs,
1129        }
1130    }
1131
1132    fn efer(&mut self) -> u64 {
1133        self.cache.efer
1134    }
1135
1136    fn cr0(&mut self) -> u64 {
1137        self.cache.cr0
1138    }
1139
1140    fn rflags(&mut self) -> RFlags {
1141        self.cache.rflags
1142    }
1143
1144    fn set_rflags(&mut self, v: RFlags) {
1145        self.cache.rflags = v;
1146    }
1147
1148    fn instruction_bytes(&self) -> &[u8] {
1149        let message = self.vp.runner.exit_message();
1150        match message.header.typ {
1151            HvMessageType::HvMessageTypeGpaIntercept
1152            | HvMessageType::HvMessageTypeUnmappedGpa
1153            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1154                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1155                &message.instruction_bytes[..message.instruction_byte_count as usize]
1156            }
1157            HvMessageType::HvMessageTypeX64IoPortIntercept => {
1158                let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
1159                &message.instruction_bytes[..message.instruction_byte_count as usize]
1160            }
1161            _ => unreachable!(),
1162        }
1163    }
1164
1165    fn physical_address(&self) -> Option<u64> {
1166        let message = self.vp.runner.exit_message();
1167        match message.header.typ {
1168            HvMessageType::HvMessageTypeGpaIntercept
1169            | HvMessageType::HvMessageTypeUnmappedGpa
1170            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1171                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1172                Some(message.guest_physical_address)
1173            }
1174            _ => None,
1175        }
1176    }
1177
1178    fn initial_gva_translation(
1179        &mut self,
1180    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
1181        if (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeGpaIntercept)
1182            && (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeUnmappedGpa)
1183            && (self.vp.runner.exit_message().header.typ
1184                != HvMessageType::HvMessageTypeUnacceptedGpa)
1185        {
1186            return None;
1187        }
1188
1189        let message = self
1190            .vp
1191            .runner
1192            .exit_message()
1193            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
1194
1195        if !message.memory_access_info.gva_gpa_valid() {
1196            tracing::trace!(?message.guest_virtual_address, ?message.guest_physical_address, "gva gpa not valid {:?}", self.vp.runner.exit_message().payload());
1197            return None;
1198        }
1199
1200        let translate_mode = virt_support_x86emu::emulate::TranslateMode::try_from(
1201            message.header.intercept_access_type,
1202        )
1203        .expect("unexpected intercept access type");
1204
1205        let translation = virt_support_x86emu::emulate::InitialTranslation {
1206            gva: message.guest_virtual_address,
1207            gpa: message.guest_physical_address,
1208            translate_mode,
1209        };
1210
1211        tracing::trace!(?translation, "initial translation");
1212
1213        // If we have a valid translation, the hypervisor must have set the TLB lock
1214        // so the translation remains valid for the duration of this exit.
1215        // Update our local cache appropriately.
1216        self.vp.mark_tlb_locked(Vtl::Vtl2, self.vtl);
1217
1218        Some(translation)
1219    }
1220
1221    fn interruption_pending(&self) -> bool {
1222        self.interruption_pending
1223    }
1224
1225    fn check_vtl_access(
1226        &mut self,
1227        gpa: u64,
1228        mode: virt_support_x86emu::emulate::TranslateMode,
1229    ) -> Result<(), EmuCheckVtlAccessError> {
1230        // Underhill currently doesn't set VTL 2 protections against execute exclusively, it removes
1231        // all permissions from a page. So for VTL 1, no need to check the permissions; if VTL 1
1232        // doesn't have permissions to a page, Underhill should appropriately fail when it tries
1233        // to read or write to that page on VTL 1's behalf.
1234        //
1235        // For VTL 0, the alias map guards for read and write permissions, so only check VTL execute
1236        // permissions. Because VTL 2 will not restrict execute exclusively, only VTL 1 execute
1237        // permissions need to be checked and therefore only check permissions if VTL 1 is allowed.
1238        //
1239        // Note: the restriction to VTL 1 support also means that for WHP, which doesn't support VTL 1
1240        // the HvCheckSparseGpaPageVtlAccess hypercall--which is unimplemented in whp--will never be made.
1241        if mode == virt_support_x86emu::emulate::TranslateMode::Execute
1242            && self.vtl == GuestVtl::Vtl0
1243            && !matches!(
1244                *self.vp.shared.guest_vsm.read(),
1245                GuestVsmState::NotPlatformSupported,
1246            )
1247        {
1248            // Should always be called after translate gva with the tlb lock flag
1249            // or with an initial translation.
1250            debug_assert!(self.vp.is_tlb_locked(Vtl::Vtl2, self.vtl));
1251
1252            let mbec_user_execute = self
1253                .vp
1254                .runner
1255                .get_vp_register(self.vtl, HvX64RegisterName::InstructionEmulationHints)
1256                .unwrap();
1257
1258            let flags =
1259                if hvdef::HvInstructionEmulatorHintsRegister::from(mbec_user_execute.as_u64())
1260                    .mbec_user_execute_control()
1261                {
1262                    HvMapGpaFlags::new().with_user_executable(true)
1263                } else {
1264                    HvMapGpaFlags::new().with_kernel_executable(true)
1265                };
1266
1267            let access_result = self
1268                .vp
1269                .partition
1270                .hcl
1271                .check_vtl_access(gpa, self.vtl, flags)
1272                .unwrap();
1273
1274            if let Some(ioctl::CheckVtlAccessResult { vtl, denied_flags }) = access_result {
1275                return Err(EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags });
1276            };
1277        }
1278
1279        Ok(())
1280    }
1281
1282    fn translate_gva(
1283        &mut self,
1284        gva: u64,
1285        mode: virt_support_x86emu::emulate::TranslateMode,
1286    ) -> Result<EmuTranslateResult, EmuTranslateError> {
1287        let mut control_flags = hypercall::TranslateGvaControlFlagsX64::new();
1288        match mode {
1289            virt_support_x86emu::emulate::TranslateMode::Read => {
1290                control_flags.set_validate_read(true)
1291            }
1292            virt_support_x86emu::emulate::TranslateMode::Write => {
1293                control_flags.set_validate_read(true);
1294                control_flags.set_validate_write(true);
1295            }
1296            virt_support_x86emu::emulate::TranslateMode::Execute => {
1297                control_flags.set_validate_execute(true)
1298            }
1299        };
1300
1301        // The translation will be used, so set the appropriate page table bits
1302        // (the access/dirty bit).
1303        //
1304        // Prevent flushes in order to make sure that translation of this GVA
1305        // remains usable until the VP is resumed back to direct execution.
1306        control_flags.set_set_page_table_bits(true);
1307        control_flags.set_tlb_flush_inhibit(true);
1308
1309        // In case we're not running ring 0, check privileges against VP state
1310        // as of when the original intercept came in - since the emulator
1311        // doesn't support instructions that change ring level, the ring level
1312        // will remain the same as it was in the VP state as of when the
1313        // original intercept came in. The privilege exempt flag should
1314        // not be set.
1315        assert!(!control_flags.privilege_exempt());
1316
1317        // Do the translation using the current VTL.
1318        control_flags.set_input_vtl(self.vtl.into());
1319
1320        match self
1321            .vp
1322            .runner
1323            .translate_gva_to_gpa(gva, control_flags)
1324            .unwrap()
1325        {
1326            Ok(ioctl::TranslateResult {
1327                gpa_page,
1328                overlay_page,
1329            }) => {
1330                self.vp.mark_tlb_locked(Vtl::Vtl2, self.vtl);
1331                Ok(EmuTranslateResult {
1332                    gpa: (gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)),
1333                    overlay_page: Some(overlay_page),
1334                })
1335            }
1336            Err(ioctl::x64::TranslateErrorX64 { code, event_info }) => Err(EmuTranslateError {
1337                code: hypercall::TranslateGvaResultCode(code),
1338                event_info: Some(event_info),
1339            }),
1340        }
1341    }
1342
1343    fn inject_pending_event(&mut self, event_info: HvX64PendingEvent) {
1344        let regs = [
1345            (
1346                HvX64RegisterName::PendingEvent0,
1347                u128::from(event_info.reg_0),
1348            ),
1349            (
1350                HvX64RegisterName::PendingEvent1,
1351                u128::from(event_info.reg_1),
1352            ),
1353        ];
1354
1355        self.vp
1356            .runner
1357            .set_vp_registers_hvcall(self.vtl.into(), regs)
1358            .expect("set_vp_registers hypercall for setting pending event should not fail");
1359    }
1360
1361    fn monitor_support(&self) -> Option<&dyn EmulatorMonitorSupport> {
1362        Some(self)
1363    }
1364
1365    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
1366        self.vp.partition.is_gpa_mapped(gpa, write)
1367    }
1368
1369    fn lapic_base_address(&self) -> Option<u64> {
1370        None
1371    }
1372
1373    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1374        unimplemented!()
1375    }
1376
1377    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1378        unimplemented!()
1379    }
1380}
1381
1382impl<T: CpuIo> UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1383    const MSHV_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1384        Self,
1385        [
1386            hv1_hypercall::HvPostMessage,
1387            hv1_hypercall::HvSignalEvent,
1388            hv1_hypercall::HvRetargetDeviceInterrupt,
1389            hv1_hypercall::HvGetVpIndexFromApicId,
1390            hv1_hypercall::HvSetVpRegisters,
1391            hv1_hypercall::HvModifyVtlProtectionMask
1392        ]
1393    );
1394}
1395
1396impl<T> hv1_hypercall::X64RegisterState for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1397    fn rip(&mut self) -> u64 {
1398        self.vp
1399            .runner
1400            .exit_message()
1401            .as_message::<HvX64InterceptMessageHeader>()
1402            .rip
1403    }
1404
1405    fn set_rip(&mut self, rip: u64) {
1406        self.vp.set_rip(self.intercepted_vtl, rip)
1407    }
1408
1409    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1410        match n {
1411            hv1_hypercall::X64HypercallRegister::Rax => {
1412                self.vp.runner.cpu_context().gps[protocol::RAX]
1413            }
1414            hv1_hypercall::X64HypercallRegister::Rcx => {
1415                self.vp.runner.cpu_context().gps[protocol::RCX]
1416            }
1417            hv1_hypercall::X64HypercallRegister::Rdx => {
1418                self.vp.runner.cpu_context().gps[protocol::RDX]
1419            }
1420            hv1_hypercall::X64HypercallRegister::Rbx => {
1421                self.vp.runner.cpu_context().gps[protocol::RBX]
1422            }
1423            hv1_hypercall::X64HypercallRegister::Rsi => {
1424                self.vp.runner.cpu_context().gps[protocol::RSI]
1425            }
1426            hv1_hypercall::X64HypercallRegister::Rdi => {
1427                self.vp.runner.cpu_context().gps[protocol::RDI]
1428            }
1429            hv1_hypercall::X64HypercallRegister::R8 => {
1430                self.vp.runner.cpu_context().gps[protocol::R8]
1431            }
1432        }
1433    }
1434
1435    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1436        *match n {
1437            hv1_hypercall::X64HypercallRegister::Rax => {
1438                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX]
1439            }
1440            hv1_hypercall::X64HypercallRegister::Rcx => {
1441                &mut self.vp.runner.cpu_context_mut().gps[protocol::RCX]
1442            }
1443            hv1_hypercall::X64HypercallRegister::Rdx => {
1444                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDX]
1445            }
1446            hv1_hypercall::X64HypercallRegister::Rbx => {
1447                &mut self.vp.runner.cpu_context_mut().gps[protocol::RBX]
1448            }
1449            hv1_hypercall::X64HypercallRegister::Rsi => {
1450                &mut self.vp.runner.cpu_context_mut().gps[protocol::RSI]
1451            }
1452            hv1_hypercall::X64HypercallRegister::Rdi => {
1453                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDI]
1454            }
1455            hv1_hypercall::X64HypercallRegister::R8 => {
1456                &mut self.vp.runner.cpu_context_mut().gps[protocol::R8]
1457            }
1458        } = value;
1459    }
1460
1461    fn xmm(&mut self, n: usize) -> u128 {
1462        u128::from_ne_bytes(self.vp.runner.cpu_context().fx_state.xmm[n])
1463    }
1464
1465    fn set_xmm(&mut self, n: usize, value: u128) {
1466        self.vp.runner.cpu_context_mut().fx_state.xmm[n] = value.to_ne_bytes();
1467    }
1468}
1469
1470trait ToVpRegisterName: 'static + Copy + std::fmt::Debug {
1471    fn to_vp_reg_name(self) -> VpRegisterName;
1472}
1473
1474impl ToVpRegisterName for VpRegisterName {
1475    fn to_vp_reg_name(self) -> VpRegisterName {
1476        self
1477    }
1478}
1479
1480impl UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1481    fn set_register_state<T, R: ToVpRegisterName, const N: usize>(
1482        &mut self,
1483        regs: &T,
1484    ) -> Result<(), vp_state::Error>
1485    where
1486        T: HvRegisterState<R, N>,
1487    {
1488        let names = regs.names().map(|r| r.to_vp_reg_name());
1489        let mut values = [HvRegisterValue::new_zeroed(); N];
1490        regs.get_values(values.iter_mut());
1491        self.vp
1492            .runner
1493            .set_vp_registers(self.vtl, names.iter().copied().zip(values))
1494            .map_err(vp_state::Error::SetRegisters)?;
1495        Ok(())
1496    }
1497
1498    fn get_register_state<T, R: ToVpRegisterName, const N: usize>(
1499        &mut self,
1500    ) -> Result<T, vp_state::Error>
1501    where
1502        T: HvRegisterState<R, N>,
1503    {
1504        let mut regs = T::default();
1505        let names = regs.names().map(|r| r.to_vp_reg_name());
1506        let mut values = [HvRegisterValue::new_zeroed(); N];
1507        self.vp
1508            .runner
1509            .get_vp_registers(self.vtl, &names, &mut values)
1510            .map_err(vp_state::Error::GetRegisters)?;
1511
1512        regs.set_values(values.into_iter());
1513        Ok(regs)
1514    }
1515}
1516
1517impl AccessVpState for UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1518    type Error = vp_state::Error;
1519
1520    fn caps(&self) -> &virt::x86::X86PartitionCapabilities {
1521        &self.vp.partition.caps
1522    }
1523
1524    fn commit(&mut self) -> Result<(), Self::Error> {
1525        Ok(())
1526    }
1527
1528    fn registers(&mut self) -> Result<vp::Registers, Self::Error> {
1529        self.get_register_state()
1530    }
1531
1532    fn set_registers(&mut self, value: &vp::Registers) -> Result<(), Self::Error> {
1533        self.set_register_state(value)
1534    }
1535
1536    fn activity(&mut self) -> Result<vp::Activity, Self::Error> {
1537        let activity: vp::Activity = self.get_register_state()?;
1538
1539        // TODO: Get the NMI pending bit from the APIC.
1540        // let apic = self.vp.whp(self.vtl).get_apic()?;
1541        // activity.nmi_pending = hv_apic_nmi_pending(&apic);
1542        Ok(activity)
1543    }
1544
1545    fn set_activity(&mut self, value: &vp::Activity) -> Result<(), Self::Error> {
1546        self.set_register_state(value)?;
1547
1548        // TODO: Set the NMI pending bit via the APIC.
1549        // let mut apic = self.vp.whp(self.vtl).get_apic()?;
1550        // set_hv_apic_nmi_pending(&mut apic, value.nmi_pending);
1551        // self.vp.whp(self.vtl).set_apic(&apic)?;
1552        Ok(())
1553    }
1554
1555    fn xsave(&mut self) -> Result<vp::Xsave, Self::Error> {
1556        // TODO: get the rest of the xsave state, not just the legacy FP state.
1557        //
1558        // This is just used for debugging, so this should not be a problem.
1559        #[repr(C)]
1560        #[derive(IntoBytes, Immutable, KnownLayout)]
1561        struct XsaveStandard {
1562            fxsave: Fxsave,
1563            xsave_header: XsaveHeader,
1564        }
1565        let state = XsaveStandard {
1566            fxsave: self.vp.runner.cpu_context().fx_state.clone(),
1567            xsave_header: XsaveHeader {
1568                xstate_bv: XFEATURE_X87 | XFEATURE_SSE,
1569                ..FromZeros::new_zeroed()
1570            },
1571        };
1572        Ok(vp::Xsave::from_standard(state.as_bytes(), self.caps()))
1573    }
1574
1575    fn set_xsave(&mut self, _value: &vp::Xsave) -> Result<(), Self::Error> {
1576        Err(vp_state::Error::Unimplemented("xsave"))
1577    }
1578
1579    fn apic(&mut self) -> Result<vp::Apic, Self::Error> {
1580        Err(vp_state::Error::Unimplemented("apic"))
1581    }
1582
1583    fn set_apic(&mut self, _value: &vp::Apic) -> Result<(), Self::Error> {
1584        Err(vp_state::Error::Unimplemented("apic"))
1585    }
1586
1587    fn xcr(&mut self) -> Result<vp::Xcr0, Self::Error> {
1588        self.get_register_state()
1589    }
1590
1591    fn set_xcr(&mut self, value: &vp::Xcr0) -> Result<(), Self::Error> {
1592        self.set_register_state(value)
1593    }
1594
1595    fn xss(&mut self) -> Result<vp::Xss, Self::Error> {
1596        self.get_register_state()
1597    }
1598
1599    fn set_xss(&mut self, value: &vp::Xss) -> Result<(), Self::Error> {
1600        self.set_register_state(value)
1601    }
1602
1603    fn mtrrs(&mut self) -> Result<vp::Mtrrs, Self::Error> {
1604        self.get_register_state()
1605    }
1606
1607    fn set_mtrrs(&mut self, cc: &vp::Mtrrs) -> Result<(), Self::Error> {
1608        self.set_register_state(cc)
1609    }
1610
1611    fn pat(&mut self) -> Result<vp::Pat, Self::Error> {
1612        self.get_register_state()
1613    }
1614
1615    fn set_pat(&mut self, value: &vp::Pat) -> Result<(), Self::Error> {
1616        self.set_register_state(value)
1617    }
1618
1619    fn virtual_msrs(&mut self) -> Result<vp::VirtualMsrs, Self::Error> {
1620        self.get_register_state()
1621    }
1622
1623    fn set_virtual_msrs(&mut self, msrs: &vp::VirtualMsrs) -> Result<(), Self::Error> {
1624        self.set_register_state(msrs)
1625    }
1626
1627    fn debug_regs(&mut self) -> Result<vp::DebugRegisters, Self::Error> {
1628        self.get_register_state()
1629    }
1630
1631    fn set_debug_regs(&mut self, value: &vp::DebugRegisters) -> Result<(), Self::Error> {
1632        self.set_register_state(value)
1633    }
1634
1635    fn tsc(&mut self) -> Result<vp::Tsc, Self::Error> {
1636        self.get_register_state()
1637    }
1638
1639    fn set_tsc(&mut self, value: &vp::Tsc) -> Result<(), Self::Error> {
1640        self.set_register_state(value)
1641    }
1642
1643    fn cet(&mut self) -> Result<vp::Cet, Self::Error> {
1644        self.get_register_state()
1645    }
1646
1647    fn set_cet(&mut self, value: &vp::Cet) -> Result<(), Self::Error> {
1648        self.set_register_state(value)
1649    }
1650
1651    fn cet_ss(&mut self) -> Result<vp::CetSs, Self::Error> {
1652        self.get_register_state()
1653    }
1654
1655    fn set_cet_ss(&mut self, value: &vp::CetSs) -> Result<(), Self::Error> {
1656        self.set_register_state(value)
1657    }
1658
1659    fn tsc_aux(&mut self) -> Result<vp::TscAux, Self::Error> {
1660        self.get_register_state()
1661    }
1662
1663    fn set_tsc_aux(&mut self, value: &vp::TscAux) -> Result<(), Self::Error> {
1664        self.set_register_state(value)
1665    }
1666
1667    fn synic_msrs(&mut self) -> Result<vp::SyntheticMsrs, Self::Error> {
1668        self.get_register_state()
1669    }
1670
1671    fn set_synic_msrs(&mut self, value: &vp::SyntheticMsrs) -> Result<(), Self::Error> {
1672        self.set_register_state(value)
1673    }
1674
1675    fn synic_timers(&mut self) -> Result<vp::SynicTimers, Self::Error> {
1676        Err(vp_state::Error::Unimplemented("synic_timers"))
1677    }
1678
1679    fn set_synic_timers(&mut self, _value: &vp::SynicTimers) -> Result<(), Self::Error> {
1680        Err(vp_state::Error::Unimplemented("synic_timers"))
1681    }
1682
1683    fn synic_message_queues(&mut self) -> Result<vp::SynicMessageQueues, Self::Error> {
1684        Ok(self.vp.inner.message_queues[self.vtl].save())
1685    }
1686
1687    fn set_synic_message_queues(
1688        &mut self,
1689        value: &vp::SynicMessageQueues,
1690    ) -> Result<(), Self::Error> {
1691        self.vp.inner.message_queues[self.vtl].restore(value);
1692        Ok(())
1693    }
1694
1695    fn synic_message_page(&mut self) -> Result<vp::SynicMessagePage, Self::Error> {
1696        Err(vp_state::Error::Unimplemented("synic_message_page"))
1697    }
1698
1699    fn set_synic_message_page(&mut self, _value: &vp::SynicMessagePage) -> Result<(), Self::Error> {
1700        Err(vp_state::Error::Unimplemented("synic_message_page"))
1701    }
1702
1703    fn synic_event_flags_page(&mut self) -> Result<vp::SynicEventFlagsPage, Self::Error> {
1704        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1705    }
1706
1707    fn set_synic_event_flags_page(
1708        &mut self,
1709        _value: &vp::SynicEventFlagsPage,
1710    ) -> Result<(), Self::Error> {
1711        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1712    }
1713}
1714
1715impl<T: CpuIo> hv1_hypercall::RetargetDeviceInterrupt
1716    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1717{
1718    fn retarget_interrupt(
1719        &mut self,
1720        device_id: u64,
1721        address: u64,
1722        data: u32,
1723        params: hv1_hypercall::HvInterruptParameters<'_>,
1724    ) -> hvdef::HvResult<()> {
1725        self.retarget_virtual_interrupt(
1726            device_id,
1727            address,
1728            data,
1729            params.vector,
1730            params.multicast,
1731            params.target_processors,
1732        )
1733    }
1734}
1735
1736impl<T> hv1_hypercall::SetVpRegisters for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1737    fn set_vp_registers(
1738        &mut self,
1739        partition_id: u64,
1740        vp_index: u32,
1741        vtl: Option<Vtl>,
1742        registers: &[hypercall::HvRegisterAssoc],
1743    ) -> HvRepResult {
1744        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1745            return Err((HvError::AccessDenied, 0));
1746        }
1747
1748        if vp_index != hvdef::HV_VP_INDEX_SELF && vp_index != self.vp.vp_index().index() {
1749            return Err((HvError::InvalidVpIndex, 0));
1750        }
1751
1752        let target_vtl = self
1753            .target_vtl_no_higher(vtl.unwrap_or(self.intercepted_vtl.into()))
1754            .map_err(|e| (e, 0))?;
1755
1756        for (i, reg) in registers.iter().enumerate() {
1757            if reg.name == HvX64RegisterName::VsmPartitionConfig.into() {
1758                let value = HvRegisterVsmPartitionConfig::from(reg.value.as_u64());
1759                self.vp
1760                    .set_vsm_partition_config(target_vtl, value)
1761                    .map_err(|e| (e, i))?;
1762            } else {
1763                return Err((HvError::InvalidParameter, i));
1764            }
1765        }
1766
1767        Ok(())
1768    }
1769}
1770
1771impl<T> hv1_hypercall::ModifyVtlProtectionMask
1772    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1773{
1774    fn modify_vtl_protection_mask(
1775        &mut self,
1776        partition_id: u64,
1777        _map_flags: HvMapGpaFlags,
1778        target_vtl: Option<Vtl>,
1779        gpa_pages: &[u64],
1780    ) -> HvRepResult {
1781        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1782            return Err((HvError::AccessDenied, 0));
1783        }
1784
1785        let target_vtl = self
1786            .target_vtl_no_higher(target_vtl.unwrap_or(self.intercepted_vtl.into()))
1787            .map_err(|e| (e, 0))?;
1788        if target_vtl == GuestVtl::Vtl0 {
1789            return Err((HvError::InvalidParameter, 0));
1790        }
1791
1792        // A VTL cannot change its own VTL permissions until it has enabled VTL protection and
1793        // configured default permissions. Higher VTLs are not under this restriction (as they may
1794        // need to apply default permissions before VTL protection is enabled).
1795        if target_vtl == self.intercepted_vtl
1796            && !matches!(
1797                *self.vp.shared.guest_vsm.read(),
1798                GuestVsmState::Enabled {
1799                    vtl1: VbsIsolatedVtl1State {
1800                        enable_vtl_protection: true,
1801                        default_vtl_protections: Some(_),
1802                    },
1803                }
1804            )
1805        {
1806            return Err((HvError::AccessDenied, 0));
1807        }
1808
1809        // TODO VBS GUEST VSM: verify this logic is correct
1810        // TODO VBS GUEST VSM: validation on map_flags, similar to default
1811        // protections mask changes
1812        // Can receive an intercept on adjust permissions, and for isolated
1813        // VMs if the page is unaccepted
1814        if self.vp.partition.isolation.is_isolated() {
1815            return Err((HvError::OperationDenied, 0));
1816        } else {
1817            if !gpa_pages.is_empty() {
1818                if !self.vp.partition.is_gpa_lower_vtl_ram(gpa_pages[0]) {
1819                    return Err((HvError::OperationDenied, 0));
1820                } else {
1821                    panic!("Should not be handling this hypercall for guest ram");
1822                }
1823            }
1824        }
1825
1826        Ok(())
1827    }
1828}
1829
1830mod save_restore {
1831    use super::HypervisorBackedX86;
1832    use super::UhProcessor;
1833    use anyhow::Context;
1834    use hcl::GuestVtl;
1835    use hvdef::HV_X64_MSR_GUEST_CRASH_CTL;
1836    use hvdef::HvInternalActivityRegister;
1837    use hvdef::HvX64RegisterName;
1838    use hvdef::Vtl;
1839    use virt::Processor;
1840    use virt::vp::AccessVpState;
1841    use virt::vp::Mtrrs;
1842    use vmcore::save_restore::RestoreError;
1843    use vmcore::save_restore::SaveError;
1844    use vmcore::save_restore::SaveRestore;
1845    use zerocopy::FromZeros;
1846    use zerocopy::IntoBytes;
1847
1848    mod state {
1849        use mesh::payload::Protobuf;
1850        use vmcore::save_restore::SavedStateRoot;
1851
1852        #[derive(Protobuf, SavedStateRoot)]
1853        #[mesh(package = "underhill.partition")]
1854        pub struct ProcessorSavedState {
1855            #[mesh(1)]
1856            pub(super) rax: u64,
1857            #[mesh(2)]
1858            pub(super) rcx: u64,
1859            #[mesh(3)]
1860            pub(super) rdx: u64,
1861            #[mesh(4)]
1862            pub(super) rbx: u64,
1863            #[mesh(5)]
1864            pub(super) cr2: u64,
1865            #[mesh(6)]
1866            pub(super) rbp: u64,
1867            #[mesh(7)]
1868            pub(super) rsi: u64,
1869            #[mesh(8)]
1870            pub(super) rdi: u64,
1871            #[mesh(9)]
1872            pub(super) r8: u64,
1873            #[mesh(10)]
1874            pub(super) r9: u64,
1875            #[mesh(11)]
1876            pub(super) r10: u64,
1877            #[mesh(12)]
1878            pub(super) r11: u64,
1879            #[mesh(13)]
1880            pub(super) r12: u64,
1881            #[mesh(14)]
1882            pub(super) r13: u64,
1883            #[mesh(15)]
1884            pub(super) r14: u64,
1885            #[mesh(16)]
1886            pub(super) r15: u64,
1887            #[mesh(17)]
1888            pub(super) fx_state: Vec<u8>,
1889            #[mesh(18)]
1890            pub(super) dr0: u64,
1891            #[mesh(19)]
1892            pub(super) dr1: u64,
1893            #[mesh(20)]
1894            pub(super) dr2: u64,
1895            #[mesh(21)]
1896            pub(super) dr3: u64,
1897
1898            /// Only set when the DR6_SHARED capability is present
1899            #[mesh(22)]
1900            pub(super) dr6: Option<u64>,
1901
1902            /// If VTL0 should be in the startup suspend state. Older underhill
1903            /// versions do not save this property, so maintain the old buggy
1904            /// behavior for those cases its not present in the saved state.
1905            #[mesh(23)]
1906            pub(super) startup_suspend: Option<bool>,
1907
1908            #[mesh(24)]
1909            pub(super) crash_reg: Option<[u64; 5]>,
1910
1911            /// This value is ignored going forward, but may still be read by downlevel
1912            /// versions.
1913            #[mesh(25)]
1914            pub(super) crash_control: u64,
1915
1916            #[mesh(26)]
1917            pub(super) msr_mtrr_def_type: u64,
1918            #[mesh(27)]
1919            pub(super) fixed_mtrrs: Option<[u64; 11]>,
1920            #[mesh(28)]
1921            pub(super) variable_mtrrs: Option<[u64; 16]>,
1922            #[mesh(29)]
1923            pub(super) per_vtl: Vec<ProcessorVtlSavedState>,
1924        }
1925
1926        #[derive(Protobuf, SavedStateRoot)]
1927        #[mesh(package = "underhill.partition")]
1928        pub struct ProcessorVtlSavedState {
1929            #[mesh(1)]
1930            pub(super) message_queue: virt::vp::SynicMessageQueues,
1931        }
1932    }
1933
1934    const SHARED_REGISTERS: &[HvX64RegisterName] = &[
1935        HvX64RegisterName::Dr0,
1936        HvX64RegisterName::Dr1,
1937        HvX64RegisterName::Dr2,
1938        HvX64RegisterName::Dr3,
1939        HvX64RegisterName::Dr6, // must be last
1940    ];
1941
1942    impl SaveRestore for UhProcessor<'_, HypervisorBackedX86> {
1943        type SavedState = state::ProcessorSavedState;
1944
1945        fn save(&mut self) -> Result<Self::SavedState, SaveError> {
1946            // Ensure all async requests are reflected in the saved state.
1947            self.flush_async_requests();
1948
1949            let dr6_shared = self.partition.hcl.dr6_shared();
1950            let mut values = [FromZeros::new_zeroed(); SHARED_REGISTERS.len()];
1951            let len = if dr6_shared {
1952                SHARED_REGISTERS.len()
1953            } else {
1954                SHARED_REGISTERS.len() - 1
1955            };
1956
1957            self.runner
1958                // All these registers are shared, so the VTL we ask for doesn't matter
1959                .get_vp_registers(GuestVtl::Vtl0, &SHARED_REGISTERS[..len], &mut values[..len])
1960                .context("failed to get shared registers")
1961                .map_err(SaveError::Other)?;
1962
1963            let [
1964                rax,
1965                rcx,
1966                rdx,
1967                rbx,
1968                cr2,
1969                rbp,
1970                rsi,
1971                rdi,
1972                r8,
1973                r9,
1974                r10,
1975                r11,
1976                r12,
1977                r13,
1978                r14,
1979                r15,
1980            ] = self.runner.cpu_context().gps;
1981
1982            // We are responsible for saving shared MSRs too, but other than
1983            // the MTRRs all shared MSRs are read-only. So this is all we need.
1984            let Mtrrs {
1985                msr_mtrr_def_type,
1986                fixed: fixed_mtrrs,
1987                variable: variable_mtrrs,
1988            } = self
1989                // MTRRs are shared, so it doesn't matter which VTL we ask for.
1990                .access_state(Vtl::Vtl0)
1991                .mtrrs()
1992                .context("failed to get MTRRs")
1993                .map_err(SaveError::Other)?;
1994
1995            // This value is ignored during restore, but may still be read by downlevel
1996            // versions. Set it to the correct hardcoded read value as a best effort for them.
1997            let crash_control = self
1998                .read_crash_msr(HV_X64_MSR_GUEST_CRASH_CTL, GuestVtl::Vtl0)
1999                .unwrap();
2000
2001            let UhProcessor {
2002                _not_send,
2003                inner:
2004                    crate::UhVpInner {
2005                        // Saved
2006                        message_queues,
2007                        // Sidecar state is reset during servicing
2008                        sidecar_exit_reason: _,
2009                        // Will be cleared by flush_async_requests above
2010                        wake_reasons: _,
2011                        // Runtime glue
2012                        waker: _,
2013                        // Topology information
2014                        vp_info: _,
2015                        cpu_index: _,
2016                    },
2017                // Saved
2018                crash_reg,
2019                // Runtime glue
2020                partition: _,
2021                idle_control: _,
2022                vmtime: _,
2023                timer: _,
2024                // This field is only used in dev/test scenarios
2025                force_exit_sidecar: _,
2026                signaled_sidecar_exit: _,
2027                // Just caching the hypervisor value, let it handle saving
2028                vtls_tlb_locked: _,
2029                // Statistic that should reset to 0 on restore
2030                kernel_returns: _,
2031                // Shared state should be handled by the backing
2032                shared: _,
2033                // The runner doesn't hold anything needing saving
2034                runner: _,
2035                backing:
2036                    HypervisorBackedX86 {
2037                        deliverability_notifications: _,
2038                        next_deliverability_notifications: _,
2039                        stats: _,
2040                        deferred_init,
2041                    },
2042                // Currently only meaningful for CVMs
2043                exit_activities: _,
2044            } = *self;
2045
2046            // Non-VTL0 VPs should never be in startup suspend, so we only need to check VTL0.
2047            // The hypervisor handles halt and idle for us.
2048            let startup_suspend = if deferred_init {
2049                Some(true)
2050            } else {
2051                let internal_activity = self
2052                    .runner
2053                    .get_vp_register(GuestVtl::Vtl0, HvX64RegisterName::InternalActivityState)
2054                    .inspect_err(|e| {
2055                        // The ioctl get_vp_register path does not tell us
2056                        // hv_status directly, so just log if it failed for any
2057                        // reason.
2058                        tracing::warn!(
2059                            error = e as &dyn std::error::Error,
2060                            "unable to query startup suspend, unable to save VTL0 startup suspend state"
2061                        );
2062                    })
2063                    .ok();
2064
2065                internal_activity
2066                    .map(|a| HvInternalActivityRegister::from(a.as_u64()).startup_suspend())
2067            };
2068
2069            let per_vtl = [GuestVtl::Vtl0, GuestVtl::Vtl1]
2070                .map(|vtl| state::ProcessorVtlSavedState {
2071                    message_queue: message_queues[vtl].save(),
2072                })
2073                .into();
2074
2075            let state = state::ProcessorSavedState {
2076                rax,
2077                rcx,
2078                rdx,
2079                rbx,
2080                cr2,
2081                rbp,
2082                rsi,
2083                rdi,
2084                r8,
2085                r9,
2086                r10,
2087                r11,
2088                r12,
2089                r13,
2090                r14,
2091                r15,
2092                fx_state: self.runner.cpu_context().fx_state.as_bytes().to_vec(),
2093                dr0: values[0].as_u64(),
2094                dr1: values[1].as_u64(),
2095                dr2: values[2].as_u64(),
2096                dr3: values[3].as_u64(),
2097                dr6: dr6_shared.then(|| values[4].as_u64()),
2098                startup_suspend,
2099                crash_reg: Some(crash_reg),
2100                crash_control,
2101                msr_mtrr_def_type,
2102                fixed_mtrrs: Some(fixed_mtrrs),
2103                variable_mtrrs: Some(variable_mtrrs),
2104                per_vtl,
2105            };
2106
2107            Ok(state)
2108        }
2109
2110        fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
2111            let state::ProcessorSavedState {
2112                rax,
2113                rcx,
2114                rdx,
2115                rbx,
2116                cr2,
2117                rbp,
2118                rsi,
2119                rdi,
2120                r8,
2121                r9,
2122                r10,
2123                r11,
2124                r12,
2125                r13,
2126                r14,
2127                r15,
2128                fx_state,
2129                dr0,
2130                dr1,
2131                dr2,
2132                dr3,
2133                dr6,
2134                startup_suspend,
2135                crash_reg,
2136                crash_control: _crash_control,
2137                msr_mtrr_def_type,
2138                fixed_mtrrs,
2139                variable_mtrrs,
2140                per_vtl,
2141            } = state;
2142
2143            let dr6_shared = self.partition.hcl.dr6_shared();
2144            self.runner.cpu_context_mut().gps = [
2145                rax, rcx, rdx, rbx, cr2, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
2146            ];
2147            if fx_state.len() != self.runner.cpu_context_mut().fx_state.as_bytes().len() {
2148                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2149                    "invalid fpu state"
2150                )));
2151            }
2152            if dr6_shared != state.dr6.is_some() {
2153                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2154                    "dr6 state mismatch"
2155                )));
2156            }
2157
2158            let len = if dr6_shared {
2159                SHARED_REGISTERS.len()
2160            } else {
2161                SHARED_REGISTERS.len() - 1
2162            };
2163
2164            let values = [dr0, dr1, dr2, dr3, dr6.unwrap_or(0)];
2165            self.runner
2166                .set_vp_registers(
2167                    GuestVtl::Vtl0,
2168                    SHARED_REGISTERS[..len].iter().copied().zip(values),
2169                )
2170                .context("failed to set shared registers")
2171                .map_err(RestoreError::Other)?;
2172
2173            self.runner
2174                .cpu_context_mut()
2175                .fx_state
2176                .as_mut_bytes()
2177                .copy_from_slice(&fx_state);
2178
2179            self.crash_reg = crash_reg.unwrap_or_default();
2180
2181            // Previous versions of Underhill did not save the MTRRs.
2182            // If we get a restore state with them missing then assume they weren't
2183            // saved and don't zero out whatever the system already has.
2184            if let (Some(fixed), Some(variable)) = (fixed_mtrrs, variable_mtrrs) {
2185                let mut access = self.access_state(Vtl::Vtl0);
2186                access
2187                    .set_mtrrs(&Mtrrs {
2188                        msr_mtrr_def_type,
2189                        fixed,
2190                        variable,
2191                    })
2192                    .context("failed to set MTRRs")
2193                    .map_err(RestoreError::Other)?;
2194            }
2195
2196            for (per, vtl) in per_vtl.into_iter().zip(0u8..) {
2197                let vtl = GuestVtl::try_from(vtl)
2198                    .context("too many vtls")
2199                    .map_err(RestoreError::Other)?;
2200                self.inner.message_queues[vtl].restore(&per.message_queue);
2201            }
2202
2203            let startup_suspend = match startup_suspend {
2204                Some(true) => {
2205                    // When Underhill brings up APs during a servicing update
2206                    // via hypercall, this clears the VTL0 startup suspend
2207                    // state and makes the VP runnable. Like the cold boot path,
2208                    // we need to put the AP back into the startup suspend state
2209                    // in order to not start running the VP incorrectly.
2210                    true
2211                }
2212                None if !self.vp_index().is_bsp() => {
2213                    // Previous versions of Underhill did not save this value,
2214                    // which means the VM could be in a bad state if it's being
2215                    // serviced before VTL0 brings up APs. Log this state to
2216                    // note that.
2217                    const NAMES: [HvX64RegisterName; 4] = [
2218                        HvX64RegisterName::Rip,
2219                        HvX64RegisterName::Rflags,
2220                        HvX64RegisterName::Cr0,
2221                        HvX64RegisterName::Efer,
2222                    ];
2223                    let mut values = [FromZeros::new_zeroed(); NAMES.len()];
2224                    self.runner
2225                        // Non-VTL0 VPs should never be in startup suspend, so we only need to handle VTL0.
2226                        .get_vp_registers(GuestVtl::Vtl0, &NAMES, &mut values)
2227                        .context("failed to get VP registers for startup suspend log")
2228                        .map_err(RestoreError::Other)?;
2229                    let [rip, rflags, cr0, efer] = values.map(|reg| reg.as_u64());
2230
2231                    tracing::error!(
2232                        vp_index = self.vp_index().index(),
2233                        rip,
2234                        rflags,
2235                        cr0,
2236                        efer,
2237                        "previous version of underhill did not save startup_suspend state"
2238                    );
2239
2240                    false
2241                }
2242                Some(false) | None => false,
2243            };
2244
2245            self.backing.deferred_init = match self.set_vtl0_startup_suspend(startup_suspend) {
2246                Ok(()) => false,
2247                Err(e) => {
2248                    if startup_suspend {
2249                        tracing::warn!(
2250                            error = &e as &dyn std::error::Error,
2251                            "unable to set internal activity register, falling back to deferred init"
2252                        );
2253                    }
2254                    startup_suspend
2255                }
2256            };
2257            Ok(())
2258        }
2259    }
2260}