virt_mshv_vtl/processor/mshv/
x64.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! X64 Processor support for Microsoft hypervisor-backed partitions.
5
6#![cfg(guest_arch = "x86_64")]
7
8type VpRegisterName = HvX64RegisterName;
9
10use super::super::BackingParams;
11use super::super::BackingPrivate;
12use super::super::UhEmulationState;
13use super::super::signal_mnf;
14use super::super::vp_state;
15use super::super::vp_state::UhVpStateAccess;
16use super::MshvRunVpError;
17use super::VbsIsolatedVtl1State;
18use crate::BackingShared;
19use crate::Error;
20use crate::GuestVsmState;
21use crate::GuestVtl;
22use crate::UhPartitionNewParams;
23use crate::processor::BackingSharedParams;
24use crate::processor::SidecarExitReason;
25use crate::processor::SidecarRemoveExit;
26use crate::processor::UhHypercallHandler;
27use crate::processor::UhProcessor;
28use crate::validate_vtl_gpa_flags;
29use hcl::ioctl;
30use hcl::ioctl::ApplyVtlProtectionsError;
31use hcl::ioctl::x64::MshvX64;
32use hcl::protocol;
33use hv1_emulator::hv::ProcessorVtlHv;
34use hv1_hypercall::HvRepResult;
35use hv1_structs::VtlSet;
36use hvdef::HV_PAGE_SIZE;
37use hvdef::HvDeliverabilityNotificationsRegister;
38use hvdef::HvError;
39use hvdef::HvInterceptAccessType;
40use hvdef::HvMapGpaFlags;
41use hvdef::HvMessageType;
42use hvdef::HvRegisterValue;
43use hvdef::HvRegisterVsmPartitionConfig;
44use hvdef::HvX64InterceptMessageHeader;
45use hvdef::HvX64PendingEvent;
46use hvdef::HvX64PendingInterruptionType;
47use hvdef::HvX64RegisterName;
48use hvdef::Vtl;
49use hvdef::hypercall;
50use inspect::Inspect;
51use inspect::InspectMut;
52use inspect_counters::Counter;
53use parking_lot::RwLock;
54use std::sync::atomic::Ordering::Relaxed;
55use virt::EmulatorMonitorSupport;
56use virt::StopVp;
57use virt::VpHaltReason;
58use virt::VpIndex;
59use virt::io::CpuIo;
60use virt::state::HvRegisterState;
61use virt::state::StateElement;
62use virt::vp;
63use virt::vp::AccessVpState;
64use virt::x86::MsrError;
65use virt_support_x86emu::emulate::EmuCheckVtlAccessError;
66use virt_support_x86emu::emulate::EmuTranslateError;
67use virt_support_x86emu::emulate::EmuTranslateResult;
68use virt_support_x86emu::emulate::EmulatorSupport;
69use x86defs::RFlags;
70use x86defs::SegmentRegister;
71use x86defs::xsave::Fxsave;
72use x86defs::xsave::XFEATURE_SSE;
73use x86defs::xsave::XFEATURE_X87;
74use x86defs::xsave::XsaveHeader;
75use zerocopy::FromZeros;
76use zerocopy::Immutable;
77use zerocopy::IntoBytes;
78use zerocopy::KnownLayout;
79
80/// A backing for hypervisor-backed partitions (non-isolated and
81/// software-isolated).
82#[derive(InspectMut)]
83pub struct HypervisorBackedX86 {
84    // VTL0 only, used for synic message and extint readiness notifications.
85    // We do not currently support synic message ports or extint interrupts for VTL1.
86    #[inspect(hex, with = "|&x| u64::from(x)")]
87    deliverability_notifications: HvDeliverabilityNotificationsRegister,
88    /// Next set of deliverability notifications. See register definition for details.
89    #[inspect(hex, with = "|&x| u64::from(x)")]
90    pub(super) next_deliverability_notifications: HvDeliverabilityNotificationsRegister,
91    stats: ProcessorStatsX86,
92    /// Send an INIT to VTL0 before running the VP, to simulate setting startup
93    /// suspend. Newer hypervisors allow setting startup suspend explicitly.
94    deferred_init: bool,
95}
96
97/// Partition-wide shared data for hypervisor backed VMs.
98#[derive(Inspect)]
99pub struct HypervisorBackedX86Shared {
100    pub(crate) guest_vsm: RwLock<GuestVsmState<VbsIsolatedVtl1State>>,
101}
102
103impl HypervisorBackedX86Shared {
104    /// Creates a new partition-shared data structure for hypervisor backed VMs.
105    pub(crate) fn new(
106        _partition_params: &UhPartitionNewParams<'_>,
107        params: BackingSharedParams<'_>,
108    ) -> Result<Self, Error> {
109        Ok(Self {
110            guest_vsm: RwLock::new(GuestVsmState::from_availability(params.guest_vsm_available)),
111        })
112    }
113}
114
115#[derive(Inspect, Default)]
116struct ProcessorStatsX86 {
117    io_port: Counter,
118    mmio: Counter,
119    unaccepted_gpa: Counter,
120    hypercall: Counter,
121    synic_deliverable: Counter,
122    interrupt_deliverable: Counter,
123    cpuid: Counter,
124    msr: Counter,
125    eoi: Counter,
126    unrecoverable_exception: Counter,
127    halt: Counter,
128    exception_intercept: Counter,
129}
130
131pub struct MshvEmulationCache {
132    rsp: u64,
133    es: SegmentRegister,
134    ds: SegmentRegister,
135    fs: SegmentRegister,
136    gs: SegmentRegister,
137    ss: SegmentRegister,
138    cr0: u64,
139    efer: u64,
140    rip: u64,
141    rflags: RFlags,
142}
143
144#[expect(private_interfaces)]
145impl BackingPrivate for HypervisorBackedX86 {
146    type HclBacking<'mshv> = MshvX64<'mshv>;
147    type Shared = HypervisorBackedX86Shared;
148    type EmulationCache = MshvEmulationCache;
149
150    fn shared(shared: &BackingShared) -> &Self::Shared {
151        let BackingShared::Hypervisor(shared) = shared else {
152            unreachable!()
153        };
154        shared
155    }
156
157    fn new(
158        params: BackingParams<'_, '_, Self>,
159        _shared: &HypervisorBackedX86Shared,
160    ) -> Result<Self, Error> {
161        // Initialize shared register state to architectural state. The kernel
162        // zero initializes this.
163        //
164        // When restoring, this will be overwritten, but it's not expensive
165        // enough to bother skipping.
166        let regs = vp::Registers::at_reset(&params.partition.caps, params.vp_info);
167        *params.runner.cpu_context_mut() = protocol::hcl_cpu_context_x64 {
168            gps: [
169                regs.rax, regs.rcx, regs.rdx, regs.rbx, 0, /* cr2 */
170                regs.rbp, regs.rsi, regs.rdi, regs.r8, regs.r9, regs.r10, regs.r11, regs.r12,
171                regs.r13, regs.r14, regs.r15,
172            ],
173            fx_state: vp::Xsave::at_reset(&params.partition.caps, params.vp_info).fxsave(),
174            reserved: [0; 384],
175        };
176
177        Ok(Self {
178            deliverability_notifications: Default::default(),
179            next_deliverability_notifications: Default::default(),
180            stats: Default::default(),
181            deferred_init: false,
182        })
183    }
184
185    fn init(this: &mut UhProcessor<'_, Self>) {
186        // The hypervisor initializes startup suspend to false. Set it to the
187        // architectural default.
188        if !this.vp_index().is_bsp() {
189            this.backing.deferred_init = true;
190        }
191    }
192
193    type StateAccess<'p, 'a>
194        = UhVpStateAccess<'a, 'p, Self>
195    where
196        Self: 'a + 'p,
197        'p: 'a;
198
199    fn access_vp_state<'a, 'p>(
200        this: &'a mut UhProcessor<'p, Self>,
201        vtl: GuestVtl,
202    ) -> Self::StateAccess<'p, 'a> {
203        UhVpStateAccess::new(this, vtl)
204    }
205
206    fn pre_run_vp(this: &mut UhProcessor<'_, Self>) {
207        if std::mem::take(&mut this.backing.deferred_init) {
208            tracelimit::info_ratelimited!(
209                vp = this.vp_index().index(),
210                "sending deferred INIT to set startup suspend"
211            );
212            this.partition.request_msi(
213                GuestVtl::Vtl0,
214                virt::irqcon::MsiRequest::new_x86(
215                    virt::irqcon::DeliveryMode::INIT,
216                    this.inner.vp_info.apic_id,
217                    false,
218                    0,
219                    true,
220                ),
221            );
222        }
223    }
224
225    async fn run_vp(
226        this: &mut UhProcessor<'_, Self>,
227        dev: &impl CpuIo,
228        stop: &mut StopVp<'_>,
229    ) -> Result<(), VpHaltReason> {
230        if this.backing.deliverability_notifications
231            != this.backing.next_deliverability_notifications
232        {
233            let notifications = this.backing.next_deliverability_notifications;
234            tracing::trace!(?notifications, "setting notifications");
235            this.runner
236                .set_vp_register(
237                    // TODO GUEST VSM
238                    GuestVtl::Vtl0,
239                    VpRegisterName::DeliverabilityNotifications,
240                    u64::from(notifications).into(),
241                )
242                .expect("requesting deliverability is not a fallable operation");
243            this.backing.deliverability_notifications =
244                this.backing.next_deliverability_notifications;
245        }
246
247        let intercepted = if this.runner.is_sidecar() {
248            let mut run = this
249                .runner
250                .run_sidecar()
251                .map_err(|e| VpHaltReason::InvalidVmState(e.into()))?;
252            match stop.until_stop(run.wait()).await {
253                Ok(r) => r,
254                Err(stop) => {
255                    run.cancel();
256                    let r = run.wait().await;
257                    if matches!(r, Ok(false)) {
258                        // No intercept, so stop the VP.
259                        return Err(stop.into());
260                    }
261                    r
262                }
263            }
264            .map_err(|e| VpHaltReason::InvalidVmState(ioctl::Error::Sidecar(e).into()))?
265        } else {
266            this.unlock_tlb_lock(Vtl::Vtl2);
267            this.runner
268                .run()
269                .map_err(|e| VpHaltReason::Hypervisor(MshvRunVpError(e).into()))?
270        };
271
272        if intercepted {
273            let message_type = this.runner.exit_message().header.typ;
274
275            let mut intercept_handler =
276                InterceptHandler::new(this).map_err(|e| VpHaltReason::InvalidVmState(e.into()))?;
277
278            let stat = match message_type {
279                HvMessageType::HvMessageTypeX64IoPortIntercept => {
280                    intercept_handler.handle_io_port_exit(dev).await?;
281                    &mut this.backing.stats.io_port
282                }
283                HvMessageType::HvMessageTypeUnmappedGpa
284                | HvMessageType::HvMessageTypeGpaIntercept => {
285                    intercept_handler.handle_mmio_exit(dev).await?;
286                    &mut this.backing.stats.mmio
287                }
288                HvMessageType::HvMessageTypeUnacceptedGpa => {
289                    intercept_handler
290                        .handle_unaccepted_gpa_intercept(dev)
291                        .await?;
292                    &mut this.backing.stats.unaccepted_gpa
293                }
294                HvMessageType::HvMessageTypeHypercallIntercept => {
295                    intercept_handler.handle_hypercall_exit(dev);
296                    &mut this.backing.stats.hypercall
297                }
298                HvMessageType::HvMessageTypeSynicSintDeliverable => {
299                    intercept_handler.handle_synic_deliverable_exit();
300                    &mut this.backing.stats.synic_deliverable
301                }
302                HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
303                    intercept_handler.handle_interrupt_deliverable_exit(dev)?;
304                    &mut this.backing.stats.interrupt_deliverable
305                }
306                HvMessageType::HvMessageTypeX64CpuidIntercept => {
307                    intercept_handler.handle_cpuid_intercept();
308                    &mut this.backing.stats.cpuid
309                }
310                HvMessageType::HvMessageTypeMsrIntercept => {
311                    intercept_handler.handle_msr_intercept();
312                    &mut this.backing.stats.msr
313                }
314                HvMessageType::HvMessageTypeX64ApicEoi => {
315                    intercept_handler.handle_eoi(dev);
316                    &mut this.backing.stats.eoi
317                }
318                HvMessageType::HvMessageTypeUnrecoverableException => {
319                    intercept_handler.handle_unrecoverable_exception()?;
320                    &mut this.backing.stats.unrecoverable_exception
321                }
322                HvMessageType::HvMessageTypeExceptionIntercept => {
323                    intercept_handler.handle_exception()?;
324                    &mut this.backing.stats.exception_intercept
325                }
326                reason => unreachable!("unknown exit reason: {:#x?}", reason),
327            };
328            stat.increment();
329
330            if this.runner.is_sidecar()
331                && !this.signaled_sidecar_exit
332                && !this.partition.no_sidecar_hotplug.load(Relaxed)
333            {
334                // We got and handled an exit and this is a sidecar VP. Cancel
335                // the run so that we can move the sidecar VP over to the main
336                // kernel and handle future exits there.
337                //
338                // This is not strictly necessary--we can continue to run the VP
339                // in the sidecar kernel. But since we have received at least
340                // one exit, we can expect that we will receive more, and
341                // handling the exits remotely introduces jitter.
342                let message = this.runner.exit_message();
343                this.inner
344                    .set_sidecar_exit_reason(SidecarExitReason::Exit(parse_sidecar_exit(message)));
345                this.signaled_sidecar_exit = true;
346                return Err(VpHaltReason::Cancel);
347            }
348        }
349        Ok(())
350    }
351
352    fn poll_apic(_this: &mut UhProcessor<'_, Self>, _vtl: GuestVtl, _scan_irr: bool) {}
353
354    fn process_interrupts(
355        _this: &mut UhProcessor<'_, Self>,
356        _scan_irr: hv1_structs::VtlArray<bool, 2>,
357        _first_scan_irr: &mut bool,
358        _dev: &impl CpuIo,
359    ) -> bool {
360        false
361    }
362
363    fn request_extint_readiness(this: &mut UhProcessor<'_, Self>) {
364        this.backing
365            .next_deliverability_notifications
366            .set_interrupt_notification(true);
367    }
368
369    fn request_untrusted_sint_readiness(this: &mut UhProcessor<'_, Self>, sints: u16) {
370        this.backing
371            .next_deliverability_notifications
372            .set_sints(this.backing.next_deliverability_notifications.sints() | sints);
373    }
374
375    fn hv(&self, _vtl: GuestVtl) -> Option<&ProcessorVtlHv> {
376        None
377    }
378
379    fn hv_mut(&mut self, _vtl: GuestVtl) -> Option<&mut ProcessorVtlHv> {
380        None
381    }
382
383    fn handle_vp_start_enable_vtl_wake(_this: &mut UhProcessor<'_, Self>, _vtl: GuestVtl) {
384        unimplemented!()
385    }
386
387    fn vtl1_inspectable(_this: &UhProcessor<'_, Self>) -> bool {
388        // TODO: Use the VsmVpStatus register to query the hypervisor for
389        // whether VTL 1 is enabled on the vp (this can be cached).
390        false
391    }
392}
393
394fn parse_sidecar_exit(message: &hvdef::HvMessage) -> SidecarRemoveExit {
395    match message.header.typ {
396        HvMessageType::HvMessageTypeX64IoPortIntercept => {
397            let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
398            SidecarRemoveExit::Io {
399                port: message.port_number,
400                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
401            }
402        }
403        HvMessageType::HvMessageTypeUnmappedGpa | HvMessageType::HvMessageTypeGpaIntercept => {
404            let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
405            SidecarRemoveExit::Mmio {
406                gpa: message.guest_physical_address,
407                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
408            }
409        }
410        HvMessageType::HvMessageTypeHypercallIntercept => {
411            let message = message.as_message::<hvdef::HvX64HypercallInterceptMessage>();
412            let is_64bit = message.header.execution_state.cr0_pe()
413                && message.header.execution_state.efer_lma();
414            let control = if is_64bit {
415                message.rcx
416            } else {
417                (message.rdx << 32) | (message.rax as u32 as u64)
418            };
419            SidecarRemoveExit::Hypercall {
420                code: hvdef::HypercallCode(hypercall::Control::from(control).code()),
421            }
422        }
423        HvMessageType::HvMessageTypeX64CpuidIntercept => {
424            let message = message.as_message::<hvdef::HvX64CpuidInterceptMessage>();
425            SidecarRemoveExit::Cpuid {
426                leaf: message.rax as u32,
427                subleaf: message.rcx as u32,
428            }
429        }
430        HvMessageType::HvMessageTypeMsrIntercept => {
431            let message = message.as_message::<hvdef::HvX64MsrInterceptMessage>();
432            SidecarRemoveExit::Msr {
433                msr: message.msr_number,
434                value: (message.header.intercept_access_type == HvInterceptAccessType::WRITE)
435                    .then_some((message.rdx << 32) | message.rax as u32 as u64),
436            }
437        }
438        typ => SidecarRemoveExit::Hypervisor { message: typ },
439    }
440}
441
442fn next_rip(value: &HvX64InterceptMessageHeader) -> u64 {
443    value.rip.wrapping_add(value.instruction_len() as u64)
444}
445
446struct InterceptHandler<'a, 'b> {
447    vp: &'a mut UhProcessor<'b, HypervisorBackedX86>,
448    intercepted_vtl: GuestVtl,
449}
450
451#[derive(Debug, Error)]
452#[error("invalid intercepted vtl {0:?}")]
453struct InvalidInterceptedVtl(u8);
454
455#[derive(Debug, Error)]
456#[error("guest accessed unaccepted gpa {0}")]
457struct UnacceptedMemoryAccess(u64);
458
459impl<'a, 'b> InterceptHandler<'a, 'b> {
460    fn new(
461        vp: &'a mut UhProcessor<'b, HypervisorBackedX86>,
462    ) -> Result<Self, InvalidInterceptedVtl> {
463        let message_type = vp.runner.exit_message().header.typ;
464
465        let intercepted_vtl = match vp.runner.reg_page_vtl() {
466            Ok(vtl) => vtl,
467            Err(ioctl::x64::RegisterPageVtlError::InvalidVtl(vtl)) => {
468                return Err(InvalidInterceptedVtl(vtl));
469            }
470            Err(ioctl::x64::RegisterPageVtlError::NoRegisterPage) => {
471                if matches!(&message_type, &HvMessageType::HvMessageTypeX64ApicEoi) {
472                    // At the moment this is only used for the ioapic, so assume
473                    // that this is targeting VTL 0 for now. TODO: fix
474                    GuestVtl::Vtl0
475                } else {
476                    let message_header = match &message_type {
477                        &HvMessageType::HvMessageTypeX64IoPortIntercept => {
478                            &vp.runner
479                                .exit_message()
480                                .as_message::<hvdef::HvX64IoPortInterceptMessage>()
481                                .header
482                        }
483                        &HvMessageType::HvMessageTypeUnmappedGpa
484                        | &HvMessageType::HvMessageTypeGpaIntercept => {
485                            &vp.runner
486                                .exit_message()
487                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
488                                .header
489                        }
490                        &HvMessageType::HvMessageTypeUnacceptedGpa => {
491                            &vp.runner
492                                .exit_message()
493                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
494                                .header
495                        }
496                        &HvMessageType::HvMessageTypeHypercallIntercept => {
497                            &vp.runner
498                                .exit_message()
499                                .as_message::<hvdef::HvX64HypercallInterceptMessage>()
500                                .header
501                        }
502                        &HvMessageType::HvMessageTypeSynicSintDeliverable => {
503                            &vp.runner
504                                .exit_message()
505                                .as_message::<hvdef::HvX64SynicSintDeliverableMessage>()
506                                .header
507                        }
508                        &HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
509                            &vp.runner
510                                .exit_message()
511                                .as_message::<hvdef::HvX64InterruptionDeliverableMessage>()
512                                .header
513                        }
514                        &HvMessageType::HvMessageTypeX64CpuidIntercept => {
515                            &vp.runner
516                                .exit_message()
517                                .as_message::<hvdef::HvX64CpuidInterceptMessage>()
518                                .header
519                        }
520                        &HvMessageType::HvMessageTypeMsrIntercept => {
521                            &vp.runner
522                                .exit_message()
523                                .as_message::<hvdef::HvX64MsrInterceptMessage>()
524                                .header
525                        }
526                        &HvMessageType::HvMessageTypeUnrecoverableException => {
527                            &vp.runner
528                                .exit_message()
529                                .as_message::<hvdef::HvX64UnrecoverableExceptionMessage>()
530                                .header
531                        }
532                        &HvMessageType::HvMessageTypeX64Halt => {
533                            &vp.runner
534                                .exit_message()
535                                .as_message::<hvdef::HvX64HaltMessage>()
536                                .header
537                        }
538                        &HvMessageType::HvMessageTypeExceptionIntercept => {
539                            &vp.runner
540                                .exit_message()
541                                .as_message::<hvdef::HvX64ExceptionInterceptMessage>()
542                                .header
543                        }
544                        reason => unreachable!("unknown exit reason: {:#x?}", reason),
545                    };
546
547                    message_header
548                        .execution_state
549                        .vtl()
550                        .try_into()
551                        .map_err(|hcl::UnsupportedGuestVtl(vtl)| InvalidInterceptedVtl(vtl))?
552                }
553            }
554        };
555
556        Ok(Self {
557            vp,
558            intercepted_vtl,
559        })
560    }
561
562    fn handle_interrupt_deliverable_exit(&mut self, bus: &impl CpuIo) -> Result<(), VpHaltReason> {
563        let message = self
564            .vp
565            .runner
566            .exit_message()
567            .as_message::<hvdef::HvX64InterruptionDeliverableMessage>();
568
569        assert_eq!(
570            message.deliverable_type,
571            HvX64PendingInterruptionType::HV_X64_PENDING_INTERRUPT
572        );
573
574        self.vp
575            .backing
576            .deliverability_notifications
577            .set_interrupt_notification(false);
578
579        self.vp
580            .backing
581            .next_deliverability_notifications
582            .set_interrupt_notification(false);
583
584        if let Some(vector) = bus.acknowledge_pic_interrupt() {
585            let event = hvdef::HvX64PendingExtIntEvent::new()
586                .with_event_pending(true)
587                .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXT_INT)
588                .with_vector(vector);
589
590            self.vp
591                .runner
592                .set_vp_register(
593                    self.intercepted_vtl,
594                    HvX64RegisterName::PendingEvent0,
595                    u128::from(event).into(),
596                )
597                .unwrap();
598        }
599
600        Ok(())
601    }
602
603    fn handle_synic_deliverable_exit(&mut self) {
604        let message = self
605            .vp
606            .runner
607            .exit_message()
608            .as_message::<hvdef::HvX64SynicSintDeliverableMessage>();
609
610        tracing::trace!(
611            deliverable_sints = message.deliverable_sints,
612            "sint deliverable"
613        );
614
615        self.vp.backing.deliverability_notifications.set_sints(
616            self.vp.backing.deliverability_notifications.sints() & !message.deliverable_sints,
617        );
618
619        // This is updated by `deliver_synic_messages below`, so clear it here.
620        self.vp
621            .backing
622            .next_deliverability_notifications
623            .set_sints(0);
624
625        // These messages are always delivered to VTL0, as VTL1 does not own any VMBUS channels.
626        self.vp
627            .deliver_synic_messages(GuestVtl::Vtl0, message.deliverable_sints);
628    }
629
630    fn handle_hypercall_exit(&mut self, bus: &impl CpuIo) {
631        let message = self
632            .vp
633            .runner
634            .exit_message()
635            .as_message::<hvdef::HvX64HypercallInterceptMessage>();
636
637        tracing::trace!(msg = %format_args!("{:x?}", message), "hypercall");
638
639        let is_64bit =
640            message.header.execution_state.cr0_pe() && message.header.execution_state.efer_lma();
641
642        let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
643        let handler = UhHypercallHandler {
644            vp: self.vp,
645            bus,
646            trusted: false,
647            intercepted_vtl: self.intercepted_vtl,
648        };
649        UhHypercallHandler::MSHV_DISPATCHER.dispatch(
650            guest_memory,
651            hv1_hypercall::X64RegisterIo::new(handler, is_64bit),
652        );
653    }
654
655    async fn handle_mmio_exit(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
656        let message = self
657            .vp
658            .runner
659            .exit_message()
660            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
661
662        tracing::trace!(msg = %format_args!("{:x?}", message), "mmio");
663
664        let interruption_pending = message.header.execution_state.interruption_pending();
665
666        // Fast path for monitor page writes.
667        if Some(message.guest_physical_address & !(HV_PAGE_SIZE - 1))
668            == self.vp.partition.monitor_page.gpa()
669            && message.header.intercept_access_type == HvInterceptAccessType::WRITE
670        {
671            let tlb_lock_held = message.memory_access_info.gva_gpa_valid()
672                || message.memory_access_info.tlb_locked();
673            let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
674            let cache = self.vp.emulation_cache(self.intercepted_vtl);
675            let mut emulation_state = UhEmulationState {
676                vp: &mut *self.vp,
677                interruption_pending,
678                devices: dev,
679                vtl: self.intercepted_vtl,
680                cache,
681            };
682            if let Some(bit) = virt_support_x86emu::emulate::emulate_mnf_write_fast_path(
683                &mut emulation_state,
684                guest_memory,
685                dev,
686                interruption_pending,
687                tlb_lock_held,
688            ) {
689                if let Some(connection_id) = self.vp.partition.monitor_page.write_bit(bit) {
690                    signal_mnf(dev, connection_id);
691                }
692                return Ok(());
693            }
694        }
695
696        let cache = self.vp.emulation_cache(self.intercepted_vtl);
697        self.vp
698            .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
699            .await
700    }
701
702    async fn handle_io_port_exit(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
703        let message = self
704            .vp
705            .runner
706            .exit_message()
707            .as_message::<hvdef::HvX64IoPortInterceptMessage>();
708
709        tracing::trace!(msg = %format_args!("{:x?}", message), "io_port");
710
711        assert_eq!(message.rax, self.vp.runner.cpu_context().gps[protocol::RAX]);
712
713        let interruption_pending = message.header.execution_state.interruption_pending();
714
715        if message.access_info.string_op() || message.access_info.rep_prefix() {
716            let cache = self.vp.emulation_cache(self.intercepted_vtl);
717            self.vp
718                .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
719                .await
720        } else {
721            let next_rip = next_rip(&message.header);
722            let access_size = message.access_info.access_size();
723            virt_support_x86emu::emulate::emulate_io(
724                self.vp.vp_index(),
725                message.header.intercept_access_type == HvInterceptAccessType::WRITE,
726                message.port_number,
727                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX],
728                access_size,
729                dev,
730            )
731            .await;
732            self.vp.set_rip(self.intercepted_vtl, next_rip);
733            Ok(())
734        }
735    }
736
737    async fn handle_unaccepted_gpa_intercept(
738        &mut self,
739        dev: &impl CpuIo,
740    ) -> Result<(), VpHaltReason> {
741        let gpa = self
742            .vp
743            .runner
744            .exit_message()
745            .as_message::<hvdef::HvX64MemoryInterceptMessage>()
746            .guest_physical_address;
747
748        if self.vp.partition.is_gpa_lower_vtl_ram(gpa) {
749            // The host may have moved the page to an unaccepted state, so fail
750            // here. This does not apply to VTL 2 memory - for unaccepted pages,
751            // the intercept goes to host VTL0.
752            //
753            // Note: SGX memory should be included in this check, so if SGX is
754            // no longer included in the lower_vtl_memory_layout, make sure the
755            // appropriate changes are reflected here.
756            Err(VpHaltReason::InvalidVmState(
757                UnacceptedMemoryAccess(gpa).into(),
758            ))
759        } else {
760            self.handle_mmio_exit(dev).await
761        }
762    }
763
764    fn handle_cpuid_intercept(&mut self) {
765        let message = self
766            .vp
767            .runner
768            .exit_message()
769            .as_message::<hvdef::HvX64CpuidInterceptMessage>();
770
771        let default_result = [
772            message.default_result_rax as u32,
773            message.default_result_rbx as u32,
774            message.default_result_rcx as u32,
775            message.default_result_rdx as u32,
776        ];
777
778        tracing::trace!(msg = %format_args!("{:x?}", message), "cpuid");
779
780        let [eax, ebx, ecx, edx] =
781            self.vp
782                .partition
783                .cpuid_result(message.rax as u32, message.rcx as u32, &default_result);
784
785        let next_rip = next_rip(&message.header);
786        self.vp.runner.cpu_context_mut().gps[protocol::RAX] = eax.into();
787        self.vp.runner.cpu_context_mut().gps[protocol::RBX] = ebx.into();
788        self.vp.runner.cpu_context_mut().gps[protocol::RCX] = ecx.into();
789        self.vp.runner.cpu_context_mut().gps[protocol::RDX] = edx.into();
790
791        self.vp.set_rip(self.intercepted_vtl, next_rip);
792    }
793
794    fn handle_msr_intercept(&mut self) {
795        let message = self
796            .vp
797            .runner
798            .exit_message()
799            .as_message::<hvdef::HvX64MsrInterceptMessage>();
800        let rip = next_rip(&message.header);
801
802        tracing::trace!(msg = %format_args!("{:x?}", message), "msr");
803
804        let msr = message.msr_number;
805        match message.header.intercept_access_type {
806            HvInterceptAccessType::READ => {
807                // Only supported MSRs are the crash MSRs.
808                let value = match self.vp.read_crash_msr(msr, self.intercepted_vtl) {
809                    Ok(v) => v,
810                    Err(MsrError::Unknown) => {
811                        tracing::trace!(msr, "unknown msr read");
812                        0
813                    }
814                    Err(MsrError::InvalidAccess) => {
815                        self.vp.inject_gpf(self.intercepted_vtl);
816                        // Do not advance RIP.
817                        return;
818                    }
819                };
820
821                self.vp.runner.cpu_context_mut().gps[protocol::RAX] = value & 0xffff_ffff;
822                self.vp.runner.cpu_context_mut().gps[protocol::RDX] = value >> 32;
823            }
824            HvInterceptAccessType::WRITE => {
825                let value = (message.rax & 0xffff_ffff) | (message.rdx << 32);
826                // Only supported MSRs are the crash MSRs.
827                match self.vp.write_crash_msr(msr, value, self.intercepted_vtl) {
828                    Ok(()) => {}
829                    Err(MsrError::Unknown) => {
830                        tracing::trace!(msr, value, "unknown msr write");
831                    }
832                    Err(MsrError::InvalidAccess) => {
833                        self.vp.inject_gpf(self.intercepted_vtl);
834                        // Do not advance RIP.
835                        return;
836                    }
837                }
838            }
839            _ => unreachable!(),
840        }
841
842        self.vp.set_rip(self.intercepted_vtl, rip);
843    }
844
845    fn handle_eoi(&self, dev: &impl CpuIo) {
846        let message = self
847            .vp
848            .runner
849            .exit_message()
850            .as_message::<hvdef::HvX64ApicEoiMessage>();
851
852        tracing::trace!(msg = %format_args!("{:x?}", message), "eoi");
853
854        dev.handle_eoi(message.interrupt_vector);
855    }
856
857    fn handle_unrecoverable_exception(&self) -> Result<(), VpHaltReason> {
858        Err(VpHaltReason::TripleFault {
859            vtl: self.intercepted_vtl.into(),
860        })
861    }
862
863    fn handle_exception(&mut self) -> Result<(), VpHaltReason> {
864        let message = self
865            .vp
866            .runner
867            .exit_message()
868            .as_message::<hvdef::HvX64ExceptionInterceptMessage>();
869
870        match x86defs::Exception(message.vector as u8) {
871            x86defs::Exception::DEBUG if cfg!(feature = "gdb") => {
872                self.vp.handle_debug_exception(self.intercepted_vtl)?
873            }
874            _ => tracing::error!("unexpected exception type {:#x?}", message.vector),
875        }
876        Ok(())
877    }
878}
879
880impl UhProcessor<'_, HypervisorBackedX86> {
881    fn set_rip(&mut self, vtl: GuestVtl, rip: u64) {
882        self.runner
883            .set_vp_register(vtl, HvX64RegisterName::Rip, rip.into())
884            .unwrap();
885    }
886
887    fn inject_gpf(&mut self, vtl: GuestVtl) {
888        let exception_event = hvdef::HvX64PendingExceptionEvent::new()
889            .with_event_pending(true)
890            .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
891            .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0.into())
892            .with_deliver_error_code(true)
893            .with_error_code(0);
894
895        self.runner
896            .set_vp_register(
897                vtl,
898                HvX64RegisterName::PendingEvent0,
899                u128::from(exception_event).into(),
900            )
901            .expect("set_vp_register should succeed for pending event");
902    }
903
904    fn set_vsm_partition_config(
905        &mut self,
906        vtl: GuestVtl,
907        value: HvRegisterVsmPartitionConfig,
908    ) -> Result<(), HvError> {
909        if vtl != GuestVtl::Vtl1 {
910            return Err(HvError::InvalidParameter);
911        }
912
913        assert!(self.partition.isolation.is_isolated());
914
915        let status = self
916            .partition
917            .vsm_status()
918            .expect("cannot fail to query vsm status");
919
920        let vtl1_enabled = VtlSet::from(status.enabled_vtl_set()).is_set(GuestVtl::Vtl1);
921        if !vtl1_enabled {
922            return Err(HvError::InvalidVtlState);
923        }
924
925        let mut guest_vsm_lock = self.shared.guest_vsm.write();
926
927        // Initialize partition.guest_vsm state if necessary.
928        match *guest_vsm_lock {
929            GuestVsmState::NotPlatformSupported => {
930                return Err(HvError::AccessDenied);
931            }
932            GuestVsmState::NotGuestEnabled => {
933                // TODO: check status
934                *guest_vsm_lock = GuestVsmState::Enabled {
935                    vtl1: Default::default(),
936                };
937            }
938            GuestVsmState::Enabled { .. } => {}
939        }
940
941        let GuestVsmState::Enabled { vtl1 } = &mut *guest_vsm_lock else {
942            unreachable!()
943        };
944        let protections = HvMapGpaFlags::from(value.default_vtl_protection_mask() as u32);
945
946        if value.reserved() != 0 {
947            return Err(HvError::InvalidRegisterValue);
948        }
949
950        // VTL protection cannot be disabled once enabled.
951        //
952        // The hypervisor should intercept only the case where the lower VTL is
953        // setting the enable_vtl_protection bit when it was previously
954        // disabled; other cases are handled directly by the hypervisor.
955        if !value.enable_vtl_protection() {
956            if vtl1.enable_vtl_protection {
957                // A malicious guest could change its hypercall parameters in
958                // memory while the intercept is being handled; this case
959                // explicitly handles that situation.
960                return Err(HvError::InvalidRegisterValue);
961            } else {
962                panic!("unexpected SetVpRegisters intercept");
963            }
964        }
965
966        // For VBS-isolated VMs, protections apply to VTLs lower than the one specified when
967        // setting VsmPartitionConfig.
968        let mbec_enabled = VtlSet::from(status.mbec_enabled_vtl_set()).is_set(GuestVtl::Vtl0);
969        let shadow_supervisor_stack_enabled =
970            VtlSet::from(status.supervisor_shadow_stack_enabled_vtl_set() as u16)
971                .is_set(GuestVtl::Vtl0);
972
973        if !validate_vtl_gpa_flags(protections, mbec_enabled, shadow_supervisor_stack_enabled) {
974            return Err(HvError::InvalidRegisterValue);
975        }
976
977        // Default VTL protection mask must include read and write.
978        if !(protections.readable() && protections.writable()) {
979            return Err(HvError::InvalidRegisterValue);
980        }
981
982        // Don't allow changing existing protections once set.
983        if let Some(current_protections) = vtl1.default_vtl_protections {
984            if protections != current_protections {
985                return Err(HvError::InvalidRegisterValue);
986            }
987        }
988        vtl1.default_vtl_protections = Some(protections);
989
990        for ram_range in self.partition.lower_vtl_memory_layout.ram().iter() {
991            self.partition
992                .hcl
993                .modify_vtl_protection_mask(ram_range.range, protections, vtl.into())
994                .map_err(|e| match e {
995                    ApplyVtlProtectionsError::Hypervisor {
996                        range: _,
997                        output: _,
998                        hv_error,
999                        vtl: _,
1000                    } => hv_error,
1001                    _ => unreachable!(),
1002                })?;
1003        }
1004
1005        let hc_regs = [(HvX64RegisterName::VsmPartitionConfig, u64::from(value))];
1006        self.runner.set_vp_registers_hvcall(vtl.into(), hc_regs)?;
1007        vtl1.enable_vtl_protection = true;
1008
1009        Ok(())
1010    }
1011
1012    ///Eagerly load registers for emulation
1013    ///Typically we load expensive registers lazily, however some registers will always be used,
1014    ///and the underlying ioctl supports batching multiple register retrievals into a single call
1015    fn emulation_cache(&mut self, vtl: GuestVtl) -> MshvEmulationCache {
1016        const NAMES: &[HvX64RegisterName] = &[
1017            HvX64RegisterName::Rsp,
1018            HvX64RegisterName::Es,
1019            HvX64RegisterName::Ds,
1020            HvX64RegisterName::Fs,
1021            HvX64RegisterName::Gs,
1022            HvX64RegisterName::Ss,
1023            HvX64RegisterName::Cr0,
1024            HvX64RegisterName::Efer,
1025        ];
1026        let mut values = [FromZeros::new_zeroed(); NAMES.len()];
1027        self.runner
1028            .get_vp_registers(vtl, NAMES, &mut values)
1029            .expect("register query should not fail");
1030
1031        let [rsp, es, ds, fs, gs, ss, cr0, efer] = values;
1032
1033        let header = self
1034            .runner
1035            .exit_message()
1036            .as_message::<HvX64InterceptMessageHeader>();
1037
1038        MshvEmulationCache {
1039            rsp: rsp.as_u64(),
1040            es: from_seg(es.into()),
1041            ds: from_seg(ds.into()),
1042            fs: from_seg(fs.into()),
1043            gs: from_seg(gs.into()),
1044            ss: from_seg(ss.into()),
1045            cr0: cr0.as_u64(),
1046            efer: efer.as_u64(),
1047            rip: header.rip,
1048            rflags: header.rflags.into(),
1049        }
1050    }
1051}
1052
1053fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1054    SegmentRegister {
1055        base: reg.base,
1056        limit: reg.limit,
1057        selector: reg.selector,
1058        attributes: reg.attributes.into(),
1059    }
1060}
1061
1062impl<T: CpuIo> EmulatorSupport for UhEmulationState<'_, '_, T, HypervisorBackedX86> {
1063    fn flush(&mut self) {
1064        self.vp
1065            .runner
1066            .set_vp_registers(
1067                self.vtl,
1068                [
1069                    (HvX64RegisterName::Rip, self.cache.rip),
1070                    (HvX64RegisterName::Rflags, self.cache.rflags.into()),
1071                    (HvX64RegisterName::Rsp, self.cache.rsp),
1072                ],
1073            )
1074            .unwrap();
1075    }
1076
1077    fn vp_index(&self) -> VpIndex {
1078        self.vp.vp_index()
1079    }
1080
1081    fn vendor(&self) -> x86defs::cpuid::Vendor {
1082        self.vp.partition.caps.vendor
1083    }
1084
1085    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
1086        match reg {
1087            x86emu::Gp::RSP => self.cache.rsp,
1088            _ => self.vp.runner.cpu_context().gps[reg as usize],
1089        }
1090    }
1091
1092    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
1093        if reg == x86emu::Gp::RSP {
1094            self.cache.rsp = v;
1095        }
1096        self.vp.runner.cpu_context_mut().gps[reg as usize] = v;
1097    }
1098
1099    fn xmm(&mut self, index: usize) -> u128 {
1100        u128::from_le_bytes(self.vp.runner.cpu_context().fx_state.xmm[index])
1101    }
1102
1103    fn set_xmm(&mut self, index: usize, v: u128) {
1104        self.vp.runner.cpu_context_mut().fx_state.xmm[index] = v.to_le_bytes();
1105    }
1106
1107    fn rip(&mut self) -> u64 {
1108        self.cache.rip
1109    }
1110
1111    fn set_rip(&mut self, v: u64) {
1112        self.cache.rip = v;
1113    }
1114
1115    fn segment(&mut self, index: x86emu::Segment) -> SegmentRegister {
1116        match index {
1117            x86emu::Segment::CS => {
1118                let header = self
1119                    .vp
1120                    .runner
1121                    .exit_message()
1122                    .as_message::<HvX64InterceptMessageHeader>();
1123                from_seg(header.cs_segment)
1124            }
1125            x86emu::Segment::ES => self.cache.es,
1126            x86emu::Segment::SS => self.cache.ss,
1127            x86emu::Segment::DS => self.cache.ds,
1128            x86emu::Segment::FS => self.cache.fs,
1129            x86emu::Segment::GS => self.cache.gs,
1130        }
1131    }
1132
1133    fn efer(&mut self) -> u64 {
1134        self.cache.efer
1135    }
1136
1137    fn cr0(&mut self) -> u64 {
1138        self.cache.cr0
1139    }
1140
1141    fn rflags(&mut self) -> RFlags {
1142        self.cache.rflags
1143    }
1144
1145    fn set_rflags(&mut self, v: RFlags) {
1146        self.cache.rflags = v;
1147    }
1148
1149    fn instruction_bytes(&self) -> &[u8] {
1150        let message = self.vp.runner.exit_message();
1151        match message.header.typ {
1152            HvMessageType::HvMessageTypeGpaIntercept
1153            | HvMessageType::HvMessageTypeUnmappedGpa
1154            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1155                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1156                &message.instruction_bytes[..message.instruction_byte_count as usize]
1157            }
1158            HvMessageType::HvMessageTypeX64IoPortIntercept => {
1159                let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
1160                &message.instruction_bytes[..message.instruction_byte_count as usize]
1161            }
1162            _ => unreachable!(),
1163        }
1164    }
1165
1166    fn physical_address(&self) -> Option<u64> {
1167        let message = self.vp.runner.exit_message();
1168        match message.header.typ {
1169            HvMessageType::HvMessageTypeGpaIntercept
1170            | HvMessageType::HvMessageTypeUnmappedGpa
1171            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1172                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1173                Some(message.guest_physical_address)
1174            }
1175            _ => None,
1176        }
1177    }
1178
1179    fn initial_gva_translation(
1180        &mut self,
1181    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
1182        if (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeGpaIntercept)
1183            && (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeUnmappedGpa)
1184            && (self.vp.runner.exit_message().header.typ
1185                != HvMessageType::HvMessageTypeUnacceptedGpa)
1186        {
1187            return None;
1188        }
1189
1190        let message = self
1191            .vp
1192            .runner
1193            .exit_message()
1194            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
1195
1196        if !message.memory_access_info.gva_gpa_valid() {
1197            tracing::trace!(?message.guest_virtual_address, ?message.guest_physical_address, "gva gpa not valid {:?}", self.vp.runner.exit_message().payload());
1198            return None;
1199        }
1200
1201        let translate_mode = virt_support_x86emu::emulate::TranslateMode::try_from(
1202            message.header.intercept_access_type,
1203        )
1204        .expect("unexpected intercept access type");
1205
1206        let translation = virt_support_x86emu::emulate::InitialTranslation {
1207            gva: message.guest_virtual_address,
1208            gpa: message.guest_physical_address,
1209            translate_mode,
1210        };
1211
1212        tracing::trace!(?translation, "initial translation");
1213
1214        // If we have a valid translation, the hypervisor must have set the TLB lock
1215        // so the translation remains valid for the duration of this exit.
1216        // Update our local cache appropriately.
1217        self.vp.mark_tlb_locked(Vtl::Vtl2, self.vtl);
1218
1219        Some(translation)
1220    }
1221
1222    fn interruption_pending(&self) -> bool {
1223        self.interruption_pending
1224    }
1225
1226    fn check_vtl_access(
1227        &mut self,
1228        gpa: u64,
1229        mode: virt_support_x86emu::emulate::TranslateMode,
1230    ) -> Result<(), EmuCheckVtlAccessError> {
1231        // Underhill currently doesn't set VTL 2 protections against execute exclusively, it removes
1232        // all permissions from a page. So for VTL 1, no need to check the permissions; if VTL 1
1233        // doesn't have permissions to a page, Underhill should appropriately fail when it tries
1234        // to read or write to that page on VTL 1's behalf.
1235        //
1236        // For VTL 0, the alias map guards for read and write permissions, so only check VTL execute
1237        // permissions. Because VTL 2 will not restrict execute exclusively, only VTL 1 execute
1238        // permissions need to be checked and therefore only check permissions if VTL 1 is allowed.
1239        //
1240        // Note: the restriction to VTL 1 support also means that for WHP, which doesn't support VTL 1
1241        // the HvCheckSparseGpaPageVtlAccess hypercall--which is unimplemented in whp--will never be made.
1242        if mode == virt_support_x86emu::emulate::TranslateMode::Execute
1243            && self.vtl == GuestVtl::Vtl0
1244            && !matches!(
1245                *self.vp.shared.guest_vsm.read(),
1246                GuestVsmState::NotPlatformSupported,
1247            )
1248        {
1249            // Should always be called after translate gva with the tlb lock flag
1250            // or with an initial translation.
1251            debug_assert!(self.vp.is_tlb_locked(Vtl::Vtl2, self.vtl));
1252
1253            let mbec_user_execute = self
1254                .vp
1255                .runner
1256                .get_vp_register(self.vtl, HvX64RegisterName::InstructionEmulationHints)
1257                .unwrap();
1258
1259            let flags =
1260                if hvdef::HvInstructionEmulatorHintsRegister::from(mbec_user_execute.as_u64())
1261                    .mbec_user_execute_control()
1262                {
1263                    HvMapGpaFlags::new().with_user_executable(true)
1264                } else {
1265                    HvMapGpaFlags::new().with_kernel_executable(true)
1266                };
1267
1268            let access_result = self
1269                .vp
1270                .partition
1271                .hcl
1272                .check_vtl_access(gpa, self.vtl, flags)
1273                .unwrap();
1274
1275            if let Some(ioctl::CheckVtlAccessResult { vtl, denied_flags }) = access_result {
1276                return Err(EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags });
1277            };
1278        }
1279
1280        Ok(())
1281    }
1282
1283    fn translate_gva(
1284        &mut self,
1285        gva: u64,
1286        mode: virt_support_x86emu::emulate::TranslateMode,
1287    ) -> Result<EmuTranslateResult, EmuTranslateError> {
1288        let mut control_flags = hypercall::TranslateGvaControlFlagsX64::new();
1289        match mode {
1290            virt_support_x86emu::emulate::TranslateMode::Read => {
1291                control_flags.set_validate_read(true)
1292            }
1293            virt_support_x86emu::emulate::TranslateMode::Write => {
1294                control_flags.set_validate_read(true);
1295                control_flags.set_validate_write(true);
1296            }
1297            virt_support_x86emu::emulate::TranslateMode::Execute => {
1298                control_flags.set_validate_execute(true)
1299            }
1300        };
1301
1302        // The translation will be used, so set the appropriate page table bits
1303        // (the access/dirty bit).
1304        //
1305        // Prevent flushes in order to make sure that translation of this GVA
1306        // remains usable until the VP is resumed back to direct execution.
1307        control_flags.set_set_page_table_bits(true);
1308        control_flags.set_tlb_flush_inhibit(true);
1309
1310        // In case we're not running ring 0, check privileges against VP state
1311        // as of when the original intercept came in - since the emulator
1312        // doesn't support instructions that change ring level, the ring level
1313        // will remain the same as it was in the VP state as of when the
1314        // original intercept came in. The privilege exempt flag should
1315        // not be set.
1316        assert!(!control_flags.privilege_exempt());
1317
1318        // Do the translation using the current VTL.
1319        control_flags.set_input_vtl(self.vtl.into());
1320
1321        match self
1322            .vp
1323            .runner
1324            .translate_gva_to_gpa(gva, control_flags)
1325            .unwrap()
1326        {
1327            Ok(ioctl::TranslateResult {
1328                gpa_page,
1329                overlay_page,
1330            }) => {
1331                self.vp.mark_tlb_locked(Vtl::Vtl2, self.vtl);
1332                Ok(EmuTranslateResult {
1333                    gpa: (gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)),
1334                    overlay_page: Some(overlay_page),
1335                })
1336            }
1337            Err(ioctl::x64::TranslateErrorX64 { code, event_info }) => Err(EmuTranslateError {
1338                code: hypercall::TranslateGvaResultCode(code),
1339                event_info: Some(event_info),
1340            }),
1341        }
1342    }
1343
1344    fn inject_pending_event(&mut self, event_info: HvX64PendingEvent) {
1345        let regs = [
1346            (
1347                HvX64RegisterName::PendingEvent0,
1348                u128::from(event_info.reg_0),
1349            ),
1350            (
1351                HvX64RegisterName::PendingEvent1,
1352                u128::from(event_info.reg_1),
1353            ),
1354        ];
1355
1356        self.vp
1357            .runner
1358            .set_vp_registers_hvcall(self.vtl.into(), regs)
1359            .expect("set_vp_registers hypercall for setting pending event should not fail");
1360    }
1361
1362    fn monitor_support(&self) -> Option<&dyn EmulatorMonitorSupport> {
1363        Some(self)
1364    }
1365
1366    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
1367        self.vp.partition.is_gpa_mapped(gpa, write)
1368    }
1369
1370    fn lapic_base_address(&self) -> Option<u64> {
1371        None
1372    }
1373
1374    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1375        unimplemented!()
1376    }
1377
1378    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1379        unimplemented!()
1380    }
1381}
1382
1383impl<T: CpuIo> UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1384    const MSHV_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1385        Self,
1386        [
1387            hv1_hypercall::HvPostMessage,
1388            hv1_hypercall::HvSignalEvent,
1389            hv1_hypercall::HvRetargetDeviceInterrupt,
1390            hv1_hypercall::HvGetVpIndexFromApicId,
1391            hv1_hypercall::HvSetVpRegisters,
1392            hv1_hypercall::HvModifyVtlProtectionMask
1393        ]
1394    );
1395}
1396
1397impl<T> hv1_hypercall::X64RegisterState for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1398    fn rip(&mut self) -> u64 {
1399        self.vp
1400            .runner
1401            .exit_message()
1402            .as_message::<HvX64InterceptMessageHeader>()
1403            .rip
1404    }
1405
1406    fn set_rip(&mut self, rip: u64) {
1407        self.vp.set_rip(self.intercepted_vtl, rip)
1408    }
1409
1410    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1411        match n {
1412            hv1_hypercall::X64HypercallRegister::Rax => {
1413                self.vp.runner.cpu_context().gps[protocol::RAX]
1414            }
1415            hv1_hypercall::X64HypercallRegister::Rcx => {
1416                self.vp.runner.cpu_context().gps[protocol::RCX]
1417            }
1418            hv1_hypercall::X64HypercallRegister::Rdx => {
1419                self.vp.runner.cpu_context().gps[protocol::RDX]
1420            }
1421            hv1_hypercall::X64HypercallRegister::Rbx => {
1422                self.vp.runner.cpu_context().gps[protocol::RBX]
1423            }
1424            hv1_hypercall::X64HypercallRegister::Rsi => {
1425                self.vp.runner.cpu_context().gps[protocol::RSI]
1426            }
1427            hv1_hypercall::X64HypercallRegister::Rdi => {
1428                self.vp.runner.cpu_context().gps[protocol::RDI]
1429            }
1430            hv1_hypercall::X64HypercallRegister::R8 => {
1431                self.vp.runner.cpu_context().gps[protocol::R8]
1432            }
1433        }
1434    }
1435
1436    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1437        *match n {
1438            hv1_hypercall::X64HypercallRegister::Rax => {
1439                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX]
1440            }
1441            hv1_hypercall::X64HypercallRegister::Rcx => {
1442                &mut self.vp.runner.cpu_context_mut().gps[protocol::RCX]
1443            }
1444            hv1_hypercall::X64HypercallRegister::Rdx => {
1445                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDX]
1446            }
1447            hv1_hypercall::X64HypercallRegister::Rbx => {
1448                &mut self.vp.runner.cpu_context_mut().gps[protocol::RBX]
1449            }
1450            hv1_hypercall::X64HypercallRegister::Rsi => {
1451                &mut self.vp.runner.cpu_context_mut().gps[protocol::RSI]
1452            }
1453            hv1_hypercall::X64HypercallRegister::Rdi => {
1454                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDI]
1455            }
1456            hv1_hypercall::X64HypercallRegister::R8 => {
1457                &mut self.vp.runner.cpu_context_mut().gps[protocol::R8]
1458            }
1459        } = value;
1460    }
1461
1462    fn xmm(&mut self, n: usize) -> u128 {
1463        u128::from_ne_bytes(self.vp.runner.cpu_context().fx_state.xmm[n])
1464    }
1465
1466    fn set_xmm(&mut self, n: usize, value: u128) {
1467        self.vp.runner.cpu_context_mut().fx_state.xmm[n] = value.to_ne_bytes();
1468    }
1469}
1470
1471trait ToVpRegisterName: 'static + Copy + std::fmt::Debug {
1472    fn to_vp_reg_name(self) -> VpRegisterName;
1473}
1474
1475impl ToVpRegisterName for VpRegisterName {
1476    fn to_vp_reg_name(self) -> VpRegisterName {
1477        self
1478    }
1479}
1480
1481impl UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1482    fn set_register_state<T, R: ToVpRegisterName, const N: usize>(
1483        &mut self,
1484        regs: &T,
1485    ) -> Result<(), vp_state::Error>
1486    where
1487        T: HvRegisterState<R, N>,
1488    {
1489        let names = regs.names().map(|r| r.to_vp_reg_name());
1490        let mut values = [HvRegisterValue::new_zeroed(); N];
1491        regs.get_values(values.iter_mut());
1492        self.vp
1493            .runner
1494            .set_vp_registers(self.vtl, names.iter().copied().zip(values))
1495            .map_err(vp_state::Error::SetRegisters)?;
1496        Ok(())
1497    }
1498
1499    fn get_register_state<T, R: ToVpRegisterName, const N: usize>(
1500        &mut self,
1501    ) -> Result<T, vp_state::Error>
1502    where
1503        T: HvRegisterState<R, N>,
1504    {
1505        let mut regs = T::default();
1506        let names = regs.names().map(|r| r.to_vp_reg_name());
1507        let mut values = [HvRegisterValue::new_zeroed(); N];
1508        self.vp
1509            .runner
1510            .get_vp_registers(self.vtl, &names, &mut values)
1511            .map_err(vp_state::Error::GetRegisters)?;
1512
1513        regs.set_values(values.into_iter());
1514        Ok(regs)
1515    }
1516}
1517
1518impl AccessVpState for UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1519    type Error = vp_state::Error;
1520
1521    fn caps(&self) -> &virt::x86::X86PartitionCapabilities {
1522        &self.vp.partition.caps
1523    }
1524
1525    fn commit(&mut self) -> Result<(), Self::Error> {
1526        Ok(())
1527    }
1528
1529    fn registers(&mut self) -> Result<vp::Registers, Self::Error> {
1530        self.get_register_state()
1531    }
1532
1533    fn set_registers(&mut self, value: &vp::Registers) -> Result<(), Self::Error> {
1534        self.set_register_state(value)
1535    }
1536
1537    fn activity(&mut self) -> Result<vp::Activity, Self::Error> {
1538        let activity: vp::Activity = self.get_register_state()?;
1539
1540        // TODO: Get the NMI pending bit from the APIC.
1541        // let apic = self.vp.whp(self.vtl).get_apic()?;
1542        // activity.nmi_pending = hv_apic_nmi_pending(&apic);
1543        Ok(activity)
1544    }
1545
1546    fn set_activity(&mut self, value: &vp::Activity) -> Result<(), Self::Error> {
1547        self.set_register_state(value)?;
1548
1549        // TODO: Set the NMI pending bit via the APIC.
1550        // let mut apic = self.vp.whp(self.vtl).get_apic()?;
1551        // set_hv_apic_nmi_pending(&mut apic, value.nmi_pending);
1552        // self.vp.whp(self.vtl).set_apic(&apic)?;
1553        Ok(())
1554    }
1555
1556    fn xsave(&mut self) -> Result<vp::Xsave, Self::Error> {
1557        // TODO: get the rest of the xsave state, not just the legacy FP state.
1558        //
1559        // This is just used for debugging, so this should not be a problem.
1560        #[repr(C)]
1561        #[derive(IntoBytes, Immutable, KnownLayout)]
1562        struct XsaveStandard {
1563            fxsave: Fxsave,
1564            xsave_header: XsaveHeader,
1565        }
1566        let state = XsaveStandard {
1567            fxsave: self.vp.runner.cpu_context().fx_state.clone(),
1568            xsave_header: XsaveHeader {
1569                xstate_bv: XFEATURE_X87 | XFEATURE_SSE,
1570                ..FromZeros::new_zeroed()
1571            },
1572        };
1573        Ok(vp::Xsave::from_standard(state.as_bytes(), self.caps()))
1574    }
1575
1576    fn set_xsave(&mut self, _value: &vp::Xsave) -> Result<(), Self::Error> {
1577        Err(vp_state::Error::Unimplemented("xsave"))
1578    }
1579
1580    fn apic(&mut self) -> Result<vp::Apic, Self::Error> {
1581        Err(vp_state::Error::Unimplemented("apic"))
1582    }
1583
1584    fn set_apic(&mut self, _value: &vp::Apic) -> Result<(), Self::Error> {
1585        Err(vp_state::Error::Unimplemented("apic"))
1586    }
1587
1588    fn xcr(&mut self) -> Result<vp::Xcr0, Self::Error> {
1589        self.get_register_state()
1590    }
1591
1592    fn set_xcr(&mut self, value: &vp::Xcr0) -> Result<(), Self::Error> {
1593        self.set_register_state(value)
1594    }
1595
1596    fn xss(&mut self) -> Result<vp::Xss, Self::Error> {
1597        self.get_register_state()
1598    }
1599
1600    fn set_xss(&mut self, value: &vp::Xss) -> Result<(), Self::Error> {
1601        self.set_register_state(value)
1602    }
1603
1604    fn mtrrs(&mut self) -> Result<vp::Mtrrs, Self::Error> {
1605        self.get_register_state()
1606    }
1607
1608    fn set_mtrrs(&mut self, cc: &vp::Mtrrs) -> Result<(), Self::Error> {
1609        self.set_register_state(cc)
1610    }
1611
1612    fn pat(&mut self) -> Result<vp::Pat, Self::Error> {
1613        self.get_register_state()
1614    }
1615
1616    fn set_pat(&mut self, value: &vp::Pat) -> Result<(), Self::Error> {
1617        self.set_register_state(value)
1618    }
1619
1620    fn virtual_msrs(&mut self) -> Result<vp::VirtualMsrs, Self::Error> {
1621        self.get_register_state()
1622    }
1623
1624    fn set_virtual_msrs(&mut self, msrs: &vp::VirtualMsrs) -> Result<(), Self::Error> {
1625        self.set_register_state(msrs)
1626    }
1627
1628    fn debug_regs(&mut self) -> Result<vp::DebugRegisters, Self::Error> {
1629        self.get_register_state()
1630    }
1631
1632    fn set_debug_regs(&mut self, value: &vp::DebugRegisters) -> Result<(), Self::Error> {
1633        self.set_register_state(value)
1634    }
1635
1636    fn tsc(&mut self) -> Result<vp::Tsc, Self::Error> {
1637        self.get_register_state()
1638    }
1639
1640    fn set_tsc(&mut self, value: &vp::Tsc) -> Result<(), Self::Error> {
1641        self.set_register_state(value)
1642    }
1643
1644    fn cet(&mut self) -> Result<vp::Cet, Self::Error> {
1645        self.get_register_state()
1646    }
1647
1648    fn set_cet(&mut self, value: &vp::Cet) -> Result<(), Self::Error> {
1649        self.set_register_state(value)
1650    }
1651
1652    fn cet_ss(&mut self) -> Result<vp::CetSs, Self::Error> {
1653        self.get_register_state()
1654    }
1655
1656    fn set_cet_ss(&mut self, value: &vp::CetSs) -> Result<(), Self::Error> {
1657        self.set_register_state(value)
1658    }
1659
1660    fn tsc_aux(&mut self) -> Result<vp::TscAux, Self::Error> {
1661        self.get_register_state()
1662    }
1663
1664    fn set_tsc_aux(&mut self, value: &vp::TscAux) -> Result<(), Self::Error> {
1665        self.set_register_state(value)
1666    }
1667
1668    fn synic_msrs(&mut self) -> Result<vp::SyntheticMsrs, Self::Error> {
1669        self.get_register_state()
1670    }
1671
1672    fn set_synic_msrs(&mut self, value: &vp::SyntheticMsrs) -> Result<(), Self::Error> {
1673        self.set_register_state(value)
1674    }
1675
1676    fn synic_timers(&mut self) -> Result<vp::SynicTimers, Self::Error> {
1677        Err(vp_state::Error::Unimplemented("synic_timers"))
1678    }
1679
1680    fn set_synic_timers(&mut self, _value: &vp::SynicTimers) -> Result<(), Self::Error> {
1681        Err(vp_state::Error::Unimplemented("synic_timers"))
1682    }
1683
1684    fn synic_message_queues(&mut self) -> Result<vp::SynicMessageQueues, Self::Error> {
1685        Ok(self.vp.inner.message_queues[self.vtl].save())
1686    }
1687
1688    fn set_synic_message_queues(
1689        &mut self,
1690        value: &vp::SynicMessageQueues,
1691    ) -> Result<(), Self::Error> {
1692        self.vp.inner.message_queues[self.vtl].restore(value);
1693        Ok(())
1694    }
1695
1696    fn synic_message_page(&mut self) -> Result<vp::SynicMessagePage, Self::Error> {
1697        Err(vp_state::Error::Unimplemented("synic_message_page"))
1698    }
1699
1700    fn set_synic_message_page(&mut self, _value: &vp::SynicMessagePage) -> Result<(), Self::Error> {
1701        Err(vp_state::Error::Unimplemented("synic_message_page"))
1702    }
1703
1704    fn synic_event_flags_page(&mut self) -> Result<vp::SynicEventFlagsPage, Self::Error> {
1705        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1706    }
1707
1708    fn set_synic_event_flags_page(
1709        &mut self,
1710        _value: &vp::SynicEventFlagsPage,
1711    ) -> Result<(), Self::Error> {
1712        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1713    }
1714}
1715
1716impl<T: CpuIo> hv1_hypercall::RetargetDeviceInterrupt
1717    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1718{
1719    fn retarget_interrupt(
1720        &mut self,
1721        device_id: u64,
1722        address: u64,
1723        data: u32,
1724        params: hv1_hypercall::HvInterruptParameters<'_>,
1725    ) -> hvdef::HvResult<()> {
1726        self.retarget_virtual_interrupt(
1727            device_id,
1728            address,
1729            data,
1730            params.vector,
1731            params.multicast,
1732            params.target_processors,
1733        )
1734    }
1735}
1736
1737impl<T> hv1_hypercall::SetVpRegisters for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1738    fn set_vp_registers(
1739        &mut self,
1740        partition_id: u64,
1741        vp_index: u32,
1742        vtl: Option<Vtl>,
1743        registers: &[hypercall::HvRegisterAssoc],
1744    ) -> HvRepResult {
1745        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1746            return Err((HvError::AccessDenied, 0));
1747        }
1748
1749        if vp_index != hvdef::HV_VP_INDEX_SELF && vp_index != self.vp.vp_index().index() {
1750            return Err((HvError::InvalidVpIndex, 0));
1751        }
1752
1753        let target_vtl = self
1754            .target_vtl_no_higher(vtl.unwrap_or(self.intercepted_vtl.into()))
1755            .map_err(|e| (e, 0))?;
1756
1757        for (i, reg) in registers.iter().enumerate() {
1758            if reg.name == HvX64RegisterName::VsmPartitionConfig.into() {
1759                let value = HvRegisterVsmPartitionConfig::from(reg.value.as_u64());
1760                self.vp
1761                    .set_vsm_partition_config(target_vtl, value)
1762                    .map_err(|e| (e, i))?;
1763            } else {
1764                return Err((HvError::InvalidParameter, i));
1765            }
1766        }
1767
1768        Ok(())
1769    }
1770}
1771
1772impl<T> hv1_hypercall::ModifyVtlProtectionMask
1773    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1774{
1775    fn modify_vtl_protection_mask(
1776        &mut self,
1777        partition_id: u64,
1778        _map_flags: HvMapGpaFlags,
1779        target_vtl: Option<Vtl>,
1780        gpa_pages: &[u64],
1781    ) -> HvRepResult {
1782        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1783            return Err((HvError::AccessDenied, 0));
1784        }
1785
1786        let target_vtl = self
1787            .target_vtl_no_higher(target_vtl.unwrap_or(self.intercepted_vtl.into()))
1788            .map_err(|e| (e, 0))?;
1789        if target_vtl == GuestVtl::Vtl0 {
1790            return Err((HvError::InvalidParameter, 0));
1791        }
1792
1793        // A VTL cannot change its own VTL permissions until it has enabled VTL protection and
1794        // configured default permissions. Higher VTLs are not under this restriction (as they may
1795        // need to apply default permissions before VTL protection is enabled).
1796        if target_vtl == self.intercepted_vtl
1797            && !matches!(
1798                *self.vp.shared.guest_vsm.read(),
1799                GuestVsmState::Enabled {
1800                    vtl1: VbsIsolatedVtl1State {
1801                        enable_vtl_protection: true,
1802                        default_vtl_protections: Some(_),
1803                    },
1804                }
1805            )
1806        {
1807            return Err((HvError::AccessDenied, 0));
1808        }
1809
1810        // TODO VBS GUEST VSM: verify this logic is correct
1811        // TODO VBS GUEST VSM: validation on map_flags, similar to default
1812        // protections mask changes
1813        // Can receive an intercept on adjust permissions, and for isolated
1814        // VMs if the page is unaccepted
1815        if self.vp.partition.isolation.is_isolated() {
1816            return Err((HvError::OperationDenied, 0));
1817        } else {
1818            if !gpa_pages.is_empty() {
1819                if !self.vp.partition.is_gpa_lower_vtl_ram(gpa_pages[0]) {
1820                    return Err((HvError::OperationDenied, 0));
1821                } else {
1822                    panic!("Should not be handling this hypercall for guest ram");
1823                }
1824            }
1825        }
1826
1827        Ok(())
1828    }
1829}
1830
1831mod save_restore {
1832    use super::HypervisorBackedX86;
1833    use super::UhProcessor;
1834    use anyhow::Context;
1835    use hcl::GuestVtl;
1836    use hvdef::HV_X64_MSR_GUEST_CRASH_CTL;
1837    use hvdef::HvInternalActivityRegister;
1838    use hvdef::HvX64RegisterName;
1839    use hvdef::Vtl;
1840    use virt::Processor;
1841    use virt::vp::AccessVpState;
1842    use virt::vp::Mtrrs;
1843    use vmcore::save_restore::RestoreError;
1844    use vmcore::save_restore::SaveError;
1845    use vmcore::save_restore::SaveRestore;
1846    use zerocopy::FromZeros;
1847    use zerocopy::IntoBytes;
1848
1849    mod state {
1850        use mesh::payload::Protobuf;
1851        use vmcore::save_restore::SavedStateRoot;
1852
1853        #[derive(Protobuf, SavedStateRoot)]
1854        #[mesh(package = "underhill.partition")]
1855        pub struct ProcessorSavedState {
1856            #[mesh(1)]
1857            pub(super) rax: u64,
1858            #[mesh(2)]
1859            pub(super) rcx: u64,
1860            #[mesh(3)]
1861            pub(super) rdx: u64,
1862            #[mesh(4)]
1863            pub(super) rbx: u64,
1864            #[mesh(5)]
1865            pub(super) cr2: u64,
1866            #[mesh(6)]
1867            pub(super) rbp: u64,
1868            #[mesh(7)]
1869            pub(super) rsi: u64,
1870            #[mesh(8)]
1871            pub(super) rdi: u64,
1872            #[mesh(9)]
1873            pub(super) r8: u64,
1874            #[mesh(10)]
1875            pub(super) r9: u64,
1876            #[mesh(11)]
1877            pub(super) r10: u64,
1878            #[mesh(12)]
1879            pub(super) r11: u64,
1880            #[mesh(13)]
1881            pub(super) r12: u64,
1882            #[mesh(14)]
1883            pub(super) r13: u64,
1884            #[mesh(15)]
1885            pub(super) r14: u64,
1886            #[mesh(16)]
1887            pub(super) r15: u64,
1888            #[mesh(17)]
1889            pub(super) fx_state: Vec<u8>,
1890            #[mesh(18)]
1891            pub(super) dr0: u64,
1892            #[mesh(19)]
1893            pub(super) dr1: u64,
1894            #[mesh(20)]
1895            pub(super) dr2: u64,
1896            #[mesh(21)]
1897            pub(super) dr3: u64,
1898
1899            /// Only set when the DR6_SHARED capability is present
1900            #[mesh(22)]
1901            pub(super) dr6: Option<u64>,
1902
1903            /// If VTL0 should be in the startup suspend state. Older underhill
1904            /// versions do not save this property, so maintain the old buggy
1905            /// behavior for those cases its not present in the saved state.
1906            #[mesh(23)]
1907            pub(super) startup_suspend: Option<bool>,
1908
1909            #[mesh(24)]
1910            pub(super) crash_reg: Option<[u64; 5]>,
1911
1912            /// This value is ignored going forward, but may still be read by downlevel
1913            /// versions.
1914            #[mesh(25)]
1915            pub(super) crash_control: u64,
1916
1917            #[mesh(26)]
1918            pub(super) msr_mtrr_def_type: u64,
1919            #[mesh(27)]
1920            pub(super) fixed_mtrrs: Option<[u64; 11]>,
1921            #[mesh(28)]
1922            pub(super) variable_mtrrs: Option<[u64; 16]>,
1923            #[mesh(29)]
1924            pub(super) per_vtl: Vec<ProcessorVtlSavedState>,
1925        }
1926
1927        #[derive(Protobuf, SavedStateRoot)]
1928        #[mesh(package = "underhill.partition")]
1929        pub struct ProcessorVtlSavedState {
1930            #[mesh(1)]
1931            pub(super) message_queue: virt::vp::SynicMessageQueues,
1932        }
1933    }
1934
1935    const SHARED_REGISTERS: &[HvX64RegisterName] = &[
1936        HvX64RegisterName::Dr0,
1937        HvX64RegisterName::Dr1,
1938        HvX64RegisterName::Dr2,
1939        HvX64RegisterName::Dr3,
1940        HvX64RegisterName::Dr6, // must be last
1941    ];
1942
1943    impl SaveRestore for UhProcessor<'_, HypervisorBackedX86> {
1944        type SavedState = state::ProcessorSavedState;
1945
1946        fn save(&mut self) -> Result<Self::SavedState, SaveError> {
1947            // Ensure all async requests are reflected in the saved state.
1948            self.flush_async_requests();
1949
1950            let dr6_shared = self.partition.hcl.dr6_shared();
1951            let mut values = [FromZeros::new_zeroed(); SHARED_REGISTERS.len()];
1952            let len = if dr6_shared {
1953                SHARED_REGISTERS.len()
1954            } else {
1955                SHARED_REGISTERS.len() - 1
1956            };
1957
1958            self.runner
1959                // All these registers are shared, so the VTL we ask for doesn't matter
1960                .get_vp_registers(GuestVtl::Vtl0, &SHARED_REGISTERS[..len], &mut values[..len])
1961                .context("failed to get shared registers")
1962                .map_err(SaveError::Other)?;
1963
1964            let [
1965                rax,
1966                rcx,
1967                rdx,
1968                rbx,
1969                cr2,
1970                rbp,
1971                rsi,
1972                rdi,
1973                r8,
1974                r9,
1975                r10,
1976                r11,
1977                r12,
1978                r13,
1979                r14,
1980                r15,
1981            ] = self.runner.cpu_context().gps;
1982
1983            // We are responsible for saving shared MSRs too, but other than
1984            // the MTRRs all shared MSRs are read-only. So this is all we need.
1985            let Mtrrs {
1986                msr_mtrr_def_type,
1987                fixed: fixed_mtrrs,
1988                variable: variable_mtrrs,
1989            } = self
1990                // MTRRs are shared, so it doesn't matter which VTL we ask for.
1991                .access_state(Vtl::Vtl0)
1992                .mtrrs()
1993                .context("failed to get MTRRs")
1994                .map_err(SaveError::Other)?;
1995
1996            // This value is ignored during restore, but may still be read by downlevel
1997            // versions. Set it to the correct hardcoded read value as a best effort for them.
1998            let crash_control = self
1999                .read_crash_msr(HV_X64_MSR_GUEST_CRASH_CTL, GuestVtl::Vtl0)
2000                .unwrap();
2001
2002            let UhProcessor {
2003                _not_send,
2004                inner:
2005                    crate::UhVpInner {
2006                        // Saved
2007                        message_queues,
2008                        // Sidecar state is reset during servicing
2009                        sidecar_exit_reason: _,
2010                        // Will be cleared by flush_async_requests above
2011                        wake_reasons: _,
2012                        // Runtime glue
2013                        waker: _,
2014                        // Topology information
2015                        vp_info: _,
2016                        cpu_index: _,
2017                    },
2018                // Saved
2019                crash_reg,
2020                // Runtime glue
2021                partition: _,
2022                idle_control: _,
2023                vmtime: _,
2024                timer: _,
2025                // This field is only used in dev/test scenarios
2026                force_exit_sidecar: _,
2027                signaled_sidecar_exit: _,
2028                // Just caching the hypervisor value, let it handle saving
2029                vtls_tlb_locked: _,
2030                // Statistic that should reset to 0 on restore
2031                kernel_returns: _,
2032                // Shared state should be handled by the backing
2033                shared: _,
2034                // The runner doesn't hold anything needing saving
2035                runner: _,
2036                backing:
2037                    HypervisorBackedX86 {
2038                        deliverability_notifications: _,
2039                        next_deliverability_notifications: _,
2040                        stats: _,
2041                        deferred_init,
2042                    },
2043                // Currently only meaningful for CVMs
2044                exit_activities: _,
2045            } = *self;
2046
2047            // Non-VTL0 VPs should never be in startup suspend, so we only need to check VTL0.
2048            // The hypervisor handles halt and idle for us.
2049            let startup_suspend = if deferred_init {
2050                Some(true)
2051            } else {
2052                let internal_activity = self
2053                    .runner
2054                    .get_vp_register(GuestVtl::Vtl0, HvX64RegisterName::InternalActivityState)
2055                    .inspect_err(|e| {
2056                        // The ioctl get_vp_register path does not tell us
2057                        // hv_status directly, so just log if it failed for any
2058                        // reason.
2059                        tracing::warn!(
2060                            error = e as &dyn std::error::Error,
2061                            "unable to query startup suspend, unable to save VTL0 startup suspend state"
2062                        );
2063                    })
2064                    .ok();
2065
2066                internal_activity
2067                    .map(|a| HvInternalActivityRegister::from(a.as_u64()).startup_suspend())
2068            };
2069
2070            let per_vtl = [GuestVtl::Vtl0, GuestVtl::Vtl1]
2071                .map(|vtl| state::ProcessorVtlSavedState {
2072                    message_queue: message_queues[vtl].save(),
2073                })
2074                .into();
2075
2076            let state = state::ProcessorSavedState {
2077                rax,
2078                rcx,
2079                rdx,
2080                rbx,
2081                cr2,
2082                rbp,
2083                rsi,
2084                rdi,
2085                r8,
2086                r9,
2087                r10,
2088                r11,
2089                r12,
2090                r13,
2091                r14,
2092                r15,
2093                fx_state: self.runner.cpu_context().fx_state.as_bytes().to_vec(),
2094                dr0: values[0].as_u64(),
2095                dr1: values[1].as_u64(),
2096                dr2: values[2].as_u64(),
2097                dr3: values[3].as_u64(),
2098                dr6: dr6_shared.then(|| values[4].as_u64()),
2099                startup_suspend,
2100                crash_reg: Some(crash_reg),
2101                crash_control,
2102                msr_mtrr_def_type,
2103                fixed_mtrrs: Some(fixed_mtrrs),
2104                variable_mtrrs: Some(variable_mtrrs),
2105                per_vtl,
2106            };
2107
2108            Ok(state)
2109        }
2110
2111        fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
2112            let state::ProcessorSavedState {
2113                rax,
2114                rcx,
2115                rdx,
2116                rbx,
2117                cr2,
2118                rbp,
2119                rsi,
2120                rdi,
2121                r8,
2122                r9,
2123                r10,
2124                r11,
2125                r12,
2126                r13,
2127                r14,
2128                r15,
2129                fx_state,
2130                dr0,
2131                dr1,
2132                dr2,
2133                dr3,
2134                dr6,
2135                startup_suspend,
2136                crash_reg,
2137                crash_control: _crash_control,
2138                msr_mtrr_def_type,
2139                fixed_mtrrs,
2140                variable_mtrrs,
2141                per_vtl,
2142            } = state;
2143
2144            let dr6_shared = self.partition.hcl.dr6_shared();
2145            self.runner.cpu_context_mut().gps = [
2146                rax, rcx, rdx, rbx, cr2, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
2147            ];
2148            if fx_state.len() != self.runner.cpu_context_mut().fx_state.as_bytes().len() {
2149                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2150                    "invalid fpu state"
2151                )));
2152            }
2153            if dr6_shared != state.dr6.is_some() {
2154                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2155                    "dr6 state mismatch"
2156                )));
2157            }
2158
2159            let len = if dr6_shared {
2160                SHARED_REGISTERS.len()
2161            } else {
2162                SHARED_REGISTERS.len() - 1
2163            };
2164
2165            let values = [dr0, dr1, dr2, dr3, dr6.unwrap_or(0)];
2166            self.runner
2167                .set_vp_registers(
2168                    GuestVtl::Vtl0,
2169                    SHARED_REGISTERS[..len].iter().copied().zip(values),
2170                )
2171                .context("failed to set shared registers")
2172                .map_err(RestoreError::Other)?;
2173
2174            self.runner
2175                .cpu_context_mut()
2176                .fx_state
2177                .as_mut_bytes()
2178                .copy_from_slice(&fx_state);
2179
2180            self.crash_reg = crash_reg.unwrap_or_default();
2181
2182            // Previous versions of Underhill did not save the MTRRs.
2183            // If we get a restore state with them missing then assume they weren't
2184            // saved and don't zero out whatever the system already has.
2185            if let (Some(fixed), Some(variable)) = (fixed_mtrrs, variable_mtrrs) {
2186                let mut access = self.access_state(Vtl::Vtl0);
2187                access
2188                    .set_mtrrs(&Mtrrs {
2189                        msr_mtrr_def_type,
2190                        fixed,
2191                        variable,
2192                    })
2193                    .context("failed to set MTRRs")
2194                    .map_err(RestoreError::Other)?;
2195            }
2196
2197            for (per, vtl) in per_vtl.into_iter().zip(0u8..) {
2198                let vtl = GuestVtl::try_from(vtl)
2199                    .context("too many vtls")
2200                    .map_err(RestoreError::Other)?;
2201                self.inner.message_queues[vtl].restore(&per.message_queue);
2202            }
2203
2204            let startup_suspend = match startup_suspend {
2205                Some(true) => {
2206                    // When Underhill brings up APs during a servicing update
2207                    // via hypercall, this clears the VTL0 startup suspend
2208                    // state and makes the VP runnable. Like the cold boot path,
2209                    // we need to put the AP back into the startup suspend state
2210                    // in order to not start running the VP incorrectly.
2211                    true
2212                }
2213                None if !self.vp_index().is_bsp() => {
2214                    // Previous versions of Underhill did not save this value,
2215                    // which means the VM could be in a bad state if it's being
2216                    // serviced before VTL0 brings up APs. Log this state to
2217                    // note that.
2218                    const NAMES: [HvX64RegisterName; 4] = [
2219                        HvX64RegisterName::Rip,
2220                        HvX64RegisterName::Rflags,
2221                        HvX64RegisterName::Cr0,
2222                        HvX64RegisterName::Efer,
2223                    ];
2224                    let mut values = [FromZeros::new_zeroed(); NAMES.len()];
2225                    self.runner
2226                        // Non-VTL0 VPs should never be in startup suspend, so we only need to handle VTL0.
2227                        .get_vp_registers(GuestVtl::Vtl0, &NAMES, &mut values)
2228                        .context("failed to get VP registers for startup suspend log")
2229                        .map_err(RestoreError::Other)?;
2230                    let [rip, rflags, cr0, efer] = values.map(|reg| reg.as_u64());
2231
2232                    tracing::error!(
2233                        vp_index = self.vp_index().index(),
2234                        rip,
2235                        rflags,
2236                        cr0,
2237                        efer,
2238                        "previous version of underhill did not save startup_suspend state"
2239                    );
2240
2241                    false
2242                }
2243                Some(false) | None => false,
2244            };
2245
2246            self.backing.deferred_init = match self.set_vtl0_startup_suspend(startup_suspend) {
2247                Ok(()) => false,
2248                Err(e) => {
2249                    if startup_suspend {
2250                        tracing::warn!(
2251                            error = &e as &dyn std::error::Error,
2252                            "unable to set internal activity register, falling back to deferred init"
2253                        );
2254                    }
2255                    startup_suspend
2256                }
2257            };
2258            Ok(())
2259        }
2260    }
2261}