virt_mshv_vtl/processor/mshv/
x64.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! X64 Processor support for Microsoft hypervisor-backed partitions.
5
6#![cfg(guest_arch = "x86_64")]
7
8type VpRegisterName = HvX64RegisterName;
9
10use super::super::BackingParams;
11use super::super::BackingPrivate;
12use super::super::UhEmulationState;
13use super::super::UhRunVpError;
14use super::super::signal_mnf;
15use super::super::vp_state;
16use super::super::vp_state::UhVpStateAccess;
17use super::VbsIsolatedVtl1State;
18use crate::BackingShared;
19use crate::Error;
20use crate::GuestVsmState;
21use crate::GuestVtl;
22use crate::UhPartitionNewParams;
23use crate::processor::BackingSharedParams;
24use crate::processor::SidecarExitReason;
25use crate::processor::SidecarRemoveExit;
26use crate::processor::UhHypercallHandler;
27use crate::processor::UhProcessor;
28use crate::validate_vtl_gpa_flags;
29use hcl::ioctl;
30use hcl::ioctl::ApplyVtlProtectionsError;
31use hcl::ioctl::x64::MshvX64;
32use hcl::protocol;
33use hv1_emulator::hv::ProcessorVtlHv;
34use hv1_hypercall::HvRepResult;
35use hv1_structs::VtlSet;
36use hvdef::HV_PAGE_SIZE;
37use hvdef::HvDeliverabilityNotificationsRegister;
38use hvdef::HvError;
39use hvdef::HvInterceptAccessType;
40use hvdef::HvMapGpaFlags;
41use hvdef::HvMessageType;
42use hvdef::HvRegisterValue;
43use hvdef::HvRegisterVsmPartitionConfig;
44use hvdef::HvX64InterceptMessageHeader;
45use hvdef::HvX64PendingEvent;
46use hvdef::HvX64PendingInterruptionType;
47use hvdef::HvX64RegisterName;
48use hvdef::Vtl;
49use hvdef::hypercall;
50use inspect::Inspect;
51use inspect::InspectMut;
52use inspect_counters::Counter;
53use parking_lot::RwLock;
54use std::sync::atomic::Ordering::Relaxed;
55use virt::StopVp;
56use virt::VpHaltReason;
57use virt::VpIndex;
58use virt::io::CpuIo;
59use virt::state::HvRegisterState;
60use virt::state::StateElement;
61use virt::vp;
62use virt::vp::AccessVpState;
63use virt::x86::MsrError;
64use virt_support_x86emu::emulate::EmuCheckVtlAccessError;
65use virt_support_x86emu::emulate::EmuTranslateError;
66use virt_support_x86emu::emulate::EmuTranslateResult;
67use virt_support_x86emu::emulate::EmulatorSupport;
68use x86defs::RFlags;
69use x86defs::SegmentRegister;
70use x86defs::xsave::Fxsave;
71use x86defs::xsave::XFEATURE_SSE;
72use x86defs::xsave::XFEATURE_X87;
73use x86defs::xsave::XsaveHeader;
74use zerocopy::FromZeros;
75use zerocopy::Immutable;
76use zerocopy::IntoBytes;
77use zerocopy::KnownLayout;
78
79/// A backing for hypervisor-backed partitions (non-isolated and
80/// software-isolated).
81#[derive(InspectMut)]
82pub struct HypervisorBackedX86 {
83    // VTL0 only, used for synic message and extint readiness notifications.
84    // We do not currently support synic message ports or extint interrupts for VTL1.
85    #[inspect(hex, with = "|&x| u64::from(x)")]
86    deliverability_notifications: HvDeliverabilityNotificationsRegister,
87    /// Next set of deliverability notifications. See register definition for details.
88    #[inspect(hex, with = "|&x| u64::from(x)")]
89    pub(super) next_deliverability_notifications: HvDeliverabilityNotificationsRegister,
90    stats: ProcessorStatsX86,
91}
92
93/// Partition-wide shared data for hypervisor backed VMs.
94#[derive(Inspect)]
95pub struct HypervisorBackedX86Shared {
96    pub(crate) guest_vsm: RwLock<GuestVsmState<VbsIsolatedVtl1State>>,
97}
98
99impl HypervisorBackedX86Shared {
100    /// Creates a new partition-shared data structure for hypervisor backed VMs.
101    pub(crate) fn new(
102        _partition_params: &UhPartitionNewParams<'_>,
103        params: BackingSharedParams<'_>,
104    ) -> Result<Self, Error> {
105        Ok(Self {
106            guest_vsm: RwLock::new(GuestVsmState::from_availability(params.guest_vsm_available)),
107        })
108    }
109}
110
111#[derive(Inspect, Default)]
112struct ProcessorStatsX86 {
113    io_port: Counter,
114    mmio: Counter,
115    unaccepted_gpa: Counter,
116    hypercall: Counter,
117    synic_deliverable: Counter,
118    interrupt_deliverable: Counter,
119    cpuid: Counter,
120    msr: Counter,
121    eoi: Counter,
122    unrecoverable_exception: Counter,
123    halt: Counter,
124    exception_intercept: Counter,
125}
126
127pub struct MshvEmulationCache {
128    rsp: u64,
129    es: SegmentRegister,
130    ds: SegmentRegister,
131    fs: SegmentRegister,
132    gs: SegmentRegister,
133    ss: SegmentRegister,
134    cr0: u64,
135    efer: u64,
136    rip: u64,
137    rflags: RFlags,
138}
139
140#[expect(private_interfaces)]
141impl BackingPrivate for HypervisorBackedX86 {
142    type HclBacking<'mshv> = MshvX64<'mshv>;
143    type Shared = HypervisorBackedX86Shared;
144    type EmulationCache = MshvEmulationCache;
145
146    fn shared(shared: &BackingShared) -> &Self::Shared {
147        let BackingShared::Hypervisor(shared) = shared else {
148            unreachable!()
149        };
150        shared
151    }
152
153    fn new(
154        params: BackingParams<'_, '_, Self>,
155        _shared: &HypervisorBackedX86Shared,
156    ) -> Result<Self, Error> {
157        // Initialize shared register state to architectural state. The kernel
158        // zero initializes this.
159        //
160        // When restoring, this will be overwritten, but it's not expensive
161        // enough to bother skipping.
162        let regs = vp::Registers::at_reset(&params.partition.caps, params.vp_info);
163        *params.runner.cpu_context_mut() = protocol::hcl_cpu_context_x64 {
164            gps: [
165                regs.rax, regs.rcx, regs.rdx, regs.rbx, 0, /* cr2 */
166                regs.rbp, regs.rsi, regs.rdi, regs.r8, regs.r9, regs.r10, regs.r11, regs.r12,
167                regs.r13, regs.r14, regs.r15,
168            ],
169            fx_state: vp::Xsave::at_reset(&params.partition.caps, params.vp_info).fxsave(),
170            reserved: [0; 384],
171        };
172
173        Ok(Self {
174            deliverability_notifications: Default::default(),
175            next_deliverability_notifications: Default::default(),
176            stats: Default::default(),
177        })
178    }
179
180    fn init(_this: &mut UhProcessor<'_, Self>) {}
181
182    type StateAccess<'p, 'a>
183        = UhVpStateAccess<'a, 'p, Self>
184    where
185        Self: 'a + 'p,
186        'p: 'a;
187
188    fn access_vp_state<'a, 'p>(
189        this: &'a mut UhProcessor<'p, Self>,
190        vtl: GuestVtl,
191    ) -> Self::StateAccess<'p, 'a> {
192        UhVpStateAccess::new(this, vtl)
193    }
194
195    async fn run_vp(
196        this: &mut UhProcessor<'_, Self>,
197        dev: &impl CpuIo,
198        stop: &mut StopVp<'_>,
199    ) -> Result<(), VpHaltReason<UhRunVpError>> {
200        if this.backing.deliverability_notifications
201            != this.backing.next_deliverability_notifications
202        {
203            let notifications = this.backing.next_deliverability_notifications;
204            tracing::trace!(?notifications, "setting notifications");
205            this.runner
206                .set_vp_register(
207                    // TODO GUEST VSM
208                    GuestVtl::Vtl0,
209                    VpRegisterName::DeliverabilityNotifications,
210                    u64::from(notifications).into(),
211                )
212                .expect("requesting deliverability is not a fallable operation");
213            this.backing.deliverability_notifications =
214                this.backing.next_deliverability_notifications;
215        }
216
217        let intercepted = if this.runner.is_sidecar() {
218            let mut run = this
219                .runner
220                .run_sidecar()
221                .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::Run(e)))?;
222            match stop.until_stop(run.wait()).await {
223                Ok(r) => r,
224                Err(stop) => {
225                    run.cancel();
226                    let r = run.wait().await;
227                    if matches!(r, Ok(false)) {
228                        // No intercept, so stop the VP.
229                        return Err(stop.into());
230                    }
231                    r
232                }
233            }
234            .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::Sidecar(e)))?
235        } else {
236            this.unlock_tlb_lock(Vtl::Vtl2);
237            this.runner
238                .run()
239                .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::Run(e)))?
240        };
241
242        if intercepted {
243            let message_type = this.runner.exit_message().header.typ;
244
245            let mut intercept_handler =
246                InterceptHandler::new(this).map_err(VpHaltReason::InvalidVmState)?;
247
248            let stat = match message_type {
249                HvMessageType::HvMessageTypeX64IoPortIntercept => {
250                    intercept_handler.handle_io_port_exit(dev).await?;
251                    &mut this.backing.stats.io_port
252                }
253                HvMessageType::HvMessageTypeUnmappedGpa
254                | HvMessageType::HvMessageTypeGpaIntercept => {
255                    intercept_handler.handle_mmio_exit(dev).await?;
256                    &mut this.backing.stats.mmio
257                }
258                HvMessageType::HvMessageTypeUnacceptedGpa => {
259                    intercept_handler
260                        .handle_unaccepted_gpa_intercept(dev)
261                        .await?;
262                    &mut this.backing.stats.unaccepted_gpa
263                }
264                HvMessageType::HvMessageTypeHypercallIntercept => {
265                    intercept_handler.handle_hypercall_exit(dev)?;
266                    &mut this.backing.stats.hypercall
267                }
268                HvMessageType::HvMessageTypeSynicSintDeliverable => {
269                    intercept_handler.handle_synic_deliverable_exit();
270                    &mut this.backing.stats.synic_deliverable
271                }
272                HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
273                    intercept_handler.handle_interrupt_deliverable_exit(dev)?;
274                    &mut this.backing.stats.interrupt_deliverable
275                }
276                HvMessageType::HvMessageTypeX64CpuidIntercept => {
277                    intercept_handler.handle_cpuid_intercept()?;
278                    &mut this.backing.stats.cpuid
279                }
280                HvMessageType::HvMessageTypeMsrIntercept => {
281                    intercept_handler.handle_msr_intercept()?;
282                    &mut this.backing.stats.msr
283                }
284                HvMessageType::HvMessageTypeX64ApicEoi => {
285                    intercept_handler.handle_eoi(dev)?;
286                    &mut this.backing.stats.eoi
287                }
288                HvMessageType::HvMessageTypeUnrecoverableException => {
289                    intercept_handler.handle_unrecoverable_exception()?;
290                    &mut this.backing.stats.unrecoverable_exception
291                }
292                HvMessageType::HvMessageTypeExceptionIntercept => {
293                    intercept_handler.handle_exception()?;
294                    &mut this.backing.stats.exception_intercept
295                }
296                reason => unreachable!("unknown exit reason: {:#x?}", reason),
297            };
298            stat.increment();
299
300            if this.runner.is_sidecar()
301                && !this.signaled_sidecar_exit
302                && !this.partition.no_sidecar_hotplug.load(Relaxed)
303            {
304                // We got and handled an exit and this is a sidecar VP. Cancel
305                // the run so that we can move the sidecar VP over to the main
306                // kernel and handle future exits there.
307                //
308                // This is not strictly necessary--we can continue to run the VP
309                // in the sidecar kernel. But since we have received at least
310                // one exit, we can expect that we will receive more, and
311                // handling the exits remotely introduces jitter.
312                let message = this.runner.exit_message();
313                this.inner
314                    .set_sidecar_exit_reason(SidecarExitReason::Exit(parse_sidecar_exit(message)));
315                this.signaled_sidecar_exit = true;
316                return Err(VpHaltReason::Cancel);
317            }
318        }
319        Ok(())
320    }
321
322    fn poll_apic(
323        _this: &mut UhProcessor<'_, Self>,
324        _vtl: GuestVtl,
325        _scan_irr: bool,
326    ) -> Result<(), UhRunVpError> {
327        Ok(())
328    }
329
330    fn process_interrupts(
331        _this: &mut UhProcessor<'_, Self>,
332        _scan_irr: hv1_structs::VtlArray<bool, 2>,
333        _first_scan_irr: &mut bool,
334        _dev: &impl CpuIo,
335    ) -> Result<bool, VpHaltReason<UhRunVpError>> {
336        Ok(false)
337    }
338
339    fn request_extint_readiness(this: &mut UhProcessor<'_, Self>) {
340        this.backing
341            .next_deliverability_notifications
342            .set_interrupt_notification(true);
343    }
344
345    fn request_untrusted_sint_readiness(this: &mut UhProcessor<'_, Self>, sints: u16) {
346        this.backing
347            .next_deliverability_notifications
348            .set_sints(this.backing.next_deliverability_notifications.sints() | sints);
349    }
350
351    fn hv(&self, _vtl: GuestVtl) -> Option<&ProcessorVtlHv> {
352        None
353    }
354
355    fn hv_mut(&mut self, _vtl: GuestVtl) -> Option<&mut ProcessorVtlHv> {
356        None
357    }
358
359    fn handle_vp_start_enable_vtl_wake(
360        _this: &mut UhProcessor<'_, Self>,
361        _vtl: GuestVtl,
362    ) -> Result<(), UhRunVpError> {
363        unimplemented!()
364    }
365
366    fn vtl1_inspectable(_this: &UhProcessor<'_, Self>) -> bool {
367        // TODO: Use the VsmVpStatus register to query the hypervisor for
368        // whether VTL 1 is enabled on the vp (this can be cached).
369        false
370    }
371}
372
373fn parse_sidecar_exit(message: &hvdef::HvMessage) -> SidecarRemoveExit {
374    match message.header.typ {
375        HvMessageType::HvMessageTypeX64IoPortIntercept => {
376            let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
377            SidecarRemoveExit::Io {
378                port: message.port_number,
379                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
380            }
381        }
382        HvMessageType::HvMessageTypeUnmappedGpa | HvMessageType::HvMessageTypeGpaIntercept => {
383            let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
384            SidecarRemoveExit::Mmio {
385                gpa: message.guest_physical_address,
386                write: message.header.intercept_access_type == HvInterceptAccessType::WRITE,
387            }
388        }
389        HvMessageType::HvMessageTypeHypercallIntercept => {
390            let message = message.as_message::<hvdef::HvX64HypercallInterceptMessage>();
391            let is_64bit = message.header.execution_state.cr0_pe()
392                && message.header.execution_state.efer_lma();
393            let control = if is_64bit {
394                message.rcx
395            } else {
396                (message.rdx << 32) | (message.rax as u32 as u64)
397            };
398            SidecarRemoveExit::Hypercall {
399                code: hvdef::HypercallCode(hypercall::Control::from(control).code()),
400            }
401        }
402        HvMessageType::HvMessageTypeX64CpuidIntercept => {
403            let message = message.as_message::<hvdef::HvX64CpuidInterceptMessage>();
404            SidecarRemoveExit::Cpuid {
405                leaf: message.rax as u32,
406                subleaf: message.rcx as u32,
407            }
408        }
409        HvMessageType::HvMessageTypeMsrIntercept => {
410            let message = message.as_message::<hvdef::HvX64MsrInterceptMessage>();
411            SidecarRemoveExit::Msr {
412                msr: message.msr_number,
413                value: (message.header.intercept_access_type == HvInterceptAccessType::WRITE)
414                    .then_some((message.rdx << 32) | message.rax as u32 as u64),
415            }
416        }
417        typ => SidecarRemoveExit::Hypervisor { message: typ },
418    }
419}
420
421fn next_rip(value: &HvX64InterceptMessageHeader) -> u64 {
422    value.rip.wrapping_add(value.instruction_len() as u64)
423}
424
425struct InterceptHandler<'a, 'b> {
426    vp: &'a mut UhProcessor<'b, HypervisorBackedX86>,
427    intercepted_vtl: GuestVtl,
428}
429
430impl<'a, 'b> InterceptHandler<'a, 'b> {
431    fn new(vp: &'a mut UhProcessor<'b, HypervisorBackedX86>) -> Result<Self, UhRunVpError> {
432        let message_type = vp.runner.exit_message().header.typ;
433
434        let intercepted_vtl = match vp.runner.reg_page_vtl() {
435            Ok(vtl) => vtl,
436            Err(ioctl::x64::RegisterPageVtlError::InvalidVtl(vtl)) => {
437                return Err(UhRunVpError::InvalidInterceptedVtl(vtl));
438            }
439            Err(ioctl::x64::RegisterPageVtlError::NoRegisterPage) => {
440                if matches!(&message_type, &HvMessageType::HvMessageTypeX64ApicEoi) {
441                    // At the moment this is only used for the ioapic, so assume
442                    // that this is targeting VTL 0 for now. TODO: fix
443                    GuestVtl::Vtl0
444                } else {
445                    let message_header = match &message_type {
446                        &HvMessageType::HvMessageTypeX64IoPortIntercept => {
447                            &vp.runner
448                                .exit_message()
449                                .as_message::<hvdef::HvX64IoPortInterceptMessage>()
450                                .header
451                        }
452                        &HvMessageType::HvMessageTypeUnmappedGpa
453                        | &HvMessageType::HvMessageTypeGpaIntercept => {
454                            &vp.runner
455                                .exit_message()
456                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
457                                .header
458                        }
459                        &HvMessageType::HvMessageTypeUnacceptedGpa => {
460                            &vp.runner
461                                .exit_message()
462                                .as_message::<hvdef::HvX64MemoryInterceptMessage>()
463                                .header
464                        }
465                        &HvMessageType::HvMessageTypeHypercallIntercept => {
466                            &vp.runner
467                                .exit_message()
468                                .as_message::<hvdef::HvX64HypercallInterceptMessage>()
469                                .header
470                        }
471                        &HvMessageType::HvMessageTypeSynicSintDeliverable => {
472                            &vp.runner
473                                .exit_message()
474                                .as_message::<hvdef::HvX64SynicSintDeliverableMessage>()
475                                .header
476                        }
477                        &HvMessageType::HvMessageTypeX64InterruptionDeliverable => {
478                            &vp.runner
479                                .exit_message()
480                                .as_message::<hvdef::HvX64InterruptionDeliverableMessage>()
481                                .header
482                        }
483                        &HvMessageType::HvMessageTypeX64CpuidIntercept => {
484                            &vp.runner
485                                .exit_message()
486                                .as_message::<hvdef::HvX64CpuidInterceptMessage>()
487                                .header
488                        }
489                        &HvMessageType::HvMessageTypeMsrIntercept => {
490                            &vp.runner
491                                .exit_message()
492                                .as_message::<hvdef::HvX64MsrInterceptMessage>()
493                                .header
494                        }
495                        &HvMessageType::HvMessageTypeUnrecoverableException => {
496                            &vp.runner
497                                .exit_message()
498                                .as_message::<hvdef::HvX64UnrecoverableExceptionMessage>()
499                                .header
500                        }
501                        &HvMessageType::HvMessageTypeX64Halt => {
502                            &vp.runner
503                                .exit_message()
504                                .as_message::<hvdef::HvX64HaltMessage>()
505                                .header
506                        }
507                        &HvMessageType::HvMessageTypeExceptionIntercept => {
508                            &vp.runner
509                                .exit_message()
510                                .as_message::<hvdef::HvX64ExceptionInterceptMessage>()
511                                .header
512                        }
513                        reason => unreachable!("unknown exit reason: {:#x?}", reason),
514                    };
515
516                    message_header.execution_state.vtl().try_into().map_err(
517                        |hcl::UnsupportedGuestVtl(vtl)| UhRunVpError::InvalidInterceptedVtl(vtl),
518                    )?
519                }
520            }
521        };
522
523        Ok(Self {
524            vp,
525            intercepted_vtl,
526        })
527    }
528
529    fn handle_interrupt_deliverable_exit(
530        &mut self,
531        bus: &impl CpuIo,
532    ) -> Result<(), VpHaltReason<UhRunVpError>> {
533        let message = self
534            .vp
535            .runner
536            .exit_message()
537            .as_message::<hvdef::HvX64InterruptionDeliverableMessage>();
538
539        assert_eq!(
540            message.deliverable_type,
541            HvX64PendingInterruptionType::HV_X64_PENDING_INTERRUPT
542        );
543
544        self.vp
545            .backing
546            .deliverability_notifications
547            .set_interrupt_notification(false);
548
549        self.vp
550            .backing
551            .next_deliverability_notifications
552            .set_interrupt_notification(false);
553
554        if let Some(vector) = bus.acknowledge_pic_interrupt() {
555            let event = hvdef::HvX64PendingExtIntEvent::new()
556                .with_event_pending(true)
557                .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXT_INT)
558                .with_vector(vector);
559
560            self.vp
561                .runner
562                .set_vp_register(
563                    self.intercepted_vtl,
564                    HvX64RegisterName::PendingEvent0,
565                    u128::from(event).into(),
566                )
567                .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::Event(e)))?;
568        }
569
570        Ok(())
571    }
572
573    fn handle_synic_deliverable_exit(&mut self) {
574        let message = self
575            .vp
576            .runner
577            .exit_message()
578            .as_message::<hvdef::HvX64SynicSintDeliverableMessage>();
579
580        tracing::trace!(
581            deliverable_sints = message.deliverable_sints,
582            "sint deliverable"
583        );
584
585        self.vp.backing.deliverability_notifications.set_sints(
586            self.vp.backing.deliverability_notifications.sints() & !message.deliverable_sints,
587        );
588
589        // This is updated by `deliver_synic_messages below`, so clear it here.
590        self.vp
591            .backing
592            .next_deliverability_notifications
593            .set_sints(0);
594
595        // These messages are always delivered to VTL0, as VTL1 does not own any VMBUS channels.
596        self.vp
597            .deliver_synic_messages(GuestVtl::Vtl0, message.deliverable_sints);
598    }
599
600    fn handle_hypercall_exit(
601        &mut self,
602        bus: &impl CpuIo,
603    ) -> Result<(), VpHaltReason<UhRunVpError>> {
604        let message = self
605            .vp
606            .runner
607            .exit_message()
608            .as_message::<hvdef::HvX64HypercallInterceptMessage>();
609
610        tracing::trace!(msg = %format_args!("{:x?}", message), "hypercall");
611
612        let is_64bit =
613            message.header.execution_state.cr0_pe() && message.header.execution_state.efer_lma();
614
615        let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
616        let handler = UhHypercallHandler {
617            vp: self.vp,
618            bus,
619            trusted: false,
620            intercepted_vtl: self.intercepted_vtl,
621        };
622        UhHypercallHandler::MSHV_DISPATCHER.dispatch(
623            guest_memory,
624            hv1_hypercall::X64RegisterIo::new(handler, is_64bit),
625        );
626
627        Ok(())
628    }
629
630    async fn handle_mmio_exit(
631        &mut self,
632        dev: &impl CpuIo,
633    ) -> Result<(), VpHaltReason<UhRunVpError>> {
634        let message = self
635            .vp
636            .runner
637            .exit_message()
638            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
639
640        tracing::trace!(msg = %format_args!("{:x?}", message), "mmio");
641
642        let interruption_pending = message.header.execution_state.interruption_pending();
643
644        // Fast path for monitor page writes.
645        if Some(message.guest_physical_address & !(HV_PAGE_SIZE - 1))
646            == self.vp.partition.monitor_page.gpa()
647            && message.header.intercept_access_type == HvInterceptAccessType::WRITE
648        {
649            let tlb_lock_held = message.memory_access_info.gva_gpa_valid()
650                || message.memory_access_info.tlb_locked();
651            let guest_memory = &self.vp.partition.gm[self.intercepted_vtl];
652            let cache = self.vp.emulation_cache(self.intercepted_vtl);
653            let mut emulation_state = UhEmulationState {
654                vp: &mut *self.vp,
655                interruption_pending,
656                devices: dev,
657                vtl: self.intercepted_vtl,
658                cache,
659            };
660            if let Some(bit) = virt_support_x86emu::emulate::emulate_mnf_write_fast_path(
661                &mut emulation_state,
662                guest_memory,
663                dev,
664                interruption_pending,
665                tlb_lock_held,
666            )? {
667                if let Some(connection_id) = self.vp.partition.monitor_page.write_bit(bit) {
668                    signal_mnf(dev, connection_id);
669                }
670                return Ok(());
671            }
672        }
673
674        let cache = self.vp.emulation_cache(self.intercepted_vtl);
675        self.vp
676            .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
677            .await?;
678        Ok(())
679    }
680
681    async fn handle_io_port_exit(
682        &mut self,
683        dev: &impl CpuIo,
684    ) -> Result<(), VpHaltReason<UhRunVpError>> {
685        let message = self
686            .vp
687            .runner
688            .exit_message()
689            .as_message::<hvdef::HvX64IoPortInterceptMessage>();
690
691        tracing::trace!(msg = %format_args!("{:x?}", message), "io_port");
692
693        assert_eq!(message.rax, self.vp.runner.cpu_context().gps[protocol::RAX]);
694
695        let interruption_pending = message.header.execution_state.interruption_pending();
696
697        if message.access_info.string_op() || message.access_info.rep_prefix() {
698            let cache = self.vp.emulation_cache(self.intercepted_vtl);
699            self.vp
700                .emulate(dev, interruption_pending, self.intercepted_vtl, cache)
701                .await
702        } else {
703            let next_rip = next_rip(&message.header);
704            let access_size = message.access_info.access_size();
705            virt_support_x86emu::emulate::emulate_io(
706                self.vp.vp_index(),
707                message.header.intercept_access_type == HvInterceptAccessType::WRITE,
708                message.port_number,
709                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX],
710                access_size,
711                dev,
712            )
713            .await;
714            self.vp.set_rip(self.intercepted_vtl, next_rip)
715        }
716    }
717
718    async fn handle_unaccepted_gpa_intercept(
719        &mut self,
720        dev: &impl CpuIo,
721    ) -> Result<(), VpHaltReason<UhRunVpError>> {
722        let gpa = self
723            .vp
724            .runner
725            .exit_message()
726            .as_message::<hvdef::HvX64MemoryInterceptMessage>()
727            .guest_physical_address;
728
729        if self.vp.partition.is_gpa_lower_vtl_ram(gpa) {
730            // The host may have moved the page to an unaccepted state, so fail
731            // here. This does not apply to VTL 2 memory - for unaccepted pages,
732            // the intercept goes to host VTL0.
733            //
734            // Note: SGX memory should be included in this check, so if SGX is
735            // no longer included in the lower_vtl_memory_layout, make sure the
736            // appropriate changes are reflected here.
737            Err(VpHaltReason::InvalidVmState(
738                UhRunVpError::UnacceptedMemoryAccess(gpa),
739            ))
740        } else {
741            // TODO SNP: for hardware isolation, if the intercept is due to a guest
742            // error, inject a machine check
743            self.handle_mmio_exit(dev).await?;
744            Ok(())
745        }
746    }
747
748    fn handle_cpuid_intercept(&mut self) -> Result<(), VpHaltReason<UhRunVpError>> {
749        let message = self
750            .vp
751            .runner
752            .exit_message()
753            .as_message::<hvdef::HvX64CpuidInterceptMessage>();
754
755        let default_result = [
756            message.default_result_rax as u32,
757            message.default_result_rbx as u32,
758            message.default_result_rcx as u32,
759            message.default_result_rdx as u32,
760        ];
761
762        tracing::trace!(msg = %format_args!("{:x?}", message), "cpuid");
763
764        let [eax, ebx, ecx, edx] =
765            self.vp
766                .partition
767                .cpuid_result(message.rax as u32, message.rcx as u32, &default_result);
768
769        let next_rip = next_rip(&message.header);
770        self.vp.runner.cpu_context_mut().gps[protocol::RAX] = eax.into();
771        self.vp.runner.cpu_context_mut().gps[protocol::RBX] = ebx.into();
772        self.vp.runner.cpu_context_mut().gps[protocol::RCX] = ecx.into();
773        self.vp.runner.cpu_context_mut().gps[protocol::RDX] = edx.into();
774
775        self.vp.set_rip(self.intercepted_vtl, next_rip)
776    }
777
778    fn handle_msr_intercept(&mut self) -> Result<(), VpHaltReason<UhRunVpError>> {
779        let message = self
780            .vp
781            .runner
782            .exit_message()
783            .as_message::<hvdef::HvX64MsrInterceptMessage>();
784        let rip = next_rip(&message.header);
785
786        tracing::trace!(msg = %format_args!("{:x?}", message), "msr");
787
788        let msr = message.msr_number;
789        match message.header.intercept_access_type {
790            HvInterceptAccessType::READ => {
791                // Only supported MSRs are the crash MSRs.
792                let value = match self.vp.read_crash_msr(msr, self.intercepted_vtl) {
793                    Ok(v) => v,
794                    Err(MsrError::Unknown) => {
795                        tracing::trace!(msr, "unknown msr read");
796                        0
797                    }
798                    Err(MsrError::InvalidAccess) => {
799                        self.vp.inject_gpf(self.intercepted_vtl);
800                        // Do not advance RIP.
801                        return Ok(());
802                    }
803                };
804
805                self.vp.runner.cpu_context_mut().gps[protocol::RAX] = value & 0xffff_ffff;
806                self.vp.runner.cpu_context_mut().gps[protocol::RDX] = value >> 32;
807            }
808            HvInterceptAccessType::WRITE => {
809                let value = (message.rax & 0xffff_ffff) | (message.rdx << 32);
810                // Only supported MSRs are the crash MSRs.
811                match self.vp.write_crash_msr(msr, value, self.intercepted_vtl) {
812                    Ok(()) => {}
813                    Err(MsrError::Unknown) => {
814                        tracing::trace!(msr, value, "unknown msr write");
815                    }
816                    Err(MsrError::InvalidAccess) => {
817                        self.vp.inject_gpf(self.intercepted_vtl);
818                        // Do not advance RIP.
819                        return Ok(());
820                    }
821                }
822            }
823            _ => unreachable!(),
824        }
825
826        self.vp.set_rip(self.intercepted_vtl, rip)
827    }
828
829    fn handle_eoi(&self, dev: &impl CpuIo) -> Result<(), VpHaltReason<UhRunVpError>> {
830        let message = self
831            .vp
832            .runner
833            .exit_message()
834            .as_message::<hvdef::HvX64ApicEoiMessage>();
835
836        tracing::trace!(msg = %format_args!("{:x?}", message), "eoi");
837
838        dev.handle_eoi(message.interrupt_vector);
839        Ok(())
840    }
841
842    fn handle_unrecoverable_exception(&self) -> Result<(), VpHaltReason<UhRunVpError>> {
843        Err(VpHaltReason::TripleFault {
844            vtl: self.intercepted_vtl.into(),
845        })
846    }
847
848    fn handle_exception(&mut self) -> Result<(), VpHaltReason<UhRunVpError>> {
849        let message = self
850            .vp
851            .runner
852            .exit_message()
853            .as_message::<hvdef::HvX64ExceptionInterceptMessage>();
854
855        match x86defs::Exception(message.vector as u8) {
856            x86defs::Exception::DEBUG if cfg!(feature = "gdb") => {
857                self.vp.handle_debug_exception(self.intercepted_vtl)?
858            }
859            _ => tracing::error!("unexpected exception type {:#x?}", message.vector),
860        }
861        Ok(())
862    }
863}
864
865impl UhProcessor<'_, HypervisorBackedX86> {
866    fn set_rip(&mut self, vtl: GuestVtl, rip: u64) -> Result<(), VpHaltReason<UhRunVpError>> {
867        self.runner
868            .set_vp_register(vtl, HvX64RegisterName::Rip, rip.into())
869            .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::AdvanceRip(e)))?;
870
871        Ok(())
872    }
873
874    fn inject_gpf(&mut self, vtl: GuestVtl) {
875        let exception_event = hvdef::HvX64PendingExceptionEvent::new()
876            .with_event_pending(true)
877            .with_event_type(hvdef::HV_X64_PENDING_EVENT_EXCEPTION)
878            .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0.into())
879            .with_deliver_error_code(true)
880            .with_error_code(0);
881
882        self.runner
883            .set_vp_register(
884                vtl,
885                HvX64RegisterName::PendingEvent0,
886                u128::from(exception_event).into(),
887            )
888            .expect("set_vp_register should succeed for pending event");
889    }
890
891    fn set_vsm_partition_config(
892        &mut self,
893        vtl: GuestVtl,
894        value: HvRegisterVsmPartitionConfig,
895    ) -> Result<(), HvError> {
896        if vtl != GuestVtl::Vtl1 {
897            return Err(HvError::InvalidParameter);
898        }
899
900        assert!(self.partition.isolation.is_isolated());
901
902        let status = self
903            .partition
904            .vsm_status()
905            .expect("cannot fail to query vsm status");
906
907        let vtl1_enabled = VtlSet::from(status.enabled_vtl_set()).is_set(GuestVtl::Vtl1);
908        if !vtl1_enabled {
909            return Err(HvError::InvalidVtlState);
910        }
911
912        let mut guest_vsm_lock = self.shared.guest_vsm.write();
913
914        // Initialize partition.guest_vsm state if necessary.
915        match *guest_vsm_lock {
916            GuestVsmState::NotPlatformSupported => {
917                return Err(HvError::AccessDenied);
918            }
919            GuestVsmState::NotGuestEnabled => {
920                // TODO: check status
921                *guest_vsm_lock = GuestVsmState::Enabled {
922                    vtl1: Default::default(),
923                };
924            }
925            GuestVsmState::Enabled { .. } => {}
926        }
927
928        let GuestVsmState::Enabled { vtl1 } = &mut *guest_vsm_lock else {
929            unreachable!()
930        };
931        let protections = HvMapGpaFlags::from(value.default_vtl_protection_mask() as u32);
932
933        if value.reserved() != 0 {
934            return Err(HvError::InvalidRegisterValue);
935        }
936
937        // VTL protection cannot be disabled once enabled.
938        //
939        // The hypervisor should intercept only the case where the lower VTL is
940        // setting the enable_vtl_protection bit when it was previously
941        // disabled; other cases are handled directly by the hypervisor.
942        if !value.enable_vtl_protection() {
943            if vtl1.enable_vtl_protection {
944                // A malicious guest could change its hypercall parameters in
945                // memory while the intercept is being handled; this case
946                // explicitly handles that situation.
947                return Err(HvError::InvalidRegisterValue);
948            } else {
949                panic!("unexpected SetVpRegisters intercept");
950            }
951        }
952
953        // For VBS-isolated VMs, protections apply to VTLs lower than the one specified when
954        // setting VsmPartitionConfig.
955        let mbec_enabled = VtlSet::from(status.mbec_enabled_vtl_set()).is_set(GuestVtl::Vtl0);
956        let shadow_supervisor_stack_enabled =
957            VtlSet::from(status.supervisor_shadow_stack_enabled_vtl_set() as u16)
958                .is_set(GuestVtl::Vtl0);
959
960        if !validate_vtl_gpa_flags(protections, mbec_enabled, shadow_supervisor_stack_enabled) {
961            return Err(HvError::InvalidRegisterValue);
962        }
963
964        // Default VTL protection mask must include read and write.
965        if !(protections.readable() && protections.writable()) {
966            return Err(HvError::InvalidRegisterValue);
967        }
968
969        // Don't allow changing existing protections once set.
970        if let Some(current_protections) = vtl1.default_vtl_protections {
971            if protections != current_protections {
972                return Err(HvError::InvalidRegisterValue);
973            }
974        }
975        vtl1.default_vtl_protections = Some(protections);
976
977        for ram_range in self.partition.lower_vtl_memory_layout.ram().iter() {
978            self.partition
979                .hcl
980                .modify_vtl_protection_mask(ram_range.range, protections, vtl.into())
981                .map_err(|e| match e {
982                    ApplyVtlProtectionsError::Hypervisor {
983                        range: _,
984                        output: _,
985                        hv_error,
986                        vtl: _,
987                    } => hv_error,
988                    _ => unreachable!(),
989                })?;
990        }
991
992        let hc_regs = [(HvX64RegisterName::VsmPartitionConfig, u64::from(value))];
993        self.runner.set_vp_registers_hvcall(vtl.into(), hc_regs)?;
994        vtl1.enable_vtl_protection = true;
995
996        Ok(())
997    }
998
999    ///Eagerly load registers for emulation
1000    ///Typically we load expensive registers lazily, however some registers will always be used,
1001    ///and the underlying ioctl supports batching multiple register retrievals into a single call
1002    fn emulation_cache(&mut self, vtl: GuestVtl) -> MshvEmulationCache {
1003        const NAMES: &[HvX64RegisterName] = &[
1004            HvX64RegisterName::Rsp,
1005            HvX64RegisterName::Es,
1006            HvX64RegisterName::Ds,
1007            HvX64RegisterName::Fs,
1008            HvX64RegisterName::Gs,
1009            HvX64RegisterName::Ss,
1010            HvX64RegisterName::Cr0,
1011            HvX64RegisterName::Efer,
1012        ];
1013        let mut values = [FromZeros::new_zeroed(); NAMES.len()];
1014        self.runner
1015            .get_vp_registers(vtl, NAMES, &mut values)
1016            .expect("register query should not fail");
1017
1018        let [rsp, es, ds, fs, gs, ss, cr0, efer] = values;
1019
1020        let header = self
1021            .runner
1022            .exit_message()
1023            .as_message::<HvX64InterceptMessageHeader>();
1024
1025        MshvEmulationCache {
1026            rsp: rsp.as_u64(),
1027            es: from_seg(es.into()),
1028            ds: from_seg(ds.into()),
1029            fs: from_seg(fs.into()),
1030            gs: from_seg(gs.into()),
1031            ss: from_seg(ss.into()),
1032            cr0: cr0.as_u64(),
1033            efer: efer.as_u64(),
1034            rip: header.rip,
1035            rflags: header.rflags.into(),
1036        }
1037    }
1038}
1039
1040fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1041    SegmentRegister {
1042        base: reg.base,
1043        limit: reg.limit,
1044        selector: reg.selector,
1045        attributes: reg.attributes.into(),
1046    }
1047}
1048
1049impl<T: CpuIo> EmulatorSupport for UhEmulationState<'_, '_, T, HypervisorBackedX86> {
1050    type Error = UhRunVpError;
1051
1052    fn flush(&mut self) -> Result<(), Self::Error> {
1053        self.vp
1054            .runner
1055            .set_vp_registers(
1056                self.vtl,
1057                [
1058                    (HvX64RegisterName::Rip, self.cache.rip),
1059                    (HvX64RegisterName::Rflags, self.cache.rflags.into()),
1060                    (HvX64RegisterName::Rsp, self.cache.rsp),
1061                ],
1062            )
1063            .unwrap();
1064        Ok(())
1065    }
1066
1067    fn vp_index(&self) -> VpIndex {
1068        self.vp.vp_index()
1069    }
1070
1071    fn vendor(&self) -> x86defs::cpuid::Vendor {
1072        self.vp.partition.caps.vendor
1073    }
1074
1075    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
1076        match reg {
1077            x86emu::Gp::RSP => self.cache.rsp,
1078            _ => self.vp.runner.cpu_context().gps[reg as usize],
1079        }
1080    }
1081
1082    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
1083        if reg == x86emu::Gp::RSP {
1084            self.cache.rsp = v;
1085        }
1086        self.vp.runner.cpu_context_mut().gps[reg as usize] = v;
1087    }
1088
1089    fn xmm(&mut self, index: usize) -> u128 {
1090        u128::from_le_bytes(self.vp.runner.cpu_context().fx_state.xmm[index])
1091    }
1092
1093    fn set_xmm(&mut self, index: usize, v: u128) -> Result<(), Self::Error> {
1094        self.vp.runner.cpu_context_mut().fx_state.xmm[index] = v.to_le_bytes();
1095        Ok(())
1096    }
1097
1098    fn rip(&mut self) -> u64 {
1099        self.cache.rip
1100    }
1101
1102    fn set_rip(&mut self, v: u64) {
1103        self.cache.rip = v;
1104    }
1105
1106    fn segment(&mut self, index: x86emu::Segment) -> SegmentRegister {
1107        match index {
1108            x86emu::Segment::CS => {
1109                let header = self
1110                    .vp
1111                    .runner
1112                    .exit_message()
1113                    .as_message::<HvX64InterceptMessageHeader>();
1114                from_seg(header.cs_segment)
1115            }
1116            x86emu::Segment::ES => self.cache.es,
1117            x86emu::Segment::SS => self.cache.ss,
1118            x86emu::Segment::DS => self.cache.ds,
1119            x86emu::Segment::FS => self.cache.fs,
1120            x86emu::Segment::GS => self.cache.gs,
1121        }
1122    }
1123
1124    fn efer(&mut self) -> u64 {
1125        self.cache.efer
1126    }
1127
1128    fn cr0(&mut self) -> u64 {
1129        self.cache.cr0
1130    }
1131
1132    fn rflags(&mut self) -> RFlags {
1133        self.cache.rflags
1134    }
1135
1136    fn set_rflags(&mut self, v: RFlags) {
1137        self.cache.rflags = v;
1138    }
1139
1140    fn instruction_bytes(&self) -> &[u8] {
1141        let message = self.vp.runner.exit_message();
1142        match message.header.typ {
1143            HvMessageType::HvMessageTypeGpaIntercept
1144            | HvMessageType::HvMessageTypeUnmappedGpa
1145            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1146                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1147                &message.instruction_bytes[..message.instruction_byte_count as usize]
1148            }
1149            HvMessageType::HvMessageTypeX64IoPortIntercept => {
1150                let message = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
1151                &message.instruction_bytes[..message.instruction_byte_count as usize]
1152            }
1153            _ => unreachable!(),
1154        }
1155    }
1156
1157    fn physical_address(&self) -> Option<u64> {
1158        let message = self.vp.runner.exit_message();
1159        match message.header.typ {
1160            HvMessageType::HvMessageTypeGpaIntercept
1161            | HvMessageType::HvMessageTypeUnmappedGpa
1162            | HvMessageType::HvMessageTypeUnacceptedGpa => {
1163                let message = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
1164                Some(message.guest_physical_address)
1165            }
1166            _ => None,
1167        }
1168    }
1169
1170    fn initial_gva_translation(
1171        &mut self,
1172    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
1173        if (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeGpaIntercept)
1174            && (self.vp.runner.exit_message().header.typ != HvMessageType::HvMessageTypeUnmappedGpa)
1175            && (self.vp.runner.exit_message().header.typ
1176                != HvMessageType::HvMessageTypeUnacceptedGpa)
1177        {
1178            return None;
1179        }
1180
1181        let message = self
1182            .vp
1183            .runner
1184            .exit_message()
1185            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
1186
1187        if !message.memory_access_info.gva_gpa_valid() {
1188            tracing::trace!(?message.guest_virtual_address, ?message.guest_physical_address, "gva gpa not valid {:?}", self.vp.runner.exit_message().payload());
1189            return None;
1190        }
1191
1192        let translate_mode = virt_support_x86emu::emulate::TranslateMode::try_from(
1193            message.header.intercept_access_type,
1194        )
1195        .expect("unexpected intercept access type");
1196
1197        let translation = virt_support_x86emu::emulate::InitialTranslation {
1198            gva: message.guest_virtual_address,
1199            gpa: message.guest_physical_address,
1200            translate_mode,
1201        };
1202
1203        tracing::trace!(?translation, "initial translation");
1204
1205        // If we have a valid translation, the hypervisor must have set the TLB lock
1206        // so the translation remains valid for the duration of this exit.
1207        // Update our local cache appropriately.
1208        self.vp.mark_tlb_locked(Vtl::Vtl2, self.vtl);
1209
1210        Some(translation)
1211    }
1212
1213    fn interruption_pending(&self) -> bool {
1214        self.interruption_pending
1215    }
1216
1217    fn check_vtl_access(
1218        &mut self,
1219        gpa: u64,
1220        mode: virt_support_x86emu::emulate::TranslateMode,
1221    ) -> Result<(), EmuCheckVtlAccessError<Self::Error>> {
1222        // Underhill currently doesn't set VTL 2 protections against execute exclusively, it removes
1223        // all permissions from a page. So for VTL 1, no need to check the permissions; if VTL 1
1224        // doesn't have permissions to a page, Underhill should appropriately fail when it tries
1225        // to read or write to that page on VTL 1's behalf.
1226        //
1227        // For VTL 0, the alias map guards for read and write permissions, so only check VTL execute
1228        // permissions. Because VTL 2 will not restrict execute exclusively, only VTL 1 execute
1229        // permissions need to be checked and therefore only check permissions if VTL 1 is allowed.
1230        //
1231        // Note: the restriction to VTL 1 support also means that for WHP, which doesn't support VTL 1
1232        // the HvCheckSparseGpaPageVtlAccess hypercall--which is unimplemented in whp--will never be made.
1233        if mode == virt_support_x86emu::emulate::TranslateMode::Execute
1234            && self.vtl == GuestVtl::Vtl0
1235            && !matches!(
1236                *self.vp.shared.guest_vsm.read(),
1237                GuestVsmState::NotPlatformSupported,
1238            )
1239        {
1240            // Should always be called after translate gva with the tlb lock flag
1241            // or with an initial translation.
1242            debug_assert!(self.vp.is_tlb_locked(Vtl::Vtl2, self.vtl));
1243
1244            let mbec_user_execute = self
1245                .vp
1246                .runner
1247                .get_vp_register(self.vtl, HvX64RegisterName::InstructionEmulationHints)
1248                .map_err(UhRunVpError::EmulationState)?;
1249
1250            let flags =
1251                if hvdef::HvInstructionEmulatorHintsRegister::from(mbec_user_execute.as_u64())
1252                    .mbec_user_execute_control()
1253                {
1254                    HvMapGpaFlags::new().with_user_executable(true)
1255                } else {
1256                    HvMapGpaFlags::new().with_kernel_executable(true)
1257                };
1258
1259            let access_result = self
1260                .vp
1261                .partition
1262                .hcl
1263                .check_vtl_access(gpa, self.vtl, flags)
1264                .map_err(|e| EmuCheckVtlAccessError::Hypervisor(UhRunVpError::VtlAccess(e)))?;
1265
1266            if let Some(ioctl::CheckVtlAccessResult { vtl, denied_flags }) = access_result {
1267                return Err(EmuCheckVtlAccessError::AccessDenied { vtl, denied_flags });
1268            };
1269        }
1270
1271        Ok(())
1272    }
1273
1274    fn translate_gva(
1275        &mut self,
1276        gva: u64,
1277        mode: virt_support_x86emu::emulate::TranslateMode,
1278    ) -> Result<Result<EmuTranslateResult, EmuTranslateError>, Self::Error> {
1279        let mut control_flags = hypercall::TranslateGvaControlFlagsX64::new();
1280        match mode {
1281            virt_support_x86emu::emulate::TranslateMode::Read => {
1282                control_flags.set_validate_read(true)
1283            }
1284            virt_support_x86emu::emulate::TranslateMode::Write => {
1285                control_flags.set_validate_read(true);
1286                control_flags.set_validate_write(true);
1287            }
1288            virt_support_x86emu::emulate::TranslateMode::Execute => {
1289                control_flags.set_validate_execute(true)
1290            }
1291        };
1292
1293        let target_vtl = self.vtl;
1294
1295        // The translation will be used, so set the appropriate page table bits
1296        // (the access/dirty bit).
1297        //
1298        // Prevent flushes in order to make sure that translation of this GVA
1299        // remains usable until the VP is resumed back to direct execution.
1300        control_flags.set_set_page_table_bits(true);
1301        control_flags.set_tlb_flush_inhibit(true);
1302
1303        // In case we're not running ring 0, check privileges against VP state
1304        // as of when the original intercept came in - since the emulator
1305        // doesn't support instructions that change ring level, the ring level
1306        // will remain the same as it was in the VP state as of when the
1307        // original intercept came in. The privilege exempt flag should
1308        // not be set.
1309        assert!(!control_flags.privilege_exempt());
1310
1311        // Do the translation using the current VTL.
1312        control_flags.set_input_vtl(target_vtl.into());
1313
1314        match self
1315            .vp
1316            .runner
1317            .translate_gva_to_gpa(gva, control_flags)
1318            .map_err(|e| UhRunVpError::TranslateGva(ioctl::Error::TranslateGvaToGpa(e)))?
1319        {
1320            Ok(ioctl::TranslateResult {
1321                gpa_page,
1322                overlay_page,
1323            }) => {
1324                self.vp.mark_tlb_locked(Vtl::Vtl2, GuestVtl::Vtl0);
1325                Ok(Ok(EmuTranslateResult {
1326                    gpa: (gpa_page << hvdef::HV_PAGE_SHIFT) + (gva & (HV_PAGE_SIZE - 1)),
1327                    overlay_page: Some(overlay_page),
1328                }))
1329            }
1330            Err(ioctl::x64::TranslateErrorX64 { code, event_info }) => Ok(Err(EmuTranslateError {
1331                code: hypercall::TranslateGvaResultCode(code),
1332                event_info: Some(event_info),
1333            })),
1334        }
1335    }
1336
1337    fn inject_pending_event(&mut self, event_info: HvX64PendingEvent) {
1338        let regs = [
1339            (
1340                HvX64RegisterName::PendingEvent0,
1341                u128::from(event_info.reg_0),
1342            ),
1343            (
1344                HvX64RegisterName::PendingEvent1,
1345                u128::from(event_info.reg_1),
1346            ),
1347        ];
1348
1349        self.vp
1350            .runner
1351            .set_vp_registers_hvcall(self.vtl.into(), regs)
1352            .expect("set_vp_registers hypercall for setting pending event should not fail");
1353    }
1354
1355    fn check_monitor_write(&self, gpa: u64, bytes: &[u8]) -> bool {
1356        self.vp
1357            .partition
1358            .monitor_page
1359            .check_write(gpa, bytes, |connection_id| {
1360                signal_mnf(self.devices, connection_id)
1361            })
1362    }
1363
1364    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
1365        self.vp.partition.is_gpa_mapped(gpa, write)
1366    }
1367
1368    fn lapic_base_address(&self) -> Option<u64> {
1369        None
1370    }
1371
1372    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1373        unimplemented!()
1374    }
1375
1376    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1377        unimplemented!()
1378    }
1379}
1380
1381impl<T: CpuIo> UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1382    const MSHV_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1383        Self,
1384        [
1385            hv1_hypercall::HvPostMessage,
1386            hv1_hypercall::HvSignalEvent,
1387            hv1_hypercall::HvRetargetDeviceInterrupt,
1388            hv1_hypercall::HvGetVpIndexFromApicId,
1389            hv1_hypercall::HvSetVpRegisters,
1390            hv1_hypercall::HvModifyVtlProtectionMask
1391        ]
1392    );
1393}
1394
1395impl<T> hv1_hypercall::X64RegisterState for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1396    fn rip(&mut self) -> u64 {
1397        self.vp
1398            .runner
1399            .exit_message()
1400            .as_message::<HvX64InterceptMessageHeader>()
1401            .rip
1402    }
1403
1404    fn set_rip(&mut self, rip: u64) {
1405        self.vp.set_rip(self.intercepted_vtl, rip).unwrap()
1406    }
1407
1408    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1409        match n {
1410            hv1_hypercall::X64HypercallRegister::Rax => {
1411                self.vp.runner.cpu_context().gps[protocol::RAX]
1412            }
1413            hv1_hypercall::X64HypercallRegister::Rcx => {
1414                self.vp.runner.cpu_context().gps[protocol::RCX]
1415            }
1416            hv1_hypercall::X64HypercallRegister::Rdx => {
1417                self.vp.runner.cpu_context().gps[protocol::RDX]
1418            }
1419            hv1_hypercall::X64HypercallRegister::Rbx => {
1420                self.vp.runner.cpu_context().gps[protocol::RBX]
1421            }
1422            hv1_hypercall::X64HypercallRegister::Rsi => {
1423                self.vp.runner.cpu_context().gps[protocol::RSI]
1424            }
1425            hv1_hypercall::X64HypercallRegister::Rdi => {
1426                self.vp.runner.cpu_context().gps[protocol::RDI]
1427            }
1428            hv1_hypercall::X64HypercallRegister::R8 => {
1429                self.vp.runner.cpu_context().gps[protocol::R8]
1430            }
1431        }
1432    }
1433
1434    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1435        *match n {
1436            hv1_hypercall::X64HypercallRegister::Rax => {
1437                &mut self.vp.runner.cpu_context_mut().gps[protocol::RAX]
1438            }
1439            hv1_hypercall::X64HypercallRegister::Rcx => {
1440                &mut self.vp.runner.cpu_context_mut().gps[protocol::RCX]
1441            }
1442            hv1_hypercall::X64HypercallRegister::Rdx => {
1443                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDX]
1444            }
1445            hv1_hypercall::X64HypercallRegister::Rbx => {
1446                &mut self.vp.runner.cpu_context_mut().gps[protocol::RBX]
1447            }
1448            hv1_hypercall::X64HypercallRegister::Rsi => {
1449                &mut self.vp.runner.cpu_context_mut().gps[protocol::RSI]
1450            }
1451            hv1_hypercall::X64HypercallRegister::Rdi => {
1452                &mut self.vp.runner.cpu_context_mut().gps[protocol::RDI]
1453            }
1454            hv1_hypercall::X64HypercallRegister::R8 => {
1455                &mut self.vp.runner.cpu_context_mut().gps[protocol::R8]
1456            }
1457        } = value;
1458    }
1459
1460    fn xmm(&mut self, n: usize) -> u128 {
1461        u128::from_ne_bytes(self.vp.runner.cpu_context().fx_state.xmm[n])
1462    }
1463
1464    fn set_xmm(&mut self, n: usize, value: u128) {
1465        self.vp.runner.cpu_context_mut().fx_state.xmm[n] = value.to_ne_bytes();
1466    }
1467}
1468
1469trait ToVpRegisterName: 'static + Copy + std::fmt::Debug {
1470    fn to_vp_reg_name(self) -> VpRegisterName;
1471}
1472
1473impl ToVpRegisterName for VpRegisterName {
1474    fn to_vp_reg_name(self) -> VpRegisterName {
1475        self
1476    }
1477}
1478
1479impl UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1480    fn set_register_state<T, R: ToVpRegisterName, const N: usize>(
1481        &mut self,
1482        regs: &T,
1483    ) -> Result<(), vp_state::Error>
1484    where
1485        T: HvRegisterState<R, N>,
1486    {
1487        let names = regs.names().map(|r| r.to_vp_reg_name());
1488        let mut values = [HvRegisterValue::new_zeroed(); N];
1489        regs.get_values(values.iter_mut());
1490        self.vp
1491            .runner
1492            .set_vp_registers(self.vtl, names.iter().copied().zip(values))
1493            .map_err(vp_state::Error::SetRegisters)?;
1494        Ok(())
1495    }
1496
1497    fn get_register_state<T, R: ToVpRegisterName, const N: usize>(
1498        &mut self,
1499    ) -> Result<T, vp_state::Error>
1500    where
1501        T: HvRegisterState<R, N>,
1502    {
1503        let mut regs = T::default();
1504        let names = regs.names().map(|r| r.to_vp_reg_name());
1505        let mut values = [HvRegisterValue::new_zeroed(); N];
1506        self.vp
1507            .runner
1508            .get_vp_registers(self.vtl, &names, &mut values)
1509            .map_err(vp_state::Error::GetRegisters)?;
1510
1511        regs.set_values(values.into_iter());
1512        Ok(regs)
1513    }
1514}
1515
1516impl AccessVpState for UhVpStateAccess<'_, '_, HypervisorBackedX86> {
1517    type Error = vp_state::Error;
1518
1519    fn caps(&self) -> &virt::x86::X86PartitionCapabilities {
1520        &self.vp.partition.caps
1521    }
1522
1523    fn commit(&mut self) -> Result<(), Self::Error> {
1524        Ok(())
1525    }
1526
1527    fn registers(&mut self) -> Result<vp::Registers, Self::Error> {
1528        self.get_register_state()
1529    }
1530
1531    fn set_registers(&mut self, value: &vp::Registers) -> Result<(), Self::Error> {
1532        self.set_register_state(value)
1533    }
1534
1535    fn activity(&mut self) -> Result<vp::Activity, Self::Error> {
1536        let activity: vp::Activity = self.get_register_state()?;
1537
1538        // TODO: Get the NMI pending bit from the APIC.
1539        // let apic = self.vp.whp(self.vtl).get_apic()?;
1540        // activity.nmi_pending = hv_apic_nmi_pending(&apic);
1541        Ok(activity)
1542    }
1543
1544    fn set_activity(&mut self, value: &vp::Activity) -> Result<(), Self::Error> {
1545        self.set_register_state(value)?;
1546
1547        // TODO: Set the NMI pending bit via the APIC.
1548        // let mut apic = self.vp.whp(self.vtl).get_apic()?;
1549        // set_hv_apic_nmi_pending(&mut apic, value.nmi_pending);
1550        // self.vp.whp(self.vtl).set_apic(&apic)?;
1551        Ok(())
1552    }
1553
1554    fn xsave(&mut self) -> Result<vp::Xsave, Self::Error> {
1555        // TODO: get the rest of the xsave state, not just the legacy FP state.
1556        //
1557        // This is just used for debugging, so this should not be a problem.
1558        #[repr(C)]
1559        #[derive(IntoBytes, Immutable, KnownLayout)]
1560        struct XsaveStandard {
1561            fxsave: Fxsave,
1562            xsave_header: XsaveHeader,
1563        }
1564        let state = XsaveStandard {
1565            fxsave: self.vp.runner.cpu_context().fx_state.clone(),
1566            xsave_header: XsaveHeader {
1567                xstate_bv: XFEATURE_X87 | XFEATURE_SSE,
1568                ..FromZeros::new_zeroed()
1569            },
1570        };
1571        Ok(vp::Xsave::from_standard(state.as_bytes(), self.caps()))
1572    }
1573
1574    fn set_xsave(&mut self, _value: &vp::Xsave) -> Result<(), Self::Error> {
1575        Err(vp_state::Error::Unimplemented("xsave"))
1576    }
1577
1578    fn apic(&mut self) -> Result<vp::Apic, Self::Error> {
1579        Err(vp_state::Error::Unimplemented("apic"))
1580    }
1581
1582    fn set_apic(&mut self, _value: &vp::Apic) -> Result<(), Self::Error> {
1583        Err(vp_state::Error::Unimplemented("apic"))
1584    }
1585
1586    fn xcr(&mut self) -> Result<vp::Xcr0, Self::Error> {
1587        self.get_register_state()
1588    }
1589
1590    fn set_xcr(&mut self, value: &vp::Xcr0) -> Result<(), Self::Error> {
1591        self.set_register_state(value)
1592    }
1593
1594    fn xss(&mut self) -> Result<vp::Xss, Self::Error> {
1595        self.get_register_state()
1596    }
1597
1598    fn set_xss(&mut self, value: &vp::Xss) -> Result<(), Self::Error> {
1599        self.set_register_state(value)
1600    }
1601
1602    fn mtrrs(&mut self) -> Result<vp::Mtrrs, Self::Error> {
1603        self.get_register_state()
1604    }
1605
1606    fn set_mtrrs(&mut self, cc: &vp::Mtrrs) -> Result<(), Self::Error> {
1607        self.set_register_state(cc)
1608    }
1609
1610    fn pat(&mut self) -> Result<vp::Pat, Self::Error> {
1611        self.get_register_state()
1612    }
1613
1614    fn set_pat(&mut self, value: &vp::Pat) -> Result<(), Self::Error> {
1615        self.set_register_state(value)
1616    }
1617
1618    fn virtual_msrs(&mut self) -> Result<vp::VirtualMsrs, Self::Error> {
1619        self.get_register_state()
1620    }
1621
1622    fn set_virtual_msrs(&mut self, msrs: &vp::VirtualMsrs) -> Result<(), Self::Error> {
1623        self.set_register_state(msrs)
1624    }
1625
1626    fn debug_regs(&mut self) -> Result<vp::DebugRegisters, Self::Error> {
1627        self.get_register_state()
1628    }
1629
1630    fn set_debug_regs(&mut self, value: &vp::DebugRegisters) -> Result<(), Self::Error> {
1631        self.set_register_state(value)
1632    }
1633
1634    fn tsc(&mut self) -> Result<vp::Tsc, Self::Error> {
1635        self.get_register_state()
1636    }
1637
1638    fn set_tsc(&mut self, value: &vp::Tsc) -> Result<(), Self::Error> {
1639        self.set_register_state(value)
1640    }
1641
1642    fn cet(&mut self) -> Result<vp::Cet, Self::Error> {
1643        self.get_register_state()
1644    }
1645
1646    fn set_cet(&mut self, value: &vp::Cet) -> Result<(), Self::Error> {
1647        self.set_register_state(value)
1648    }
1649
1650    fn cet_ss(&mut self) -> Result<vp::CetSs, Self::Error> {
1651        self.get_register_state()
1652    }
1653
1654    fn set_cet_ss(&mut self, value: &vp::CetSs) -> Result<(), Self::Error> {
1655        self.set_register_state(value)
1656    }
1657
1658    fn tsc_aux(&mut self) -> Result<vp::TscAux, Self::Error> {
1659        self.get_register_state()
1660    }
1661
1662    fn set_tsc_aux(&mut self, value: &vp::TscAux) -> Result<(), Self::Error> {
1663        self.set_register_state(value)
1664    }
1665
1666    fn synic_msrs(&mut self) -> Result<vp::SyntheticMsrs, Self::Error> {
1667        self.get_register_state()
1668    }
1669
1670    fn set_synic_msrs(&mut self, value: &vp::SyntheticMsrs) -> Result<(), Self::Error> {
1671        self.set_register_state(value)
1672    }
1673
1674    fn synic_timers(&mut self) -> Result<vp::SynicTimers, Self::Error> {
1675        Err(vp_state::Error::Unimplemented("synic_timers"))
1676    }
1677
1678    fn set_synic_timers(&mut self, _value: &vp::SynicTimers) -> Result<(), Self::Error> {
1679        Err(vp_state::Error::Unimplemented("synic_timers"))
1680    }
1681
1682    fn synic_message_queues(&mut self) -> Result<vp::SynicMessageQueues, Self::Error> {
1683        Ok(self.vp.inner.message_queues[self.vtl].save())
1684    }
1685
1686    fn set_synic_message_queues(
1687        &mut self,
1688        value: &vp::SynicMessageQueues,
1689    ) -> Result<(), Self::Error> {
1690        self.vp.inner.message_queues[self.vtl].restore(value);
1691        Ok(())
1692    }
1693
1694    fn synic_message_page(&mut self) -> Result<vp::SynicMessagePage, Self::Error> {
1695        Err(vp_state::Error::Unimplemented("synic_message_page"))
1696    }
1697
1698    fn set_synic_message_page(&mut self, _value: &vp::SynicMessagePage) -> Result<(), Self::Error> {
1699        Err(vp_state::Error::Unimplemented("synic_message_page"))
1700    }
1701
1702    fn synic_event_flags_page(&mut self) -> Result<vp::SynicEventFlagsPage, Self::Error> {
1703        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1704    }
1705
1706    fn set_synic_event_flags_page(
1707        &mut self,
1708        _value: &vp::SynicEventFlagsPage,
1709    ) -> Result<(), Self::Error> {
1710        Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
1711    }
1712}
1713
1714impl<T: CpuIo> hv1_hypercall::RetargetDeviceInterrupt
1715    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1716{
1717    fn retarget_interrupt(
1718        &mut self,
1719        device_id: u64,
1720        address: u64,
1721        data: u32,
1722        params: hv1_hypercall::HvInterruptParameters<'_>,
1723    ) -> hvdef::HvResult<()> {
1724        self.retarget_virtual_interrupt(
1725            device_id,
1726            address,
1727            data,
1728            params.vector,
1729            params.multicast,
1730            params.target_processors,
1731        )
1732    }
1733}
1734
1735impl<T> hv1_hypercall::SetVpRegisters for UhHypercallHandler<'_, '_, T, HypervisorBackedX86> {
1736    fn set_vp_registers(
1737        &mut self,
1738        partition_id: u64,
1739        vp_index: u32,
1740        vtl: Option<Vtl>,
1741        registers: &[hypercall::HvRegisterAssoc],
1742    ) -> HvRepResult {
1743        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1744            return Err((HvError::AccessDenied, 0));
1745        }
1746
1747        if vp_index != hvdef::HV_VP_INDEX_SELF && vp_index != self.vp.vp_index().index() {
1748            return Err((HvError::InvalidVpIndex, 0));
1749        }
1750
1751        let target_vtl = self
1752            .target_vtl_no_higher(vtl.unwrap_or(self.intercepted_vtl.into()))
1753            .map_err(|e| (e, 0))?;
1754
1755        for (i, reg) in registers.iter().enumerate() {
1756            if reg.name == HvX64RegisterName::VsmPartitionConfig.into() {
1757                let value = HvRegisterVsmPartitionConfig::from(reg.value.as_u64());
1758                self.vp
1759                    .set_vsm_partition_config(target_vtl, value)
1760                    .map_err(|e| (e, i))?;
1761            } else {
1762                return Err((HvError::InvalidParameter, i));
1763            }
1764        }
1765
1766        Ok(())
1767    }
1768}
1769
1770impl<T> hv1_hypercall::ModifyVtlProtectionMask
1771    for UhHypercallHandler<'_, '_, T, HypervisorBackedX86>
1772{
1773    fn modify_vtl_protection_mask(
1774        &mut self,
1775        partition_id: u64,
1776        _map_flags: HvMapGpaFlags,
1777        target_vtl: Option<Vtl>,
1778        gpa_pages: &[u64],
1779    ) -> HvRepResult {
1780        if partition_id != hvdef::HV_PARTITION_ID_SELF {
1781            return Err((HvError::AccessDenied, 0));
1782        }
1783
1784        let target_vtl = self
1785            .target_vtl_no_higher(target_vtl.unwrap_or(self.intercepted_vtl.into()))
1786            .map_err(|e| (e, 0))?;
1787        if target_vtl == GuestVtl::Vtl0 {
1788            return Err((HvError::InvalidParameter, 0));
1789        }
1790
1791        // A VTL cannot change its own VTL permissions until it has enabled VTL protection and
1792        // configured default permissions. Higher VTLs are not under this restriction (as they may
1793        // need to apply default permissions before VTL protection is enabled).
1794        if target_vtl == self.intercepted_vtl
1795            && !matches!(
1796                *self.vp.shared.guest_vsm.read(),
1797                GuestVsmState::Enabled {
1798                    vtl1: VbsIsolatedVtl1State {
1799                        enable_vtl_protection: true,
1800                        default_vtl_protections: Some(_),
1801                    },
1802                }
1803            )
1804        {
1805            return Err((HvError::AccessDenied, 0));
1806        }
1807
1808        // TODO VBS GUEST VSM: verify this logic is correct
1809        // TODO VBS GUEST VSM: validation on map_flags, similar to default
1810        // protections mask changes
1811        // Can receive an intercept on adjust permissions, and for isolated
1812        // VMs if the page is unaccepted
1813        if self.vp.partition.isolation.is_isolated() {
1814            return Err((HvError::OperationDenied, 0));
1815        } else {
1816            if !gpa_pages.is_empty() {
1817                if !self.vp.partition.is_gpa_lower_vtl_ram(gpa_pages[0]) {
1818                    return Err((HvError::OperationDenied, 0));
1819                } else {
1820                    panic!("Should not be handling this hypercall for guest ram");
1821                }
1822            }
1823        }
1824
1825        Ok(())
1826    }
1827}
1828
1829mod save_restore {
1830    use super::HypervisorBackedX86;
1831    use super::UhProcessor;
1832    use anyhow::Context;
1833    use hcl::GuestVtl;
1834    use hvdef::HV_X64_MSR_GUEST_CRASH_CTL;
1835    use hvdef::HvInternalActivityRegister;
1836    use hvdef::HvX64RegisterName;
1837    use hvdef::Vtl;
1838    use virt::Processor;
1839    use virt::irqcon::MsiRequest;
1840    use virt::vp::AccessVpState;
1841    use virt::vp::Mtrrs;
1842    use vmcore::save_restore::RestoreError;
1843    use vmcore::save_restore::SaveError;
1844    use vmcore::save_restore::SaveRestore;
1845    use zerocopy::FromZeros;
1846    use zerocopy::IntoBytes;
1847
1848    mod state {
1849        use mesh::payload::Protobuf;
1850        use vmcore::save_restore::SavedStateRoot;
1851
1852        #[derive(Protobuf, SavedStateRoot)]
1853        #[mesh(package = "underhill.partition")]
1854        pub struct ProcessorSavedState {
1855            #[mesh(1)]
1856            pub(super) rax: u64,
1857            #[mesh(2)]
1858            pub(super) rcx: u64,
1859            #[mesh(3)]
1860            pub(super) rdx: u64,
1861            #[mesh(4)]
1862            pub(super) rbx: u64,
1863            #[mesh(5)]
1864            pub(super) cr2: u64,
1865            #[mesh(6)]
1866            pub(super) rbp: u64,
1867            #[mesh(7)]
1868            pub(super) rsi: u64,
1869            #[mesh(8)]
1870            pub(super) rdi: u64,
1871            #[mesh(9)]
1872            pub(super) r8: u64,
1873            #[mesh(10)]
1874            pub(super) r9: u64,
1875            #[mesh(11)]
1876            pub(super) r10: u64,
1877            #[mesh(12)]
1878            pub(super) r11: u64,
1879            #[mesh(13)]
1880            pub(super) r12: u64,
1881            #[mesh(14)]
1882            pub(super) r13: u64,
1883            #[mesh(15)]
1884            pub(super) r14: u64,
1885            #[mesh(16)]
1886            pub(super) r15: u64,
1887            #[mesh(17)]
1888            pub(super) fx_state: Vec<u8>,
1889            #[mesh(18)]
1890            pub(super) dr0: u64,
1891            #[mesh(19)]
1892            pub(super) dr1: u64,
1893            #[mesh(20)]
1894            pub(super) dr2: u64,
1895            #[mesh(21)]
1896            pub(super) dr3: u64,
1897
1898            /// Only set when the DR6_SHARED capability is present
1899            #[mesh(22)]
1900            pub(super) dr6: Option<u64>,
1901
1902            /// If VTL0 should be in the startup suspend state. Older underhill
1903            /// versions do not save this property, so maintain the old buggy
1904            /// behavior for those cases its not present in the saved state.
1905            #[mesh(23)]
1906            pub(super) startup_suspend: Option<bool>,
1907
1908            #[mesh(24)]
1909            pub(super) crash_reg: Option<[u64; 5]>,
1910
1911            /// This value is ignored going forward, but may still be read by downlevel
1912            /// versions.
1913            #[mesh(25)]
1914            pub(super) crash_control: u64,
1915
1916            #[mesh(26)]
1917            pub(super) msr_mtrr_def_type: u64,
1918            #[mesh(27)]
1919            pub(super) fixed_mtrrs: Option<[u64; 11]>,
1920            #[mesh(28)]
1921            pub(super) variable_mtrrs: Option<[u64; 16]>,
1922            #[mesh(29)]
1923            pub(super) per_vtl: Vec<ProcessorVtlSavedState>,
1924        }
1925
1926        #[derive(Protobuf, SavedStateRoot)]
1927        #[mesh(package = "underhill.partition")]
1928        pub struct ProcessorVtlSavedState {
1929            #[mesh(1)]
1930            pub(super) message_queue: virt::vp::SynicMessageQueues,
1931        }
1932    }
1933
1934    const SHARED_REGISTERS: &[HvX64RegisterName] = &[
1935        HvX64RegisterName::Dr0,
1936        HvX64RegisterName::Dr1,
1937        HvX64RegisterName::Dr2,
1938        HvX64RegisterName::Dr3,
1939        HvX64RegisterName::Dr6, // must be last
1940    ];
1941
1942    impl SaveRestore for UhProcessor<'_, HypervisorBackedX86> {
1943        type SavedState = state::ProcessorSavedState;
1944
1945        fn save(&mut self) -> Result<Self::SavedState, SaveError> {
1946            // Ensure all async requests are reflected in the saved state.
1947            self.flush_async_requests()
1948                .context("failed to flush async requests")
1949                .map_err(SaveError::Other)?;
1950
1951            let dr6_shared = self.partition.hcl.dr6_shared();
1952            let mut values = [FromZeros::new_zeroed(); SHARED_REGISTERS.len()];
1953            let len = if dr6_shared {
1954                SHARED_REGISTERS.len()
1955            } else {
1956                SHARED_REGISTERS.len() - 1
1957            };
1958
1959            self.runner
1960                // All these registers are shared, so the VTL we ask for doesn't matter
1961                .get_vp_registers(GuestVtl::Vtl0, &SHARED_REGISTERS[..len], &mut values[..len])
1962                .context("failed to get shared registers")
1963                .map_err(SaveError::Other)?;
1964
1965            // Non-VTL0 VPs should never be in startup suspend, so we only need to check VTL0.
1966            // The hypervisor handles halt and idle for us.
1967            let internal_activity = self
1968                .runner
1969                .get_vp_register(GuestVtl::Vtl0, HvX64RegisterName::InternalActivityState)
1970                .inspect_err(|e| {
1971                    // The ioctl get_vp_register path does not tell us
1972                    // hv_status directly, so just log if it failed for any
1973                    // reason.
1974                    tracing::warn!(
1975                        error = e as &dyn std::error::Error,
1976                        "unable to query startup suspend, unable to save VTL0 startup suspend state"
1977                    );
1978                })
1979                .ok();
1980            let startup_suspend = internal_activity
1981                .map(|a| HvInternalActivityRegister::from(a.as_u64()).startup_suspend());
1982
1983            let [
1984                rax,
1985                rcx,
1986                rdx,
1987                rbx,
1988                cr2,
1989                rbp,
1990                rsi,
1991                rdi,
1992                r8,
1993                r9,
1994                r10,
1995                r11,
1996                r12,
1997                r13,
1998                r14,
1999                r15,
2000            ] = self.runner.cpu_context().gps;
2001
2002            // We are responsible for saving shared MSRs too, but other than
2003            // the MTRRs all shared MSRs are read-only. So this is all we need.
2004            let Mtrrs {
2005                msr_mtrr_def_type,
2006                fixed: fixed_mtrrs,
2007                variable: variable_mtrrs,
2008            } = self
2009                // MTRRs are shared, so it doesn't matter which VTL we ask for.
2010                .access_state(Vtl::Vtl0)
2011                .mtrrs()
2012                .context("failed to get MTRRs")
2013                .map_err(SaveError::Other)?;
2014
2015            // This value is ignored during restore, but may still be read by downlevel
2016            // versions. Set it to the correct hardcoded read value as a best effort for them.
2017            let crash_control = self
2018                .read_crash_msr(HV_X64_MSR_GUEST_CRASH_CTL, GuestVtl::Vtl0)
2019                .unwrap();
2020
2021            let UhProcessor {
2022                _not_send,
2023                inner:
2024                    crate::UhVpInner {
2025                        // Saved
2026                        message_queues,
2027                        // Sidecar state is reset during servicing
2028                        sidecar_exit_reason: _,
2029                        // Will be cleared by flush_async_requests above
2030                        wake_reasons: _,
2031                        // Runtime glue
2032                        waker: _,
2033                        // Topology information
2034                        vp_info: _,
2035                        cpu_index: _,
2036                    },
2037                // Saved
2038                crash_reg,
2039                // Runtime glue
2040                partition: _,
2041                idle_control: _,
2042                vmtime: _,
2043                timer: _,
2044                // This field is only used in dev/test scenarios
2045                force_exit_sidecar: _,
2046                signaled_sidecar_exit: _,
2047                // Just caching the hypervisor value, let it handle saving
2048                vtls_tlb_locked: _,
2049                // Statistic that should reset to 0 on restore
2050                kernel_returns: _,
2051                // Shared state should be handled by the backing
2052                shared: _,
2053                // The runner doesn't hold anything needing saving
2054                runner: _,
2055                // TODO CVM Servicing: The hypervisor backing doesn't need to save anything, but CVMs will.
2056                backing: _,
2057                // Currently only meaningful for CVMs
2058                exit_activities: _,
2059            } = self;
2060
2061            let per_vtl = [GuestVtl::Vtl0, GuestVtl::Vtl1]
2062                .map(|vtl| state::ProcessorVtlSavedState {
2063                    message_queue: message_queues[vtl].save(),
2064                })
2065                .into();
2066
2067            let state = state::ProcessorSavedState {
2068                rax,
2069                rcx,
2070                rdx,
2071                rbx,
2072                cr2,
2073                rbp,
2074                rsi,
2075                rdi,
2076                r8,
2077                r9,
2078                r10,
2079                r11,
2080                r12,
2081                r13,
2082                r14,
2083                r15,
2084                fx_state: self.runner.cpu_context().fx_state.as_bytes().to_vec(),
2085                dr0: values[0].as_u64(),
2086                dr1: values[1].as_u64(),
2087                dr2: values[2].as_u64(),
2088                dr3: values[3].as_u64(),
2089                dr6: dr6_shared.then(|| values[4].as_u64()),
2090                startup_suspend,
2091                crash_reg: Some(*crash_reg),
2092                crash_control,
2093                msr_mtrr_def_type,
2094                fixed_mtrrs: Some(fixed_mtrrs),
2095                variable_mtrrs: Some(variable_mtrrs),
2096                per_vtl,
2097            };
2098
2099            Ok(state)
2100        }
2101
2102        fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
2103            let state::ProcessorSavedState {
2104                rax,
2105                rcx,
2106                rdx,
2107                rbx,
2108                cr2,
2109                rbp,
2110                rsi,
2111                rdi,
2112                r8,
2113                r9,
2114                r10,
2115                r11,
2116                r12,
2117                r13,
2118                r14,
2119                r15,
2120                fx_state,
2121                dr0,
2122                dr1,
2123                dr2,
2124                dr3,
2125                dr6,
2126                startup_suspend,
2127                crash_reg,
2128                crash_control: _crash_control,
2129                msr_mtrr_def_type,
2130                fixed_mtrrs,
2131                variable_mtrrs,
2132                per_vtl,
2133            } = state;
2134
2135            let dr6_shared = self.partition.hcl.dr6_shared();
2136            self.runner.cpu_context_mut().gps = [
2137                rax, rcx, rdx, rbx, cr2, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
2138            ];
2139            if fx_state.len() != self.runner.cpu_context_mut().fx_state.as_bytes().len() {
2140                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2141                    "invalid fpu state"
2142                )));
2143            }
2144            if dr6_shared != state.dr6.is_some() {
2145                return Err(RestoreError::InvalidSavedState(anyhow::anyhow!(
2146                    "dr6 state mismatch"
2147                )));
2148            }
2149
2150            let len = if dr6_shared {
2151                SHARED_REGISTERS.len()
2152            } else {
2153                SHARED_REGISTERS.len() - 1
2154            };
2155
2156            let values = [dr0, dr1, dr2, dr3, dr6.unwrap_or(0)];
2157            self.runner
2158                .set_vp_registers(
2159                    GuestVtl::Vtl0,
2160                    SHARED_REGISTERS[..len].iter().copied().zip(values),
2161                )
2162                .context("failed to set shared registers")
2163                .map_err(RestoreError::Other)?;
2164
2165            self.runner
2166                .cpu_context_mut()
2167                .fx_state
2168                .as_mut_bytes()
2169                .copy_from_slice(&fx_state);
2170
2171            self.crash_reg = crash_reg.unwrap_or_default();
2172
2173            // Previous versions of Underhill did not save the MTRRs.
2174            // If we get a restore state with them missing then assume they weren't
2175            // saved and don't zero out whatever the system already has.
2176            if let (Some(fixed), Some(variable)) = (fixed_mtrrs, variable_mtrrs) {
2177                let mut access = self.access_state(Vtl::Vtl0);
2178                access
2179                    .set_mtrrs(&Mtrrs {
2180                        msr_mtrr_def_type,
2181                        fixed,
2182                        variable,
2183                    })
2184                    .context("failed to set MTRRs")
2185                    .map_err(RestoreError::Other)?;
2186            }
2187
2188            for (per, vtl) in per_vtl.into_iter().zip(0u8..) {
2189                let vtl = GuestVtl::try_from(vtl)
2190                    .context("too many vtls")
2191                    .map_err(RestoreError::Other)?;
2192                self.inner.message_queues[vtl].restore(&per.message_queue);
2193            }
2194
2195            let inject_startup_suspend = match startup_suspend {
2196                Some(true) => {
2197                    // When Underhill brings up APs during a servicing update
2198                    // via hypercall, this clears the VTL0 startup suspend
2199                    // state and makes the VP runnable. Like the cold boot path,
2200                    // we need to put the AP back into the startup suspend state
2201                    // in order to not start running the VP incorrectly.
2202                    true
2203                }
2204                None if !self.vp_index().is_bsp() => {
2205                    // Previous versions of Underhill did not save this value,
2206                    // which means the VM could be in a bad state if it's being
2207                    // serviced before VTL0 brings up APs. Log this state to
2208                    // note that.
2209                    const NAMES: [HvX64RegisterName; 4] = [
2210                        HvX64RegisterName::Rip,
2211                        HvX64RegisterName::Rflags,
2212                        HvX64RegisterName::Cr0,
2213                        HvX64RegisterName::Efer,
2214                    ];
2215                    let mut values = [FromZeros::new_zeroed(); NAMES.len()];
2216                    self.runner
2217                        // Non-VTL0 VPs should never be in startup suspend, so we only need to handle VTL0.
2218                        .get_vp_registers(GuestVtl::Vtl0, &NAMES, &mut values)
2219                        .context("failed to get VP registers for startup suspend log")
2220                        .map_err(RestoreError::Other)?;
2221                    let [rip, rflags, cr0, efer] = values.map(|reg| reg.as_u64());
2222
2223                    tracing::error!(
2224                        vp_index = self.vp_index().index(),
2225                        rip,
2226                        rflags,
2227                        cr0,
2228                        efer,
2229                        "previous version of underhill did not save startup_suspend state"
2230                    );
2231
2232                    false
2233                }
2234                Some(false) | None => false,
2235            };
2236
2237            if inject_startup_suspend {
2238                let reg = u64::from(HvInternalActivityRegister::new().with_startup_suspend(true));
2239                // Non-VTL0 VPs should never be in startup suspend, so we only need to handle VTL0.
2240                let result = self.runner.set_vp_registers(
2241                    GuestVtl::Vtl0,
2242                    [(HvX64RegisterName::InternalActivityState, reg)],
2243                );
2244
2245                if let Err(e) = result {
2246                    // The ioctl set_vp_register path does not tell us hv_status
2247                    // directly, so just log if it failed for any reason.
2248                    tracing::warn!(
2249                        error = &e as &dyn std::error::Error,
2250                        "unable to set internal activity register, falling back to init"
2251                    );
2252
2253                    self.partition.request_msi(
2254                        GuestVtl::Vtl0,
2255                        MsiRequest::new_x86(
2256                            virt::irqcon::DeliveryMode::INIT,
2257                            self.inner.vp_info.apic_id,
2258                            false,
2259                            0,
2260                            true,
2261                        ),
2262                    );
2263                }
2264            }
2265
2266            Ok(())
2267        }
2268    }
2269}