Skip to main content

virt_mshv/x86_64/
mod.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! x86_64-specific implementation of the mshv hypervisor backend.
5
6mod vm_state;
7mod vp_state;
8
9use crate::Error;
10use crate::ErrorInner;
11use crate::KernelError;
12use crate::LinuxMshv;
13use crate::MshvPartition;
14use crate::MshvPartitionInner;
15use crate::MshvProcessor;
16use crate::MshvProcessorBinder;
17use crate::MshvProtoPartition;
18use crate::MshvVpRunner;
19use crate::VcpuFdExt;
20use crate::common_synthetic_features;
21use crate::create_vm_with_retry;
22
23use guestmem::DoorbellRegistration;
24use guestmem::GuestMemory;
25use hv1_hypercall::X64RegisterIo;
26use hvdef::HvDeliverabilityNotificationsRegister;
27use hvdef::HvMessage;
28use hvdef::HvMessageType;
29use hvdef::HvPartitionPropertyCode;
30use hvdef::HvProcessorVendor;
31use hvdef::HvX64RegisterName;
32use hvdef::HvX64RegisterPage;
33use hvdef::Vtl;
34use hvdef::hypercall::HvRegisterAssoc;
35use mshv_ioctls::InterruptRequest;
36use mshv_ioctls::VcpuFd;
37use pal::unix::pthread::Pthread;
38use parking_lot::Mutex;
39use pci_core::msi::SignalMsi;
40use std::sync::Arc;
41use virt::Hv1;
42use virt::PartitionAccessState;
43use virt::PartitionConfig;
44use virt::ProtoPartition;
45use virt::ProtoPartitionConfig;
46use virt::VpHaltReason;
47use virt::VpIndex;
48use virt::io::CpuIo;
49use virt::irqcon::MsiRequest;
50use virt::state::StateElement as _;
51use virt::x86::apic_software_device::ApicSoftwareDevice;
52use virt::x86::apic_software_device::ApicSoftwareDevices;
53use virt_support_x86emu::emulate::EmuTranslateError;
54use virt_support_x86emu::emulate::EmuTranslateResult;
55use virt_support_x86emu::emulate::EmulatorSupport;
56use virt_support_x86emu::emulate::TranslateGvaSupport;
57use virt_support_x86emu::emulate::TranslateMode;
58use virt_support_x86emu::emulate::emulate_translate_gva;
59use virt_support_x86emu::translate::TranslationRegisters;
60use vmcore::reference_time::ReferenceTimeSource;
61use x86defs::RFlags;
62use x86defs::SegmentRegister;
63
64impl virt::Hypervisor for LinuxMshv {
65    type ProtoPartition<'a> = MshvProtoPartition<'a>;
66    type Partition = MshvPartition;
67    type Error = Error;
68
69    fn platform_info(&self) -> virt::PlatformInfo {
70        virt::PlatformInfo {}
71    }
72
73    fn new_partition<'a>(
74        &mut self,
75        config: ProtoPartitionConfig<'a>,
76    ) -> Result<MshvProtoPartition<'a>, Self::Error> {
77        if config.isolation.is_isolated() {
78            return Err(ErrorInner::IsolationNotSupported.into());
79        }
80
81        // Build partition creation flags. LAPIC is always enabled (the
82        // hypervisor emulates the local APIC). X2APIC is only enabled when
83        // the topology requests it.
84        let mut pt_flags: u64 = 1 << mshv_bindings::MSHV_PT_BIT_LAPIC
85            | 1 << mshv_bindings::MSHV_PT_BIT_GPA_SUPER_PAGES
86            | 1 << mshv_bindings::MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES;
87
88        match config.processor_topology.apic_mode() {
89            vm_topology::processor::x86::ApicMode::X2ApicSupported
90            | vm_topology::processor::x86::ApicMode::X2ApicEnabled => {
91                pt_flags |= 1 << mshv_bindings::MSHV_PT_BIT_X2APIC;
92            }
93            vm_topology::processor::x86::ApicMode::XApic => {}
94        }
95
96        if config.processor_topology.smt_enabled() {
97            pt_flags |= 1 << mshv_bindings::MSHV_PT_BIT_SMT_ENABLED_GUEST;
98        }
99
100        let create_args = mshv_bindings::mshv_create_partition_v2 {
101            pt_flags,
102            pt_isolation: mshv_bindings::MSHV_PT_ISOLATION_NONE as u64,
103            pt_num_cpu_fbanks: mshv_bindings::MSHV_NUM_CPU_FEATURES_BANKS as u16,
104            pt_cpu_fbanks: [
105                !u64::from(supported_processor_features()),
106                !u64::from(supported_processor_features1()),
107            ],
108            pt_disabled_xsave: !u64::from(supported_xsave_features()),
109            ..Default::default()
110        };
111
112        let vmfd = create_vm_with_retry(&self.mshv, &create_args)?;
113
114        // Set synthetic processor features before initialization when the
115        // guest interface is configured.
116        if config.hv_config.is_some() {
117            let synthetic_features = common_synthetic_features()
118                .with_access_partition_reference_tsc(true)
119                .with_access_guest_idle_reg(true)
120                .with_access_frequency_regs(true)
121                .with_enable_extended_gva_ranges_for_flush_virtual_address_list(true);
122
123            vmfd.set_partition_property(
124                HvPartitionPropertyCode::SyntheticProcFeatures.0,
125                u64::from(synthetic_features),
126            )
127            .map_err(|e| ErrorInner::SetPartitionProperty(e.into()))?;
128        }
129
130        vmfd.initialize()
131            .map_err(|e| ErrorInner::CreateVMInitFailed(e.into()))?;
132
133        // Tell the hypervisor how many VPs are in each socket.
134        vmfd.set_partition_property(
135            HvPartitionPropertyCode::ProcessorsPerSocket.0,
136            config.processor_topology.reserved_vps_per_socket() as u64,
137        )
138        .map_err(|e| ErrorInner::SetPartitionProperty(e.into()))?;
139
140        MshvProtoPartition::new(config, vmfd)
141    }
142}
143
144impl MshvProtoPartition<'_> {
145    /// Build partition capabilities from partition properties instead of
146    /// CPUID.
147    fn caps_from_properties(&self) -> Result<virt::x86::X86PartitionCapabilities, Error> {
148        use virt::x86::X86PartitionCapabilities;
149        use virt::x86::XsaveCapabilities;
150        use x86defs::cpuid::Vendor;
151        use x86defs::xsave::XSAVE_VARIABLE_OFFSET;
152
153        let vendor_id = self
154            .vmfd
155            .get_partition_property(HvPartitionPropertyCode::ProcessorVendor.0)
156            .map_err(|e| ErrorInner::GetPartitionProperty(e.into()))?;
157
158        let vendor = match HvProcessorVendor(vendor_id as u32) {
159            HvProcessorVendor::AMD => Vendor::AMD,
160            HvProcessorVendor::INTEL => Vendor::INTEL,
161            HvProcessorVendor::HYGON => Vendor::HYGON,
162            v => return Err(ErrorInner::UnsupportedProcessorVendor(v).into()),
163        };
164
165        let xsave_states = self
166            .vmfd
167            .get_partition_property(HvPartitionPropertyCode::XsaveStates.0)
168            .map_err(|e| ErrorInner::GetPartitionProperty(e.into()))?;
169
170        let max_xsave_data_size = self
171            .vmfd
172            .get_partition_property(HvPartitionPropertyCode::MaxXsaveDataSize.0)
173            .map_err(|e| ErrorInner::GetPartitionProperty(e.into()))?;
174
175        let reset_rdx = {
176            let mut assoc = [HvRegisterAssoc::from((HvX64RegisterName::Rdx, 0u64))];
177            self.bsp
178                .get_hvdef_regs(&mut assoc)
179                .map_err(ErrorInner::Register)?;
180            assoc[0].value.as_u64()
181        };
182
183        let x2apic = matches!(
184            self.config.processor_topology.apic_mode(),
185            vm_topology::processor::x86::ApicMode::X2ApicSupported
186                | vm_topology::processor::x86::ApicMode::X2ApicEnabled
187        );
188        let x2apic_enabled = matches!(
189            self.config.processor_topology.apic_mode(),
190            vm_topology::processor::x86::ApicMode::X2ApicEnabled
191        );
192
193        Ok(X86PartitionCapabilities {
194            vendor,
195            hv1: self.config.hv_config.is_some(),
196            hv1_reference_tsc_page: self.config.hv_config.is_some(),
197            xsave: XsaveCapabilities {
198                features: xsave_states,
199                supervisor_features: 0,
200                standard_len: XSAVE_VARIABLE_OFFSET as u32,
201                compact_len: max_xsave_data_size as u32,
202                feature_info: [Default::default(); 63],
203            },
204            x2apic,
205            x2apic_enabled,
206            reset_rdx,
207            cet: false,
208            cet_ss: false,
209            sgx: false,
210            tsc_aux: false,
211            vtom: None,
212            physical_address_width: self.max_physical_address_size(),
213            can_freeze_time: false,
214            xsaves_state_bv_broken: false,
215            dr6_tsx_broken: false,
216            nxe_forced_on: false,
217        })
218    }
219
220    fn max_physical_address_size(&self) -> u8 {
221        self.vmfd
222            .get_partition_property(HvPartitionPropertyCode::PhysicalAddressWidth.0)
223            .expect("failed to get physical address width") as u8
224    }
225}
226
227impl ProtoPartition for MshvProtoPartition<'_> {
228    type Partition = MshvPartition;
229    type ProcessorBinder = MshvProcessorBinder;
230    type Error = Error;
231
232    fn max_physical_address_size(&self) -> u8 {
233        self.max_physical_address_size()
234    }
235
236    fn build(
237        self,
238        config: PartitionConfig<'_>,
239    ) -> Result<(Self::Partition, Vec<Self::ProcessorBinder>), Self::Error> {
240        let cpuid = virt::CpuidLeafSet::new(config.cpuid.to_vec());
241
242        // Apply CPUID overrides partition-wide.
243        for leaf in cpuid.leaves().iter() {
244            let input = hvdef::hypercall::RegisterInterceptResultCpuid {
245                partition_id: 0,
246                vp_index: hvdef::HV_ANY_VP,
247                intercept_type: hvdef::hypercall::HvInterceptType::HvInterceptTypeX64Cpuid,
248                parameters: hvdef::hypercall::HvRegisterX64CpuidResultParameters {
249                    input: hvdef::hypercall::HvRegisterX64CpuidResultParametersInput {
250                        eax: leaf.function,
251                        ecx: leaf.index.unwrap_or(0),
252                        subleaf_specific: u8::from(leaf.index.is_some()),
253                        always_override: 1,
254                        padding: 0,
255                    },
256                    result: hvdef::hypercall::HvRegisterX64CpuidResultParametersOutput {
257                        eax: leaf.result[0],
258                        eax_mask: leaf.mask[0],
259                        ebx: leaf.result[1],
260                        ebx_mask: leaf.mask[1],
261                        ecx: leaf.result[2],
262                        ecx_mask: leaf.mask[2],
263                        edx: leaf.result[3],
264                        edx_mask: leaf.mask[3],
265                    },
266                },
267                _reserved: 0,
268            };
269            let mut args = mshv_bindings::mshv_root_hvcall {
270                code: hvdef::HypercallCode::HvCallRegisterInterceptResult.0,
271                in_sz: size_of_val(&input) as u16,
272                in_ptr: std::ptr::addr_of!(input) as u64,
273                ..Default::default()
274            };
275            self.vmfd
276                .hvcall(&mut args)
277                .map_err(|e| ErrorInner::RegisterCpuid(e.into()))?;
278        }
279
280        let caps = {
281            let mut caps = match self.bsp.get_cpuid_values(0, 0, 0, 0) {
282                Ok(_) => virt::PartitionCapabilities::from_cpuid(
283                    self.config.processor_topology,
284                    &mut |function, index| {
285                        self.bsp
286                            .get_cpuid_values(function, index, 0, 0)
287                            .map_err(KernelError::from)
288                            .expect("cpuid should not fail")
289                    },
290                )
291                .map_err(ErrorInner::Capabilities)?,
292                Err(_) => {
293                    tracing::warn!(
294                        "failed to query CPUID, falling back to partition properties, some features may be unavailable"
295                    );
296                    self.caps_from_properties()?
297                }
298            };
299            caps.xsaves_state_bv_broken = true;
300            caps.can_freeze_time = true;
301            caps
302        };
303
304        let apic_id_map = self
305            .config
306            .processor_topology
307            .vps_arch()
308            .map(|vp| vp.apic_id)
309            .collect();
310
311        let inner = Arc::new(MshvPartitionInner {
312            vmfd: self.vmfd,
313            bsp_vcpufd: self.bsp,
314            memory: Default::default(),
315            gm: config.guest_memory.clone(),
316            mem_layout: config.mem_layout.clone(),
317            vps: self.vps,
318            irq_routes: Default::default(),
319            gsi_states: Mutex::new(Box::new(
320                [crate::irqfd::GsiState::Unallocated; crate::irqfd::NUM_GSIS],
321            )),
322            caps,
323            synic_ports: Default::default(),
324            cpuid,
325            software_devices: ApicSoftwareDevices::new(apic_id_map),
326            time_frozen: Mutex::new(false),
327        });
328
329        let partition = MshvPartition {
330            synic_ports: Arc::new(virt::synic::SynicPorts::new(inner.clone())),
331            inner,
332        };
333
334        let vps = self
335            .config
336            .processor_topology
337            .vps()
338            .map(|vp| MshvProcessorBinder {
339                partition: partition.inner.clone(),
340                vpindex: vp.vp_index,
341                vcpufd: None,
342            })
343            .collect();
344
345        Ok((partition, vps))
346    }
347}
348
349// ---------------------------------------------------------------------------
350// Partition trait impls
351// ---------------------------------------------------------------------------
352
353impl virt::Partition for MshvPartition {
354    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Error>> {
355        Some(self)
356    }
357
358    fn doorbell_registration(
359        self: &Arc<Self>,
360        _minimum_vtl: Vtl,
361    ) -> Option<Arc<dyn DoorbellRegistration>> {
362        Some(self.clone())
363    }
364
365    fn caps(&self) -> &virt::PartitionCapabilities {
366        &self.inner.caps
367    }
368
369    fn request_msi(&self, _vtl: Vtl, request: MsiRequest) {
370        self.inner.request_msi(request)
371    }
372
373    fn as_signal_msi(&self, _vtl: Vtl) -> Option<Arc<dyn SignalMsi>> {
374        Some(self.inner.clone())
375    }
376
377    fn irqfd(&self) -> Option<Arc<dyn virt::irqfd::IrqFd>> {
378        Some(Arc::new(crate::irqfd::MshvIrqFd::new(self.inner.clone())))
379    }
380
381    fn request_yield(&self, vp_index: VpIndex) {
382        let vp = self.inner.vp(vp_index);
383        if vp.needs_yield.request_yield() {
384            let thread = vp.thread.read();
385            if let Some(thread) = *thread {
386                if thread != Pthread::current() {
387                    thread
388                        .signal(libc::SIGRTMIN())
389                        .expect("thread cancel signal failed");
390                }
391            }
392        }
393    }
394}
395
396impl virt::X86Partition for MshvPartition {
397    fn ioapic_routing(&self) -> Arc<dyn virt::irqcon::IoApicRouting> {
398        self.inner.clone()
399    }
400
401    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
402        // TODO
403        tracelimit::warn_ratelimited!(?vp_index, ?vtl, lint, "ignored lint pulse");
404    }
405}
406
407impl virt::ResetPartition for MshvPartition {
408    type Error = Error;
409
410    fn reset(&self) -> Result<(), Error> {
411        use virt::x86::vm::AccessVmState;
412
413        for irq in 0..virt::irqcon::IRQ_LINES as u8 {
414            self.inner.irq_routes.set_irq_route(irq, None);
415        }
416
417        self.inner.freeze_time()?;
418
419        let bsp_vp_info = &self.inner.vps[0].vp_info;
420        self.access_state(Vtl::Vtl0)
421            .reset_all(bsp_vp_info)
422            .map_err(|e| ErrorInner::ResetState(Box::new(e)))?;
423
424        Ok(())
425    }
426}
427
428impl Hv1 for MshvPartition {
429    type Error = Error;
430    type Device = ApicSoftwareDevice;
431
432    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
433        Some(ReferenceTimeSource::from(self.inner.clone() as Arc<_>))
434    }
435
436    fn new_virtual_device(
437        &self,
438    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
439        Some(self)
440    }
441
442    fn synic(&self) -> Arc<dyn vmcore::synic::SynicPortAccess> {
443        self.synic_ports.clone()
444    }
445}
446
447impl virt::DeviceBuilder for MshvPartition {
448    fn build(&self, _vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
449        Ok(self
450            .inner
451            .software_devices
452            .new_device(self.inner.clone(), device_id)
453            .map_err(ErrorInner::NewDevice)?)
454    }
455}
456
457impl MshvPartitionInner {
458    fn request_msi(&self, request: MsiRequest) {
459        let (address, data) = request.as_x86();
460        let control = request.hv_x86_interrupt_control();
461        let mshv_req = InterruptRequest {
462            interrupt_type: control.interrupt_type().0,
463            apic_id: address.virt_destination().into(),
464            vector: data.vector().into(),
465            level_triggered: control.x86_level_triggered(),
466            logical_destination_mode: control.x86_logical_destination_mode(),
467            long_mode: false,
468        };
469
470        if let Err(err) = self.vmfd.request_virtual_interrupt(&mshv_req) {
471            tracelimit::warn_ratelimited!(
472                address = request.address,
473                data = request.data,
474                error = &err as &dyn std::error::Error,
475                "failed to request msi"
476            );
477        }
478    }
479}
480
481impl SignalMsi for MshvPartitionInner {
482    fn signal_msi(&self, _rid: u32, address: u64, data: u32) {
483        self.request_msi(MsiRequest { address, data });
484    }
485}
486
487impl virt::irqcon::IoApicRouting for MshvPartitionInner {
488    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
489        self.irq_routes.set_irq_route(irq, request)
490    }
491
492    fn assert_irq(&self, irq: u8) {
493        self.irq_routes
494            .assert_irq(irq, |request| self.request_msi(request))
495    }
496}
497
498// ---------------------------------------------------------------------------
499// Processor binding and run loop
500// ---------------------------------------------------------------------------
501
502impl virt::BindProcessor for MshvProcessorBinder {
503    type Processor<'a>
504        = MshvProcessor<'a>
505    where
506        Self: 'a;
507    type Error = Error;
508
509    fn bind(&mut self) -> Result<Self::Processor<'_>, Self::Error> {
510        let inner = &self.partition.vps[self.vpindex.index() as usize];
511
512        let vcpufd = if self.vpindex.is_bsp() {
513            &self.partition.bsp_vcpufd
514        } else {
515            if self.vcpufd.is_none() {
516                let vcpufd = self
517                    .partition
518                    .vmfd
519                    .create_vcpu(u8::try_from(self.vpindex.index()).expect("validated above"))
520                    .map_err(|e| ErrorInner::CreateVcpu(e.into()))?;
521                self.vcpufd = Some(vcpufd);
522            }
523            self.vcpufd.as_ref().unwrap()
524        };
525
526        let reg_page_ptr = vcpufd
527            .get_vp_reg_page()
528            .expect("register page must be mapped")
529            .0
530            .cast::<HvX64RegisterPage>();
531
532        let runner = MshvVpRunner {
533            vcpufd,
534            reg_page: reg_page_ptr,
535        };
536
537        let this = MshvProcessor {
538            partition: &self.partition,
539            inner,
540            vpindex: self.vpindex,
541            runner,
542            deliverability_notifications: HvDeliverabilityNotificationsRegister::new(),
543        };
544
545        // Set the APIC state.
546        let apic_base =
547            virt::vp::Apic::at_reset(&this.partition.caps, &this.inner.vp_info).apic_base;
548
549        let regs = &[
550            HvRegisterAssoc::from((
551                HvX64RegisterName::InitialApicId,
552                u64::from(inner.vp_info.apic_id),
553            )),
554            HvRegisterAssoc::from((HvX64RegisterName::ApicBase, apic_base)),
555            HvRegisterAssoc::from((HvX64RegisterName::ApicId, u64::from(inner.vp_info.apic_id))),
556        ];
557
558        let reg_count = if this.partition.caps.x2apic { 2 } else { 3 };
559
560        vcpufd
561            .set_hvdef_regs(&regs[..reg_count])
562            .map_err(ErrorInner::Register)?;
563
564        Ok(this)
565    }
566}
567
568impl MshvProcessor<'_> {
569    async fn emulate(
570        &mut self,
571        message: &HvMessage,
572        devices: &impl CpuIo,
573        interruption_pending: bool,
574    ) -> Result<(), VpHaltReason> {
575        let emu_mem = virt_support_x86emu::emulate::EmulatorMemoryAccess {
576            gm: &self.partition.gm,
577            kx_gm: &self.partition.gm,
578            ux_gm: &self.partition.gm,
579        };
580
581        let mut support = MshvEmulationState {
582            partition: self.partition,
583            vcpufd: self.runner.vcpufd,
584            reg_page: self.runner.reg_page(),
585            vp_index: self.vpindex,
586            message,
587            interruption_pending,
588        };
589        virt_support_x86emu::emulate::emulate(&mut support, &emu_mem, devices).await
590    }
591
592    pub(crate) async fn handle_exit(
593        &mut self,
594        exit: &HvMessage,
595        dev: &impl CpuIo,
596    ) -> Result<(), VpHaltReason> {
597        match exit.header.typ {
598            HvMessageType::HvMessageTypeUnrecoverableException => {
599                return Err(VpHaltReason::TripleFault { vtl: Vtl::Vtl0 });
600            }
601            HvMessageType::HvMessageTypeX64IoPortIntercept => {
602                self.handle_io_port_intercept(exit, dev).await?;
603            }
604            HvMessageType::HvMessageTypeUnmappedGpa | HvMessageType::HvMessageTypeGpaIntercept => {
605                self.handle_mmio_intercept(exit, dev).await?;
606            }
607            HvMessageType::HvMessageTypeSynicSintDeliverable => {
608                tracing::trace!("SYNIC_SINT_DELIVERABLE");
609                let info = exit.as_message::<hvdef::HvX64SynicSintDeliverableMessage>();
610                self.handle_sint_deliverable(info.deliverable_sints);
611            }
612            HvMessageType::HvMessageTypeHypercallIntercept => {
613                tracing::trace!("HYPERCALL_INTERCEPT");
614                self.handle_hypercall_intercept(exit, dev);
615            }
616            HvMessageType::HvMessageTypeX64ApicEoi => {
617                let msg = exit.as_message::<hvdef::HvX64ApicEoiMessage>();
618                dev.handle_eoi(msg.interrupt_vector);
619            }
620            exit_type => {
621                panic!("Unhandled vcpu exit code {exit_type:?}");
622            }
623        }
624        Ok(())
625    }
626
627    async fn handle_io_port_intercept(
628        &mut self,
629        message: &HvMessage,
630        devices: &impl CpuIo,
631    ) -> Result<(), VpHaltReason> {
632        let info = message.as_message::<hvdef::HvX64IoPortInterceptMessage>();
633        let access_info = info.access_info;
634
635        if access_info.string_op() || access_info.rep_prefix() {
636            let interruption_pending = info.header.execution_state.interruption_pending();
637            self.emulate(message, devices, interruption_pending).await?
638        } else {
639            let mut ret_rax = info.rax;
640            virt_support_x86emu::emulate::emulate_io(
641                self.vpindex,
642                info.header.intercept_access_type == hvdef::HvInterceptAccessType::WRITE,
643                info.port_number,
644                &mut ret_rax,
645                access_info.access_size(),
646                devices,
647            )
648            .await;
649
650            let insn_len = info.header.instruction_len() as u64;
651
652            let rp = self.runner.reg_page();
653            rp.gp_registers[x86emu::Gp::RAX as usize] = ret_rax;
654            rp.rip = info.header.rip + insn_len;
655            rp.dirty.set_general_purpose(true);
656            rp.dirty.set_instruction_pointer(true);
657        }
658
659        Ok(())
660    }
661
662    async fn handle_mmio_intercept(
663        &mut self,
664        message: &HvMessage,
665        devices: &impl CpuIo,
666    ) -> Result<(), VpHaltReason> {
667        let info = message.as_message::<hvdef::HvX64MemoryInterceptMessage>();
668        let interruption_pending = info.header.execution_state.interruption_pending();
669        self.emulate(message, devices, interruption_pending).await
670    }
671
672    fn handle_hypercall_intercept(&mut self, message: &HvMessage, _devices: &impl CpuIo) {
673        let info = message.as_message::<hvdef::HvX64HypercallInterceptMessage>();
674        let is_64bit =
675            info.header.execution_state.cr0_pe() && info.header.execution_state.efer_lma();
676
677        let mut handler = MshvHypercallHandler {
678            partition: self.partition,
679            reg_page: self.runner.reg_page(),
680        };
681
682        MshvHypercallHandler::DISPATCHER.dispatch(
683            &self.partition.gm,
684            X64RegisterIo::new(&mut handler, is_64bit),
685        );
686    }
687}
688
689// ---------------------------------------------------------------------------
690// x86 emulation support
691// ---------------------------------------------------------------------------
692
693struct MshvEmulationState<'a> {
694    partition: &'a MshvPartitionInner,
695    vcpufd: &'a VcpuFd,
696    reg_page: &'a mut HvX64RegisterPage,
697    vp_index: VpIndex,
698    message: &'a HvMessage,
699    interruption_pending: bool,
700}
701
702impl EmulatorSupport for MshvEmulationState<'_> {
703    fn vp_index(&self) -> VpIndex {
704        self.vp_index
705    }
706
707    fn vendor(&self) -> x86defs::cpuid::Vendor {
708        self.partition.caps.vendor
709    }
710
711    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
712        self.reg_page.gp_registers[reg as usize]
713    }
714
715    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
716        self.reg_page.gp_registers[reg as usize] = v;
717        self.reg_page.dirty.set_general_purpose(true);
718    }
719
720    fn rip(&mut self) -> u64 {
721        self.reg_page.rip
722    }
723
724    fn set_rip(&mut self, v: u64) {
725        self.reg_page.rip = v;
726        self.reg_page.dirty.set_instruction_pointer(true);
727    }
728
729    fn segment(&mut self, reg: x86emu::Segment) -> SegmentRegister {
730        virt::x86::SegmentRegister::from(self.reg_page.segment[reg as usize]).into()
731    }
732
733    fn efer(&mut self) -> u64 {
734        self.reg_page.efer
735    }
736
737    fn cr0(&mut self) -> u64 {
738        self.reg_page.cr0
739    }
740
741    fn rflags(&mut self) -> RFlags {
742        RFlags::from(self.reg_page.rflags)
743    }
744
745    fn set_rflags(&mut self, v: RFlags) {
746        self.reg_page.rflags = v.into();
747        self.reg_page.dirty.set_flags(true);
748    }
749
750    fn xmm(&mut self, reg: usize) -> u128 {
751        assert!(reg < 16);
752        if reg < 6 {
753            self.reg_page.xmm[reg]
754        } else {
755            let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
756            let mut assoc = [HvRegisterAssoc::from((name, 0u128))];
757            let _ = self.vcpufd.get_hvdef_regs(&mut assoc);
758            assoc[0].value.as_u128()
759        }
760    }
761
762    fn set_xmm(&mut self, reg: usize, value: u128) {
763        assert!(reg < 16);
764        if reg < 6 {
765            self.reg_page.xmm[reg] = value;
766            self.reg_page.dirty.set_xmm(true);
767        } else {
768            let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
769            let assoc = [HvRegisterAssoc::from((name, value))];
770            self.vcpufd.set_hvdef_regs(&assoc).unwrap();
771        }
772    }
773
774    fn flush(&mut self) {}
775
776    fn instruction_bytes(&self) -> &[u8] {
777        match self.message.header.typ {
778            HvMessageType::HvMessageTypeGpaIntercept
779            | HvMessageType::HvMessageTypeUnmappedGpa
780            | HvMessageType::HvMessageTypeUnacceptedGpa => {
781                let info = self
782                    .message
783                    .as_message::<hvdef::HvX64MemoryInterceptMessage>();
784                &info.instruction_bytes[..info.instruction_byte_count as usize]
785            }
786            HvMessageType::HvMessageTypeX64IoPortIntercept => {
787                let info = self
788                    .message
789                    .as_message::<hvdef::HvX64IoPortInterceptMessage>();
790                &info.instruction_bytes[..info.instruction_byte_count as usize]
791            }
792            _ => unreachable!(),
793        }
794    }
795
796    fn physical_address(&self) -> Option<u64> {
797        match self.message.header.typ {
798            HvMessageType::HvMessageTypeGpaIntercept
799            | HvMessageType::HvMessageTypeUnmappedGpa
800            | HvMessageType::HvMessageTypeUnacceptedGpa => {
801                let info = self
802                    .message
803                    .as_message::<hvdef::HvX64MemoryInterceptMessage>();
804                Some(info.guest_physical_address)
805            }
806            _ => None,
807        }
808    }
809
810    fn initial_gva_translation(
811        &mut self,
812    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
813        match self.message.header.typ {
814            HvMessageType::HvMessageTypeGpaIntercept
815            | HvMessageType::HvMessageTypeUnmappedGpa
816            | HvMessageType::HvMessageTypeUnacceptedGpa => {}
817            _ => return None,
818        }
819
820        let message = self
821            .message
822            .as_message::<hvdef::HvX64MemoryInterceptMessage>();
823
824        if !message.memory_access_info.gva_gpa_valid() {
825            return None;
826        }
827
828        if let Ok(translate_mode) = TranslateMode::try_from(message.header.intercept_access_type) {
829            Some(virt_support_x86emu::emulate::InitialTranslation {
830                gva: message.guest_virtual_address,
831                gpa: message.guest_physical_address,
832                translate_mode,
833            })
834        } else {
835            None
836        }
837    }
838
839    fn interruption_pending(&self) -> bool {
840        self.interruption_pending
841    }
842
843    fn check_vtl_access(
844        &mut self,
845        _gpa: u64,
846        _mode: TranslateMode,
847    ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError> {
848        Ok(())
849    }
850
851    fn translate_gva(
852        &mut self,
853        gva: u64,
854        mode: TranslateMode,
855    ) -> Result<EmuTranslateResult, EmuTranslateError> {
856        emulate_translate_gva(self, gva, mode)
857    }
858
859    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
860        self.vcpufd
861            .set_hvdef_regs(&[
862                HvRegisterAssoc::from((
863                    HvX64RegisterName::PendingEvent0,
864                    u128::from(event_info.reg_0),
865                )),
866                HvRegisterAssoc::from((
867                    HvX64RegisterName::PendingEvent1,
868                    u128::from(event_info.reg_1),
869                )),
870            ])
871            .unwrap();
872    }
873
874    fn is_gpa_mapped(&self, gpa: u64, _write: bool) -> bool {
875        self.partition
876            .mem_layout
877            .ram()
878            .iter()
879            .any(|r| r.range.contains_addr(gpa))
880    }
881
882    fn lapic_base_address(&self) -> Option<u64> {
883        None
884    }
885
886    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
887        unreachable!()
888    }
889
890    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
891        unreachable!()
892    }
893}
894
895impl TranslateGvaSupport for MshvEmulationState<'_> {
896    fn guest_memory(&self) -> &GuestMemory {
897        &self.partition.gm
898    }
899
900    fn acquire_tlb_lock(&mut self) {}
901
902    fn registers(&mut self) -> TranslationRegisters {
903        TranslationRegisters {
904            cr0: self.reg_page.cr0,
905            cr4: self.reg_page.cr4,
906            efer: self.reg_page.efer,
907            cr3: self.reg_page.cr3,
908            rflags: self.reg_page.rflags,
909            ss: virt::x86::SegmentRegister::from(
910                self.reg_page.segment[x86emu::Segment::SS as usize],
911            )
912            .into(),
913            encryption_mode: virt_support_x86emu::translate::EncryptionMode::None,
914        }
915    }
916}
917
918// ---------------------------------------------------------------------------
919// Hypercall handler
920// ---------------------------------------------------------------------------
921
922impl hv1_hypercall::X64RegisterState for MshvHypercallHandler<'_> {
923    fn rip(&mut self) -> u64 {
924        self.reg_page.rip
925    }
926
927    fn set_rip(&mut self, rip: u64) {
928        self.reg_page.rip = rip;
929        self.reg_page.dirty.set_instruction_pointer(true);
930    }
931
932    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
933        self.reg_page.gp_registers[n as usize]
934    }
935
936    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
937        self.reg_page.gp_registers[n as usize] = value;
938        self.reg_page.dirty.set_general_purpose(true);
939    }
940
941    fn xmm(&mut self, n: usize) -> u128 {
942        self.reg_page.xmm[n]
943    }
944
945    fn set_xmm(&mut self, n: usize, value: u128) {
946        self.reg_page.xmm[n] = value;
947        self.reg_page.dirty.set_xmm(true);
948    }
949}
950
951pub(crate) struct MshvHypercallHandler<'a> {
952    pub(crate) partition: &'a MshvPartitionInner,
953    pub(crate) reg_page: &'a mut HvX64RegisterPage,
954}
955
956impl MshvHypercallHandler<'_> {
957    const DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
958        Self,
959        [
960            hv1_hypercall::HvPostMessage,
961            hv1_hypercall::HvSignalEvent,
962            hv1_hypercall::HvRetargetDeviceInterrupt,
963        ],
964    );
965}
966
967impl hv1_hypercall::RetargetDeviceInterrupt for MshvHypercallHandler<'_> {
968    fn retarget_interrupt(
969        &mut self,
970        device_id: u64,
971        address: u64,
972        data: u32,
973        params: hv1_hypercall::HvInterruptParameters<'_>,
974    ) -> hvdef::HvResult<()> {
975        let target_processors = Vec::from_iter(params.target_processors);
976        let vpci_params = vmcore::vpci_msi::VpciInterruptParameters {
977            vector: params.vector,
978            multicast: params.multicast,
979            target_processors: &target_processors,
980        };
981
982        self.partition
983            .software_devices
984            .retarget_interrupt(device_id, address, data, &vpci_params)
985    }
986}
987
988// ---------------------------------------------------------------------------
989// CPU feature lists
990// ---------------------------------------------------------------------------
991
992/// Processor features (bank 0) that we support exposing to guests.
993fn supported_processor_features() -> hvdef::HvX64PartitionProcessorFeatures {
994    hvdef::HvX64PartitionProcessorFeatures::new()
995        .with_sse3_support(true)
996        .with_lahf_sahf_support(true)
997        .with_ssse3_support(true)
998        .with_sse4_1_support(true)
999        .with_sse4_2_support(true)
1000        .with_sse4a_support(true)
1001        .with_xop_support(true)
1002        .with_pop_cnt_support(true)
1003        .with_cmpxchg16b_support(true)
1004        .with_altmovcr8_support(true)
1005        .with_lzcnt_support(true)
1006        .with_mis_align_sse_support(true)
1007        .with_mmx_ext_support(true)
1008        .with_amd3d_now_support(true)
1009        .with_extended_amd3d_now_support(true)
1010        .with_page_1gb_support(true)
1011        .with_aes_support(true)
1012        .with_pclmulqdq_support(true)
1013        .with_pcid_support(true)
1014        .with_fma4_support(true)
1015        .with_f16c_support(true)
1016        .with_rd_rand_support(true)
1017        .with_rd_wr_fs_gs_support(true)
1018        .with_smep_support(true)
1019        .with_enhanced_fast_string_support(true)
1020        .with_bmi1_support(true)
1021        .with_bmi2_support(true)
1022        .with_movbe_support(true)
1023        .with_npiep1_support(true)
1024        .with_dep_x87_fpu_save_support(true)
1025        .with_rd_seed_support(true)
1026        .with_adx_support(true)
1027        .with_intel_prefetch_support(true)
1028        .with_smap_support(true)
1029        .with_hle_support(true)
1030        .with_rtm_support(true)
1031        .with_rdtscp_support(true)
1032        .with_clflushopt_support(true)
1033        .with_clwb_support(true)
1034        .with_sha_support(true)
1035        .with_x87_pointers_saved_support(true)
1036        .with_invpcid_support(true)
1037        .with_ibrs_support(true)
1038        .with_stibp_support(true)
1039        .with_ibpb_support(true)
1040        .with_unrestricted_guest_support(true)
1041        .with_mdd_support(true)
1042        .with_fast_short_rep_mov_support(true)
1043        .with_rdcl_no_support(true)
1044        .with_ibrs_all_support(true)
1045        .with_ssb_no_support(true)
1046        .with_rsb_a_no_support(true)
1047        .with_rd_pid_support(true)
1048        .with_umip_support(true)
1049        .with_mbs_no_support(true)
1050        .with_mb_clear_support(true)
1051        .with_taa_no_support(true)
1052        .with_tsx_ctrl_support(true)
1053}
1054
1055/// Processor features (bank 1) that we support exposing to guests.
1056fn supported_processor_features1() -> hvdef::HvX64PartitionProcessorFeatures1 {
1057    hvdef::HvX64PartitionProcessorFeatures1::new()
1058        .with_a_count_m_count_support(true)
1059        .with_tsc_invariant_support(true)
1060        .with_cl_zero_support(true)
1061        .with_rdpru_support(true)
1062        .with_la57_support(true)
1063        .with_mbec_support(true)
1064        .with_nested_virt_support(true)
1065        .with_psfd_support(true)
1066        .with_cet_ss_support(true)
1067        .with_cet_ibt_support(true)
1068        .with_vmx_exception_inject_support(true)
1069        .with_umwait_tpause_support(true)
1070        .with_movdiri_support(true)
1071        .with_movdir64b_support(true)
1072        .with_cldemote_support(true)
1073        .with_serialize_support(true)
1074        .with_tsc_deadline_tmr_support(true)
1075        .with_tsc_adjust_support(true)
1076        .with_fz_l_rep_movsb(true)
1077        .with_fs_rep_stosb(true)
1078        .with_fs_rep_cmpsb(true)
1079        .with_tsx_ld_trk_support(true)
1080        .with_vmx_ins_outs_exit_info_support(true)
1081        .with_sbdr_ssdp_no_support(true)
1082        .with_fbsdp_no_support(true)
1083        .with_psdp_no_support(true)
1084        .with_fb_clear_support(true)
1085        .with_btc_no_support(true)
1086        .with_ibpb_rsb_flush_support(true)
1087        .with_stibp_always_on_support(true)
1088        .with_perf_global_ctrl_support(true)
1089        .with_npt_execute_only_support(true)
1090        .with_npt_ad_flags_support(true)
1091        .with_npt_1gb_page_support(true)
1092        .with_cmpccxadd_support(true)
1093        .with_prefetch_i_support(true)
1094        .with_sha512_support(true)
1095        .with_rfds_no_support(true)
1096        .with_rfds_clear_support(true)
1097        .with_sm3_support(true)
1098        .with_sm4_support(true)
1099}
1100
1101/// XSAVE features that we support exposing to guests.
1102fn supported_xsave_features() -> hvdef::HvX64PartitionProcessorXsaveFeatures {
1103    hvdef::HvX64PartitionProcessorXsaveFeatures::new()
1104        .with_xsave_support(true)
1105        .with_xsaveopt_support(true)
1106        .with_avx_support(true)
1107        .with_avx2_support(true)
1108        .with_fma_support(true)
1109        .with_mpx_support(true)
1110        .with_avx512_support(true)
1111        .with_avx512_dq_support(true)
1112        .with_avx512_cd_support(true)
1113        .with_avx512_bw_support(true)
1114        .with_avx512_vl_support(true)
1115        .with_xsave_comp_support(true)
1116        .with_xsave_supervisor_support(true)
1117        .with_xcr1_support(true)
1118        .with_avx512_bitalg_support(true)
1119        .with_avx512_ifma_support(true)
1120        .with_avx512_vbmi_support(true)
1121        .with_avx512_vbmi2_support(true)
1122        .with_avx512_vnni_support(true)
1123        .with_gfni_support(true)
1124        .with_vaes_support(true)
1125        .with_avx512_vpopcntdq_support(true)
1126        .with_vpclmulqdq_support(true)
1127        .with_avx512_bf16_support(true)
1128        .with_avx512_vp2_intersect_support(true)
1129        .with_avx512_fp16_support(true)
1130        .with_xfd_support(true)
1131        .with_amx_tile_support(true)
1132        .with_amx_bf16_support(true)
1133        .with_amx_int8_support(true)
1134        .with_avx_vnni_support(true)
1135        .with_avx_ifma_support(true)
1136        .with_avx_ne_convert_support(true)
1137        .with_avx_vnni_int8_support(true)
1138        .with_avx_vnni_int16_support(true)
1139        .with_avx10_1_256_support(true)
1140        .with_avx10_1_512_support(true)
1141        .with_amx_fp16_support(true)
1142}