virt_mshv/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux /dev/mshv implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native, guest_arch = "x86_64"))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling HV APIs and manually managing memory.
9#![expect(unsafe_code)]
10
11mod vm_state;
12mod vp_state;
13
14use arrayvec::ArrayVec;
15use guestmem::DoorbellRegistration;
16use guestmem::GuestMemory;
17use hv1_emulator::message_queues::MessageQueues;
18use hv1_hypercall::X64RegisterIo;
19use hvdef::HvDeliverabilityNotificationsRegister;
20use hvdef::HvError;
21use hvdef::HvMessage;
22use hvdef::HvMessageType;
23use hvdef::HvX64RegisterName;
24use hvdef::HvX64VpExecutionState;
25use hvdef::Vtl;
26use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
27use hvdef::hypercall::HvRegisterAssoc;
28use inspect::Inspect;
29use inspect::InspectMut;
30use mshv_bindings::MSHV_SET_MEM_BIT_EXECUTABLE;
31use mshv_bindings::MSHV_SET_MEM_BIT_WRITABLE;
32use mshv_bindings::hv_message;
33use mshv_bindings::hv_register_assoc;
34use mshv_bindings::hv_register_value;
35use mshv_bindings::hv_u128;
36use mshv_bindings::hv_x64_io_port_intercept_message;
37use mshv_bindings::hv_x64_memory_intercept_message;
38use mshv_bindings::hv_x64_segment_register;
39use mshv_bindings::mshv_install_intercept;
40use mshv_bindings::mshv_user_mem_region;
41use mshv_ioctls::InterruptRequest;
42use mshv_ioctls::Mshv;
43use mshv_ioctls::MshvError;
44use mshv_ioctls::VcpuFd;
45use mshv_ioctls::VmFd;
46use mshv_ioctls::set_bits;
47use mshv_ioctls::set_registers_64;
48use pal::unix::pthread::*;
49use pal_event::Event;
50use parking_lot::Mutex;
51use parking_lot::RwLock;
52use std::convert::Infallible;
53use std::io;
54use std::sync::Arc;
55use std::sync::Once;
56use std::sync::Weak;
57use thiserror::Error;
58use virt::Hv1;
59use virt::NeedsYield;
60use virt::PartitionAccessState;
61use virt::PartitionConfig;
62use virt::ProtoPartition;
63use virt::ProtoPartitionConfig;
64use virt::StopVp;
65use virt::VpHaltReason;
66use virt::VpIndex;
67use virt::io::CpuIo;
68use virt::irqcon::MsiRequest;
69use virt::x86::max_physical_address_size_from_cpuid;
70use virt_support_x86emu::emulate::EmuTranslateError;
71use virt_support_x86emu::emulate::EmuTranslateResult;
72use virt_support_x86emu::emulate::EmulatorSupport;
73use virt_support_x86emu::emulate::TranslateGvaSupport;
74use virt_support_x86emu::emulate::TranslateMode;
75use virt_support_x86emu::emulate::emulate_translate_gva;
76use virt_support_x86emu::translate::TranslationRegisters;
77use vmcore::interrupt::Interrupt;
78use vmcore::reference_time::GetReferenceTime;
79use vmcore::reference_time::ReferenceTimeResult;
80use vmcore::reference_time::ReferenceTimeSource;
81use vmcore::synic::GuestEventPort;
82use x86defs::RFlags;
83use x86defs::SegmentRegister;
84use zerocopy::IntoBytes;
85
86#[derive(Debug)]
87pub struct LinuxMshv;
88
89struct MshvEmuCache {
90    /// GP registers, in the canonical order (as defined by `RAX`, etc.).
91    gps: [u64; 16],
92    /// Segment registers, in the canonical order (as defined by `ES`, etc.).
93    segs: [SegmentRegister; 6],
94    rip: u64,
95    rflags: RFlags,
96
97    cr0: u64,
98    efer: u64,
99}
100
101impl virt::Hypervisor for LinuxMshv {
102    type ProtoPartition<'a> = MshvProtoPartition<'a>;
103    type Partition = MshvPartition;
104    type Error = Error;
105
106    fn new_partition<'a>(
107        &mut self,
108        config: ProtoPartitionConfig<'a>,
109    ) -> Result<MshvProtoPartition<'a>, Self::Error> {
110        if config.isolation.is_isolated() {
111            return Err(Error::IsolationNotSupported);
112        }
113
114        // Open /dev/mshv.
115        let mshv = Mshv::new().map_err(Error::OpenMshv)?;
116
117        // Create VM.
118        //
119        // TODO: really need to pass some partition properties here (e.g., for
120        // APIC configuration), but the underlying crate just hardcodes
121        // everything.
122        let vmfd: VmFd;
123        loop {
124            match mshv.create_vm() {
125                Ok(fd) => vmfd = fd,
126                Err(e) => {
127                    if e.errno() == libc::EINTR {
128                        // If the error returned is EINTR, which means the
129                        // ioctl has been interrupted, we have to retry as
130                        // this can't be considered as a regular error.
131                        continue;
132                    } else {
133                        return Err(Error::CreateVMFailed);
134                    }
135                }
136            }
137            break;
138        }
139
140        // Create virtual CPUs.
141        let mut vps: Vec<MshvVpInner> = Vec::new();
142        for vp in config.processor_topology.vps_arch() {
143            if vp.base.vp_index.index() != vp.apic_id {
144                // TODO
145                return Err(Error::NotSupported);
146            }
147
148            let vcpufd = vmfd
149                .create_vcpu(vp.base.vp_index.index() as u8)
150                .map_err(Error::CreateVcpu)?;
151
152            vps.push(MshvVpInner {
153                vcpufd,
154                thread: RwLock::new(None),
155                needs_yield: NeedsYield::new(),
156                message_queues: MessageQueues::new(),
157                deliverability_notifications: Mutex::new(
158                    HvDeliverabilityNotificationsRegister::new(),
159                ),
160            });
161        }
162
163        // Install required intercepts
164        let intercept_args = mshv_install_intercept {
165            access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
166            intercept_type: hvdef::hypercall::HvInterceptType::HvInterceptTypeHypercall.0,
167            intercept_parameter: Default::default(),
168        };
169        vmfd.install_intercept(intercept_args)
170            .map_err(Error::InstallIntercept)?;
171
172        // Set up a signal for forcing vcpufd.run() ioctl to exit.
173        static SIGNAL_HANDLER_INIT: Once = Once::new();
174        // SAFETY: The signal handler does not perform any actions that are forbidden
175        // for signal handlers to perform, as it performs nothing.
176        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
177            signal_hook::low_level::register(libc::SIGRTMIN(), || {
178                // Signal handler does nothing other than enabling run_fd() iotcl to
179                // return with EINTR, when the associated signal is sent to run_fd() thread.
180            })
181            .unwrap();
182        });
183
184        if let Some(hv_config) = &config.hv_config {
185            if hv_config.vtl2.is_some() {
186                return Err(Error::Vtl2NotSupported);
187            }
188        }
189
190        Ok(MshvProtoPartition { config, vmfd, vps })
191    }
192
193    fn is_available(&self) -> Result<bool, Self::Error> {
194        match std::fs::metadata("/dev/mshv") {
195            Ok(_) => Ok(true),
196            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
197            Err(err) => Err(Error::AvailableCheck(err)),
198        }
199    }
200}
201
202/// Prototype partition.
203pub struct MshvProtoPartition<'a> {
204    config: ProtoPartitionConfig<'a>,
205    vmfd: VmFd,
206    vps: Vec<MshvVpInner>,
207}
208
209impl ProtoPartition for MshvProtoPartition<'_> {
210    type Partition = MshvPartition;
211    type ProcessorBinder = MshvProcessorBinder;
212    type Error = Infallible;
213
214    fn cpuid(&self, eax: u32, ecx: u32) -> [u32; 4] {
215        // This call should never fail unless there is a kernel or hypervisor
216        // bug.
217        self.vps[0]
218            .vcpufd
219            .get_cpuid_values(eax, ecx, 0, 0)
220            .expect("cpuid should not fail")
221    }
222
223    fn max_physical_address_size(&self) -> u8 {
224        max_physical_address_size_from_cpuid(&|eax, ecx| self.cpuid(eax, ecx))
225    }
226
227    fn build(
228        self,
229        config: PartitionConfig<'_>,
230    ) -> Result<(Self::Partition, Vec<Self::ProcessorBinder>), Self::Error> {
231        // TODO: do something with cpuid.
232
233        // Get caps via cpuid
234        let caps = virt::PartitionCapabilities::from_cpuid(
235            self.config.processor_topology,
236            &mut |function, index| {
237                self.vps[0]
238                    .vcpufd
239                    .get_cpuid_values(function, index, 0, 0)
240                    .expect("cpuid should not fail")
241            },
242        );
243
244        // Attach all the resources created above to a Partition object.
245        let partition = MshvPartition {
246            inner: Arc::new(MshvPartitionInner {
247                vmfd: self.vmfd,
248                memory: Default::default(),
249                gm: config.guest_memory.clone(),
250                vps: self.vps,
251                irq_routes: Default::default(),
252                caps,
253            }),
254        };
255
256        let vps = self
257            .config
258            .processor_topology
259            .vps()
260            .map(|vp| MshvProcessorBinder {
261                partition: partition.inner.clone(),
262                vpindex: vp.vp_index,
263            })
264            .collect();
265
266        Ok((partition, vps))
267    }
268}
269
270// TODO: remove these workarounds when mshv-ioctl implements the Debug trait
271#[derive(Debug)]
272pub struct MshvPartition {
273    inner: Arc<MshvPartitionInner>,
274}
275
276#[derive(Debug)]
277struct MshvPartitionInner {
278    vmfd: VmFd,
279    memory: Mutex<MshvMemoryRangeState>,
280    gm: GuestMemory,
281    vps: Vec<MshvVpInner>,
282    irq_routes: virt::irqcon::IrqRoutes,
283    caps: virt::PartitionCapabilities,
284}
285
286#[derive(Debug)]
287struct MshvVpInner {
288    vcpufd: VcpuFd,
289    thread: RwLock<Option<Pthread>>,
290    needs_yield: NeedsYield,
291    message_queues: MessageQueues,
292    deliverability_notifications: Mutex<HvDeliverabilityNotificationsRegister>,
293}
294
295struct MshvVpInnerCleaner<'a> {
296    vpinner: &'a MshvVpInner,
297}
298
299impl Drop for MshvVpInnerCleaner<'_> {
300    fn drop(&mut self) {
301        self.vpinner.thread.write().take();
302    }
303}
304
305impl virt::Partition for MshvPartition {
306    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Error>> {
307        None
308    }
309
310    fn doorbell_registration(
311        self: &Arc<Self>,
312        _minimum_vtl: Vtl,
313    ) -> Option<Arc<dyn DoorbellRegistration>> {
314        // TODO: implementation
315
316        Some(self.clone())
317    }
318
319    fn caps(&self) -> &virt::PartitionCapabilities {
320        &self.inner.caps
321    }
322
323    fn request_msi(&self, _vtl: Vtl, request: MsiRequest) {
324        self.inner.request_msi(request)
325    }
326
327    fn request_yield(&self, vp_index: VpIndex) {
328        let vp = self.inner.vp(vp_index);
329        if vp.needs_yield.request_yield() {
330            // Send a signal to the thread who called vcpufd.run() to force an exit.
331            let thread = vp.thread.read();
332            if let Some(thread) = *thread {
333                if thread != Pthread::current() {
334                    thread
335                        .signal(libc::SIGRTMIN())
336                        .expect("thread cancel signal failed");
337                }
338            }
339        }
340    }
341}
342
343impl virt::X86Partition for MshvPartition {
344    fn ioapic_routing(&self) -> Arc<dyn virt::irqcon::IoApicRouting> {
345        self.inner.clone()
346    }
347
348    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
349        // TODO
350        tracing::warn!(?vp_index, ?vtl, lint, "ignored lint pulse");
351    }
352}
353
354impl PartitionAccessState for MshvPartition {
355    type StateAccess<'a> = &'a MshvPartition;
356
357    fn access_state(&self, vtl: Vtl) -> Self::StateAccess<'_> {
358        assert_eq!(vtl, Vtl::Vtl0);
359
360        self
361    }
362}
363
364impl Hv1 for MshvPartition {
365    type Error = Error;
366    type Device = virt::UnimplementedDevice;
367
368    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
369        Some(ReferenceTimeSource::from(self.inner.clone() as Arc<_>))
370    }
371
372    fn new_virtual_device(
373        &self,
374    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
375        None
376    }
377}
378
379impl GetReferenceTime for MshvPartitionInner {
380    fn now(&self) -> ReferenceTimeResult {
381        let mut regs = [hv_register_assoc {
382            name: hvdef::HvAllArchRegisterName::TimeRefCount.0,
383            value: hv_register_value { reg64: 0 },
384            ..Default::default()
385        }];
386        self.vp(VpIndex::BSP).vcpufd.get_reg(&mut regs).unwrap();
387        // SAFETY: the value has been written by the kernel.
388        let ref_time = unsafe { regs[0].value.reg64 };
389        ReferenceTimeResult {
390            ref_time,
391            system_time: None,
392        }
393    }
394}
395
396impl MshvPartitionInner {
397    fn vp(&self, vp_index: VpIndex) -> &MshvVpInner {
398        &self.vps[vp_index.index() as usize]
399    }
400
401    fn post_message(&self, vp_index: VpIndex, sint: u8, message: &HvMessage) {
402        let request_notification = self
403            .vp(vp_index)
404            .message_queues
405            .enqueue_message(sint, message);
406
407        if request_notification {
408            self.request_sint_notifications(vp_index, 1 << sint);
409        }
410    }
411
412    fn request_sint_notifications(&self, vp_index: VpIndex, sints: u16) {
413        let mut notifications = self.vp(vp_index).deliverability_notifications.lock();
414        if notifications.sints() != sints {
415            notifications.set_sints(sints);
416            self.vmfd
417                .register_deliverabilty_notifications(vp_index.index(), (*notifications).into())
418                .expect("Requesting deliverability is not a fallable operation");
419        }
420    }
421}
422
423pub struct MshvProcessorBinder {
424    partition: Arc<MshvPartitionInner>,
425    vpindex: VpIndex,
426}
427
428impl virt::BindProcessor for MshvProcessorBinder {
429    type Processor<'a>
430        = MshvProcessor<'a>
431    where
432        Self: 'a;
433    type Error = Error;
434
435    fn bind(&mut self) -> Result<Self::Processor<'_>, Self::Error> {
436        Ok(MshvProcessor {
437            partition: &self.partition,
438            inner: &self.partition.vps[self.vpindex.index() as usize],
439            vpindex: self.vpindex,
440        })
441    }
442}
443
444pub struct MshvProcessor<'a> {
445    partition: &'a MshvPartitionInner,
446    inner: &'a MshvVpInner,
447    vpindex: VpIndex,
448}
449
450impl MshvProcessor<'_> {
451    async fn emulate(
452        &self,
453        message: &hv_message,
454        devices: &impl CpuIo,
455        interruption_pending: bool,
456    ) -> Result<(), VpHaltReason<MshvError>> {
457        let cache = self.emulation_cache().map_err(VpHaltReason::Hypervisor)?;
458        let emu_mem = virt_support_x86emu::emulate::EmulatorMemoryAccess {
459            gm: &self.partition.gm,
460            kx_gm: &self.partition.gm,
461            ux_gm: &self.partition.gm,
462        };
463
464        let mut support = MshvEmulationState {
465            partition: self.partition,
466            processor: self.inner,
467            vp_index: self.vpindex,
468            message,
469            interruption_pending,
470            cache,
471        };
472        virt_support_x86emu::emulate::emulate(&mut support, &emu_mem, devices).await
473    }
474
475    async fn handle_io_port_intercept(
476        &self,
477        message: &hv_message,
478        devices: &impl CpuIo,
479    ) -> Result<(), VpHaltReason<MshvError>> {
480        let info = message.to_ioport_info().unwrap();
481        let access_info = info.access_info;
482        // SAFETY: This union only contains one field.
483        let port_access_info = unsafe { access_info.__bindgen_anon_1 };
484
485        if port_access_info.string_op() != 0 || port_access_info.rep_prefix() != 0 {
486            let execution_state = info.header.execution_state;
487            // SAFETY: This union only contains one field.
488            let io_execution_state = unsafe { execution_state.__bindgen_anon_1 };
489            let interruption_pending = io_execution_state.interruption_pending() != 0;
490
491            self.emulate(message, devices, interruption_pending).await?
492        } else {
493            let mut ret_rax = info.rax;
494            virt_support_x86emu::emulate::emulate_io(
495                self.vpindex,
496                info.header.intercept_access_type == 1,
497                info.port_number,
498                &mut ret_rax,
499                port_access_info.access_size(),
500                devices,
501            )
502            .await;
503
504            let insn_len = info.header.instruction_length() as u64;
505
506            /* Advance RIP and update RAX */
507            let arr_reg_name_value = [
508                (
509                    mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
510                    info.header.rip + insn_len,
511                ),
512                (mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
513            ];
514
515            set_registers_64!(self.inner.vcpufd, arr_reg_name_value)
516                .map_err(VpHaltReason::Hypervisor)?;
517        }
518
519        Ok(())
520    }
521
522    async fn handle_mmio_intercept(
523        &self,
524        message: &hv_message,
525        devices: &impl CpuIo,
526    ) -> Result<(), VpHaltReason<MshvError>> {
527        let execution_state = message.to_memory_info().unwrap().header.execution_state;
528        // SAFETY: This union only contains one field.
529        let mmio_execution_state = unsafe { execution_state.__bindgen_anon_1 };
530        let interruption_pending = mmio_execution_state.interruption_pending() != 0;
531
532        self.emulate(message, devices, interruption_pending).await?;
533
534        Ok(())
535    }
536
537    fn handle_synic_deliverable_exit(
538        &self,
539        message: &hv_message,
540        _devices: &impl CpuIo,
541    ) -> Result<(), VpHaltReason<MshvError>> {
542        let info = message.to_sint_deliverable_info().unwrap();
543
544        self.flush_messages(info.deliverable_sints);
545        Ok(())
546    }
547
548    fn handle_hypercall_intercept(
549        &self,
550        message: &hv_message,
551        devices: &impl CpuIo,
552    ) -> Result<(), VpHaltReason<MshvError>> {
553        let info = message.to_hypercall_intercept_info().unwrap();
554        let execution_state = info.header.execution_state;
555        // SAFETY: Accessing the raw field of this union is always safe.
556        let vp_state = unsafe { HvX64VpExecutionState::from(execution_state.as_uint16) };
557        let is_64bit = vp_state.cr0_pe() && vp_state.efer_lma();
558        let mut hpc_context = MshvHypercallContext {
559            rax: info.rax,
560            rbx: info.rbx,
561            rcx: info.rcx,
562            rdx: info.rdx,
563            r8: info.r8,
564            rsi: info.rsi,
565            rdi: info.rdi,
566            xmm: info.xmmregisters,
567        };
568        let mut handler = MshvHypercallHandler {
569            bus: devices,
570            context: &mut hpc_context,
571            rip: info.header.rip,
572            rip_dirty: false,
573            xmm_dirty: false,
574            gp_dirty: false,
575        };
576
577        MshvHypercallHandler::DISPATCHER.dispatch(
578            &self.partition.gm,
579            X64RegisterIo::new(&mut handler, is_64bit),
580        );
581
582        let mut dirty_regs = ArrayVec::<hv_register_assoc, 14>::new();
583
584        if handler.gp_dirty {
585            dirty_regs.extend([
586                hv_register_assoc {
587                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
588                    value: hv_register_value {
589                        reg64: handler.context.rax,
590                    },
591                    ..Default::default()
592                },
593                hv_register_assoc {
594                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
595                    value: hv_register_value {
596                        reg64: handler.context.rbx,
597                    },
598                    ..Default::default()
599                },
600                hv_register_assoc {
601                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
602                    value: hv_register_value {
603                        reg64: handler.context.rcx,
604                    },
605                    ..Default::default()
606                },
607                hv_register_assoc {
608                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
609                    value: hv_register_value {
610                        reg64: handler.context.rdx,
611                    },
612                    ..Default::default()
613                },
614                hv_register_assoc {
615                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
616                    value: hv_register_value {
617                        reg64: handler.context.r8,
618                    },
619                    ..Default::default()
620                },
621                hv_register_assoc {
622                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
623                    value: hv_register_value {
624                        reg64: handler.context.rsi,
625                    },
626                    ..Default::default()
627                },
628                hv_register_assoc {
629                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
630                    value: hv_register_value {
631                        reg64: handler.context.rdi,
632                    },
633                    ..Default::default()
634                },
635            ]);
636        }
637
638        if handler.xmm_dirty {
639            dirty_regs.extend((0..5).map(|i| hv_register_assoc {
640                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_XMM0 + i,
641                value: hv_register_value {
642                    reg128: handler.context.xmm[i as usize],
643                },
644                ..Default::default()
645            }));
646        }
647
648        if handler.rip_dirty {
649            dirty_regs.push(hv_register_assoc {
650                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
651                value: hv_register_value { reg64: handler.rip },
652                ..Default::default()
653            });
654        }
655
656        if !dirty_regs.is_empty() {
657            self.inner
658                .vcpufd
659                .set_reg(&dirty_regs)
660                .expect("RIP setting is not a fallable operation");
661        }
662
663        Ok(())
664    }
665
666    fn flush_messages(&self, deliverable_sints: u16) {
667        let nonempty_sints =
668            self.inner
669                .message_queues
670                .post_pending_messages(deliverable_sints, |sint, message| {
671                    match self.partition.vmfd.post_message_direct(
672                        self.vpindex.index(),
673                        sint,
674                        message.as_bytes(),
675                    ) {
676                        Ok(()) => {
677                            tracing::trace!(sint, "sint message posted successfully");
678                            Ok(())
679                        }
680                        Err(e) => {
681                            // TODO: handle errors appropriately
682                            tracing::trace!(error = %e, "dropping sint message");
683                            Err(HvError::ObjectInUse)
684                        }
685                    }
686                });
687
688        {
689            // To avoid an additional get_reg hypercall, clear w/ deliverable sints mask
690            let mut notifications = self.inner.deliverability_notifications.lock();
691            let remaining_sints = notifications.sints() & !deliverable_sints;
692            notifications.set_sints(remaining_sints);
693        }
694
695        if nonempty_sints != 0 {
696            self.partition
697                .request_sint_notifications(self.vpindex, nonempty_sints);
698        }
699    }
700
701    fn emulation_cache(&self) -> Result<MshvEmuCache, MshvError> {
702        let regs = self.inner.vcpufd.get_regs()?;
703        let gps = [
704            regs.rax, regs.rcx, regs.rdx, regs.rbx, regs.rsp, regs.rbp, regs.rsi, regs.rdi,
705            regs.r8, regs.r9, regs.r10, regs.r11, regs.r12, regs.r13, regs.r14, regs.r15,
706        ];
707        let rip = regs.rip;
708        let rflags = regs.rflags;
709
710        let sregs = self.inner.vcpufd.get_sregs().unwrap();
711        let segs = [
712            x86emu_sreg_from_mshv_sreg(sregs.es),
713            x86emu_sreg_from_mshv_sreg(sregs.cs),
714            x86emu_sreg_from_mshv_sreg(sregs.ss),
715            x86emu_sreg_from_mshv_sreg(sregs.ds),
716            x86emu_sreg_from_mshv_sreg(sregs.fs),
717            x86emu_sreg_from_mshv_sreg(sregs.gs),
718        ];
719        let cr0 = sregs.cr0;
720        let efer = sregs.efer;
721
722        Ok(MshvEmuCache {
723            gps,
724            segs,
725            rip,
726            rflags: rflags.into(),
727            cr0,
728            efer,
729        })
730    }
731}
732
733struct MshvEmulationState<'a> {
734    partition: &'a MshvPartitionInner,
735    processor: &'a MshvVpInner,
736    vp_index: VpIndex,
737    message: &'a hv_message,
738    interruption_pending: bool,
739    cache: MshvEmuCache,
740}
741
742impl EmulatorSupport for MshvEmulationState<'_> {
743    type Error = MshvError;
744
745    fn vp_index(&self) -> VpIndex {
746        self.vp_index
747    }
748
749    fn vendor(&self) -> x86defs::cpuid::Vendor {
750        self.partition.caps.vendor
751    }
752
753    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
754        self.cache.gps[reg as usize]
755    }
756
757    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
758        self.cache.gps[reg as usize] = v;
759    }
760
761    fn rip(&mut self) -> u64 {
762        self.cache.rip
763    }
764
765    fn set_rip(&mut self, v: u64) {
766        self.cache.rip = v;
767    }
768
769    fn segment(&mut self, reg: x86emu::Segment) -> SegmentRegister {
770        self.cache.segs[reg as usize]
771    }
772
773    fn efer(&mut self) -> u64 {
774        self.cache.efer
775    }
776
777    fn cr0(&mut self) -> u64 {
778        self.cache.cr0
779    }
780
781    fn rflags(&mut self) -> RFlags {
782        self.cache.rflags
783    }
784
785    fn set_rflags(&mut self, v: RFlags) {
786        self.cache.rflags = v;
787    }
788
789    fn xmm(&mut self, reg: usize) -> u128 {
790        assert!(reg < 16);
791        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
792        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
793        let reg = unsafe {
794            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
795                name, 0u128,
796            )))
797        };
798        let _ = self.processor.vcpufd.get_reg(&mut [reg]);
799        // SAFETY: Accessing the u128 field of this union is always safe.
800        hvu128_to_u128(unsafe { &reg.value.reg128 })
801    }
802
803    fn set_xmm(&mut self, reg: usize, value: u128) -> Result<(), Self::Error> {
804        assert!(reg < 16);
805        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
806        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
807        let reg = unsafe {
808            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
809                name, value,
810            )))
811        };
812        self.processor.vcpufd.set_reg(&[reg])?;
813        Ok(())
814    }
815
816    fn flush(&mut self) -> Result<(), Self::Error> {
817        let arr_reg_name_value = [
818            (
819                mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
820                self.cache.rip,
821            ),
822            (
823                mshv_bindings::hv_register_name_HV_X64_REGISTER_RFLAGS,
824                self.cache.rflags.into(),
825            ),
826            (
827                mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
828                self.cache.gps[0],
829            ),
830            (
831                mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
832                self.cache.gps[1],
833            ),
834            (
835                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
836                self.cache.gps[2],
837            ),
838            (
839                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
840                self.cache.gps[3],
841            ),
842            (
843                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSP,
844                self.cache.gps[4],
845            ),
846            (
847                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBP,
848                self.cache.gps[5],
849            ),
850            (
851                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
852                self.cache.gps[6],
853            ),
854            (
855                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
856                self.cache.gps[7],
857            ),
858            (
859                mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
860                self.cache.gps[8],
861            ),
862            (
863                mshv_bindings::hv_register_name_HV_X64_REGISTER_R9,
864                self.cache.gps[9],
865            ),
866            (
867                mshv_bindings::hv_register_name_HV_X64_REGISTER_R10,
868                self.cache.gps[10],
869            ),
870            (
871                mshv_bindings::hv_register_name_HV_X64_REGISTER_R11,
872                self.cache.gps[11],
873            ),
874            (
875                mshv_bindings::hv_register_name_HV_X64_REGISTER_R12,
876                self.cache.gps[12],
877            ),
878            (
879                mshv_bindings::hv_register_name_HV_X64_REGISTER_R13,
880                self.cache.gps[13],
881            ),
882            (
883                mshv_bindings::hv_register_name_HV_X64_REGISTER_R14,
884                self.cache.gps[14],
885            ),
886            (
887                mshv_bindings::hv_register_name_HV_X64_REGISTER_R15,
888                self.cache.gps[15],
889            ),
890        ];
891
892        set_registers_64!(self.processor.vcpufd, arr_reg_name_value)?;
893        Ok(())
894    }
895
896    fn instruction_bytes(&self) -> &[u8] {
897        match HvMessageType(self.message.header.message_type) {
898            HvMessageType::HvMessageTypeGpaIntercept
899            | HvMessageType::HvMessageTypeUnmappedGpa
900            | HvMessageType::HvMessageTypeUnacceptedGpa => {
901                // SAFETY: We have checked the message type.
902                unsafe {
903                    let info = (&raw const self.message.u.payload)
904                        .cast::<hv_x64_memory_intercept_message>();
905                    let instruction_bytes = &raw const (*info).instruction_bytes;
906                    let instruction_byte_count =
907                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
908                    std::slice::from_raw_parts(
909                        instruction_bytes.cast(),
910                        instruction_byte_count as usize,
911                    )
912                }
913            }
914            HvMessageType::HvMessageTypeX64IoPortIntercept => {
915                // SAFETY: We have checked the message type.
916                unsafe {
917                    let info = (&raw const self.message.u.payload)
918                        .cast::<hv_x64_io_port_intercept_message>();
919                    let instruction_bytes = &raw const (*info).instruction_bytes;
920                    let instruction_byte_count =
921                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
922                    std::slice::from_raw_parts(
923                        instruction_bytes.cast(),
924                        instruction_byte_count as usize,
925                    )
926                }
927            }
928            _ => unreachable!(),
929        }
930    }
931
932    fn physical_address(&self) -> Option<u64> {
933        if self.message.header.message_type == HvMessageType::HvMessageTypeGpaIntercept.0
934            || self.message.header.message_type == HvMessageType::HvMessageTypeUnmappedGpa.0
935            || self.message.header.message_type == HvMessageType::HvMessageTypeUnacceptedGpa.0
936        {
937            let info = self.message.to_memory_info().unwrap();
938            Some(info.guest_physical_address)
939        } else {
940            None
941        }
942    }
943
944    fn initial_gva_translation(
945        &mut self,
946    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
947        if (self.message.header.message_type != HvMessageType::HvMessageTypeGpaIntercept.0)
948            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnmappedGpa.0)
949            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnacceptedGpa.0)
950        {
951            return None;
952        }
953
954        let message = self.message.to_memory_info().unwrap();
955
956        // SAFETY: access to union as uint8 is safe because in this case, the actual
957        // type doesn't matter so much as the bits
958        let memory_access_info =
959            unsafe { hvdef::HvX64MemoryAccessInfo::from(message.memory_access_info.as_uint8) };
960
961        if !memory_access_info.gva_gpa_valid() {
962            return None;
963        }
964
965        if let Ok(translate_mode) = TranslateMode::try_from(hvdef::HvInterceptAccessType(
966            message.header.intercept_access_type,
967        )) {
968            Some(virt_support_x86emu::emulate::InitialTranslation {
969                gva: message.guest_virtual_address,
970                gpa: message.guest_physical_address,
971                translate_mode,
972            })
973        } else {
974            None
975        }
976    }
977
978    fn interruption_pending(&self) -> bool {
979        self.interruption_pending
980    }
981
982    fn check_vtl_access(
983        &mut self,
984        _gpa: u64,
985        _mode: TranslateMode,
986    ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError<Self::Error>> {
987        // TODO: No VTL2 supported so always return Ok.
988        Ok(())
989    }
990
991    fn translate_gva(
992        &mut self,
993        gva: u64,
994        mode: TranslateMode,
995    ) -> Result<Result<EmuTranslateResult, EmuTranslateError>, Self::Error> {
996        emulate_translate_gva(self, gva, mode)
997    }
998
999    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
1000        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
1001        let reg = unsafe {
1002            &[
1003                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
1004                    HvX64RegisterName::PendingEvent0,
1005                    u128::from(event_info.reg_0),
1006                ))),
1007                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
1008                    HvX64RegisterName::PendingEvent1,
1009                    u128::from(event_info.reg_1),
1010                ))),
1011            ]
1012        };
1013        self.processor.vcpufd.set_reg(reg).unwrap();
1014    }
1015
1016    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
1017        self.partition
1018            .memory
1019            .lock()
1020            .ranges
1021            .iter()
1022            .flatten()
1023            .any(|range| {
1024                (range.guest_pfn..range.guest_pfn + range.size).contains(&gpa)
1025                    && (!write
1026                        || range.flags & set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE)
1027                            == set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE))
1028            })
1029    }
1030
1031    fn lapic_base_address(&self) -> Option<u64> {
1032        None
1033    }
1034
1035    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1036        unreachable!()
1037    }
1038
1039    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1040        unreachable!()
1041    }
1042}
1043
1044impl TranslateGvaSupport for MshvEmulationState<'_> {
1045    type Error = MshvError;
1046
1047    fn guest_memory(&self) -> &GuestMemory {
1048        &self.partition.gm
1049    }
1050
1051    fn acquire_tlb_lock(&mut self) {
1052        // The hypervisor automatically acquires the TLB lock for exo partitions.
1053    }
1054
1055    fn registers(&mut self) -> Result<TranslationRegisters, Self::Error> {
1056        let mut reg = [
1057            HvX64RegisterName::Cr0,
1058            HvX64RegisterName::Cr4,
1059            HvX64RegisterName::Efer,
1060            HvX64RegisterName::Cr3,
1061            HvX64RegisterName::Rflags,
1062            HvX64RegisterName::Ss,
1063        ]
1064        .map(|n| HvRegisterAssoc::from((n, 0u64)));
1065
1066        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same size.
1067        unsafe {
1068            self.processor.vcpufd.get_reg(std::mem::transmute::<
1069                &mut [HvRegisterAssoc],
1070                &mut [hv_register_assoc],
1071            >(&mut reg[..]))?;
1072        }
1073
1074        let [cr0, cr4, efer, cr3, rflags, ss] = reg.map(|v| v.value);
1075
1076        Ok(TranslationRegisters {
1077            cr0: cr0.as_u64(),
1078            cr4: cr4.as_u64(),
1079            efer: efer.as_u64(),
1080            cr3: cr3.as_u64(),
1081            rflags: rflags.as_u64(),
1082            ss: from_seg(ss.as_segment()),
1083            encryption_mode: virt_support_x86emu::translate::EncryptionMode::None,
1084        })
1085    }
1086}
1087
1088#[derive(Error, Debug)]
1089pub enum Error {
1090    #[error("operation not supported")]
1091    NotSupported,
1092    #[error("create_vm failed")]
1093    CreateVMFailed,
1094    #[error("failed to create VCPU")]
1095    CreateVcpu(#[source] MshvError),
1096    #[error("emulator GPA translation error")]
1097    EmulatorTranslateGPA,
1098    #[error("vtl2 not supported")]
1099    Vtl2NotSupported,
1100    #[error("isolation not supported")]
1101    IsolationNotSupported,
1102    #[error("failed to stat /dev/mshv")]
1103    AvailableCheck(#[source] io::Error),
1104    #[error("failed to open /dev/mshv")]
1105    OpenMshv(#[source] MshvError),
1106    #[error("register access error")]
1107    Register(#[source] MshvError),
1108    #[error("interrupt assertion failed")]
1109    AssertInterrupt(#[source] MshvError),
1110    #[error("install instercept failed")]
1111    InstallIntercept(#[source] MshvError),
1112}
1113
1114impl MshvPartitionInner {
1115    fn request_msi(&self, request: MsiRequest) {
1116        let (address, data) = request.as_x86();
1117        let control = request.hv_x86_interrupt_control();
1118        let mshv_req = InterruptRequest {
1119            interrupt_type: control.interrupt_type().0,
1120            apic_id: address.virt_destination().into(),
1121            vector: data.vector().into(),
1122            level_triggered: control.x86_level_triggered(),
1123            logical_destination_mode: control.x86_logical_destination_mode(),
1124            long_mode: false,
1125        };
1126
1127        if let Err(err) = self.vmfd.request_virtual_interrupt(&mshv_req) {
1128            tracelimit::warn_ratelimited!(
1129                address = request.address,
1130                data = request.data,
1131                error = &err as &dyn std::error::Error,
1132                "failed to request msi"
1133            );
1134        }
1135    }
1136}
1137
1138impl virt::irqcon::IoApicRouting for MshvPartitionInner {
1139    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1140        self.irq_routes.set_irq_route(irq, request)
1141    }
1142
1143    fn assert_irq(&self, irq: u8) {
1144        self.irq_routes
1145            .assert_irq(irq, |request| self.request_msi(request))
1146    }
1147}
1148
1149#[derive(Debug, Default)]
1150struct MshvMemoryRangeState {
1151    ranges: Vec<Option<mshv_user_mem_region>>,
1152}
1153
1154impl virt::PartitionMemoryMapper for MshvPartition {
1155    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
1156        assert_eq!(vtl, Vtl::Vtl0);
1157        self.inner.clone()
1158    }
1159}
1160
1161// TODO: figure out a better abstraction that also works for KVM and WHP.
1162impl virt::PartitionMemoryMap for MshvPartitionInner {
1163    unsafe fn map_range(
1164        &self,
1165        data: *mut u8,
1166        size: usize,
1167        addr: u64,
1168        writable: bool,
1169        exec: bool,
1170    ) -> Result<(), virt::Error> {
1171        let mut state = self.memory.lock();
1172
1173        // Memory slots cannot be resized but can be moved within the guest
1174        // address space. Find the existing slot if there is one.
1175        let mut slot_to_use = None;
1176        for (slot, range) in state.ranges.iter_mut().enumerate() {
1177            match range {
1178                Some(range) if range.userspace_addr == data as u64 => {
1179                    slot_to_use = Some(slot);
1180                    break;
1181                }
1182                Some(_) => (),
1183                None => slot_to_use = Some(slot),
1184            }
1185        }
1186        if slot_to_use.is_none() {
1187            slot_to_use = Some(state.ranges.len());
1188            state.ranges.push(None);
1189        }
1190        let slot_to_use = slot_to_use.unwrap();
1191
1192        let mut flags = 0;
1193        if writable {
1194            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE);
1195        }
1196        if exec {
1197            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_EXECUTABLE);
1198        }
1199
1200        let mem_region = mshv_user_mem_region {
1201            size: size as u64,
1202            guest_pfn: addr,
1203            userspace_addr: data as u64,
1204            flags,
1205            rsvd: [0; 7],
1206        };
1207
1208        self.vmfd.map_user_memory(mem_region)?;
1209        state.ranges[slot_to_use] = Some(mem_region);
1210        Ok(())
1211    }
1212
1213    fn unmap_range(&self, addr: u64, size: u64) -> Result<(), virt::Error> {
1214        let mut state = self.memory.lock();
1215        let (slot, range) = state
1216            .ranges
1217            .iter_mut()
1218            .enumerate()
1219            .find(|(_, range)| range.as_ref().map(|r| (r.guest_pfn, r.size)) == Some((addr, size)))
1220            .expect("can only unmap existing ranges of exact size");
1221
1222        self.vmfd.unmap_user_memory(range.unwrap())?;
1223        state.ranges[slot] = None;
1224        Ok(())
1225    }
1226}
1227
1228// TODO: implementation
1229struct MshvDoorbellEntry;
1230
1231impl MshvDoorbellEntry {
1232    pub fn new(
1233        _guest_address: u64,
1234        _value: Option<u64>,
1235        _length: Option<u32>,
1236        _fd: &Event,
1237    ) -> io::Result<MshvDoorbellEntry> {
1238        // TODO: implementation
1239
1240        Ok(Self)
1241    }
1242}
1243
1244impl DoorbellRegistration for MshvPartition {
1245    fn register_doorbell(
1246        &self,
1247        guest_address: u64,
1248        value: Option<u64>,
1249        length: Option<u32>,
1250        fd: &Event,
1251    ) -> io::Result<Box<dyn Send + Sync>> {
1252        Ok(Box::new(MshvDoorbellEntry::new(
1253            guest_address,
1254            value,
1255            length,
1256            fd,
1257        )?))
1258    }
1259}
1260
1261pub struct MshvHypercallContext {
1262    pub rax: u64,
1263    pub rbx: u64,
1264    pub rcx: u64,
1265    pub rdx: u64,
1266    pub r8: u64,
1267    pub rsi: u64,
1268    pub rdi: u64,
1269    pub xmm: [hv_u128; 6],
1270}
1271
1272impl<T> hv1_hypercall::X64RegisterState for MshvHypercallHandler<'_, T> {
1273    fn rip(&mut self) -> u64 {
1274        self.rip
1275    }
1276
1277    fn set_rip(&mut self, rip: u64) {
1278        self.rip = rip;
1279        self.rip_dirty = true;
1280    }
1281
1282    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1283        match n {
1284            hv1_hypercall::X64HypercallRegister::Rax => self.context.rax,
1285            hv1_hypercall::X64HypercallRegister::Rcx => self.context.rcx,
1286            hv1_hypercall::X64HypercallRegister::Rdx => self.context.rdx,
1287            hv1_hypercall::X64HypercallRegister::Rbx => self.context.rbx,
1288            hv1_hypercall::X64HypercallRegister::Rsi => self.context.rsi,
1289            hv1_hypercall::X64HypercallRegister::Rdi => self.context.rdi,
1290            hv1_hypercall::X64HypercallRegister::R8 => self.context.r8,
1291        }
1292    }
1293
1294    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1295        *match n {
1296            hv1_hypercall::X64HypercallRegister::Rax => &mut self.context.rax,
1297            hv1_hypercall::X64HypercallRegister::Rcx => &mut self.context.rcx,
1298            hv1_hypercall::X64HypercallRegister::Rdx => &mut self.context.rdx,
1299            hv1_hypercall::X64HypercallRegister::Rbx => &mut self.context.rbx,
1300            hv1_hypercall::X64HypercallRegister::Rsi => &mut self.context.rsi,
1301            hv1_hypercall::X64HypercallRegister::Rdi => &mut self.context.rdi,
1302            hv1_hypercall::X64HypercallRegister::R8 => &mut self.context.r8,
1303        } = value;
1304        self.gp_dirty = true;
1305    }
1306
1307    fn xmm(&mut self, n: usize) -> u128 {
1308        let r = &self.context.xmm[n];
1309        hvu128_to_u128(r)
1310    }
1311
1312    fn set_xmm(&mut self, n: usize, value: u128) {
1313        self.context.xmm[n] = u128_to_hvu128(value);
1314        self.xmm_dirty = true;
1315    }
1316}
1317
1318fn hvu128_to_u128(r: &hv_u128) -> u128 {
1319    (r.high_part as u128) << 64 | r.low_part as u128
1320}
1321
1322fn u128_to_hvu128(value: u128) -> hv_u128 {
1323    hv_u128 {
1324        high_part: (value & (u64::MAX as u128)) as u64,
1325        low_part: (value >> 64) as u64,
1326    }
1327}
1328
1329struct MshvHypercallHandler<'a, T> {
1330    bus: &'a T,
1331    context: &'a mut MshvHypercallContext,
1332    rip: u64,
1333    rip_dirty: bool,
1334    xmm_dirty: bool,
1335    gp_dirty: bool,
1336}
1337
1338impl<T: CpuIo> MshvHypercallHandler<'_, T> {
1339    const DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1340        Self,
1341        [hv1_hypercall::HvPostMessage, hv1_hypercall::HvSignalEvent],
1342    );
1343}
1344
1345impl<T: CpuIo> hv1_hypercall::PostMessage for MshvHypercallHandler<'_, T> {
1346    fn post_message(&mut self, connection_id: u32, message: &[u8]) -> hvdef::HvResult<()> {
1347        self.bus
1348            .post_synic_message(Vtl::Vtl0, connection_id, false, message)
1349    }
1350}
1351
1352impl<T: CpuIo> hv1_hypercall::SignalEvent for MshvHypercallHandler<'_, T> {
1353    fn signal_event(&mut self, connection_id: u32, flag: u16) -> hvdef::HvResult<()> {
1354        self.bus.signal_synic_event(Vtl::Vtl0, connection_id, flag)
1355    }
1356}
1357
1358impl Inspect for MshvPartition {
1359    fn inspect(&self, req: inspect::Request<'_>) {
1360        // TODO: implementation
1361        req.respond();
1362    }
1363}
1364
1365impl InspectMut for MshvProcessor<'_> {
1366    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
1367        req.respond();
1368    }
1369}
1370
1371impl virt::Processor for MshvProcessor<'_> {
1372    type Error = Error;
1373    type RunVpError = MshvError;
1374    type StateAccess<'a>
1375        = &'a mut Self
1376    where
1377        Self: 'a;
1378
1379    fn set_debug_state(
1380        &mut self,
1381        _vtl: Vtl,
1382        _state: Option<&virt::x86::DebugState>,
1383    ) -> Result<(), Self::Error> {
1384        Err(Error::NotSupported)
1385    }
1386
1387    async fn run_vp(
1388        &mut self,
1389        stop: StopVp<'_>,
1390        dev: &impl CpuIo,
1391    ) -> Result<Infallible, VpHaltReason<MshvError>> {
1392        let vpinner = self.inner;
1393        let _cleaner = MshvVpInnerCleaner { vpinner };
1394        let vcpufd = &vpinner.vcpufd;
1395
1396        // Ensure this thread is uniquely running the VP, and store the thread
1397        // ID to support cancellation.
1398        assert!(vpinner.thread.write().replace(Pthread::current()).is_none());
1399
1400        loop {
1401            vpinner.needs_yield.maybe_yield().await;
1402            stop.check()?;
1403
1404            match vcpufd.run() {
1405                Ok(exit) => match HvMessageType(exit.header.message_type) {
1406                    HvMessageType::HvMessageTypeUnrecoverableException => {
1407                        return Err(VpHaltReason::TripleFault { vtl: Vtl::Vtl0 });
1408                    }
1409                    HvMessageType::HvMessageTypeX64IoPortIntercept => {
1410                        self.handle_io_port_intercept(&exit, dev).await?;
1411                    }
1412                    HvMessageType::HvMessageTypeUnmappedGpa
1413                    | HvMessageType::HvMessageTypeGpaIntercept => {
1414                        self.handle_mmio_intercept(&exit, dev).await?;
1415                    }
1416                    HvMessageType::HvMessageTypeSynicSintDeliverable => {
1417                        tracing::trace!("SYNIC_SINT_DELIVERABLE");
1418                        self.handle_synic_deliverable_exit(&exit, dev)?;
1419                    }
1420                    HvMessageType::HvMessageTypeHypercallIntercept => {
1421                        tracing::trace!("HYPERCALL_INTERCEPT");
1422                        self.handle_hypercall_intercept(&exit, dev)?;
1423                    }
1424                    exit => {
1425                        panic!("Unhandled vcpu exit code {exit:?}");
1426                    }
1427                },
1428
1429                Err(e) => match e.errno() {
1430                    libc::EAGAIN | libc::EINTR => {}
1431                    _ => tracing::error!(
1432                        error = &e as &dyn std::error::Error,
1433                        "vcpufd.run returned error"
1434                    ),
1435                },
1436            }
1437        }
1438    }
1439
1440    fn flush_async_requests(&mut self) -> Result<(), Self::RunVpError> {
1441        Ok(())
1442    }
1443
1444    fn access_state(&mut self, vtl: Vtl) -> Self::StateAccess<'_> {
1445        assert_eq!(vtl, Vtl::Vtl0);
1446        self
1447    }
1448}
1449
1450fn x86emu_sreg_from_mshv_sreg(reg: mshv_bindings::SegmentRegister) -> SegmentRegister {
1451    let reg: hv_x64_segment_register = hv_x64_segment_register::from(reg);
1452    // SAFETY: This union only contains one field.
1453    let attributes: u16 = unsafe { reg.__bindgen_anon_1.attributes };
1454
1455    SegmentRegister {
1456        base: reg.base,
1457        limit: reg.limit,
1458        selector: reg.selector,
1459        attributes: attributes.into(),
1460    }
1461}
1462
1463fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1464    SegmentRegister {
1465        base: reg.base,
1466        limit: reg.limit,
1467        selector: reg.selector,
1468        attributes: reg.attributes.into(),
1469    }
1470}
1471
1472impl virt::Synic for MshvPartition {
1473    fn post_message(&self, _vtl: Vtl, vp: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
1474        self.inner
1475            .post_message(vp, sint, &HvMessage::new(HvMessageType(typ), 0, payload));
1476    }
1477
1478    fn new_guest_event_port(
1479        &self,
1480        _vtl: Vtl,
1481        vp: u32,
1482        sint: u8,
1483        flag: u16,
1484    ) -> Box<dyn GuestEventPort> {
1485        Box::new(MshvGuestEventPort {
1486            partition: Arc::downgrade(&self.inner),
1487            params: Arc::new(Mutex::new(MshvEventPortParams {
1488                vp: VpIndex::new(vp),
1489                sint,
1490                flag,
1491            })),
1492        })
1493    }
1494
1495    fn prefer_os_events(&self) -> bool {
1496        false
1497    }
1498}
1499
1500/// `GuestEventPort` implementation for MSHV partitions.
1501#[derive(Debug, Clone)]
1502struct MshvGuestEventPort {
1503    partition: Weak<MshvPartitionInner>,
1504    params: Arc<Mutex<MshvEventPortParams>>,
1505}
1506
1507#[derive(Debug, Copy, Clone)]
1508struct MshvEventPortParams {
1509    vp: VpIndex,
1510    sint: u8,
1511    flag: u16,
1512}
1513
1514impl GuestEventPort for MshvGuestEventPort {
1515    fn interrupt(&self) -> Interrupt {
1516        let partition = self.partition.clone();
1517        let params = self.params.clone();
1518        Interrupt::from_fn(move || {
1519            let MshvEventPortParams { vp, sint, flag } = *params.lock();
1520            if let Some(partition) = partition.upgrade() {
1521                partition
1522                    .vmfd
1523                    .signal_event_direct(vp.index(), sint, flag)
1524                    .unwrap_or_else(|_| {
1525                        panic!(
1526                            "Failed signal synic sint {} on vp {:?} with flag {}",
1527                            sint, vp, flag
1528                        )
1529                    });
1530            }
1531        })
1532    }
1533
1534    fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1535        self.params.lock().vp = VpIndex::new(vp);
1536        Ok(())
1537    }
1538}