virt_mshv/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux /dev/mshv implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native, guest_arch = "x86_64"))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling HV APIs and manually managing memory.
9#![expect(unsafe_code)]
10
11mod vm_state;
12mod vp_state;
13
14use arrayvec::ArrayVec;
15use guestmem::DoorbellRegistration;
16use guestmem::GuestMemory;
17use hv1_emulator::message_queues::MessageQueues;
18use hv1_hypercall::X64RegisterIo;
19use hvdef::HV_PAGE_SHIFT;
20use hvdef::HvDeliverabilityNotificationsRegister;
21use hvdef::HvError;
22use hvdef::HvMessage;
23use hvdef::HvMessageType;
24use hvdef::HvX64RegisterName;
25use hvdef::HvX64VpExecutionState;
26use hvdef::Vtl;
27use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
28use hvdef::hypercall::HvRegisterAssoc;
29use inspect::Inspect;
30use inspect::InspectMut;
31use mshv_bindings::MSHV_SET_MEM_BIT_EXECUTABLE;
32use mshv_bindings::MSHV_SET_MEM_BIT_WRITABLE;
33use mshv_bindings::hv_message;
34use mshv_bindings::hv_register_assoc;
35use mshv_bindings::hv_register_value;
36use mshv_bindings::hv_u128;
37use mshv_bindings::hv_x64_io_port_intercept_message;
38use mshv_bindings::hv_x64_memory_intercept_message;
39use mshv_bindings::hv_x64_segment_register;
40use mshv_bindings::mshv_install_intercept;
41use mshv_bindings::mshv_user_mem_region;
42use mshv_ioctls::InterruptRequest;
43use mshv_ioctls::Mshv;
44use mshv_ioctls::MshvError;
45use mshv_ioctls::VcpuFd;
46use mshv_ioctls::VmFd;
47use mshv_ioctls::set_bits;
48use mshv_ioctls::set_registers_64;
49use pal::unix::pthread::*;
50use pal_event::Event;
51use parking_lot::Mutex;
52use parking_lot::RwLock;
53use std::convert::Infallible;
54use std::io;
55use std::sync::Arc;
56use std::sync::Once;
57use std::sync::Weak;
58use thiserror::Error;
59use virt::Hv1;
60use virt::NeedsYield;
61use virt::PartitionAccessState;
62use virt::PartitionConfig;
63use virt::ProtoPartition;
64use virt::ProtoPartitionConfig;
65use virt::StopVp;
66use virt::VpHaltReason;
67use virt::VpIndex;
68use virt::io::CpuIo;
69use virt::irqcon::MsiRequest;
70use virt::x86::max_physical_address_size_from_cpuid;
71use virt_support_x86emu::emulate::EmuTranslateError;
72use virt_support_x86emu::emulate::EmuTranslateResult;
73use virt_support_x86emu::emulate::EmulatorSupport;
74use virt_support_x86emu::emulate::TranslateGvaSupport;
75use virt_support_x86emu::emulate::TranslateMode;
76use virt_support_x86emu::emulate::emulate_translate_gva;
77use virt_support_x86emu::translate::TranslationRegisters;
78use vmcore::interrupt::Interrupt;
79use vmcore::reference_time::GetReferenceTime;
80use vmcore::reference_time::ReferenceTimeResult;
81use vmcore::reference_time::ReferenceTimeSource;
82use vmcore::synic::GuestEventPort;
83use x86defs::RFlags;
84use x86defs::SegmentRegister;
85use zerocopy::IntoBytes;
86
87#[derive(Debug)]
88pub struct LinuxMshv;
89
90struct MshvEmuCache {
91    /// GP registers, in the canonical order (as defined by `RAX`, etc.).
92    gps: [u64; 16],
93    /// Segment registers, in the canonical order (as defined by `ES`, etc.).
94    segs: [SegmentRegister; 6],
95    rip: u64,
96    rflags: RFlags,
97
98    cr0: u64,
99    efer: u64,
100}
101
102impl virt::Hypervisor for LinuxMshv {
103    type ProtoPartition<'a> = MshvProtoPartition<'a>;
104    type Partition = MshvPartition;
105    type Error = Error;
106
107    fn new_partition<'a>(
108        &mut self,
109        config: ProtoPartitionConfig<'a>,
110    ) -> Result<MshvProtoPartition<'a>, Self::Error> {
111        if config.isolation.is_isolated() {
112            return Err(Error::IsolationNotSupported);
113        }
114
115        // Open /dev/mshv.
116        let mshv = Mshv::new().map_err(Error::OpenMshv)?;
117
118        // Create VM.
119        //
120        // TODO: really need to pass some partition properties here (e.g., for
121        // APIC configuration), but the underlying crate just hardcodes
122        // everything.
123        let vmfd: VmFd;
124        loop {
125            match mshv.create_vm() {
126                Ok(fd) => vmfd = fd,
127                Err(e) => {
128                    if e.errno() == libc::EINTR {
129                        // If the error returned is EINTR, which means the
130                        // ioctl has been interrupted, we have to retry as
131                        // this can't be considered as a regular error.
132                        continue;
133                    } else {
134                        return Err(Error::CreateVMFailed);
135                    }
136                }
137            }
138            break;
139        }
140
141        // Create virtual CPUs.
142        let mut vps: Vec<MshvVpInner> = Vec::new();
143        for vp in config.processor_topology.vps_arch() {
144            if vp.base.vp_index.index() != vp.apic_id {
145                // TODO
146                return Err(Error::NotSupported);
147            }
148
149            let vcpufd = vmfd
150                .create_vcpu(vp.base.vp_index.index() as u8)
151                .map_err(Error::CreateVcpu)?;
152
153            vps.push(MshvVpInner {
154                vcpufd,
155                thread: RwLock::new(None),
156                needs_yield: NeedsYield::new(),
157                message_queues: MessageQueues::new(),
158                deliverability_notifications: Mutex::new(
159                    HvDeliverabilityNotificationsRegister::new(),
160                ),
161            });
162        }
163
164        // Install required intercepts
165        let intercept_args = mshv_install_intercept {
166            access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
167            intercept_type: hvdef::hypercall::HvInterceptType::HvInterceptTypeHypercall.0,
168            intercept_parameter: Default::default(),
169        };
170        vmfd.install_intercept(intercept_args)
171            .map_err(Error::InstallIntercept)?;
172
173        // Set up a signal for forcing vcpufd.run() ioctl to exit.
174        static SIGNAL_HANDLER_INIT: Once = Once::new();
175        // SAFETY: The signal handler does not perform any actions that are forbidden
176        // for signal handlers to perform, as it performs nothing.
177        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
178            signal_hook::low_level::register(libc::SIGRTMIN(), || {
179                // Signal handler does nothing other than enabling run_fd() iotcl to
180                // return with EINTR, when the associated signal is sent to run_fd() thread.
181            })
182            .unwrap();
183        });
184
185        if let Some(hv_config) = &config.hv_config {
186            if hv_config.vtl2.is_some() {
187                return Err(Error::Vtl2NotSupported);
188            }
189        }
190
191        Ok(MshvProtoPartition { config, vmfd, vps })
192    }
193
194    fn is_available(&self) -> Result<bool, Self::Error> {
195        match std::fs::metadata("/dev/mshv") {
196            Ok(_) => Ok(true),
197            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
198            Err(err) => Err(Error::AvailableCheck(err)),
199        }
200    }
201}
202
203/// Prototype partition.
204pub struct MshvProtoPartition<'a> {
205    config: ProtoPartitionConfig<'a>,
206    vmfd: VmFd,
207    vps: Vec<MshvVpInner>,
208}
209
210impl ProtoPartition for MshvProtoPartition<'_> {
211    type Partition = MshvPartition;
212    type ProcessorBinder = MshvProcessorBinder;
213    type Error = Infallible;
214
215    fn cpuid(&self, eax: u32, ecx: u32) -> [u32; 4] {
216        // This call should never fail unless there is a kernel or hypervisor
217        // bug.
218        self.vps[0]
219            .vcpufd
220            .get_cpuid_values(eax, ecx, 0, 0)
221            .expect("cpuid should not fail")
222    }
223
224    fn max_physical_address_size(&self) -> u8 {
225        max_physical_address_size_from_cpuid(&|eax, ecx| self.cpuid(eax, ecx))
226    }
227
228    fn build(
229        self,
230        config: PartitionConfig<'_>,
231    ) -> Result<(Self::Partition, Vec<Self::ProcessorBinder>), Self::Error> {
232        // TODO: do something with cpuid.
233
234        // Get caps via cpuid
235        let caps = virt::PartitionCapabilities::from_cpuid(
236            self.config.processor_topology,
237            &mut |function, index| {
238                self.vps[0]
239                    .vcpufd
240                    .get_cpuid_values(function, index, 0, 0)
241                    .expect("cpuid should not fail")
242            },
243        );
244
245        // Attach all the resources created above to a Partition object.
246        let partition = MshvPartition {
247            inner: Arc::new(MshvPartitionInner {
248                vmfd: self.vmfd,
249                memory: Default::default(),
250                gm: config.guest_memory.clone(),
251                vps: self.vps,
252                irq_routes: Default::default(),
253                caps,
254            }),
255        };
256
257        let vps = self
258            .config
259            .processor_topology
260            .vps()
261            .map(|vp| MshvProcessorBinder {
262                partition: partition.inner.clone(),
263                vpindex: vp.vp_index,
264            })
265            .collect();
266
267        Ok((partition, vps))
268    }
269}
270
271// TODO: remove these workarounds when mshv-ioctl implements the Debug trait
272#[derive(Debug)]
273pub struct MshvPartition {
274    inner: Arc<MshvPartitionInner>,
275}
276
277#[derive(Debug)]
278struct MshvPartitionInner {
279    vmfd: VmFd,
280    memory: Mutex<MshvMemoryRangeState>,
281    gm: GuestMemory,
282    vps: Vec<MshvVpInner>,
283    irq_routes: virt::irqcon::IrqRoutes,
284    caps: virt::PartitionCapabilities,
285}
286
287#[derive(Debug)]
288struct MshvVpInner {
289    vcpufd: VcpuFd,
290    thread: RwLock<Option<Pthread>>,
291    needs_yield: NeedsYield,
292    message_queues: MessageQueues,
293    deliverability_notifications: Mutex<HvDeliverabilityNotificationsRegister>,
294}
295
296struct MshvVpInnerCleaner<'a> {
297    vpinner: &'a MshvVpInner,
298}
299
300impl Drop for MshvVpInnerCleaner<'_> {
301    fn drop(&mut self) {
302        self.vpinner.thread.write().take();
303    }
304}
305
306impl virt::Partition for MshvPartition {
307    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Error>> {
308        None
309    }
310
311    fn doorbell_registration(
312        self: &Arc<Self>,
313        _minimum_vtl: Vtl,
314    ) -> Option<Arc<dyn DoorbellRegistration>> {
315        // TODO: implementation
316
317        Some(self.clone())
318    }
319
320    fn caps(&self) -> &virt::PartitionCapabilities {
321        &self.inner.caps
322    }
323
324    fn request_msi(&self, _vtl: Vtl, request: MsiRequest) {
325        self.inner.request_msi(request)
326    }
327
328    fn request_yield(&self, vp_index: VpIndex) {
329        let vp = self.inner.vp(vp_index);
330        if vp.needs_yield.request_yield() {
331            // Send a signal to the thread who called vcpufd.run() to force an exit.
332            let thread = vp.thread.read();
333            if let Some(thread) = *thread {
334                if thread != Pthread::current() {
335                    thread
336                        .signal(libc::SIGRTMIN())
337                        .expect("thread cancel signal failed");
338                }
339            }
340        }
341    }
342}
343
344impl virt::X86Partition for MshvPartition {
345    fn ioapic_routing(&self) -> Arc<dyn virt::irqcon::IoApicRouting> {
346        self.inner.clone()
347    }
348
349    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
350        // TODO
351        tracing::warn!(?vp_index, ?vtl, lint, "ignored lint pulse");
352    }
353}
354
355impl PartitionAccessState for MshvPartition {
356    type StateAccess<'a> = &'a MshvPartition;
357
358    fn access_state(&self, vtl: Vtl) -> Self::StateAccess<'_> {
359        assert_eq!(vtl, Vtl::Vtl0);
360
361        self
362    }
363}
364
365impl Hv1 for MshvPartition {
366    type Error = Error;
367    type Device = virt::UnimplementedDevice;
368
369    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
370        Some(ReferenceTimeSource::from(self.inner.clone() as Arc<_>))
371    }
372
373    fn new_virtual_device(
374        &self,
375    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
376        None
377    }
378}
379
380impl GetReferenceTime for MshvPartitionInner {
381    fn now(&self) -> ReferenceTimeResult {
382        let mut regs = [hv_register_assoc {
383            name: hvdef::HvAllArchRegisterName::TimeRefCount.0,
384            value: hv_register_value { reg64: 0 },
385            ..Default::default()
386        }];
387        self.vp(VpIndex::BSP).vcpufd.get_reg(&mut regs).unwrap();
388        // SAFETY: the value has been written by the kernel.
389        let ref_time = unsafe { regs[0].value.reg64 };
390        ReferenceTimeResult {
391            ref_time,
392            system_time: None,
393        }
394    }
395}
396
397impl MshvPartitionInner {
398    fn vp(&self, vp_index: VpIndex) -> &MshvVpInner {
399        &self.vps[vp_index.index() as usize]
400    }
401
402    fn post_message(&self, vp_index: VpIndex, sint: u8, message: &HvMessage) {
403        let request_notification = self
404            .vp(vp_index)
405            .message_queues
406            .enqueue_message(sint, message);
407
408        if request_notification {
409            self.request_sint_notifications(vp_index, 1 << sint);
410        }
411    }
412
413    fn request_sint_notifications(&self, vp_index: VpIndex, sints: u16) {
414        let mut notifications = self.vp(vp_index).deliverability_notifications.lock();
415        if notifications.sints() != sints {
416            notifications.set_sints(sints);
417            self.vmfd
418                .register_deliverabilty_notifications(vp_index.index(), (*notifications).into())
419                .expect("Requesting deliverability is not a fallable operation");
420        }
421    }
422}
423
424pub struct MshvProcessorBinder {
425    partition: Arc<MshvPartitionInner>,
426    vpindex: VpIndex,
427}
428
429impl virt::BindProcessor for MshvProcessorBinder {
430    type Processor<'a>
431        = MshvProcessor<'a>
432    where
433        Self: 'a;
434    type Error = Error;
435
436    fn bind(&mut self) -> Result<Self::Processor<'_>, Self::Error> {
437        Ok(MshvProcessor {
438            partition: &self.partition,
439            inner: &self.partition.vps[self.vpindex.index() as usize],
440            vpindex: self.vpindex,
441        })
442    }
443}
444
445pub struct MshvProcessor<'a> {
446    partition: &'a MshvPartitionInner,
447    inner: &'a MshvVpInner,
448    vpindex: VpIndex,
449}
450
451impl MshvProcessor<'_> {
452    async fn emulate(
453        &self,
454        message: &hv_message,
455        devices: &impl CpuIo,
456        interruption_pending: bool,
457    ) -> Result<(), VpHaltReason> {
458        let cache = self.emulation_cache();
459        let emu_mem = virt_support_x86emu::emulate::EmulatorMemoryAccess {
460            gm: &self.partition.gm,
461            kx_gm: &self.partition.gm,
462            ux_gm: &self.partition.gm,
463        };
464
465        let mut support = MshvEmulationState {
466            partition: self.partition,
467            processor: self.inner,
468            vp_index: self.vpindex,
469            message,
470            interruption_pending,
471            cache,
472        };
473        virt_support_x86emu::emulate::emulate(&mut support, &emu_mem, devices).await
474    }
475
476    async fn handle_io_port_intercept(
477        &self,
478        message: &hv_message,
479        devices: &impl CpuIo,
480    ) -> Result<(), VpHaltReason> {
481        let info = message.to_ioport_info().unwrap();
482        let access_info = info.access_info;
483        // SAFETY: This union only contains one field.
484        let port_access_info = unsafe { access_info.__bindgen_anon_1 };
485
486        if port_access_info.string_op() != 0 || port_access_info.rep_prefix() != 0 {
487            let execution_state = info.header.execution_state;
488            // SAFETY: This union only contains one field.
489            let io_execution_state = unsafe { execution_state.__bindgen_anon_1 };
490            let interruption_pending = io_execution_state.interruption_pending() != 0;
491
492            self.emulate(message, devices, interruption_pending).await?
493        } else {
494            let mut ret_rax = info.rax;
495            virt_support_x86emu::emulate::emulate_io(
496                self.vpindex,
497                info.header.intercept_access_type == 1,
498                info.port_number,
499                &mut ret_rax,
500                port_access_info.access_size(),
501                devices,
502            )
503            .await;
504
505            let insn_len = info.header.instruction_length() as u64;
506
507            /* Advance RIP and update RAX */
508            let arr_reg_name_value = [
509                (
510                    mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
511                    info.header.rip + insn_len,
512                ),
513                (mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
514            ];
515
516            set_registers_64!(self.inner.vcpufd, arr_reg_name_value).unwrap();
517        }
518
519        Ok(())
520    }
521
522    async fn handle_mmio_intercept(
523        &self,
524        message: &hv_message,
525        devices: &impl CpuIo,
526    ) -> Result<(), VpHaltReason> {
527        let execution_state = message.to_memory_info().unwrap().header.execution_state;
528        // SAFETY: This union only contains one field.
529        let mmio_execution_state = unsafe { execution_state.__bindgen_anon_1 };
530        let interruption_pending = mmio_execution_state.interruption_pending() != 0;
531
532        self.emulate(message, devices, interruption_pending).await
533    }
534
535    fn handle_synic_deliverable_exit(&self, message: &hv_message, _devices: &impl CpuIo) {
536        let info = message.to_sint_deliverable_info().unwrap();
537        self.flush_messages(info.deliverable_sints);
538    }
539
540    fn handle_hypercall_intercept(&self, message: &hv_message, devices: &impl CpuIo) {
541        let info = message.to_hypercall_intercept_info().unwrap();
542        let execution_state = info.header.execution_state;
543        // SAFETY: Accessing the raw field of this union is always safe.
544        let vp_state = unsafe { HvX64VpExecutionState::from(execution_state.as_uint16) };
545        let is_64bit = vp_state.cr0_pe() && vp_state.efer_lma();
546        let mut hpc_context = MshvHypercallContext {
547            rax: info.rax,
548            rbx: info.rbx,
549            rcx: info.rcx,
550            rdx: info.rdx,
551            r8: info.r8,
552            rsi: info.rsi,
553            rdi: info.rdi,
554            xmm: info.xmmregisters,
555        };
556        let mut handler = MshvHypercallHandler {
557            bus: devices,
558            context: &mut hpc_context,
559            rip: info.header.rip,
560            rip_dirty: false,
561            xmm_dirty: false,
562            gp_dirty: false,
563        };
564
565        MshvHypercallHandler::DISPATCHER.dispatch(
566            &self.partition.gm,
567            X64RegisterIo::new(&mut handler, is_64bit),
568        );
569
570        let mut dirty_regs = ArrayVec::<hv_register_assoc, 14>::new();
571
572        if handler.gp_dirty {
573            dirty_regs.extend([
574                hv_register_assoc {
575                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
576                    value: hv_register_value {
577                        reg64: handler.context.rax,
578                    },
579                    ..Default::default()
580                },
581                hv_register_assoc {
582                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
583                    value: hv_register_value {
584                        reg64: handler.context.rbx,
585                    },
586                    ..Default::default()
587                },
588                hv_register_assoc {
589                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
590                    value: hv_register_value {
591                        reg64: handler.context.rcx,
592                    },
593                    ..Default::default()
594                },
595                hv_register_assoc {
596                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
597                    value: hv_register_value {
598                        reg64: handler.context.rdx,
599                    },
600                    ..Default::default()
601                },
602                hv_register_assoc {
603                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
604                    value: hv_register_value {
605                        reg64: handler.context.r8,
606                    },
607                    ..Default::default()
608                },
609                hv_register_assoc {
610                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
611                    value: hv_register_value {
612                        reg64: handler.context.rsi,
613                    },
614                    ..Default::default()
615                },
616                hv_register_assoc {
617                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
618                    value: hv_register_value {
619                        reg64: handler.context.rdi,
620                    },
621                    ..Default::default()
622                },
623            ]);
624        }
625
626        if handler.xmm_dirty {
627            dirty_regs.extend((0..5).map(|i| hv_register_assoc {
628                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_XMM0 + i,
629                value: hv_register_value {
630                    reg128: handler.context.xmm[i as usize],
631                },
632                ..Default::default()
633            }));
634        }
635
636        if handler.rip_dirty {
637            dirty_regs.push(hv_register_assoc {
638                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
639                value: hv_register_value { reg64: handler.rip },
640                ..Default::default()
641            });
642        }
643
644        if !dirty_regs.is_empty() {
645            self.inner
646                .vcpufd
647                .set_reg(&dirty_regs)
648                .expect("RIP setting is not a fallable operation");
649        }
650    }
651
652    fn flush_messages(&self, deliverable_sints: u16) {
653        let nonempty_sints =
654            self.inner
655                .message_queues
656                .post_pending_messages(deliverable_sints, |sint, message| {
657                    match self.partition.vmfd.post_message_direct(
658                        self.vpindex.index(),
659                        sint,
660                        message.as_bytes(),
661                    ) {
662                        Ok(()) => {
663                            tracing::trace!(sint, "sint message posted successfully");
664                            Ok(())
665                        }
666                        Err(e) => {
667                            // TODO: handle errors appropriately
668                            tracing::trace!(error = %e, "dropping sint message");
669                            Err(HvError::ObjectInUse)
670                        }
671                    }
672                });
673
674        {
675            // To avoid an additional get_reg hypercall, clear w/ deliverable sints mask
676            let mut notifications = self.inner.deliverability_notifications.lock();
677            let remaining_sints = notifications.sints() & !deliverable_sints;
678            notifications.set_sints(remaining_sints);
679        }
680
681        if nonempty_sints != 0 {
682            self.partition
683                .request_sint_notifications(self.vpindex, nonempty_sints);
684        }
685    }
686
687    fn emulation_cache(&self) -> MshvEmuCache {
688        let regs = self.inner.vcpufd.get_regs().unwrap();
689        let gps = [
690            regs.rax, regs.rcx, regs.rdx, regs.rbx, regs.rsp, regs.rbp, regs.rsi, regs.rdi,
691            regs.r8, regs.r9, regs.r10, regs.r11, regs.r12, regs.r13, regs.r14, regs.r15,
692        ];
693        let rip = regs.rip;
694        let rflags = regs.rflags;
695
696        let sregs = self.inner.vcpufd.get_sregs().unwrap();
697        let segs = [
698            x86emu_sreg_from_mshv_sreg(sregs.es),
699            x86emu_sreg_from_mshv_sreg(sregs.cs),
700            x86emu_sreg_from_mshv_sreg(sregs.ss),
701            x86emu_sreg_from_mshv_sreg(sregs.ds),
702            x86emu_sreg_from_mshv_sreg(sregs.fs),
703            x86emu_sreg_from_mshv_sreg(sregs.gs),
704        ];
705        let cr0 = sregs.cr0;
706        let efer = sregs.efer;
707
708        MshvEmuCache {
709            gps,
710            segs,
711            rip,
712            rflags: rflags.into(),
713            cr0,
714            efer,
715        }
716    }
717}
718
719struct MshvEmulationState<'a> {
720    partition: &'a MshvPartitionInner,
721    processor: &'a MshvVpInner,
722    vp_index: VpIndex,
723    message: &'a hv_message,
724    interruption_pending: bool,
725    cache: MshvEmuCache,
726}
727
728impl EmulatorSupport for MshvEmulationState<'_> {
729    fn vp_index(&self) -> VpIndex {
730        self.vp_index
731    }
732
733    fn vendor(&self) -> x86defs::cpuid::Vendor {
734        self.partition.caps.vendor
735    }
736
737    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
738        self.cache.gps[reg as usize]
739    }
740
741    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
742        self.cache.gps[reg as usize] = v;
743    }
744
745    fn rip(&mut self) -> u64 {
746        self.cache.rip
747    }
748
749    fn set_rip(&mut self, v: u64) {
750        self.cache.rip = v;
751    }
752
753    fn segment(&mut self, reg: x86emu::Segment) -> SegmentRegister {
754        self.cache.segs[reg as usize]
755    }
756
757    fn efer(&mut self) -> u64 {
758        self.cache.efer
759    }
760
761    fn cr0(&mut self) -> u64 {
762        self.cache.cr0
763    }
764
765    fn rflags(&mut self) -> RFlags {
766        self.cache.rflags
767    }
768
769    fn set_rflags(&mut self, v: RFlags) {
770        self.cache.rflags = v;
771    }
772
773    fn xmm(&mut self, reg: usize) -> u128 {
774        assert!(reg < 16);
775        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
776        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
777        let reg = unsafe {
778            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
779                name, 0u128,
780            )))
781        };
782        let _ = self.processor.vcpufd.get_reg(&mut [reg]);
783        // SAFETY: Accessing the u128 field of this union is always safe.
784        hvu128_to_u128(unsafe { &reg.value.reg128 })
785    }
786
787    fn set_xmm(&mut self, reg: usize, value: u128) {
788        assert!(reg < 16);
789        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
790        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
791        let reg = unsafe {
792            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
793                name, value,
794            )))
795        };
796        self.processor.vcpufd.set_reg(&[reg]).unwrap();
797    }
798
799    fn flush(&mut self) {
800        let arr_reg_name_value = [
801            (
802                mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
803                self.cache.rip,
804            ),
805            (
806                mshv_bindings::hv_register_name_HV_X64_REGISTER_RFLAGS,
807                self.cache.rflags.into(),
808            ),
809            (
810                mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
811                self.cache.gps[0],
812            ),
813            (
814                mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
815                self.cache.gps[1],
816            ),
817            (
818                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
819                self.cache.gps[2],
820            ),
821            (
822                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
823                self.cache.gps[3],
824            ),
825            (
826                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSP,
827                self.cache.gps[4],
828            ),
829            (
830                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBP,
831                self.cache.gps[5],
832            ),
833            (
834                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
835                self.cache.gps[6],
836            ),
837            (
838                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
839                self.cache.gps[7],
840            ),
841            (
842                mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
843                self.cache.gps[8],
844            ),
845            (
846                mshv_bindings::hv_register_name_HV_X64_REGISTER_R9,
847                self.cache.gps[9],
848            ),
849            (
850                mshv_bindings::hv_register_name_HV_X64_REGISTER_R10,
851                self.cache.gps[10],
852            ),
853            (
854                mshv_bindings::hv_register_name_HV_X64_REGISTER_R11,
855                self.cache.gps[11],
856            ),
857            (
858                mshv_bindings::hv_register_name_HV_X64_REGISTER_R12,
859                self.cache.gps[12],
860            ),
861            (
862                mshv_bindings::hv_register_name_HV_X64_REGISTER_R13,
863                self.cache.gps[13],
864            ),
865            (
866                mshv_bindings::hv_register_name_HV_X64_REGISTER_R14,
867                self.cache.gps[14],
868            ),
869            (
870                mshv_bindings::hv_register_name_HV_X64_REGISTER_R15,
871                self.cache.gps[15],
872            ),
873        ];
874
875        set_registers_64!(self.processor.vcpufd, arr_reg_name_value).unwrap();
876    }
877
878    fn instruction_bytes(&self) -> &[u8] {
879        match HvMessageType(self.message.header.message_type) {
880            HvMessageType::HvMessageTypeGpaIntercept
881            | HvMessageType::HvMessageTypeUnmappedGpa
882            | HvMessageType::HvMessageTypeUnacceptedGpa => {
883                // SAFETY: We have checked the message type.
884                unsafe {
885                    let info = (&raw const self.message.u.payload)
886                        .cast::<hv_x64_memory_intercept_message>();
887                    let instruction_bytes = &raw const (*info).instruction_bytes;
888                    let instruction_byte_count =
889                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
890                    std::slice::from_raw_parts(
891                        instruction_bytes.cast(),
892                        instruction_byte_count as usize,
893                    )
894                }
895            }
896            HvMessageType::HvMessageTypeX64IoPortIntercept => {
897                // SAFETY: We have checked the message type.
898                unsafe {
899                    let info = (&raw const self.message.u.payload)
900                        .cast::<hv_x64_io_port_intercept_message>();
901                    let instruction_bytes = &raw const (*info).instruction_bytes;
902                    let instruction_byte_count =
903                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
904                    std::slice::from_raw_parts(
905                        instruction_bytes.cast(),
906                        instruction_byte_count as usize,
907                    )
908                }
909            }
910            _ => unreachable!(),
911        }
912    }
913
914    fn physical_address(&self) -> Option<u64> {
915        if self.message.header.message_type == HvMessageType::HvMessageTypeGpaIntercept.0
916            || self.message.header.message_type == HvMessageType::HvMessageTypeUnmappedGpa.0
917            || self.message.header.message_type == HvMessageType::HvMessageTypeUnacceptedGpa.0
918        {
919            let info = self.message.to_memory_info().unwrap();
920            Some(info.guest_physical_address)
921        } else {
922            None
923        }
924    }
925
926    fn initial_gva_translation(
927        &mut self,
928    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
929        if (self.message.header.message_type != HvMessageType::HvMessageTypeGpaIntercept.0)
930            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnmappedGpa.0)
931            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnacceptedGpa.0)
932        {
933            return None;
934        }
935
936        let message = self.message.to_memory_info().unwrap();
937
938        // SAFETY: access to union as uint8 is safe because in this case, the actual
939        // type doesn't matter so much as the bits
940        let memory_access_info =
941            unsafe { hvdef::HvX64MemoryAccessInfo::from(message.memory_access_info.as_uint8) };
942
943        if !memory_access_info.gva_gpa_valid() {
944            return None;
945        }
946
947        if let Ok(translate_mode) = TranslateMode::try_from(hvdef::HvInterceptAccessType(
948            message.header.intercept_access_type,
949        )) {
950            Some(virt_support_x86emu::emulate::InitialTranslation {
951                gva: message.guest_virtual_address,
952                gpa: message.guest_physical_address,
953                translate_mode,
954            })
955        } else {
956            None
957        }
958    }
959
960    fn interruption_pending(&self) -> bool {
961        self.interruption_pending
962    }
963
964    fn check_vtl_access(
965        &mut self,
966        _gpa: u64,
967        _mode: TranslateMode,
968    ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError> {
969        // TODO: No VTL2 supported so always return Ok.
970        Ok(())
971    }
972
973    fn translate_gva(
974        &mut self,
975        gva: u64,
976        mode: TranslateMode,
977    ) -> Result<EmuTranslateResult, EmuTranslateError> {
978        emulate_translate_gva(self, gva, mode)
979    }
980
981    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
982        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
983        let reg = unsafe {
984            &[
985                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
986                    HvX64RegisterName::PendingEvent0,
987                    u128::from(event_info.reg_0),
988                ))),
989                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
990                    HvX64RegisterName::PendingEvent1,
991                    u128::from(event_info.reg_1),
992                ))),
993            ]
994        };
995        self.processor.vcpufd.set_reg(reg).unwrap();
996    }
997
998    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
999        self.partition
1000            .memory
1001            .lock()
1002            .ranges
1003            .iter()
1004            .flatten()
1005            .any(|range| {
1006                (range.guest_pfn..range.guest_pfn + range.size).contains(&gpa)
1007                    && (!write
1008                        || range.flags & set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE)
1009                            == set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE))
1010            })
1011    }
1012
1013    fn lapic_base_address(&self) -> Option<u64> {
1014        None
1015    }
1016
1017    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1018        unreachable!()
1019    }
1020
1021    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1022        unreachable!()
1023    }
1024}
1025
1026impl TranslateGvaSupport for MshvEmulationState<'_> {
1027    fn guest_memory(&self) -> &GuestMemory {
1028        &self.partition.gm
1029    }
1030
1031    fn acquire_tlb_lock(&mut self) {
1032        // The hypervisor automatically acquires the TLB lock for exo partitions.
1033    }
1034
1035    fn registers(&mut self) -> TranslationRegisters {
1036        let mut reg = [
1037            HvX64RegisterName::Cr0,
1038            HvX64RegisterName::Cr4,
1039            HvX64RegisterName::Efer,
1040            HvX64RegisterName::Cr3,
1041            HvX64RegisterName::Rflags,
1042            HvX64RegisterName::Ss,
1043        ]
1044        .map(|n| HvRegisterAssoc::from((n, 0u64)));
1045
1046        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same size.
1047        unsafe {
1048            self.processor
1049                .vcpufd
1050                .get_reg(std::mem::transmute::<
1051                    &mut [HvRegisterAssoc],
1052                    &mut [hv_register_assoc],
1053                >(&mut reg[..]))
1054                .unwrap();
1055        }
1056
1057        let [cr0, cr4, efer, cr3, rflags, ss] = reg.map(|v| v.value);
1058
1059        TranslationRegisters {
1060            cr0: cr0.as_u64(),
1061            cr4: cr4.as_u64(),
1062            efer: efer.as_u64(),
1063            cr3: cr3.as_u64(),
1064            rflags: rflags.as_u64(),
1065            ss: from_seg(ss.as_segment()),
1066            encryption_mode: virt_support_x86emu::translate::EncryptionMode::None,
1067        }
1068    }
1069}
1070
1071// TODO: Chunk this up into smaller types.
1072#[derive(Error, Debug)]
1073pub enum Error {
1074    #[error("operation not supported")]
1075    NotSupported,
1076    #[error("create_vm failed")]
1077    CreateVMFailed,
1078    #[error("failed to create VCPU")]
1079    CreateVcpu(#[source] MshvError),
1080    #[error("vtl2 not supported")]
1081    Vtl2NotSupported,
1082    #[error("isolation not supported")]
1083    IsolationNotSupported,
1084    #[error("failed to stat /dev/mshv")]
1085    AvailableCheck(#[source] io::Error),
1086    #[error("failed to open /dev/mshv")]
1087    OpenMshv(#[source] MshvError),
1088    #[error("register access error")]
1089    Register(#[source] MshvError),
1090    #[error("install instercept failed")]
1091    InstallIntercept(#[source] MshvError),
1092}
1093
1094impl MshvPartitionInner {
1095    fn request_msi(&self, request: MsiRequest) {
1096        let (address, data) = request.as_x86();
1097        let control = request.hv_x86_interrupt_control();
1098        let mshv_req = InterruptRequest {
1099            interrupt_type: control.interrupt_type().0,
1100            apic_id: address.virt_destination().into(),
1101            vector: data.vector().into(),
1102            level_triggered: control.x86_level_triggered(),
1103            logical_destination_mode: control.x86_logical_destination_mode(),
1104            long_mode: false,
1105        };
1106
1107        if let Err(err) = self.vmfd.request_virtual_interrupt(&mshv_req) {
1108            tracelimit::warn_ratelimited!(
1109                address = request.address,
1110                data = request.data,
1111                error = &err as &dyn std::error::Error,
1112                "failed to request msi"
1113            );
1114        }
1115    }
1116}
1117
1118impl virt::irqcon::IoApicRouting for MshvPartitionInner {
1119    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1120        self.irq_routes.set_irq_route(irq, request)
1121    }
1122
1123    fn assert_irq(&self, irq: u8) {
1124        self.irq_routes
1125            .assert_irq(irq, |request| self.request_msi(request))
1126    }
1127}
1128
1129#[derive(Debug, Default)]
1130struct MshvMemoryRangeState {
1131    ranges: Vec<Option<mshv_user_mem_region>>,
1132}
1133
1134impl virt::PartitionMemoryMapper for MshvPartition {
1135    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
1136        assert_eq!(vtl, Vtl::Vtl0);
1137        self.inner.clone()
1138    }
1139}
1140
1141// TODO: figure out a better abstraction that also works for KVM and WHP.
1142impl virt::PartitionMemoryMap for MshvPartitionInner {
1143    unsafe fn map_range(
1144        &self,
1145        data: *mut u8,
1146        size: usize,
1147        addr: u64,
1148        writable: bool,
1149        exec: bool,
1150    ) -> anyhow::Result<()> {
1151        let mut state = self.memory.lock();
1152
1153        // Memory slots cannot be resized but can be moved within the guest
1154        // address space. Find the existing slot if there is one.
1155        let mut slot_to_use = None;
1156        for (slot, range) in state.ranges.iter_mut().enumerate() {
1157            match range {
1158                Some(range) if range.userspace_addr == data as u64 => {
1159                    slot_to_use = Some(slot);
1160                    break;
1161                }
1162                Some(_) => (),
1163                None => slot_to_use = Some(slot),
1164            }
1165        }
1166        if slot_to_use.is_none() {
1167            slot_to_use = Some(state.ranges.len());
1168            state.ranges.push(None);
1169        }
1170        let slot_to_use = slot_to_use.unwrap();
1171
1172        let mut flags = 0;
1173        if writable {
1174            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE);
1175        }
1176        if exec {
1177            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_EXECUTABLE);
1178        }
1179        let mem_region = mshv_user_mem_region {
1180            size: size as u64,
1181            guest_pfn: addr >> HV_PAGE_SHIFT,
1182            userspace_addr: data as u64,
1183            flags,
1184            rsvd: [0; 7],
1185        };
1186
1187        self.vmfd.map_user_memory(mem_region)?;
1188        state.ranges[slot_to_use] = Some(mem_region);
1189        Ok(())
1190    }
1191
1192    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
1193        let mut state = self.memory.lock();
1194        let (slot, range) = state
1195            .ranges
1196            .iter_mut()
1197            .enumerate()
1198            .find(|(_, range)| {
1199                range.as_ref().map(|r| (r.guest_pfn, r.size)) == Some((addr >> HV_PAGE_SHIFT, size))
1200            })
1201            .expect("can only unmap existing ranges of exact size");
1202
1203        self.vmfd.unmap_user_memory(range.unwrap())?;
1204        state.ranges[slot] = None;
1205        Ok(())
1206    }
1207}
1208
1209// TODO: implementation
1210struct MshvDoorbellEntry;
1211
1212impl MshvDoorbellEntry {
1213    pub fn new(
1214        _guest_address: u64,
1215        _value: Option<u64>,
1216        _length: Option<u32>,
1217        _fd: &Event,
1218    ) -> io::Result<MshvDoorbellEntry> {
1219        // TODO: implementation
1220
1221        Ok(Self)
1222    }
1223}
1224
1225impl DoorbellRegistration for MshvPartition {
1226    fn register_doorbell(
1227        &self,
1228        guest_address: u64,
1229        value: Option<u64>,
1230        length: Option<u32>,
1231        fd: &Event,
1232    ) -> io::Result<Box<dyn Send + Sync>> {
1233        Ok(Box::new(MshvDoorbellEntry::new(
1234            guest_address,
1235            value,
1236            length,
1237            fd,
1238        )?))
1239    }
1240}
1241
1242pub struct MshvHypercallContext {
1243    pub rax: u64,
1244    pub rbx: u64,
1245    pub rcx: u64,
1246    pub rdx: u64,
1247    pub r8: u64,
1248    pub rsi: u64,
1249    pub rdi: u64,
1250    pub xmm: [hv_u128; 6],
1251}
1252
1253impl<T> hv1_hypercall::X64RegisterState for MshvHypercallHandler<'_, T> {
1254    fn rip(&mut self) -> u64 {
1255        self.rip
1256    }
1257
1258    fn set_rip(&mut self, rip: u64) {
1259        self.rip = rip;
1260        self.rip_dirty = true;
1261    }
1262
1263    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1264        match n {
1265            hv1_hypercall::X64HypercallRegister::Rax => self.context.rax,
1266            hv1_hypercall::X64HypercallRegister::Rcx => self.context.rcx,
1267            hv1_hypercall::X64HypercallRegister::Rdx => self.context.rdx,
1268            hv1_hypercall::X64HypercallRegister::Rbx => self.context.rbx,
1269            hv1_hypercall::X64HypercallRegister::Rsi => self.context.rsi,
1270            hv1_hypercall::X64HypercallRegister::Rdi => self.context.rdi,
1271            hv1_hypercall::X64HypercallRegister::R8 => self.context.r8,
1272        }
1273    }
1274
1275    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1276        *match n {
1277            hv1_hypercall::X64HypercallRegister::Rax => &mut self.context.rax,
1278            hv1_hypercall::X64HypercallRegister::Rcx => &mut self.context.rcx,
1279            hv1_hypercall::X64HypercallRegister::Rdx => &mut self.context.rdx,
1280            hv1_hypercall::X64HypercallRegister::Rbx => &mut self.context.rbx,
1281            hv1_hypercall::X64HypercallRegister::Rsi => &mut self.context.rsi,
1282            hv1_hypercall::X64HypercallRegister::Rdi => &mut self.context.rdi,
1283            hv1_hypercall::X64HypercallRegister::R8 => &mut self.context.r8,
1284        } = value;
1285        self.gp_dirty = true;
1286    }
1287
1288    fn xmm(&mut self, n: usize) -> u128 {
1289        let r = &self.context.xmm[n];
1290        hvu128_to_u128(r)
1291    }
1292
1293    fn set_xmm(&mut self, n: usize, value: u128) {
1294        self.context.xmm[n] = u128_to_hvu128(value);
1295        self.xmm_dirty = true;
1296    }
1297}
1298
1299fn hvu128_to_u128(r: &hv_u128) -> u128 {
1300    (r.high_part as u128) << 64 | r.low_part as u128
1301}
1302
1303fn u128_to_hvu128(value: u128) -> hv_u128 {
1304    hv_u128 {
1305        high_part: (value >> 64) as u64,
1306        low_part: (value & (u64::MAX as u128)) as u64,
1307    }
1308}
1309
1310#[cfg(test)]
1311mod tests {
1312    use super::*;
1313
1314    #[test]
1315    fn u128_roundtrip() {
1316        let original = 0x0123_4567_89ab_cdef_fedc_ba98_7654_3210;
1317        let hv = u128_to_hvu128(original);
1318        let roundtrip = hvu128_to_u128(&hv);
1319        assert_eq!(roundtrip, original);
1320    }
1321}
1322
1323struct MshvHypercallHandler<'a, T> {
1324    bus: &'a T,
1325    context: &'a mut MshvHypercallContext,
1326    rip: u64,
1327    rip_dirty: bool,
1328    xmm_dirty: bool,
1329    gp_dirty: bool,
1330}
1331
1332impl<T: CpuIo> MshvHypercallHandler<'_, T> {
1333    const DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1334        Self,
1335        [hv1_hypercall::HvPostMessage, hv1_hypercall::HvSignalEvent],
1336    );
1337}
1338
1339impl<T: CpuIo> hv1_hypercall::PostMessage for MshvHypercallHandler<'_, T> {
1340    fn post_message(&mut self, connection_id: u32, message: &[u8]) -> hvdef::HvResult<()> {
1341        self.bus
1342            .post_synic_message(Vtl::Vtl0, connection_id, false, message)
1343    }
1344}
1345
1346impl<T: CpuIo> hv1_hypercall::SignalEvent for MshvHypercallHandler<'_, T> {
1347    fn signal_event(&mut self, connection_id: u32, flag: u16) -> hvdef::HvResult<()> {
1348        self.bus.signal_synic_event(Vtl::Vtl0, connection_id, flag)
1349    }
1350}
1351
1352impl Inspect for MshvPartition {
1353    fn inspect(&self, req: inspect::Request<'_>) {
1354        // TODO: implementation
1355        req.respond();
1356    }
1357}
1358
1359impl InspectMut for MshvProcessor<'_> {
1360    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
1361        req.respond();
1362    }
1363}
1364
1365impl virt::Processor for MshvProcessor<'_> {
1366    type StateAccess<'a>
1367        = &'a mut Self
1368    where
1369        Self: 'a;
1370
1371    fn set_debug_state(
1372        &mut self,
1373        _vtl: Vtl,
1374        _state: Option<&virt::x86::DebugState>,
1375    ) -> Result<(), <&mut Self as virt::vp::AccessVpState>::Error> {
1376        Err(Error::NotSupported)
1377    }
1378
1379    async fn run_vp(
1380        &mut self,
1381        stop: StopVp<'_>,
1382        dev: &impl CpuIo,
1383    ) -> Result<Infallible, VpHaltReason> {
1384        let vpinner = self.inner;
1385        let _cleaner = MshvVpInnerCleaner { vpinner };
1386        let vcpufd = &vpinner.vcpufd;
1387
1388        // Ensure this thread is uniquely running the VP, and store the thread
1389        // ID to support cancellation.
1390        assert!(vpinner.thread.write().replace(Pthread::current()).is_none());
1391
1392        loop {
1393            vpinner.needs_yield.maybe_yield().await;
1394            stop.check()?;
1395
1396            match vcpufd.run() {
1397                Ok(exit) => match HvMessageType(exit.header.message_type) {
1398                    HvMessageType::HvMessageTypeUnrecoverableException => {
1399                        return Err(VpHaltReason::TripleFault { vtl: Vtl::Vtl0 });
1400                    }
1401                    HvMessageType::HvMessageTypeX64IoPortIntercept => {
1402                        self.handle_io_port_intercept(&exit, dev).await?;
1403                    }
1404                    HvMessageType::HvMessageTypeUnmappedGpa
1405                    | HvMessageType::HvMessageTypeGpaIntercept => {
1406                        self.handle_mmio_intercept(&exit, dev).await?;
1407                    }
1408                    HvMessageType::HvMessageTypeSynicSintDeliverable => {
1409                        tracing::trace!("SYNIC_SINT_DELIVERABLE");
1410                        self.handle_synic_deliverable_exit(&exit, dev);
1411                    }
1412                    HvMessageType::HvMessageTypeHypercallIntercept => {
1413                        tracing::trace!("HYPERCALL_INTERCEPT");
1414                        self.handle_hypercall_intercept(&exit, dev);
1415                    }
1416                    exit => {
1417                        panic!("Unhandled vcpu exit code {exit:?}");
1418                    }
1419                },
1420
1421                Err(e) => match e.errno() {
1422                    libc::EAGAIN | libc::EINTR => {}
1423                    _ => tracing::error!(
1424                        error = &e as &dyn std::error::Error,
1425                        "vcpufd.run returned error"
1426                    ),
1427                },
1428            }
1429        }
1430    }
1431
1432    fn flush_async_requests(&mut self) {}
1433
1434    fn access_state(&mut self, vtl: Vtl) -> Self::StateAccess<'_> {
1435        assert_eq!(vtl, Vtl::Vtl0);
1436        self
1437    }
1438}
1439
1440fn x86emu_sreg_from_mshv_sreg(reg: mshv_bindings::SegmentRegister) -> SegmentRegister {
1441    let reg: hv_x64_segment_register = hv_x64_segment_register::from(reg);
1442    // SAFETY: This union only contains one field.
1443    let attributes: u16 = unsafe { reg.__bindgen_anon_1.attributes };
1444
1445    SegmentRegister {
1446        base: reg.base,
1447        limit: reg.limit,
1448        selector: reg.selector,
1449        attributes: attributes.into(),
1450    }
1451}
1452
1453fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1454    SegmentRegister {
1455        base: reg.base,
1456        limit: reg.limit,
1457        selector: reg.selector,
1458        attributes: reg.attributes.into(),
1459    }
1460}
1461
1462impl virt::Synic for MshvPartition {
1463    fn post_message(&self, _vtl: Vtl, vp: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
1464        self.inner
1465            .post_message(vp, sint, &HvMessage::new(HvMessageType(typ), 0, payload));
1466    }
1467
1468    fn new_guest_event_port(
1469        &self,
1470        _vtl: Vtl,
1471        vp: u32,
1472        sint: u8,
1473        flag: u16,
1474    ) -> Box<dyn GuestEventPort> {
1475        Box::new(MshvGuestEventPort {
1476            partition: Arc::downgrade(&self.inner),
1477            params: Arc::new(Mutex::new(MshvEventPortParams {
1478                vp: VpIndex::new(vp),
1479                sint,
1480                flag,
1481            })),
1482        })
1483    }
1484
1485    fn prefer_os_events(&self) -> bool {
1486        false
1487    }
1488}
1489
1490/// `GuestEventPort` implementation for MSHV partitions.
1491#[derive(Debug, Clone)]
1492struct MshvGuestEventPort {
1493    partition: Weak<MshvPartitionInner>,
1494    params: Arc<Mutex<MshvEventPortParams>>,
1495}
1496
1497#[derive(Debug, Copy, Clone)]
1498struct MshvEventPortParams {
1499    vp: VpIndex,
1500    sint: u8,
1501    flag: u16,
1502}
1503
1504impl GuestEventPort for MshvGuestEventPort {
1505    fn interrupt(&self) -> Interrupt {
1506        let partition = self.partition.clone();
1507        let params = self.params.clone();
1508        Interrupt::from_fn(move || {
1509            let MshvEventPortParams { vp, sint, flag } = *params.lock();
1510            if let Some(partition) = partition.upgrade() {
1511                partition
1512                    .vmfd
1513                    .signal_event_direct(vp.index(), sint, flag)
1514                    .unwrap_or_else(|_| {
1515                        panic!(
1516                            "Failed signal synic sint {} on vp {:?} with flag {}",
1517                            sint, vp, flag
1518                        )
1519                    });
1520            }
1521        })
1522    }
1523
1524    fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1525        self.params.lock().vp = VpIndex::new(vp);
1526        Ok(())
1527    }
1528}