virt_mshv/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux /dev/mshv implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native, guest_arch = "x86_64"))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling HV APIs and manually managing memory.
9#![expect(unsafe_code)]
10
11mod vm_state;
12mod vp_state;
13
14use arrayvec::ArrayVec;
15use guestmem::DoorbellRegistration;
16use guestmem::GuestMemory;
17use hv1_emulator::message_queues::MessageQueues;
18use hv1_hypercall::X64RegisterIo;
19use hvdef::HV_PAGE_SHIFT;
20use hvdef::HvDeliverabilityNotificationsRegister;
21use hvdef::HvError;
22use hvdef::HvMessage;
23use hvdef::HvMessageType;
24use hvdef::HvX64RegisterName;
25use hvdef::HvX64VpExecutionState;
26use hvdef::Vtl;
27use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
28use hvdef::hypercall::HvRegisterAssoc;
29use inspect::Inspect;
30use inspect::InspectMut;
31use mshv_bindings::MSHV_SET_MEM_BIT_EXECUTABLE;
32use mshv_bindings::MSHV_SET_MEM_BIT_WRITABLE;
33use mshv_bindings::hv_message;
34use mshv_bindings::hv_register_assoc;
35use mshv_bindings::hv_register_value;
36use mshv_bindings::hv_u128;
37use mshv_bindings::hv_x64_io_port_intercept_message;
38use mshv_bindings::hv_x64_memory_intercept_message;
39use mshv_bindings::hv_x64_segment_register;
40use mshv_bindings::mshv_install_intercept;
41use mshv_bindings::mshv_user_mem_region;
42use mshv_ioctls::InterruptRequest;
43use mshv_ioctls::Mshv;
44use mshv_ioctls::MshvError;
45use mshv_ioctls::VcpuFd;
46use mshv_ioctls::VmFd;
47use mshv_ioctls::set_bits;
48use mshv_ioctls::set_registers_64;
49use pal::unix::pthread::*;
50use pal_event::Event;
51use parking_lot::Mutex;
52use parking_lot::RwLock;
53use std::convert::Infallible;
54use std::io;
55use std::sync::Arc;
56use std::sync::Once;
57use std::sync::Weak;
58use thiserror::Error;
59use virt::Hv1;
60use virt::NeedsYield;
61use virt::PartitionAccessState;
62use virt::PartitionConfig;
63use virt::ProtoPartition;
64use virt::ProtoPartitionConfig;
65use virt::StopVp;
66use virt::VpHaltReason;
67use virt::VpIndex;
68use virt::io::CpuIo;
69use virt::irqcon::MsiRequest;
70use virt::x86::max_physical_address_size_from_cpuid;
71use virt_support_x86emu::emulate::EmuTranslateError;
72use virt_support_x86emu::emulate::EmuTranslateResult;
73use virt_support_x86emu::emulate::EmulatorSupport;
74use virt_support_x86emu::emulate::TranslateGvaSupport;
75use virt_support_x86emu::emulate::TranslateMode;
76use virt_support_x86emu::emulate::emulate_translate_gva;
77use virt_support_x86emu::translate::TranslationRegisters;
78use vmcore::interrupt::Interrupt;
79use vmcore::reference_time::GetReferenceTime;
80use vmcore::reference_time::ReferenceTimeResult;
81use vmcore::reference_time::ReferenceTimeSource;
82use vmcore::synic::GuestEventPort;
83use x86defs::RFlags;
84use x86defs::SegmentRegister;
85use zerocopy::IntoBytes;
86
87#[derive(Debug)]
88pub struct LinuxMshv;
89
90struct MshvEmuCache {
91    /// GP registers, in the canonical order (as defined by `RAX`, etc.).
92    gps: [u64; 16],
93    /// Segment registers, in the canonical order (as defined by `ES`, etc.).
94    segs: [SegmentRegister; 6],
95    rip: u64,
96    rflags: RFlags,
97
98    cr0: u64,
99    efer: u64,
100}
101
102impl virt::Hypervisor for LinuxMshv {
103    type ProtoPartition<'a> = MshvProtoPartition<'a>;
104    type Partition = MshvPartition;
105    type Error = Error;
106
107    fn new_partition<'a>(
108        &mut self,
109        config: ProtoPartitionConfig<'a>,
110    ) -> Result<MshvProtoPartition<'a>, Self::Error> {
111        if config.isolation.is_isolated() {
112            return Err(Error::IsolationNotSupported);
113        }
114
115        // Open /dev/mshv.
116        let mshv = Mshv::new().map_err(Error::OpenMshv)?;
117
118        // Create VM.
119        //
120        // TODO: really need to pass some partition properties here (e.g., for
121        // APIC configuration), but the underlying crate just hardcodes
122        // everything.
123        let vmfd: VmFd;
124        loop {
125            match mshv.create_vm() {
126                Ok(fd) => vmfd = fd,
127                Err(e) => {
128                    if e.errno() == libc::EINTR {
129                        // If the error returned is EINTR, which means the
130                        // ioctl has been interrupted, we have to retry as
131                        // this can't be considered as a regular error.
132                        continue;
133                    } else {
134                        return Err(Error::CreateVMFailed);
135                    }
136                }
137            }
138            break;
139        }
140
141        vmfd.initialize()
142            .map_err(|e| Error::CreateVMInitFailed(e.into()))?;
143
144        // Create virtual CPUs.
145        let mut vps: Vec<MshvVpInner> = Vec::new();
146        for vp in config.processor_topology.vps_arch() {
147            if vp.base.vp_index.index() != vp.apic_id {
148                // TODO
149                return Err(Error::NotSupported);
150            }
151
152            let vcpufd = vmfd
153                .create_vcpu(vp.base.vp_index.index() as u8)
154                .map_err(Error::CreateVcpu)?;
155
156            vps.push(MshvVpInner {
157                vcpufd,
158                thread: RwLock::new(None),
159                needs_yield: NeedsYield::new(),
160                message_queues: MessageQueues::new(),
161                deliverability_notifications: Mutex::new(
162                    HvDeliverabilityNotificationsRegister::new(),
163                ),
164            });
165        }
166
167        // Install required intercepts
168        let intercept_args = mshv_install_intercept {
169            access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
170            intercept_type: hvdef::hypercall::HvInterceptType::HvInterceptTypeHypercall.0,
171            intercept_parameter: Default::default(),
172        };
173        vmfd.install_intercept(intercept_args)
174            .map_err(Error::InstallIntercept)?;
175
176        // Set up a signal for forcing vcpufd.run() ioctl to exit.
177        static SIGNAL_HANDLER_INIT: Once = Once::new();
178        // SAFETY: The signal handler does not perform any actions that are forbidden
179        // for signal handlers to perform, as it performs nothing.
180        SIGNAL_HANDLER_INIT.call_once(|| unsafe {
181            signal_hook::low_level::register(libc::SIGRTMIN(), || {
182                // Signal handler does nothing other than enabling run_fd() iotcl to
183                // return with EINTR, when the associated signal is sent to run_fd() thread.
184            })
185            .unwrap();
186        });
187
188        if let Some(hv_config) = &config.hv_config {
189            if hv_config.vtl2.is_some() {
190                return Err(Error::Vtl2NotSupported);
191            }
192        }
193
194        Ok(MshvProtoPartition { config, vmfd, vps })
195    }
196
197    fn is_available(&self) -> Result<bool, Self::Error> {
198        match std::fs::metadata("/dev/mshv") {
199            Ok(_) => Ok(true),
200            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
201            Err(err) => Err(Error::AvailableCheck(err)),
202        }
203    }
204}
205
206/// Prototype partition.
207pub struct MshvProtoPartition<'a> {
208    config: ProtoPartitionConfig<'a>,
209    vmfd: VmFd,
210    vps: Vec<MshvVpInner>,
211}
212
213impl ProtoPartition for MshvProtoPartition<'_> {
214    type Partition = MshvPartition;
215    type ProcessorBinder = MshvProcessorBinder;
216    type Error = Error;
217
218    fn cpuid(&self, eax: u32, ecx: u32) -> [u32; 4] {
219        // This call should never fail unless there is a kernel or hypervisor
220        // bug.
221        self.vps[0]
222            .vcpufd
223            .get_cpuid_values(eax, ecx, 0, 0)
224            .expect("cpuid should not fail")
225    }
226
227    fn max_physical_address_size(&self) -> u8 {
228        max_physical_address_size_from_cpuid(&|eax, ecx| self.cpuid(eax, ecx))
229    }
230
231    fn build(
232        self,
233        config: PartitionConfig<'_>,
234    ) -> Result<(Self::Partition, Vec<Self::ProcessorBinder>), Self::Error> {
235        // TODO: do something with cpuid.
236
237        // Get caps via cpuid
238        let caps = virt::PartitionCapabilities::from_cpuid(
239            self.config.processor_topology,
240            &mut |function, index| {
241                self.vps[0]
242                    .vcpufd
243                    .get_cpuid_values(function, index, 0, 0)
244                    .expect("cpuid should not fail")
245            },
246        )
247        .map_err(Error::Capabilities)?;
248
249        // Attach all the resources created above to a Partition object.
250        let partition = MshvPartition {
251            inner: Arc::new(MshvPartitionInner {
252                vmfd: self.vmfd,
253                memory: Default::default(),
254                gm: config.guest_memory.clone(),
255                vps: self.vps,
256                irq_routes: Default::default(),
257                caps,
258            }),
259        };
260
261        let vps = self
262            .config
263            .processor_topology
264            .vps()
265            .map(|vp| MshvProcessorBinder {
266                partition: partition.inner.clone(),
267                vpindex: vp.vp_index,
268            })
269            .collect();
270
271        Ok((partition, vps))
272    }
273}
274
275// TODO: remove these workarounds when mshv-ioctl implements the Debug trait
276#[derive(Debug)]
277pub struct MshvPartition {
278    inner: Arc<MshvPartitionInner>,
279}
280
281#[derive(Debug)]
282struct MshvPartitionInner {
283    vmfd: VmFd,
284    memory: Mutex<MshvMemoryRangeState>,
285    gm: GuestMemory,
286    vps: Vec<MshvVpInner>,
287    irq_routes: virt::irqcon::IrqRoutes,
288    caps: virt::PartitionCapabilities,
289}
290
291#[derive(Debug)]
292struct MshvVpInner {
293    vcpufd: VcpuFd,
294    thread: RwLock<Option<Pthread>>,
295    needs_yield: NeedsYield,
296    message_queues: MessageQueues,
297    deliverability_notifications: Mutex<HvDeliverabilityNotificationsRegister>,
298}
299
300struct MshvVpInnerCleaner<'a> {
301    vpinner: &'a MshvVpInner,
302}
303
304impl Drop for MshvVpInnerCleaner<'_> {
305    fn drop(&mut self) {
306        self.vpinner.thread.write().take();
307    }
308}
309
310impl virt::Partition for MshvPartition {
311    fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Error>> {
312        None
313    }
314
315    fn doorbell_registration(
316        self: &Arc<Self>,
317        _minimum_vtl: Vtl,
318    ) -> Option<Arc<dyn DoorbellRegistration>> {
319        // TODO: implementation
320
321        Some(self.clone())
322    }
323
324    fn caps(&self) -> &virt::PartitionCapabilities {
325        &self.inner.caps
326    }
327
328    fn request_msi(&self, _vtl: Vtl, request: MsiRequest) {
329        self.inner.request_msi(request)
330    }
331
332    fn request_yield(&self, vp_index: VpIndex) {
333        let vp = self.inner.vp(vp_index);
334        if vp.needs_yield.request_yield() {
335            // Send a signal to the thread who called vcpufd.run() to force an exit.
336            let thread = vp.thread.read();
337            if let Some(thread) = *thread {
338                if thread != Pthread::current() {
339                    thread
340                        .signal(libc::SIGRTMIN())
341                        .expect("thread cancel signal failed");
342                }
343            }
344        }
345    }
346}
347
348impl virt::X86Partition for MshvPartition {
349    fn ioapic_routing(&self) -> Arc<dyn virt::irqcon::IoApicRouting> {
350        self.inner.clone()
351    }
352
353    fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
354        // TODO
355        tracing::warn!(?vp_index, ?vtl, lint, "ignored lint pulse");
356    }
357}
358
359impl PartitionAccessState for MshvPartition {
360    type StateAccess<'a> = &'a MshvPartition;
361
362    fn access_state(&self, vtl: Vtl) -> Self::StateAccess<'_> {
363        assert_eq!(vtl, Vtl::Vtl0);
364
365        self
366    }
367}
368
369impl Hv1 for MshvPartition {
370    type Error = Error;
371    type Device = virt::UnimplementedDevice;
372
373    fn reference_time_source(&self) -> Option<ReferenceTimeSource> {
374        Some(ReferenceTimeSource::from(self.inner.clone() as Arc<_>))
375    }
376
377    fn new_virtual_device(
378        &self,
379    ) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
380        None
381    }
382}
383
384impl GetReferenceTime for MshvPartitionInner {
385    fn now(&self) -> ReferenceTimeResult {
386        let mut regs = [hv_register_assoc {
387            name: hvdef::HvAllArchRegisterName::TimeRefCount.0,
388            value: hv_register_value { reg64: 0 },
389            ..Default::default()
390        }];
391        self.vp(VpIndex::BSP).vcpufd.get_reg(&mut regs).unwrap();
392        // SAFETY: the value has been written by the kernel.
393        let ref_time = unsafe { regs[0].value.reg64 };
394        ReferenceTimeResult {
395            ref_time,
396            system_time: None,
397        }
398    }
399}
400
401impl MshvPartitionInner {
402    fn vp(&self, vp_index: VpIndex) -> &MshvVpInner {
403        &self.vps[vp_index.index() as usize]
404    }
405
406    fn post_message(&self, vp_index: VpIndex, sint: u8, message: &HvMessage) {
407        let request_notification = self
408            .vp(vp_index)
409            .message_queues
410            .enqueue_message(sint, message);
411
412        if request_notification {
413            self.request_sint_notifications(vp_index, 1 << sint);
414        }
415    }
416
417    fn request_sint_notifications(&self, vp_index: VpIndex, sints: u16) {
418        let mut notifications = self.vp(vp_index).deliverability_notifications.lock();
419        if notifications.sints() != sints {
420            notifications.set_sints(sints);
421            self.vmfd
422                .register_deliverabilty_notifications(vp_index.index(), (*notifications).into())
423                .expect("Requesting deliverability is not a fallable operation");
424        }
425    }
426}
427
428pub struct MshvProcessorBinder {
429    partition: Arc<MshvPartitionInner>,
430    vpindex: VpIndex,
431}
432
433impl virt::BindProcessor for MshvProcessorBinder {
434    type Processor<'a>
435        = MshvProcessor<'a>
436    where
437        Self: 'a;
438    type Error = Error;
439
440    fn bind(&mut self) -> Result<Self::Processor<'_>, Self::Error> {
441        Ok(MshvProcessor {
442            partition: &self.partition,
443            inner: &self.partition.vps[self.vpindex.index() as usize],
444            vpindex: self.vpindex,
445        })
446    }
447}
448
449pub struct MshvProcessor<'a> {
450    partition: &'a MshvPartitionInner,
451    inner: &'a MshvVpInner,
452    vpindex: VpIndex,
453}
454
455impl MshvProcessor<'_> {
456    async fn emulate(
457        &self,
458        message: &hv_message,
459        devices: &impl CpuIo,
460        interruption_pending: bool,
461    ) -> Result<(), VpHaltReason> {
462        let cache = self.emulation_cache();
463        let emu_mem = virt_support_x86emu::emulate::EmulatorMemoryAccess {
464            gm: &self.partition.gm,
465            kx_gm: &self.partition.gm,
466            ux_gm: &self.partition.gm,
467        };
468
469        let mut support = MshvEmulationState {
470            partition: self.partition,
471            processor: self.inner,
472            vp_index: self.vpindex,
473            message,
474            interruption_pending,
475            cache,
476        };
477        virt_support_x86emu::emulate::emulate(&mut support, &emu_mem, devices).await
478    }
479
480    async fn handle_io_port_intercept(
481        &self,
482        message: &hv_message,
483        devices: &impl CpuIo,
484    ) -> Result<(), VpHaltReason> {
485        let info = message.to_ioport_info().unwrap();
486        let access_info = info.access_info;
487        // SAFETY: This union only contains one field.
488        let port_access_info = unsafe { access_info.__bindgen_anon_1 };
489
490        if port_access_info.string_op() != 0 || port_access_info.rep_prefix() != 0 {
491            let execution_state = info.header.execution_state;
492            // SAFETY: This union only contains one field.
493            let io_execution_state = unsafe { execution_state.__bindgen_anon_1 };
494            let interruption_pending = io_execution_state.interruption_pending() != 0;
495
496            self.emulate(message, devices, interruption_pending).await?
497        } else {
498            let mut ret_rax = info.rax;
499            virt_support_x86emu::emulate::emulate_io(
500                self.vpindex,
501                info.header.intercept_access_type == 1,
502                info.port_number,
503                &mut ret_rax,
504                port_access_info.access_size(),
505                devices,
506            )
507            .await;
508
509            let insn_len = info.header.instruction_length() as u64;
510
511            /* Advance RIP and update RAX */
512            let arr_reg_name_value = [
513                (
514                    mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
515                    info.header.rip + insn_len,
516                ),
517                (mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
518            ];
519
520            set_registers_64!(self.inner.vcpufd, arr_reg_name_value).unwrap();
521        }
522
523        Ok(())
524    }
525
526    async fn handle_mmio_intercept(
527        &self,
528        message: &hv_message,
529        devices: &impl CpuIo,
530    ) -> Result<(), VpHaltReason> {
531        let execution_state = message.to_memory_info().unwrap().header.execution_state;
532        // SAFETY: This union only contains one field.
533        let mmio_execution_state = unsafe { execution_state.__bindgen_anon_1 };
534        let interruption_pending = mmio_execution_state.interruption_pending() != 0;
535
536        self.emulate(message, devices, interruption_pending).await
537    }
538
539    fn handle_synic_deliverable_exit(&self, message: &hv_message, _devices: &impl CpuIo) {
540        let info = message.to_sint_deliverable_info().unwrap();
541        self.flush_messages(info.deliverable_sints);
542    }
543
544    fn handle_hypercall_intercept(&self, message: &hv_message, devices: &impl CpuIo) {
545        let info = message.to_hypercall_intercept_info().unwrap();
546        let execution_state = info.header.execution_state;
547        // SAFETY: Accessing the raw field of this union is always safe.
548        let vp_state = unsafe { HvX64VpExecutionState::from(execution_state.as_uint16) };
549        let is_64bit = vp_state.cr0_pe() && vp_state.efer_lma();
550        let mut hpc_context = MshvHypercallContext {
551            rax: info.rax,
552            rbx: info.rbx,
553            rcx: info.rcx,
554            rdx: info.rdx,
555            r8: info.r8,
556            rsi: info.rsi,
557            rdi: info.rdi,
558            xmm: info.xmmregisters,
559        };
560        let mut handler = MshvHypercallHandler {
561            bus: devices,
562            context: &mut hpc_context,
563            rip: info.header.rip,
564            rip_dirty: false,
565            xmm_dirty: false,
566            gp_dirty: false,
567        };
568
569        MshvHypercallHandler::DISPATCHER.dispatch(
570            &self.partition.gm,
571            X64RegisterIo::new(&mut handler, is_64bit),
572        );
573
574        let mut dirty_regs = ArrayVec::<hv_register_assoc, 14>::new();
575
576        if handler.gp_dirty {
577            dirty_regs.extend([
578                hv_register_assoc {
579                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
580                    value: hv_register_value {
581                        reg64: handler.context.rax,
582                    },
583                    ..Default::default()
584                },
585                hv_register_assoc {
586                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
587                    value: hv_register_value {
588                        reg64: handler.context.rbx,
589                    },
590                    ..Default::default()
591                },
592                hv_register_assoc {
593                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
594                    value: hv_register_value {
595                        reg64: handler.context.rcx,
596                    },
597                    ..Default::default()
598                },
599                hv_register_assoc {
600                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
601                    value: hv_register_value {
602                        reg64: handler.context.rdx,
603                    },
604                    ..Default::default()
605                },
606                hv_register_assoc {
607                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
608                    value: hv_register_value {
609                        reg64: handler.context.r8,
610                    },
611                    ..Default::default()
612                },
613                hv_register_assoc {
614                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
615                    value: hv_register_value {
616                        reg64: handler.context.rsi,
617                    },
618                    ..Default::default()
619                },
620                hv_register_assoc {
621                    name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
622                    value: hv_register_value {
623                        reg64: handler.context.rdi,
624                    },
625                    ..Default::default()
626                },
627            ]);
628        }
629
630        if handler.xmm_dirty {
631            dirty_regs.extend((0..5).map(|i| hv_register_assoc {
632                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_XMM0 + i,
633                value: hv_register_value {
634                    reg128: handler.context.xmm[i as usize],
635                },
636                ..Default::default()
637            }));
638        }
639
640        if handler.rip_dirty {
641            dirty_regs.push(hv_register_assoc {
642                name: mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
643                value: hv_register_value { reg64: handler.rip },
644                ..Default::default()
645            });
646        }
647
648        if !dirty_regs.is_empty() {
649            self.inner
650                .vcpufd
651                .set_reg(&dirty_regs)
652                .expect("RIP setting is not a fallable operation");
653        }
654    }
655
656    fn flush_messages(&self, deliverable_sints: u16) {
657        let nonempty_sints =
658            self.inner
659                .message_queues
660                .post_pending_messages(deliverable_sints, |sint, message| {
661                    match self.partition.vmfd.post_message_direct(
662                        self.vpindex.index(),
663                        sint,
664                        message.as_bytes(),
665                    ) {
666                        Ok(()) => {
667                            tracing::trace!(sint, "sint message posted successfully");
668                            Ok(())
669                        }
670                        Err(e) => {
671                            // TODO: handle errors appropriately
672                            tracing::trace!(error = %e, "dropping sint message");
673                            Err(HvError::ObjectInUse)
674                        }
675                    }
676                });
677
678        {
679            // To avoid an additional get_reg hypercall, clear w/ deliverable sints mask
680            let mut notifications = self.inner.deliverability_notifications.lock();
681            let remaining_sints = notifications.sints() & !deliverable_sints;
682            notifications.set_sints(remaining_sints);
683        }
684
685        if nonempty_sints != 0 {
686            self.partition
687                .request_sint_notifications(self.vpindex, nonempty_sints);
688        }
689    }
690
691    fn emulation_cache(&self) -> MshvEmuCache {
692        let regs = self.inner.vcpufd.get_regs().unwrap();
693        let gps = [
694            regs.rax, regs.rcx, regs.rdx, regs.rbx, regs.rsp, regs.rbp, regs.rsi, regs.rdi,
695            regs.r8, regs.r9, regs.r10, regs.r11, regs.r12, regs.r13, regs.r14, regs.r15,
696        ];
697        let rip = regs.rip;
698        let rflags = regs.rflags;
699
700        let sregs = self.inner.vcpufd.get_sregs().unwrap();
701        let segs = [
702            x86emu_sreg_from_mshv_sreg(sregs.es),
703            x86emu_sreg_from_mshv_sreg(sregs.cs),
704            x86emu_sreg_from_mshv_sreg(sregs.ss),
705            x86emu_sreg_from_mshv_sreg(sregs.ds),
706            x86emu_sreg_from_mshv_sreg(sregs.fs),
707            x86emu_sreg_from_mshv_sreg(sregs.gs),
708        ];
709        let cr0 = sregs.cr0;
710        let efer = sregs.efer;
711
712        MshvEmuCache {
713            gps,
714            segs,
715            rip,
716            rflags: rflags.into(),
717            cr0,
718            efer,
719        }
720    }
721}
722
723struct MshvEmulationState<'a> {
724    partition: &'a MshvPartitionInner,
725    processor: &'a MshvVpInner,
726    vp_index: VpIndex,
727    message: &'a hv_message,
728    interruption_pending: bool,
729    cache: MshvEmuCache,
730}
731
732impl EmulatorSupport for MshvEmulationState<'_> {
733    fn vp_index(&self) -> VpIndex {
734        self.vp_index
735    }
736
737    fn vendor(&self) -> x86defs::cpuid::Vendor {
738        self.partition.caps.vendor
739    }
740
741    fn gp(&mut self, reg: x86emu::Gp) -> u64 {
742        self.cache.gps[reg as usize]
743    }
744
745    fn set_gp(&mut self, reg: x86emu::Gp, v: u64) {
746        self.cache.gps[reg as usize] = v;
747    }
748
749    fn rip(&mut self) -> u64 {
750        self.cache.rip
751    }
752
753    fn set_rip(&mut self, v: u64) {
754        self.cache.rip = v;
755    }
756
757    fn segment(&mut self, reg: x86emu::Segment) -> SegmentRegister {
758        self.cache.segs[reg as usize]
759    }
760
761    fn efer(&mut self) -> u64 {
762        self.cache.efer
763    }
764
765    fn cr0(&mut self) -> u64 {
766        self.cache.cr0
767    }
768
769    fn rflags(&mut self) -> RFlags {
770        self.cache.rflags
771    }
772
773    fn set_rflags(&mut self, v: RFlags) {
774        self.cache.rflags = v;
775    }
776
777    fn xmm(&mut self, reg: usize) -> u128 {
778        assert!(reg < 16);
779        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
780        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
781        let reg = unsafe {
782            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
783                name, 0u128,
784            )))
785        };
786        let _ = self.processor.vcpufd.get_reg(&mut [reg]);
787        // SAFETY: Accessing the u128 field of this union is always safe.
788        hvu128_to_u128(unsafe { &reg.value.reg128 })
789    }
790
791    fn set_xmm(&mut self, reg: usize, value: u128) {
792        assert!(reg < 16);
793        let name = HvX64RegisterName(HvX64RegisterName::Xmm0.0 + reg as u32);
794        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
795        let reg = unsafe {
796            std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
797                name, value,
798            )))
799        };
800        self.processor.vcpufd.set_reg(&[reg]).unwrap();
801    }
802
803    fn flush(&mut self) {
804        let arr_reg_name_value = [
805            (
806                mshv_bindings::hv_register_name_HV_X64_REGISTER_RIP,
807                self.cache.rip,
808            ),
809            (
810                mshv_bindings::hv_register_name_HV_X64_REGISTER_RFLAGS,
811                self.cache.rflags.into(),
812            ),
813            (
814                mshv_bindings::hv_register_name_HV_X64_REGISTER_RAX,
815                self.cache.gps[0],
816            ),
817            (
818                mshv_bindings::hv_register_name_HV_X64_REGISTER_RCX,
819                self.cache.gps[1],
820            ),
821            (
822                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDX,
823                self.cache.gps[2],
824            ),
825            (
826                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBX,
827                self.cache.gps[3],
828            ),
829            (
830                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSP,
831                self.cache.gps[4],
832            ),
833            (
834                mshv_bindings::hv_register_name_HV_X64_REGISTER_RBP,
835                self.cache.gps[5],
836            ),
837            (
838                mshv_bindings::hv_register_name_HV_X64_REGISTER_RSI,
839                self.cache.gps[6],
840            ),
841            (
842                mshv_bindings::hv_register_name_HV_X64_REGISTER_RDI,
843                self.cache.gps[7],
844            ),
845            (
846                mshv_bindings::hv_register_name_HV_X64_REGISTER_R8,
847                self.cache.gps[8],
848            ),
849            (
850                mshv_bindings::hv_register_name_HV_X64_REGISTER_R9,
851                self.cache.gps[9],
852            ),
853            (
854                mshv_bindings::hv_register_name_HV_X64_REGISTER_R10,
855                self.cache.gps[10],
856            ),
857            (
858                mshv_bindings::hv_register_name_HV_X64_REGISTER_R11,
859                self.cache.gps[11],
860            ),
861            (
862                mshv_bindings::hv_register_name_HV_X64_REGISTER_R12,
863                self.cache.gps[12],
864            ),
865            (
866                mshv_bindings::hv_register_name_HV_X64_REGISTER_R13,
867                self.cache.gps[13],
868            ),
869            (
870                mshv_bindings::hv_register_name_HV_X64_REGISTER_R14,
871                self.cache.gps[14],
872            ),
873            (
874                mshv_bindings::hv_register_name_HV_X64_REGISTER_R15,
875                self.cache.gps[15],
876            ),
877        ];
878
879        set_registers_64!(self.processor.vcpufd, arr_reg_name_value).unwrap();
880    }
881
882    fn instruction_bytes(&self) -> &[u8] {
883        match HvMessageType(self.message.header.message_type) {
884            HvMessageType::HvMessageTypeGpaIntercept
885            | HvMessageType::HvMessageTypeUnmappedGpa
886            | HvMessageType::HvMessageTypeUnacceptedGpa => {
887                // SAFETY: We have checked the message type.
888                unsafe {
889                    let info = (&raw const self.message.u.payload)
890                        .cast::<hv_x64_memory_intercept_message>();
891                    let instruction_bytes = &raw const (*info).instruction_bytes;
892                    let instruction_byte_count =
893                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
894                    std::slice::from_raw_parts(
895                        instruction_bytes.cast(),
896                        instruction_byte_count as usize,
897                    )
898                }
899            }
900            HvMessageType::HvMessageTypeX64IoPortIntercept => {
901                // SAFETY: We have checked the message type.
902                unsafe {
903                    let info = (&raw const self.message.u.payload)
904                        .cast::<hv_x64_io_port_intercept_message>();
905                    let instruction_bytes = &raw const (*info).instruction_bytes;
906                    let instruction_byte_count =
907                        std::ptr::read_unaligned(&raw const (*info).instruction_byte_count);
908                    std::slice::from_raw_parts(
909                        instruction_bytes.cast(),
910                        instruction_byte_count as usize,
911                    )
912                }
913            }
914            _ => unreachable!(),
915        }
916    }
917
918    fn physical_address(&self) -> Option<u64> {
919        if self.message.header.message_type == HvMessageType::HvMessageTypeGpaIntercept.0
920            || self.message.header.message_type == HvMessageType::HvMessageTypeUnmappedGpa.0
921            || self.message.header.message_type == HvMessageType::HvMessageTypeUnacceptedGpa.0
922        {
923            let info = self.message.to_memory_info().unwrap();
924            Some(info.guest_physical_address)
925        } else {
926            None
927        }
928    }
929
930    fn initial_gva_translation(
931        &mut self,
932    ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
933        if (self.message.header.message_type != HvMessageType::HvMessageTypeGpaIntercept.0)
934            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnmappedGpa.0)
935            && (self.message.header.message_type != HvMessageType::HvMessageTypeUnacceptedGpa.0)
936        {
937            return None;
938        }
939
940        let message = self.message.to_memory_info().unwrap();
941
942        // SAFETY: access to union as uint8 is safe because in this case, the actual
943        // type doesn't matter so much as the bits
944        let memory_access_info =
945            unsafe { hvdef::HvX64MemoryAccessInfo::from(message.memory_access_info.as_uint8) };
946
947        if !memory_access_info.gva_gpa_valid() {
948            return None;
949        }
950
951        if let Ok(translate_mode) = TranslateMode::try_from(hvdef::HvInterceptAccessType(
952            message.header.intercept_access_type,
953        )) {
954            Some(virt_support_x86emu::emulate::InitialTranslation {
955                gva: message.guest_virtual_address,
956                gpa: message.guest_physical_address,
957                translate_mode,
958            })
959        } else {
960            None
961        }
962    }
963
964    fn interruption_pending(&self) -> bool {
965        self.interruption_pending
966    }
967
968    fn check_vtl_access(
969        &mut self,
970        _gpa: u64,
971        _mode: TranslateMode,
972    ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError> {
973        // TODO: No VTL2 supported so always return Ok.
974        Ok(())
975    }
976
977    fn translate_gva(
978        &mut self,
979        gva: u64,
980        mode: TranslateMode,
981    ) -> Result<EmuTranslateResult, EmuTranslateError> {
982        emulate_translate_gva(self, gva, mode)
983    }
984
985    fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
986        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same layout.
987        let reg = unsafe {
988            &[
989                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
990                    HvX64RegisterName::PendingEvent0,
991                    u128::from(event_info.reg_0),
992                ))),
993                std::mem::transmute::<HvRegisterAssoc, hv_register_assoc>(HvRegisterAssoc::from((
994                    HvX64RegisterName::PendingEvent1,
995                    u128::from(event_info.reg_1),
996                ))),
997            ]
998        };
999        self.processor.vcpufd.set_reg(reg).unwrap();
1000    }
1001
1002    fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
1003        self.partition
1004            .memory
1005            .lock()
1006            .ranges
1007            .iter()
1008            .flatten()
1009            .any(|range| {
1010                (range.guest_pfn..range.guest_pfn + range.size).contains(&gpa)
1011                    && (!write
1012                        || range.flags & set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE)
1013                            == set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE))
1014            })
1015    }
1016
1017    fn lapic_base_address(&self) -> Option<u64> {
1018        None
1019    }
1020
1021    fn lapic_read(&mut self, _address: u64, _data: &mut [u8]) {
1022        unreachable!()
1023    }
1024
1025    fn lapic_write(&mut self, _address: u64, _data: &[u8]) {
1026        unreachable!()
1027    }
1028}
1029
1030impl TranslateGvaSupport for MshvEmulationState<'_> {
1031    fn guest_memory(&self) -> &GuestMemory {
1032        &self.partition.gm
1033    }
1034
1035    fn acquire_tlb_lock(&mut self) {
1036        // The hypervisor automatically acquires the TLB lock for exo partitions.
1037    }
1038
1039    fn registers(&mut self) -> TranslationRegisters {
1040        let mut reg = [
1041            HvX64RegisterName::Cr0,
1042            HvX64RegisterName::Cr4,
1043            HvX64RegisterName::Efer,
1044            HvX64RegisterName::Cr3,
1045            HvX64RegisterName::Rflags,
1046            HvX64RegisterName::Ss,
1047        ]
1048        .map(|n| HvRegisterAssoc::from((n, 0u64)));
1049
1050        // SAFETY: `HvRegisterAssoc` and `hv_register_assoc` have the same size.
1051        unsafe {
1052            self.processor
1053                .vcpufd
1054                .get_reg(std::mem::transmute::<
1055                    &mut [HvRegisterAssoc],
1056                    &mut [hv_register_assoc],
1057                >(&mut reg[..]))
1058                .unwrap();
1059        }
1060
1061        let [cr0, cr4, efer, cr3, rflags, ss] = reg.map(|v| v.value);
1062
1063        TranslationRegisters {
1064            cr0: cr0.as_u64(),
1065            cr4: cr4.as_u64(),
1066            efer: efer.as_u64(),
1067            cr3: cr3.as_u64(),
1068            rflags: rflags.as_u64(),
1069            ss: from_seg(ss.as_segment()),
1070            encryption_mode: virt_support_x86emu::translate::EncryptionMode::None,
1071        }
1072    }
1073}
1074
1075// TODO: Chunk this up into smaller types.
1076#[derive(Error, Debug)]
1077pub enum Error {
1078    #[error("operation not supported")]
1079    NotSupported,
1080    #[error("create_vm failed")]
1081    CreateVMFailed,
1082    #[error("failed to initialize VM")]
1083    CreateVMInitFailed(#[source] anyhow::Error),
1084    #[error("failed to create VCPU")]
1085    CreateVcpu(#[source] MshvError),
1086    #[error("vtl2 not supported")]
1087    Vtl2NotSupported,
1088    #[error("isolation not supported")]
1089    IsolationNotSupported,
1090    #[error("failed to stat /dev/mshv")]
1091    AvailableCheck(#[source] io::Error),
1092    #[error("failed to open /dev/mshv")]
1093    OpenMshv(#[source] MshvError),
1094    #[error("register access error")]
1095    Register(#[source] MshvError),
1096    #[error("install instercept failed")]
1097    InstallIntercept(#[source] MshvError),
1098    #[error("host does not support required cpu capabilities")]
1099    Capabilities(virt::PartitionCapabilitiesError),
1100}
1101
1102impl MshvPartitionInner {
1103    fn request_msi(&self, request: MsiRequest) {
1104        let (address, data) = request.as_x86();
1105        let control = request.hv_x86_interrupt_control();
1106        let mshv_req = InterruptRequest {
1107            interrupt_type: control.interrupt_type().0,
1108            apic_id: address.virt_destination().into(),
1109            vector: data.vector().into(),
1110            level_triggered: control.x86_level_triggered(),
1111            logical_destination_mode: control.x86_logical_destination_mode(),
1112            long_mode: false,
1113        };
1114
1115        if let Err(err) = self.vmfd.request_virtual_interrupt(&mshv_req) {
1116            tracelimit::warn_ratelimited!(
1117                address = request.address,
1118                data = request.data,
1119                error = &err as &dyn std::error::Error,
1120                "failed to request msi"
1121            );
1122        }
1123    }
1124}
1125
1126impl virt::irqcon::IoApicRouting for MshvPartitionInner {
1127    fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
1128        self.irq_routes.set_irq_route(irq, request)
1129    }
1130
1131    fn assert_irq(&self, irq: u8) {
1132        self.irq_routes
1133            .assert_irq(irq, |request| self.request_msi(request))
1134    }
1135}
1136
1137#[derive(Debug, Default)]
1138struct MshvMemoryRangeState {
1139    ranges: Vec<Option<mshv_user_mem_region>>,
1140}
1141
1142impl virt::PartitionMemoryMapper for MshvPartition {
1143    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
1144        assert_eq!(vtl, Vtl::Vtl0);
1145        self.inner.clone()
1146    }
1147}
1148
1149// TODO: figure out a better abstraction that also works for KVM and WHP.
1150impl virt::PartitionMemoryMap for MshvPartitionInner {
1151    unsafe fn map_range(
1152        &self,
1153        data: *mut u8,
1154        size: usize,
1155        addr: u64,
1156        writable: bool,
1157        exec: bool,
1158    ) -> anyhow::Result<()> {
1159        let mut state = self.memory.lock();
1160
1161        // Memory slots cannot be resized but can be moved within the guest
1162        // address space. Find the existing slot if there is one.
1163        let mut slot_to_use = None;
1164        for (slot, range) in state.ranges.iter_mut().enumerate() {
1165            match range {
1166                Some(range) if range.userspace_addr == data as u64 => {
1167                    slot_to_use = Some(slot);
1168                    break;
1169                }
1170                Some(_) => (),
1171                None => slot_to_use = Some(slot),
1172            }
1173        }
1174        if slot_to_use.is_none() {
1175            slot_to_use = Some(state.ranges.len());
1176            state.ranges.push(None);
1177        }
1178        let slot_to_use = slot_to_use.unwrap();
1179
1180        let mut flags = 0;
1181        if writable {
1182            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE);
1183        }
1184        if exec {
1185            flags |= set_bits!(u8, MSHV_SET_MEM_BIT_EXECUTABLE);
1186        }
1187        let mem_region = mshv_user_mem_region {
1188            size: size as u64,
1189            guest_pfn: addr >> HV_PAGE_SHIFT,
1190            userspace_addr: data as u64,
1191            flags,
1192            rsvd: [0; 7],
1193        };
1194
1195        self.vmfd.map_user_memory(mem_region)?;
1196        state.ranges[slot_to_use] = Some(mem_region);
1197        Ok(())
1198    }
1199
1200    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
1201        let mut state = self.memory.lock();
1202        let (slot, range) = state
1203            .ranges
1204            .iter_mut()
1205            .enumerate()
1206            .find(|(_, range)| {
1207                range.as_ref().map(|r| (r.guest_pfn, r.size)) == Some((addr >> HV_PAGE_SHIFT, size))
1208            })
1209            .expect("can only unmap existing ranges of exact size");
1210
1211        self.vmfd.unmap_user_memory(range.unwrap())?;
1212        state.ranges[slot] = None;
1213        Ok(())
1214    }
1215}
1216
1217// TODO: implementation
1218struct MshvDoorbellEntry;
1219
1220impl MshvDoorbellEntry {
1221    pub fn new(
1222        _guest_address: u64,
1223        _value: Option<u64>,
1224        _length: Option<u32>,
1225        _fd: &Event,
1226    ) -> io::Result<MshvDoorbellEntry> {
1227        // TODO: implementation
1228
1229        Ok(Self)
1230    }
1231}
1232
1233impl DoorbellRegistration for MshvPartition {
1234    fn register_doorbell(
1235        &self,
1236        guest_address: u64,
1237        value: Option<u64>,
1238        length: Option<u32>,
1239        fd: &Event,
1240    ) -> io::Result<Box<dyn Send + Sync>> {
1241        Ok(Box::new(MshvDoorbellEntry::new(
1242            guest_address,
1243            value,
1244            length,
1245            fd,
1246        )?))
1247    }
1248}
1249
1250pub struct MshvHypercallContext {
1251    pub rax: u64,
1252    pub rbx: u64,
1253    pub rcx: u64,
1254    pub rdx: u64,
1255    pub r8: u64,
1256    pub rsi: u64,
1257    pub rdi: u64,
1258    pub xmm: [hv_u128; 6],
1259}
1260
1261impl<T> hv1_hypercall::X64RegisterState for MshvHypercallHandler<'_, T> {
1262    fn rip(&mut self) -> u64 {
1263        self.rip
1264    }
1265
1266    fn set_rip(&mut self, rip: u64) {
1267        self.rip = rip;
1268        self.rip_dirty = true;
1269    }
1270
1271    fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
1272        match n {
1273            hv1_hypercall::X64HypercallRegister::Rax => self.context.rax,
1274            hv1_hypercall::X64HypercallRegister::Rcx => self.context.rcx,
1275            hv1_hypercall::X64HypercallRegister::Rdx => self.context.rdx,
1276            hv1_hypercall::X64HypercallRegister::Rbx => self.context.rbx,
1277            hv1_hypercall::X64HypercallRegister::Rsi => self.context.rsi,
1278            hv1_hypercall::X64HypercallRegister::Rdi => self.context.rdi,
1279            hv1_hypercall::X64HypercallRegister::R8 => self.context.r8,
1280        }
1281    }
1282
1283    fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
1284        *match n {
1285            hv1_hypercall::X64HypercallRegister::Rax => &mut self.context.rax,
1286            hv1_hypercall::X64HypercallRegister::Rcx => &mut self.context.rcx,
1287            hv1_hypercall::X64HypercallRegister::Rdx => &mut self.context.rdx,
1288            hv1_hypercall::X64HypercallRegister::Rbx => &mut self.context.rbx,
1289            hv1_hypercall::X64HypercallRegister::Rsi => &mut self.context.rsi,
1290            hv1_hypercall::X64HypercallRegister::Rdi => &mut self.context.rdi,
1291            hv1_hypercall::X64HypercallRegister::R8 => &mut self.context.r8,
1292        } = value;
1293        self.gp_dirty = true;
1294    }
1295
1296    fn xmm(&mut self, n: usize) -> u128 {
1297        let r = &self.context.xmm[n];
1298        hvu128_to_u128(r)
1299    }
1300
1301    fn set_xmm(&mut self, n: usize, value: u128) {
1302        self.context.xmm[n] = u128_to_hvu128(value);
1303        self.xmm_dirty = true;
1304    }
1305}
1306
1307fn hvu128_to_u128(r: &hv_u128) -> u128 {
1308    (r.high_part as u128) << 64 | r.low_part as u128
1309}
1310
1311fn u128_to_hvu128(value: u128) -> hv_u128 {
1312    hv_u128 {
1313        high_part: (value >> 64) as u64,
1314        low_part: (value & (u64::MAX as u128)) as u64,
1315    }
1316}
1317
1318#[cfg(test)]
1319mod tests {
1320    use super::*;
1321
1322    #[test]
1323    fn u128_roundtrip() {
1324        let original = 0x0123_4567_89ab_cdef_fedc_ba98_7654_3210;
1325        let hv = u128_to_hvu128(original);
1326        let roundtrip = hvu128_to_u128(&hv);
1327        assert_eq!(roundtrip, original);
1328    }
1329}
1330
1331struct MshvHypercallHandler<'a, T> {
1332    bus: &'a T,
1333    context: &'a mut MshvHypercallContext,
1334    rip: u64,
1335    rip_dirty: bool,
1336    xmm_dirty: bool,
1337    gp_dirty: bool,
1338}
1339
1340impl<T: CpuIo> MshvHypercallHandler<'_, T> {
1341    const DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
1342        Self,
1343        [hv1_hypercall::HvPostMessage, hv1_hypercall::HvSignalEvent],
1344    );
1345}
1346
1347impl<T: CpuIo> hv1_hypercall::PostMessage for MshvHypercallHandler<'_, T> {
1348    fn post_message(&mut self, connection_id: u32, message: &[u8]) -> hvdef::HvResult<()> {
1349        self.bus
1350            .post_synic_message(Vtl::Vtl0, connection_id, false, message)
1351    }
1352}
1353
1354impl<T: CpuIo> hv1_hypercall::SignalEvent for MshvHypercallHandler<'_, T> {
1355    fn signal_event(&mut self, connection_id: u32, flag: u16) -> hvdef::HvResult<()> {
1356        self.bus.signal_synic_event(Vtl::Vtl0, connection_id, flag)
1357    }
1358}
1359
1360impl Inspect for MshvPartition {
1361    fn inspect(&self, req: inspect::Request<'_>) {
1362        // TODO: implementation
1363        req.respond();
1364    }
1365}
1366
1367impl InspectMut for MshvProcessor<'_> {
1368    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
1369        req.respond();
1370    }
1371}
1372
1373impl virt::Processor for MshvProcessor<'_> {
1374    type StateAccess<'a>
1375        = &'a mut Self
1376    where
1377        Self: 'a;
1378
1379    fn set_debug_state(
1380        &mut self,
1381        _vtl: Vtl,
1382        _state: Option<&virt::x86::DebugState>,
1383    ) -> Result<(), <&mut Self as virt::vp::AccessVpState>::Error> {
1384        Err(Error::NotSupported)
1385    }
1386
1387    async fn run_vp(
1388        &mut self,
1389        stop: StopVp<'_>,
1390        dev: &impl CpuIo,
1391    ) -> Result<Infallible, VpHaltReason> {
1392        let vpinner = self.inner;
1393        let _cleaner = MshvVpInnerCleaner { vpinner };
1394        let vcpufd = &vpinner.vcpufd;
1395
1396        // Ensure this thread is uniquely running the VP, and store the thread
1397        // ID to support cancellation.
1398        assert!(vpinner.thread.write().replace(Pthread::current()).is_none());
1399
1400        loop {
1401            vpinner.needs_yield.maybe_yield().await;
1402            stop.check()?;
1403
1404            match vcpufd.run() {
1405                Ok(exit) => match HvMessageType(exit.header.message_type) {
1406                    HvMessageType::HvMessageTypeUnrecoverableException => {
1407                        return Err(VpHaltReason::TripleFault { vtl: Vtl::Vtl0 });
1408                    }
1409                    HvMessageType::HvMessageTypeX64IoPortIntercept => {
1410                        self.handle_io_port_intercept(&exit, dev).await?;
1411                    }
1412                    HvMessageType::HvMessageTypeUnmappedGpa
1413                    | HvMessageType::HvMessageTypeGpaIntercept => {
1414                        self.handle_mmio_intercept(&exit, dev).await?;
1415                    }
1416                    HvMessageType::HvMessageTypeSynicSintDeliverable => {
1417                        tracing::trace!("SYNIC_SINT_DELIVERABLE");
1418                        self.handle_synic_deliverable_exit(&exit, dev);
1419                    }
1420                    HvMessageType::HvMessageTypeHypercallIntercept => {
1421                        tracing::trace!("HYPERCALL_INTERCEPT");
1422                        self.handle_hypercall_intercept(&exit, dev);
1423                    }
1424                    exit => {
1425                        panic!("Unhandled vcpu exit code {exit:?}");
1426                    }
1427                },
1428
1429                Err(e) => match e.errno() {
1430                    libc::EAGAIN | libc::EINTR => {}
1431                    _ => tracing::error!(
1432                        error = &e as &dyn std::error::Error,
1433                        "vcpufd.run returned error"
1434                    ),
1435                },
1436            }
1437        }
1438    }
1439
1440    fn flush_async_requests(&mut self) {}
1441
1442    fn access_state(&mut self, vtl: Vtl) -> Self::StateAccess<'_> {
1443        assert_eq!(vtl, Vtl::Vtl0);
1444        self
1445    }
1446}
1447
1448fn x86emu_sreg_from_mshv_sreg(reg: mshv_bindings::SegmentRegister) -> SegmentRegister {
1449    let reg: hv_x64_segment_register = hv_x64_segment_register::from(reg);
1450    // SAFETY: This union only contains one field.
1451    let attributes: u16 = unsafe { reg.__bindgen_anon_1.attributes };
1452
1453    SegmentRegister {
1454        base: reg.base,
1455        limit: reg.limit,
1456        selector: reg.selector,
1457        attributes: attributes.into(),
1458    }
1459}
1460
1461fn from_seg(reg: hvdef::HvX64SegmentRegister) -> SegmentRegister {
1462    SegmentRegister {
1463        base: reg.base,
1464        limit: reg.limit,
1465        selector: reg.selector,
1466        attributes: reg.attributes.into(),
1467    }
1468}
1469
1470impl virt::Synic for MshvPartition {
1471    fn post_message(&self, _vtl: Vtl, vp: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
1472        self.inner
1473            .post_message(vp, sint, &HvMessage::new(HvMessageType(typ), 0, payload));
1474    }
1475
1476    fn new_guest_event_port(
1477        &self,
1478        _vtl: Vtl,
1479        vp: u32,
1480        sint: u8,
1481        flag: u16,
1482    ) -> Box<dyn GuestEventPort> {
1483        Box::new(MshvGuestEventPort {
1484            partition: Arc::downgrade(&self.inner),
1485            params: Arc::new(Mutex::new(MshvEventPortParams {
1486                vp: VpIndex::new(vp),
1487                sint,
1488                flag,
1489            })),
1490        })
1491    }
1492
1493    fn prefer_os_events(&self) -> bool {
1494        false
1495    }
1496}
1497
1498/// `GuestEventPort` implementation for MSHV partitions.
1499#[derive(Debug, Clone)]
1500struct MshvGuestEventPort {
1501    partition: Weak<MshvPartitionInner>,
1502    params: Arc<Mutex<MshvEventPortParams>>,
1503}
1504
1505#[derive(Debug, Copy, Clone)]
1506struct MshvEventPortParams {
1507    vp: VpIndex,
1508    sint: u8,
1509    flag: u16,
1510}
1511
1512impl GuestEventPort for MshvGuestEventPort {
1513    fn interrupt(&self) -> Interrupt {
1514        let partition = self.partition.clone();
1515        let params = self.params.clone();
1516        Interrupt::from_fn(move || {
1517            let MshvEventPortParams { vp, sint, flag } = *params.lock();
1518            if let Some(partition) = partition.upgrade() {
1519                partition
1520                    .vmfd
1521                    .signal_event_direct(vp.index(), sint, flag)
1522                    .unwrap_or_else(|_| {
1523                        panic!(
1524                            "Failed signal synic sint {} on vp {:?} with flag {}",
1525                            sint, vp, flag
1526                        )
1527                    });
1528            }
1529        })
1530    }
1531
1532    fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1533        self.params.lock().vp = VpIndex::new(vp);
1534        Ok(())
1535    }
1536}