1#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(unsafe_code)]
9
10#[cfg(guest_arch = "aarch64")]
11mod aarch64;
12#[cfg(guest_arch = "x86_64")]
13mod x86_64;
14
15#[cfg(guest_arch = "aarch64")]
16use aarch64 as arch;
17#[cfg(guest_arch = "x86_64")]
18use x86_64 as arch;
19
20#[cfg(guest_arch = "x86_64")]
23pub mod irqfd;
24
25use guestmem::DoorbellRegistration;
26use guestmem::GuestMemory;
27use hv1_emulator::message_queues::MessageQueues;
28use hvdef::HV_PAGE_SHIFT;
29use hvdef::HvDeliverabilityNotificationsRegister;
30use hvdef::HvError;
31use hvdef::HvMessage;
32use hvdef::HvMessageType;
33use hvdef::HvPartitionPropertyCode;
34use hvdef::Vtl;
35use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
36use hvdef::hypercall::HvRegisterAssoc;
37use inspect::Inspect;
38use inspect::InspectMut;
39use mshv_bindings::MSHV_SET_MEM_BIT_EXECUTABLE;
40use mshv_bindings::MSHV_SET_MEM_BIT_WRITABLE;
41use mshv_bindings::mshv_install_intercept;
42use mshv_bindings::mshv_user_mem_region;
43use mshv_ioctls::Mshv;
44use mshv_ioctls::MshvError;
45use mshv_ioctls::VcpuFd;
46use mshv_ioctls::VmFd;
47use mshv_ioctls::set_bits;
48use pal::unix::pthread::*;
49use pal_event::Event;
50use parking_lot::Mutex;
51use parking_lot::RwLock;
52use std::convert::Infallible;
53use std::future::poll_fn;
54use std::io;
55use std::os::fd::AsFd;
56use std::os::fd::AsRawFd;
57use std::os::fd::IntoRawFd as _;
58use std::sync::Arc;
59use std::sync::Once;
60use std::sync::Weak;
61use std::sync::atomic::AtomicBool;
62use std::sync::atomic::Ordering;
63use std::task::Waker;
64use thiserror::Error;
65use virt::NeedsYield;
66use virt::PartitionAccessState;
67use virt::ProtoPartitionConfig;
68use virt::StopVp;
69use virt::VpHaltReason;
70use virt::VpIndex;
71use virt::io::CpuIo;
72use vmcore::interrupt::Interrupt;
73use vmcore::reference_time::GetReferenceTime;
74use vmcore::reference_time::ReferenceTimeResult;
75use vmcore::synic::GuestEventPort;
76
77trait VcpuFdExt {
79 fn get_hvdef_regs(&self, regs: &mut [HvRegisterAssoc]) -> Result<(), KernelError>;
80 fn set_hvdef_regs(&self, regs: &[HvRegisterAssoc]) -> Result<(), KernelError>;
81}
82
83impl VcpuFdExt for VcpuFd {
84 fn get_hvdef_regs(&self, regs: &mut [HvRegisterAssoc]) -> Result<(), KernelError> {
85 use mshv_bindings::hv_register_assoc;
86 const {
87 assert!(size_of::<HvRegisterAssoc>() == size_of::<hv_register_assoc>());
88 assert!(align_of::<HvRegisterAssoc>() >= align_of::<hv_register_assoc>());
89 }
90 self.get_reg(unsafe {
92 std::mem::transmute::<&mut [HvRegisterAssoc], &mut [hv_register_assoc]>(regs)
93 })?;
94 Ok(())
95 }
96
97 fn set_hvdef_regs(&self, regs: &[HvRegisterAssoc]) -> Result<(), KernelError> {
98 use mshv_bindings::hv_register_assoc;
99 const {
100 assert!(size_of::<HvRegisterAssoc>() == size_of::<hv_register_assoc>());
101 assert!(align_of::<HvRegisterAssoc>() >= align_of::<hv_register_assoc>());
102 }
103 self.set_reg(unsafe {
105 std::mem::transmute::<&[HvRegisterAssoc], &[hv_register_assoc]>(regs)
106 })?;
107 Ok(())
108 }
109}
110
111#[derive(Debug)]
113pub struct LinuxMshv {
114 mshv: Mshv,
115}
116
117impl LinuxMshv {
118 pub fn new() -> io::Result<Self> {
120 let file = fs_err::File::open("/dev/mshv")?;
121 Ok(Self::from(std::fs::File::from(file)))
122 }
123}
124
125impl From<std::fs::File> for LinuxMshv {
126 fn from(file: std::fs::File) -> Self {
127 LinuxMshv {
128 mshv: unsafe { Mshv::new_with_fd_number(file.into_raw_fd()) },
131 }
132 }
133}
134
135impl<'a> MshvProtoPartition<'a> {
136 fn new(config: ProtoPartitionConfig<'a>, vmfd: VmFd) -> Result<Self, Error> {
140 if config.processor_topology.vp_count() > u8::MAX as u32 {
141 return Err(ErrorInner::TooManyVps(config.processor_topology.vp_count()).into());
142 }
143
144 let vps = config
145 .processor_topology
146 .vps_arch()
147 .map(|vp| MshvVpInner {
148 vp_info: vp,
149 thread: RwLock::new(None),
150 needs_yield: NeedsYield::new(),
151 message_queues: MessageQueues::new(),
152 message_queues_pending: AtomicBool::new(false),
153 waker: RwLock::new(None),
154 })
155 .collect();
156
157 let bsp = vmfd
158 .create_vcpu(0)
159 .map_err(|e| ErrorInner::CreateVcpu(e.into()))?;
160
161 vmfd.install_intercept(mshv_install_intercept {
163 access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
164 intercept_type: hvdef::hypercall::HvInterceptType::HvInterceptTypeHypercall.0,
165 intercept_parameter: Default::default(),
166 })
167 .map_err(|e| ErrorInner::InstallIntercept(e.into()))?;
168
169 vmfd.install_intercept(mshv_install_intercept {
170 access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
171 intercept_type:
172 hvdef::hypercall::HvInterceptType::HvInterceptTypeUnknownSynicConnection.0,
173 intercept_parameter: Default::default(),
174 })
175 .map_err(|e| ErrorInner::InstallIntercept(e.into()))?;
176
177 vmfd.install_intercept(mshv_install_intercept {
178 access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE,
179 intercept_type:
180 hvdef::hypercall::HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId.0,
181 intercept_parameter: Default::default(),
182 })
183 .map_err(|e| ErrorInner::InstallIntercept(e.into()))?;
184
185 static SIGNAL_HANDLER_INIT: Once = Once::new();
187 SIGNAL_HANDLER_INIT.call_once(|| unsafe {
190 signal_hook::low_level::register(libc::SIGRTMIN(), || {
191 })
195 .unwrap();
196 });
197
198 if let Some(hv_config) = &config.hv_config {
199 if hv_config.vtl2.is_some() {
200 return Err(ErrorInner::Vtl2NotSupported.into());
201 }
202 }
203
204 Ok(MshvProtoPartition {
205 config,
206 vmfd,
207 vps,
208 bsp,
209 })
210 }
211}
212
213pub fn is_available() -> Result<bool, Error> {
215 match std::fs::metadata("/dev/mshv") {
216 Ok(_) => Ok(true),
217 Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false),
218 Err(err) => Err(ErrorInner::AvailableCheck(err).into()),
219 }
220}
221
222pub struct MshvProtoPartition<'a> {
224 config: ProtoPartitionConfig<'a>,
225 vmfd: VmFd,
226 vps: Vec<MshvVpInner>,
227 bsp: VcpuFd,
228}
229
230#[derive(Inspect)]
232pub struct MshvPartition {
233 #[inspect(flatten)]
234 inner: Arc<MshvPartitionInner>,
235 #[inspect(skip)]
236 synic_ports: Arc<virt::synic::SynicPorts<MshvPartitionInner>>,
237}
238
239#[derive(Inspect)]
240struct MshvPartitionInner {
241 #[inspect(skip)]
242 vmfd: VmFd,
243 #[inspect(skip)]
246 bsp_vcpufd: VcpuFd,
247 #[inspect(skip)]
248 memory: Mutex<MshvMemoryRangeState>,
249 gm: GuestMemory,
250 mem_layout: vm_topology::memory::MemoryLayout,
251 #[inspect(skip)]
252 vps: Vec<MshvVpInner>,
253 #[cfg(guest_arch = "x86_64")]
254 irq_routes: virt::irqcon::IrqRoutes,
255 #[cfg(guest_arch = "x86_64")]
256 #[inspect(skip)]
257 gsi_states: Mutex<Box<[irqfd::GsiState; irqfd::NUM_GSIS]>>,
258 caps: virt::PartitionCapabilities,
259 synic_ports: virt::synic::SynicPortMap,
260 #[cfg(guest_arch = "x86_64")]
261 cpuid: virt::CpuidLeafSet,
262 #[cfg(guest_arch = "x86_64")]
263 software_devices: virt::x86::apic_software_device::ApicSoftwareDevices,
264 time_frozen: Mutex<bool>,
267}
268
269struct MshvVpInner {
270 vp_info: vm_topology::processor::TargetVpInfo,
271 thread: RwLock<Option<Pthread>>,
272 needs_yield: NeedsYield,
273 message_queues: MessageQueues,
274 message_queues_pending: AtomicBool,
277 waker: RwLock<Option<Waker>>,
280}
281
282struct MshvVpInnerCleaner<'a> {
283 vpinner: &'a MshvVpInner,
284}
285
286impl Drop for MshvVpInnerCleaner<'_> {
287 fn drop(&mut self) {
288 self.vpinner.thread.write().take();
289 }
290}
291
292impl GetReferenceTime for MshvPartitionInner {
293 fn now(&self) -> ReferenceTimeResult {
294 let ref_time = self
297 .vmfd
298 .get_partition_property(HvPartitionPropertyCode::ReferenceTime.0)
299 .unwrap();
300 ReferenceTimeResult {
301 ref_time,
302 system_time: None,
303 }
304 }
305}
306
307impl MshvPartitionInner {
308 fn vp(&self, vp_index: VpIndex) -> &MshvVpInner {
309 &self.vps[vp_index.index() as usize]
310 }
311
312 fn freeze_time(&self) -> Result<(), Error> {
315 let mut frozen = self.time_frozen.lock();
316 if !*frozen {
317 self.vmfd
318 .set_partition_property(HvPartitionPropertyCode::TimeFreeze.0, 1)
319 .map_err(|e| ErrorInner::SetPartitionProperty(e.into()))?;
320 *frozen = true;
321 }
322 Ok(())
323 }
324
325 fn thaw_time(&self) -> Result<(), Error> {
328 let mut frozen = self.time_frozen.lock();
329 if *frozen {
330 self.vmfd
331 .set_partition_property(HvPartitionPropertyCode::TimeFreeze.0, 0)
332 .map_err(|e| ErrorInner::SetPartitionProperty(e.into()))?;
333 *frozen = false;
334 }
335 Ok(())
336 }
337
338 fn post_message(&self, vp_index: VpIndex, sint: u8, message: &HvMessage) {
339 let vp = self.vp(vp_index);
340 let wake = vp.message_queues.enqueue_message(sint, message);
341 if wake && !vp.message_queues_pending.swap(true, Ordering::Release) {
343 if let Some(waker) = &*vp.waker.read() {
344 waker.wake_by_ref();
345 }
346 }
347 }
348
349 fn post_message_direct(&self, vp: u32, sint: u8, message: &HvMessage) -> Result<(), MshvError> {
356 use mshv_bindings::mshv_root_hvcall;
357
358 let post_message = hvdef::hypercall::PostMessageDirect {
359 partition_id: 0,
360 vp_index: vp,
361 vtl: Vtl::Vtl0 as u8,
362 padding0: [0; 3],
363 sint,
364 padding1: [0; 3],
365 message: zerocopy::Unalign::new(*message),
366 padding2: 0,
367 };
368
369 let mut args = mshv_root_hvcall {
370 code: hvdef::HypercallCode::HvCallPostMessageDirect.0,
371 in_sz: size_of::<hvdef::hypercall::PostMessageDirect>() as u16,
372 in_ptr: std::ptr::addr_of!(post_message) as u64,
373 ..Default::default()
374 };
375 self.vmfd.hvcall(&mut args)
376 }
377
378 fn signal_event_direct(&self, vp: u32, sint: u8, flag: u16) -> Result<(), MshvError> {
385 use mshv_bindings::mshv_root_hvcall;
386 use zerocopy::FromZeros;
387
388 let input = hvdef::hypercall::SignalEventDirect {
389 target_partition: 0,
390 target_vp: vp,
391 target_vtl: 0,
392 target_sint: sint,
393 flag_number: flag,
394 };
395 let mut output = hvdef::hypercall::SignalEventDirectOutput::new_zeroed();
396
397 let mut args = mshv_root_hvcall {
398 code: hvdef::HypercallCode::HvCallSignalEventDirect.0,
399 in_sz: size_of::<hvdef::hypercall::SignalEventDirect>() as u16,
400 out_sz: size_of::<hvdef::hypercall::SignalEventDirectOutput>() as u16,
401 in_ptr: std::ptr::addr_of!(input) as u64,
402 out_ptr: std::ptr::addr_of_mut!(output) as u64,
403 ..Default::default()
404 };
405 self.vmfd.hvcall(&mut args)
406 }
407}
408
409pub struct MshvProcessorBinder {
411 partition: Arc<MshvPartitionInner>,
412 vcpufd: Option<VcpuFd>,
413 vpindex: VpIndex,
414}
415
416struct MshvVpRunner<'a> {
419 vcpufd: &'a VcpuFd,
420 #[cfg(guest_arch = "x86_64")]
421 reg_page: *mut hvdef::HvX64RegisterPage,
422}
423
424impl MshvVpRunner<'_> {
425 fn run(&mut self) -> Result<HvMessage, MshvError> {
426 self.vcpufd.run().map(|msg| {
427 unsafe { std::mem::transmute::<mshv_bindings::hv_message, HvMessage>(msg) }
431 })
432 }
433
434 #[cfg(guest_arch = "x86_64")]
435 fn reg_page(&mut self) -> &mut hvdef::HvX64RegisterPage {
436 unsafe { &mut *self.reg_page }
440 }
441}
442
443#[derive(InspectMut)]
445pub struct MshvProcessor<'a> {
446 #[inspect(skip)]
447 partition: &'a MshvPartitionInner,
448 #[inspect(skip)]
449 inner: &'a MshvVpInner,
450 #[inspect(skip)]
451 vpindex: VpIndex,
452 #[inspect(skip)]
453 runner: MshvVpRunner<'a>,
454 #[inspect(skip)]
457 deliverability_notifications: HvDeliverabilityNotificationsRegister,
458}
459
460impl MshvProcessor<'_> {
461 fn flush_messages(&mut self, deliverable_sints: u16) {
465 let nonempty_sints =
466 self.inner
467 .message_queues
468 .post_pending_messages(deliverable_sints, |sint, message| {
469 match self
470 .partition
471 .post_message_direct(self.vpindex.index(), sint, message)
472 {
473 Ok(()) => {
474 tracing::trace!(sint, "sint message posted successfully");
475 Ok(())
476 }
477 Err(e) => {
478 tracelimit::warn_ratelimited!(
479 error = &e as &dyn std::error::Error,
480 "dropping sint message"
481 );
482 Err(HvError::ObjectInUse)
483 }
484 }
485 });
486
487 if self.deliverability_notifications.sints() != nonempty_sints {
488 let notifications = self.deliverability_notifications.with_sints(nonempty_sints);
489 tracing::trace!(?notifications, "setting deliverability notifications");
490 self.partition
491 .vmfd
492 .register_deliverabilty_notifications(
493 self.vpindex.index(),
494 u64::from(notifications),
495 )
496 .expect("requesting deliverability is not a fallible operation");
497 self.deliverability_notifications = notifications;
498 }
499 }
500
501 fn handle_sint_deliverable(&mut self, deliverable_sints: u16) {
505 self.deliverability_notifications
507 .set_sints(self.deliverability_notifications.sints() & !deliverable_sints);
508
509 self.flush_messages(deliverable_sints);
510 }
511
512 fn reset_synic_state(&mut self) {
514 self.inner.message_queues.clear();
515 self.inner
516 .message_queues_pending
517 .store(false, Ordering::Relaxed);
518 self.deliverability_notifications = HvDeliverabilityNotificationsRegister::new();
519 }
520}
521
522impl virt::Processor for MshvProcessor<'_> {
523 type StateAccess<'a>
524 = &'a mut Self
525 where
526 Self: 'a;
527
528 fn set_debug_state(
529 &mut self,
530 _vtl: Vtl,
531 _state: Option<&virt::x86::DebugState>,
532 ) -> Result<(), <&mut Self as virt::vp::AccessVpState>::Error> {
533 Err(ErrorInner::NotSupported.into())
534 }
535
536 async fn run_vp(
537 &mut self,
538 stop: StopVp<'_>,
539 dev: &impl CpuIo,
540 ) -> Result<Infallible, VpHaltReason> {
541 let vpinner = self.inner;
542 let _cleaner = MshvVpInnerCleaner { vpinner };
543
544 assert!(vpinner.thread.write().replace(Pthread::current()).is_none());
545
546 self.partition
547 .thaw_time()
548 .expect("failed to thaw partition time");
549
550 if vpinner.message_queues.pending_sints() != 0 {
553 vpinner
554 .message_queues_pending
555 .store(true, Ordering::Relaxed);
556 }
557
558 let mut last_waker: Option<Waker> = None;
559
560 loop {
561 vpinner.needs_yield.maybe_yield().await;
562 stop.check()?;
563
564 poll_fn(|cx| {
566 if !last_waker.as_ref().is_some_and(|w| cx.waker().will_wake(w)) {
567 last_waker = Some(cx.waker().clone());
568 *vpinner.waker.write() = last_waker.clone();
569 }
570 std::task::Poll::Ready(())
571 })
572 .await;
573
574 if vpinner.message_queues_pending.load(Ordering::Relaxed) {
576 vpinner
577 .message_queues_pending
578 .store(false, Ordering::SeqCst);
579 let pending_sints = vpinner.message_queues.pending_sints();
580 if pending_sints != 0 {
581 self.flush_messages(pending_sints);
582 }
583 }
584
585 match self.runner.run() {
586 Ok(exit) => {
587 self.handle_exit(&exit, dev).await?;
588 }
589 Err(e) => match e.errno() {
590 libc::EAGAIN | libc::EINTR => {}
591 _ => tracing::error!(
592 error = &e as &dyn std::error::Error,
593 "vcpufd.run returned error"
594 ),
595 },
596 }
597 }
598 }
599
600 fn flush_async_requests(&mut self) {}
601
602 fn access_state(&mut self, vtl: Vtl) -> Self::StateAccess<'_> {
603 assert_eq!(vtl, Vtl::Vtl0);
604 self
605 }
606
607 fn reset(&mut self) -> Result<(), impl std::error::Error + Send + Sync + 'static> {
608 use virt::vp::AccessVpState;
609
610 let vp_info = self.inner.vp_info;
611 self.access_state(Vtl::Vtl0)
612 .reset_all(&vp_info)
613 .map_err(|e| ErrorInner::ResetState(Box::new(e)))?;
614
615 self.reset_synic_state();
616
617 Ok::<(), Error>(())
618 }
619}
620
621impl hv1_hypercall::PostMessage for arch::MshvHypercallHandler<'_> {
622 fn post_message(&mut self, connection_id: u32, message: &[u8]) -> hvdef::HvResult<()> {
623 self.partition
624 .synic_ports
625 .handle_post_message(Vtl::Vtl0, connection_id, false, message)
626 }
627}
628
629impl hv1_hypercall::SignalEvent for arch::MshvHypercallHandler<'_> {
630 fn signal_event(&mut self, connection_id: u32, flag: u16) -> hvdef::HvResult<()> {
631 self.partition
632 .synic_ports
633 .handle_signal_event(Vtl::Vtl0, connection_id, flag)
634 }
635}
636
637#[derive(Error, Debug)]
639#[error(transparent)]
640pub struct Error(ErrorInner);
641
642impl<T: Into<ErrorInner>> From<T> for Error {
643 fn from(err: T) -> Self {
644 Error(err.into())
645 }
646}
647
648#[derive(Error, Debug)]
650enum ErrorInner {
651 #[error("operation not supported")]
652 NotSupported,
653 #[error("create_vm failed")]
654 CreateVMFailed,
655 #[error("failed to initialize VM")]
656 CreateVMInitFailed(#[source] anyhow::Error),
657 #[error("failed to create VCPU")]
658 CreateVcpu(#[source] KernelError),
659 #[error("vtl2 not supported")]
660 Vtl2NotSupported,
661 #[error("isolation not supported")]
662 IsolationNotSupported,
663 #[error("failed to stat /dev/mshv")]
664 AvailableCheck(#[source] io::Error),
665 #[cfg(guest_arch = "x86_64")]
666 #[error("failed to get partition property")]
667 GetPartitionProperty(#[source] KernelError),
668 #[error("failed to set partition property")]
669 SetPartitionProperty(#[source] KernelError),
670 #[error("register access error")]
671 Register(#[source] KernelError),
672 #[cfg(guest_arch = "x86_64")]
673 #[error("failed to get VP state {ty}")]
674 GetVpState {
675 #[source]
676 error: KernelError,
677 ty: u8,
678 },
679 #[cfg(guest_arch = "x86_64")]
680 #[error("failed to set VP state {ty}")]
681 SetVpState {
682 #[source]
683 error: KernelError,
684 ty: u8,
685 },
686 #[error("failed to reset state")]
687 ResetState(#[source] Box<virt::state::StateError<Error>>),
688 #[error("install intercept failed")]
689 InstallIntercept(#[source] KernelError),
690 #[cfg(guest_arch = "x86_64")]
691 #[error("failed to register cpuid override")]
692 RegisterCpuid(#[source] KernelError),
693 #[cfg(guest_arch = "x86_64")]
694 #[error("host does not support required cpu capabilities")]
695 Capabilities(#[source] virt::PartitionCapabilitiesError),
696 #[error("too many virtual processors: {0}")]
697 TooManyVps(u32),
698 #[cfg(guest_arch = "x86_64")]
699 #[error("unsupported processor vendor: {0:?}")]
700 UnsupportedProcessorVendor(hvdef::HvProcessorVendor),
701 #[cfg(guest_arch = "x86_64")]
702 #[error("failed to create virtual device")]
703 NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
704}
705
706#[derive(Error, Debug)]
708enum KernelError {
709 #[error("kernel error")]
710 Kernel(#[source] io::Error),
711 #[error("hypercall {code:#x?} error")]
712 Hypercall {
713 code: hvdef::HypercallCode,
714 #[source]
715 error: HvError,
716 },
717}
718
719impl From<MshvError> for KernelError {
720 fn from(err: MshvError) -> Self {
721 match err {
722 MshvError::Errno(e) => KernelError::Kernel(e.into()),
723 MshvError::Hypercall {
724 code,
725 status_raw,
726 status: _,
727 } => KernelError::Hypercall {
728 code: hvdef::HypercallCode(code),
729 error: HvError::from(
730 std::num::NonZeroU16::new(status_raw)
731 .expect("not an error, hypercall returned success"),
732 ),
733 },
734 }
735 }
736}
737
738fn create_vm_with_retry(
740 mshv: &Mshv,
741 args: &mshv_bindings::mshv_create_partition_v2,
742) -> Result<VmFd, Error> {
743 loop {
744 match mshv.create_vm_with_args(args) {
745 Ok(fd) => return Ok(fd),
746 Err(e) => {
747 if e.errno() == libc::EINTR {
748 continue;
749 } else {
750 return Err(ErrorInner::CreateVMFailed.into());
751 }
752 }
753 }
754 }
755}
756
757fn common_synthetic_features() -> hvdef::HvPartitionSyntheticProcessorFeatures {
761 hvdef::HvPartitionSyntheticProcessorFeatures::new()
762 .with_hypervisor_present(true)
763 .with_hv1(true)
764 .with_access_vp_run_time_reg(true)
765 .with_access_partition_reference_counter(true)
766 .with_access_synic_regs(true)
767 .with_access_synthetic_timer_regs(true)
768 .with_access_intr_ctrl_regs(true)
769 .with_access_hypercall_regs(true)
770 .with_access_vp_index(true)
771 .with_fast_hypercall_output(true)
772 .with_direct_synthetic_timers(true)
773 .with_extended_processor_masks(true)
774 .with_tb_flush_hypercalls(true)
775 .with_synthetic_cluster_ipi(true)
776 .with_notify_long_spin_wait(true)
777 .with_query_numa_distance(true)
778 .with_signal_events(true)
779 .with_retarget_device_interrupt(true)
780}
781
782impl PartitionAccessState for MshvPartition {
783 type StateAccess<'a> = &'a MshvPartition;
784
785 fn access_state(&self, vtl: Vtl) -> Self::StateAccess<'_> {
786 assert_eq!(vtl, Vtl::Vtl0);
787 self
788 }
789}
790
791#[derive(Debug, Default)]
792struct MshvMemoryRangeState {
793 ranges: Vec<Option<mshv_user_mem_region>>,
794}
795
796impl virt::PartitionMemoryMapper for MshvPartition {
797 fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
798 assert_eq!(vtl, Vtl::Vtl0);
799 self.inner.clone()
800 }
801}
802
803impl virt::PartitionMemoryMap for MshvPartitionInner {
805 unsafe fn map_range(
806 &self,
807 data: *mut u8,
808 size: usize,
809 addr: u64,
810 writable: bool,
811 exec: bool,
812 ) -> anyhow::Result<()> {
813 let mut state = self.memory.lock();
814
815 let mut slot_to_use = None;
818 for (slot, range) in state.ranges.iter_mut().enumerate() {
819 match range {
820 Some(range) if range.userspace_addr == data as u64 => {
821 slot_to_use = Some(slot);
822 break;
823 }
824 Some(_) => (),
825 None => slot_to_use = Some(slot),
826 }
827 }
828 if slot_to_use.is_none() {
829 slot_to_use = Some(state.ranges.len());
830 state.ranges.push(None);
831 }
832 let slot_to_use = slot_to_use.unwrap();
833
834 let mut flags = 0;
835 if writable {
836 flags |= set_bits!(u8, MSHV_SET_MEM_BIT_WRITABLE);
837 }
838 if exec {
839 flags |= set_bits!(u8, MSHV_SET_MEM_BIT_EXECUTABLE);
840 }
841 let mem_region = mshv_user_mem_region {
842 size: size as u64,
843 guest_pfn: addr >> HV_PAGE_SHIFT,
844 userspace_addr: data as u64,
845 flags,
846 rsvd: [0; 7],
847 };
848
849 let _span = tracing::info_span!(
850 "mshv map user memory",
851 guest_pfn = mem_region.guest_pfn,
852 size = mem_region.size,
853 writable,
854 exec,
855 )
856 .entered();
857 self.vmfd.map_user_memory(mem_region)?;
858 state.ranges[slot_to_use] = Some(mem_region);
859 Ok(())
860 }
861
862 fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
863 let unmap_start = addr >> HV_PAGE_SHIFT;
864 let unmap_end = (addr + size) >> HV_PAGE_SHIFT;
865 let mut state = self.memory.lock();
866 for entry in &mut state.ranges {
867 let Some(region) = entry.as_ref() else {
868 continue;
869 };
870 let region_start = region.guest_pfn;
871 let region_end = region.guest_pfn + (region.size >> HV_PAGE_SHIFT);
872 if unmap_start <= region_start && region_end <= unmap_end {
873 let _span = tracing::info_span!(
875 "mshv unmap user memory",
876 guest_pfn = region.guest_pfn,
877 size = region.size,
878 )
879 .entered();
880 self.vmfd.unmap_user_memory(*region)?;
881 *entry = None;
882 } else {
883 assert!(
884 region_end <= unmap_start || unmap_end <= region_start,
885 "unmap range partially overlaps a mapped region"
886 );
887 }
888 }
889 Ok(())
890 }
891}
892
893struct MshvDoorbellEntry {
899 partition: Weak<MshvPartitionInner>,
900 event: Event,
901 guest_address: u64,
902 datamatch: u64,
903 len: u32,
904 flags: u32,
905}
906
907impl MshvDoorbellEntry {
908 fn new(
909 partition: &Arc<MshvPartitionInner>,
910 guest_address: u64,
911 value: Option<u64>,
912 length: Option<u32>,
913 fd: &Event,
914 ) -> io::Result<MshvDoorbellEntry> {
915 let flags = if value.is_some() {
916 1 << mshv_bindings::MSHV_IOEVENTFD_BIT_DATAMATCH
917 } else {
918 0
919 };
920 let datamatch = value.unwrap_or(0);
921 let len = length.unwrap_or(0);
922 let event = fd.clone();
923
924 let ioeventfd = mshv_bindings::mshv_user_ioeventfd {
925 datamatch,
926 addr: guest_address,
927 len,
928 fd: event.as_fd().as_raw_fd(),
929 flags,
930 ..Default::default()
931 };
932 let ret = unsafe {
936 libc::ioctl(
937 partition.vmfd.as_raw_fd(),
938 mshv_ioctls::MSHV_IOEVENTFD() as _,
939 std::ptr::from_ref(&ioeventfd),
940 )
941 };
942 if ret < 0 {
943 return Err(io::Error::last_os_error());
944 }
945
946 Ok(Self {
947 partition: Arc::downgrade(partition),
948 event,
949 guest_address,
950 datamatch,
951 len,
952 flags,
953 })
954 }
955}
956
957impl Drop for MshvDoorbellEntry {
958 fn drop(&mut self) {
959 if let Some(partition) = self.partition.upgrade() {
960 let ioeventfd = mshv_bindings::mshv_user_ioeventfd {
961 datamatch: self.datamatch,
962 addr: self.guest_address,
963 len: self.len,
964 fd: self.event.as_fd().as_raw_fd(),
965 flags: self.flags | (1 << mshv_bindings::MSHV_IOEVENTFD_BIT_DEASSIGN),
966 ..Default::default()
967 };
968 let ret = unsafe {
972 libc::ioctl(
973 partition.vmfd.as_raw_fd(),
974 mshv_ioctls::MSHV_IOEVENTFD() as _,
975 std::ptr::from_ref(&ioeventfd),
976 )
977 };
978 assert!(
979 ret >= 0,
980 "failed to unregister doorbell at {:#x}: {}",
981 self.guest_address,
982 io::Error::last_os_error()
983 );
984 }
985 }
986}
987
988impl DoorbellRegistration for MshvPartition {
989 fn register_doorbell(
990 &self,
991 guest_address: u64,
992 value: Option<u64>,
993 length: Option<u32>,
994 fd: &Event,
995 ) -> io::Result<Box<dyn Send + Sync>> {
996 Ok(Box::new(MshvDoorbellEntry::new(
997 &self.inner,
998 guest_address,
999 value,
1000 length,
1001 fd,
1002 )?))
1003 }
1004}
1005
1006impl virt::synic::Synic for MshvPartitionInner {
1007 fn port_map(&self) -> &virt::synic::SynicPortMap {
1008 &self.synic_ports
1009 }
1010
1011 fn post_message(&self, _vtl: Vtl, vp: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
1012 self.post_message(vp, sint, &HvMessage::new(HvMessageType(typ), 0, payload));
1013 }
1014
1015 fn new_guest_event_port(
1016 self: Arc<Self>,
1017 _vtl: Vtl,
1018 vp: u32,
1019 sint: u8,
1020 flag: u16,
1021 ) -> Box<dyn GuestEventPort> {
1022 Box::new(MshvGuestEventPort {
1023 partition: Arc::downgrade(&self),
1024 params: Arc::new(Mutex::new(MshvEventPortParams {
1025 vp: VpIndex::new(vp),
1026 sint,
1027 flag,
1028 })),
1029 })
1030 }
1031
1032 fn prefer_os_events(&self) -> bool {
1033 false
1034 }
1035}
1036
1037#[derive(Debug, Clone)]
1039struct MshvGuestEventPort {
1040 partition: Weak<MshvPartitionInner>,
1041 params: Arc<Mutex<MshvEventPortParams>>,
1042}
1043
1044#[derive(Debug, Copy, Clone)]
1045struct MshvEventPortParams {
1046 vp: VpIndex,
1047 sint: u8,
1048 flag: u16,
1049}
1050
1051impl GuestEventPort for MshvGuestEventPort {
1052 fn interrupt(&self) -> Interrupt {
1053 let partition = self.partition.clone();
1054 let params = self.params.clone();
1055 Interrupt::from_fn(move || {
1056 let MshvEventPortParams { vp, sint, flag } = *params.lock();
1057 if let Some(partition) = partition.upgrade() {
1058 partition
1059 .signal_event_direct(vp.index(), sint, flag)
1060 .unwrap_or_else(|_| {
1061 panic!(
1062 "Failed signal synic sint {} on vp {:?} with flag {}",
1063 sint, vp, flag
1064 )
1065 });
1066 }
1067 })
1068 }
1069
1070 fn set_target_vp(&mut self, vp: u32) -> Result<(), vmcore::synic::HypervisorError> {
1071 self.params.lock().vp = VpIndex::new(vp);
1072 Ok(())
1073 }
1074}