1#![cfg(target_os = "linux")]
7#![expect(unsafe_code)]
9
10use fs_err::os::unix::fs::OpenOptionsExt;
11use hvdef::HvError;
12use hvdef::HvMessage;
13use hvdef::HvStatus;
14use hvdef::hypercall::HvInputVtl;
15use hvdef::hypercall::HvRegisterAssoc;
16use hvdef::hypercall::TranslateVirtualAddressExOutputX64;
17use pal_async::driver::PollImpl;
18use pal_async::driver::SpawnDriver;
19use pal_async::fd::PollFdReady;
20use pal_async::interest::InterestSlot;
21use pal_async::interest::PollEvents;
22use pal_async::task::Task;
23use parking_lot::Mutex;
24use sidecar_defs::CommandPage;
25use sidecar_defs::CpuContextX64;
26use sidecar_defs::GetSetVpRegisterRequest;
27use sidecar_defs::PAGE_SIZE;
28use sidecar_defs::RunVpResponse;
29use sidecar_defs::SidecarCommand;
30use sidecar_defs::TranslateGvaRequest;
31use sidecar_defs::TranslateGvaResponse;
32use std::fs::File;
33use std::future::poll_fn;
34use std::io::Read;
35use std::mem::MaybeUninit;
36use std::ops::Range;
37use std::os::fd::AsRawFd;
38use std::os::raw::c_void;
39use std::ptr::NonNull;
40use std::ptr::addr_of;
41use std::ptr::addr_of_mut;
42use std::sync::Arc;
43use std::sync::atomic::AtomicBool;
44use std::sync::atomic::Ordering::Acquire;
45use std::sync::atomic::Ordering::Release;
46use std::task::Poll;
47use std::task::Waker;
48use thiserror::Error;
49use zerocopy::FromBytes;
50use zerocopy::Immutable;
51use zerocopy::IntoBytes;
52use zerocopy::KnownLayout;
53
54mod ioctl {
55 const BASE: u8 = 0xb8;
56 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_start, nix::request_code_none!(BASE, 0xf0));
57 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_stop, nix::request_code_none!(BASE, 0xf1));
58 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_run, nix::request_code_none!(BASE, 0xf2));
59 nix::ioctl_read!(mshv_vtl_sidecar_info, BASE, 0xf3, SidecarInfo);
60
61 #[repr(C)]
62 pub(crate) struct SidecarInfo {
63 pub base_cpu: u32,
64 pub cpu_count: u32,
65 pub per_cpu_shmem: u32,
66 }
67}
68
69#[derive(Debug)]
74pub struct SidecarClient {
75 nodes: Vec<SidecarNode>,
76}
77
78#[derive(Debug)]
79struct SidecarNode {
80 mapping: Mapping,
81 per_cpu_shmem_size: usize,
82 cpus: Range<u32>,
83 _task: Task<()>,
84 state: Arc<SidecarClientState>,
85 in_use: Vec<AtomicBool>,
86}
87
88#[derive(Debug)]
89struct SidecarClientState {
90 file: File,
91 vps: Vec<Mutex<VpState>>,
92}
93
94#[derive(Debug)]
95enum VpState {
96 Stopped,
97 Running(Option<Waker>),
98 Finished,
99}
100
101#[derive(Debug)]
102struct Mapping(NonNull<c_void>, usize);
103
104unsafe impl Send for Mapping {}
106unsafe impl Sync for Mapping {}
108
109#[derive(Debug, Error)]
111pub enum NewSidecarClientError {
112 #[error("{operation} failed in sidecar driver")]
114 Io {
115 operation: &'static str,
117 #[source]
119 err: std::io::Error,
120 },
121 #[error("driver error")]
123 Driver(#[source] std::io::Error),
124}
125
126impl SidecarClient {
127 pub fn new<T: SpawnDriver>(
132 mut driver: impl FnMut(u32) -> T,
133 ) -> Result<Option<Self>, NewSidecarClientError> {
134 let mut nodes = Vec::new();
135 let mut expected_base = 0;
136 loop {
137 let node = match SidecarNode::new(&mut driver, nodes.len()) {
138 Ok(Some(node)) => node,
139 Ok(None) => {
140 if nodes.is_empty() {
141 return Ok(None);
143 }
144 break;
146 }
147 Err(err) => return Err(err),
148 };
149 assert_eq!(node.cpus.start, expected_base);
150 expected_base = node.cpus.end;
151 nodes.push(node);
152 }
153 Ok(Some(Self { nodes }))
154 }
155
156 pub fn vp(&self, cpu: u32) -> SidecarVp<'_> {
158 self.nodes
159 .iter()
160 .find_map(|node| node.vp(cpu))
161 .expect("invalid cpu")
162 }
163
164 pub fn base_cpu(&self, cpu: u32) -> u32 {
166 self.nodes
167 .iter()
168 .find_map(|node| node.cpus.contains(&cpu).then_some(node.cpus.start))
169 .expect("invalid cpu")
170 }
171}
172
173impl SidecarNode {
174 fn new<T: SpawnDriver>(
175 driver: &mut impl FnMut(u32) -> T,
176 node: usize,
177 ) -> Result<Option<Self>, NewSidecarClientError> {
178 let file = match fs_err::OpenOptions::new()
179 .read(true)
180 .write(true)
181 .custom_flags(libc::O_NONBLOCK)
182 .open(format!("/dev/mshv_vtl_sidecar{node}"))
183 {
184 Ok(file) => file,
185 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
186 Err(err) => {
187 return Err(NewSidecarClientError::Io {
188 operation: "open",
189 err,
190 });
191 }
192 };
193
194 let info = unsafe {
197 let mut info = MaybeUninit::zeroed();
198 ioctl::mshv_vtl_sidecar_info(file.as_raw_fd(), info.as_mut_ptr()).map_err(|err| {
199 NewSidecarClientError::Io {
200 operation: "query info",
201 err: err.into(),
202 }
203 })?;
204 info.assume_init()
205 };
206
207 let cpus = info.base_cpu..info.base_cpu + info.cpu_count;
208 let per_cpu_shmem_size = info.per_cpu_shmem as usize;
209 assert!(
210 per_cpu_shmem_size >= size_of::<VpSharedPages>(),
211 "invalid state size"
212 );
213
214 let mapping = {
215 let mapping_len = cpus.len() * per_cpu_shmem_size;
216 let mapping = unsafe {
218 libc::mmap(
219 std::ptr::null_mut(),
220 mapping_len,
221 libc::PROT_READ | libc::PROT_WRITE,
222 libc::MAP_SHARED,
223 file.as_raw_fd(),
224 0,
225 )
226 };
227 if mapping == libc::MAP_FAILED {
228 return Err(NewSidecarClientError::Io {
229 operation: "mmap",
230 err: std::io::Error::last_os_error(),
231 });
232 }
233 Mapping(NonNull::new(mapping).unwrap(), mapping_len)
234 };
235
236 let driver = driver(cpus.start);
238
239 let fd_ready = driver
240 .new_dyn_fd_ready(file.as_raw_fd())
241 .map_err(NewSidecarClientError::Driver)?;
242
243 let state = Arc::new(SidecarClientState {
244 file: file.into(),
245 vps: cpus.clone().map(|_| Mutex::new(VpState::Stopped)).collect(),
246 });
247
248 let task = driver.spawn(
249 "sidecar-wait",
250 sidecar_wait_loop(fd_ready, state.clone(), cpus.start),
251 );
252
253 tracing::debug!(
254 "sidecar node {node} started, cpus {}..={}",
255 cpus.start,
256 cpus.end - 1
257 );
258
259 Ok(Some(Self {
260 state,
261 per_cpu_shmem_size,
262 mapping,
263 in_use: cpus.clone().map(|_| AtomicBool::new(false)).collect(),
264 cpus,
265 _task: task,
266 }))
267 }
268
269 fn vp(&self, cpu: u32) -> Option<SidecarVp<'_>> {
270 if !self.cpus.contains(&cpu) {
271 return None;
272 }
273 let index = cpu - self.cpus.start;
274 assert!(
275 !self.in_use[index as usize].swap(true, Acquire),
276 "vp in use"
277 );
278 let shmem = unsafe {
280 self.mapping
281 .0
282 .as_ptr()
283 .byte_add(index as usize * self.per_cpu_shmem_size)
284 }
285 .cast();
286 Some(SidecarVp {
287 cpu: cpu as i32,
288 index: index as usize,
289 shmem: NonNull::new(shmem).unwrap(),
290 node: self,
291 })
292 }
293}
294
295async fn sidecar_wait_loop(
296 mut fd_ready: PollImpl<dyn PollFdReady>,
297 state: Arc<SidecarClientState>,
298 base_cpu: u32,
299) {
300 let err = loop {
301 poll_fn(|cx| fd_ready.poll_fd_ready(cx, InterestSlot::Read, PollEvents::IN)).await;
302 let mut cpu = 0u32;
303 let n = match (&state.file).read(cpu.as_mut_bytes()) {
304 Ok(n) => n,
305 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
306 fd_ready.clear_fd_ready(InterestSlot::Read);
307 continue;
308 }
309 Err(err) => break err,
310 };
311 assert_eq!(n, 4, "unexpected read size");
312 tracing::trace!(cpu, "sidecar stop");
313 let index = cpu - base_cpu;
314 let VpState::Running(waker) =
315 std::mem::replace(&mut *state.vps[index as usize].lock(), VpState::Finished)
316 else {
317 panic!("cpu {cpu} stopped without start");
318 };
319 if let Some(waker) = waker {
320 waker.wake();
321 }
322 };
323 tracing::error!(
324 error = &err as &dyn std::error::Error,
325 "sidecar wait failed"
326 );
327}
328
329impl Drop for Mapping {
330 fn drop(&mut self) {
331 let r = unsafe { libc::munmap(self.0.as_ptr(), self.1) };
333 if r != 0 {
334 panic!("munmap failed: {}", std::io::Error::last_os_error());
335 }
336 }
337}
338
339pub struct SidecarVp<'a> {
341 cpu: i32,
342 index: usize,
343 shmem: NonNull<VpSharedPages>,
344 node: &'a SidecarNode,
345}
346
347#[repr(C)]
348struct VpSharedPages {
349 command_page: CommandPage,
350 register_page: hvdef::HvX64RegisterPage,
351}
352
353const _: () = assert!(size_of::<VpSharedPages>() % PAGE_SIZE == 0);
354
355impl Drop for SidecarVp<'_> {
356 fn drop(&mut self) {
357 assert!(self.node.in_use[self.index].swap(false, Release));
358 }
359}
360
361#[derive(Debug, Error)]
363pub enum SidecarError {
364 #[error("driver error")]
366 Io(#[source] std::io::Error),
367 #[error("sidecar error: {0}")]
369 Sidecar(String),
370 #[error("hypervisor error")]
372 Hypervisor(#[source] HvError),
373}
374
375impl<'a> SidecarVp<'a> {
376 pub fn run(&mut self) -> Result<SidecarRun<'_, 'a>, SidecarError> {
378 tracing::trace!("run vp");
379 self.set_command::<_, u8>(SidecarCommand::RUN_VP, (), 0);
380 self.start_async()?;
381 Ok(SidecarRun {
382 vp: self,
383 waited: false,
384 })
385 }
386
387 pub fn cpu_context(&self) -> *mut CpuContextX64 {
391 unsafe { addr_of_mut!((*self.shmem.as_ptr()).command_page.cpu_context) }
394 }
395
396 pub fn intercept_message(&self) -> *const HvMessage {
400 unsafe { addr_of!((*self.shmem.as_ptr()).command_page.intercept_message) }
403 }
404
405 pub fn register_page(&self) -> *mut hvdef::HvX64RegisterPage {
412 unsafe { addr_of_mut!((*self.shmem.as_ptr()).register_page) }
415 }
416
417 pub fn test(&mut self) -> Result<(), SidecarError> {
419 tracing::trace!("test");
420 let () = self.dispatch_sync(SidecarCommand::NONE, ())?;
421 Ok(())
422 }
423
424 pub fn get_vp_registers(
426 &mut self,
427 target_vtl: HvInputVtl,
428 regs: &mut [HvRegisterAssoc],
429 ) -> Result<(), SidecarError> {
430 tracing::trace!(count = regs.len(), "get vp register");
431 for regs in regs.chunks_mut(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
432 let buf = self.set_command(
433 SidecarCommand::GET_VP_REGISTERS,
434 GetSetVpRegisterRequest {
435 count: regs.len() as u16,
436 target_vtl,
437 rsvd: 0,
438 status: HvStatus::SUCCESS,
439 rsvd2: [0; 10],
440 regs: [],
441 },
442 regs.len(),
443 );
444 buf.copy_from_slice(regs);
445 self.run_sync()?;
446 let (&GetSetVpRegisterRequest { status, .. }, buf) =
447 self.command_result::<_, HvRegisterAssoc>(regs.len())?;
448 status.result().map_err(SidecarError::Hypervisor)?;
449 regs.copy_from_slice(buf);
450 }
451 Ok(())
452 }
453
454 pub fn set_vp_registers(
456 &mut self,
457 target_vtl: HvInputVtl,
458 regs: &[HvRegisterAssoc],
459 ) -> Result<(), SidecarError> {
460 tracing::trace!(count = regs.len(), "set vp register");
461 for regs in regs.chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
462 let buf = self.set_command(
463 SidecarCommand::SET_VP_REGISTERS,
464 GetSetVpRegisterRequest {
465 count: regs.len() as u16,
466 target_vtl,
467 rsvd: 0,
468 status: HvStatus::SUCCESS,
469 rsvd2: [0; 10],
470 regs: [],
471 },
472 regs.len(),
473 );
474 buf.copy_from_slice(regs);
475 self.run_sync()?;
476 let &GetSetVpRegisterRequest { status, .. } = self.command_result::<_, u8>(0)?.0;
477 status.result().map_err(SidecarError::Hypervisor)?;
478 }
479 Ok(())
480 }
481
482 pub fn translate_gva(
485 &mut self,
486 gvn: u64,
487 control_flags: hvdef::hypercall::TranslateGvaControlFlagsX64,
488 ) -> Result<TranslateVirtualAddressExOutputX64, SidecarError> {
489 tracing::trace!("translate gva");
490 let &TranslateGvaResponse {
491 status,
492 rsvd: _,
493 output,
494 } = self.dispatch_sync(
495 SidecarCommand::TRANSLATE_GVA,
496 TranslateGvaRequest { gvn, control_flags },
497 )?;
498 status.result().map_err(SidecarError::Hypervisor)?;
499 Ok(output)
500 }
501
502 fn set_command<
503 T: IntoBytes + Immutable + KnownLayout,
504 S: IntoBytes + FromBytes + Immutable + KnownLayout,
505 >(
506 &mut self,
507 command: SidecarCommand,
508 input: T,
509 n: usize,
510 ) -> &mut [S] {
511 let shmem = unsafe { self.shmem.as_mut() };
514 shmem.command_page.command = command;
515 input
516 .write_to_prefix(shmem.command_page.request_data.as_mut_bytes())
517 .unwrap();
518 <[S]>::mut_from_prefix_with_elems(
519 &mut shmem.command_page.request_data.as_mut_bytes()[input.as_bytes().len()..],
520 n,
521 )
522 .unwrap()
523 .0
524 }
525
526 fn dispatch_sync<O: FromBytes + Immutable + KnownLayout>(
527 &mut self,
528 command: SidecarCommand,
529 input: impl IntoBytes + Immutable + KnownLayout,
530 ) -> Result<&O, SidecarError> {
531 self.set_command::<_, u8>(command, input, 0);
532 self.run_sync()?;
533 Ok(self.command_result::<_, u8>(0)?.0)
534 }
535
536 fn run_sync(&mut self) -> Result<(), SidecarError> {
537 unsafe {
539 ioctl::mshv_vtl_sidecar_run(self.node.state.file.as_raw_fd(), self.cpu)
540 .map_err(|err| SidecarError::Io(err.into()))?;
541 }
542 Ok(())
543 }
544
545 fn start_async(&mut self) -> Result<(), SidecarError> {
546 let old = std::mem::replace(
547 &mut *self.node.state.vps[self.index].lock(),
548 VpState::Running(None),
549 );
550 assert!(matches!(old, VpState::Stopped));
551 unsafe {
553 ioctl::mshv_vtl_sidecar_start(self.node.state.file.as_raw_fd(), self.cpu)
554 .map_err(|err| SidecarError::Io(err.into()))?;
555 }
556 Ok(())
557 }
558
559 fn stop_async(&mut self) {
560 unsafe {
562 ioctl::mshv_vtl_sidecar_stop(self.node.state.file.as_raw_fd(), self.cpu)
563 .expect("failed to stop vp");
564 }
565 }
566
567 async fn wait_async(&mut self) {
568 poll_fn(|cx| {
569 let mut vp = self.node.state.vps[self.index].lock();
570 match &mut *vp {
571 VpState::Stopped => unreachable!(),
572 VpState::Running(waker) => {
573 if waker.as_ref().is_none_or(|w| !cx.waker().will_wake(w)) {
574 *waker = Some(cx.waker().clone());
575 }
576 Poll::Pending
577 }
578 VpState::Finished => {
579 *vp = VpState::Stopped;
580 Poll::Ready(())
581 }
582 }
583 })
584 .await
585 }
586
587 fn command_result<
588 O: FromBytes + Immutable + KnownLayout,
589 S: FromBytes + Immutable + KnownLayout,
590 >(
591 &mut self,
592 n: usize,
593 ) -> Result<(&O, &[S]), SidecarError> {
594 let shmem = unsafe { self.shmem.as_ref() };
597 if shmem.command_page.has_error != 0 {
598 let s = String::from_utf8_lossy(
599 &shmem.command_page.error.buf[..shmem.command_page.error.len as usize],
600 );
601 return Err(SidecarError::Sidecar(s.into_owned()));
602 }
603 let (output, slice) = shmem
604 .command_page
605 .request_data
606 .as_bytes()
607 .split_at(size_of::<O>());
608 let output = O::ref_from_bytes(output).unwrap();
609 let (slice, _) = <[S]>::ref_from_prefix_with_elems(slice, n).unwrap();
610 Ok((output, slice))
611 }
612}
613
614pub struct SidecarRun<'a, 'b> {
618 vp: &'a mut SidecarVp<'b>,
619 waited: bool,
620}
621
622impl SidecarRun<'_, '_> {
623 pub fn cancel(&mut self) {
627 if !self.waited {
628 self.vp.stop_async();
629 }
630 }
631
632 pub async fn wait(&mut self) -> Result<bool, SidecarError> {
636 if !self.waited {
637 self.vp.wait_async().await;
638 self.waited = true;
639 }
640 let &RunVpResponse { intercept } = self.vp.command_result::<_, u8>(0)?.0;
641 Ok(intercept != 0)
642 }
643}
644
645impl Drop for SidecarRun<'_, '_> {
646 fn drop(&mut self) {
647 assert!(self.waited, "failed to stop vp");
648 }
649}