1#![cfg(target_os = "linux")]
7#![expect(unsafe_code)]
9
10use fs_err::os::unix::fs::OpenOptionsExt;
11use hvdef::HvError;
12use hvdef::HvMessage;
13use hvdef::HvRegisterName;
14use hvdef::HvRegisterValue;
15use hvdef::HvStatus;
16use hvdef::hypercall::HvInputVtl;
17use hvdef::hypercall::HvRegisterAssoc;
18use hvdef::hypercall::TranslateVirtualAddressExOutputX64;
19use pal_async::driver::PollImpl;
20use pal_async::driver::SpawnDriver;
21use pal_async::fd::PollFdReady;
22use pal_async::interest::InterestSlot;
23use pal_async::interest::PollEvents;
24use pal_async::task::Task;
25use parking_lot::Mutex;
26use sidecar_defs::CommandPage;
27use sidecar_defs::CpuContextX64;
28use sidecar_defs::GetSetVpRegisterRequest;
29use sidecar_defs::PAGE_SIZE;
30use sidecar_defs::RunVpResponse;
31use sidecar_defs::SidecarCommand;
32use sidecar_defs::TranslateGvaRequest;
33use sidecar_defs::TranslateGvaResponse;
34use std::fs::File;
35use std::future::poll_fn;
36use std::io::Read;
37use std::mem::MaybeUninit;
38use std::ops::Range;
39use std::os::fd::AsRawFd;
40use std::os::raw::c_void;
41use std::ptr::NonNull;
42use std::ptr::addr_of;
43use std::ptr::addr_of_mut;
44use std::sync::Arc;
45use std::sync::atomic::AtomicBool;
46use std::sync::atomic::Ordering::Acquire;
47use std::sync::atomic::Ordering::Release;
48use std::task::Poll;
49use std::task::Waker;
50use thiserror::Error;
51use zerocopy::FromBytes;
52use zerocopy::FromZeros;
53use zerocopy::Immutable;
54use zerocopy::IntoBytes;
55use zerocopy::KnownLayout;
56
57mod ioctl {
58 const BASE: u8 = 0xb8;
59 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_start, nix::request_code_none!(BASE, 0xf0));
60 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_stop, nix::request_code_none!(BASE, 0xf1));
61 nix::ioctl_write_int_bad!(mshv_vtl_sidecar_run, nix::request_code_none!(BASE, 0xf2));
62 nix::ioctl_read!(mshv_vtl_sidecar_info, BASE, 0xf3, SidecarInfo);
63
64 #[repr(C)]
65 pub(crate) struct SidecarInfo {
66 pub base_cpu: u32,
67 pub cpu_count: u32,
68 pub per_cpu_shmem: u32,
69 }
70}
71
72#[derive(Debug)]
77pub struct SidecarClient {
78 nodes: Vec<SidecarNode>,
79}
80
81#[derive(Debug)]
82struct SidecarNode {
83 mapping: Mapping,
84 per_cpu_shmem_size: usize,
85 cpus: Range<u32>,
86 _task: Task<()>,
87 state: Arc<SidecarClientState>,
88 in_use: Vec<AtomicBool>,
89}
90
91#[derive(Debug)]
92struct SidecarClientState {
93 file: File,
94 vps: Vec<Mutex<VpState>>,
95}
96
97#[derive(Debug)]
98enum VpState {
99 Stopped,
100 Running(Option<Waker>),
101 Finished,
102}
103
104#[derive(Debug)]
105struct Mapping(NonNull<c_void>, usize);
106
107unsafe impl Send for Mapping {}
109unsafe impl Sync for Mapping {}
111
112#[derive(Debug, Error)]
114pub enum NewSidecarClientError {
115 #[error("{operation} failed in sidecar driver")]
117 Io {
118 operation: &'static str,
120 #[source]
122 err: std::io::Error,
123 },
124 #[error("driver error")]
126 Driver(#[source] std::io::Error),
127}
128
129impl SidecarClient {
130 pub fn new<T: SpawnDriver>(
135 mut driver: impl FnMut(u32) -> T,
136 ) -> Result<Option<Self>, NewSidecarClientError> {
137 let mut nodes = Vec::new();
138 let mut expected_base = 0;
139 loop {
140 let node = match SidecarNode::new(&mut driver, nodes.len()) {
141 Ok(Some(node)) => node,
142 Ok(None) => {
143 if nodes.is_empty() {
144 return Ok(None);
146 }
147 break;
149 }
150 Err(err) => return Err(err),
151 };
152 assert_eq!(node.cpus.start, expected_base);
153 expected_base = node.cpus.end;
154 nodes.push(node);
155 }
156 Ok(Some(Self { nodes }))
157 }
158
159 pub fn vp(&self, cpu: u32) -> SidecarVp<'_> {
161 self.nodes
162 .iter()
163 .find_map(|node| node.vp(cpu))
164 .expect("invalid cpu")
165 }
166
167 pub fn base_cpu(&self, cpu: u32) -> u32 {
169 self.nodes
170 .iter()
171 .find_map(|node| node.cpus.contains(&cpu).then_some(node.cpus.start))
172 .expect("invalid cpu")
173 }
174}
175
176impl SidecarNode {
177 fn new<T: SpawnDriver>(
178 driver: &mut impl FnMut(u32) -> T,
179 node: usize,
180 ) -> Result<Option<Self>, NewSidecarClientError> {
181 let file = match fs_err::OpenOptions::new()
182 .read(true)
183 .write(true)
184 .custom_flags(libc::O_NONBLOCK)
185 .open(format!("/dev/mshv_vtl_sidecar{node}"))
186 {
187 Ok(file) => file,
188 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
189 Err(err) => {
190 return Err(NewSidecarClientError::Io {
191 operation: "open",
192 err,
193 });
194 }
195 };
196
197 let info = unsafe {
200 let mut info = MaybeUninit::zeroed();
201 ioctl::mshv_vtl_sidecar_info(file.as_raw_fd(), info.as_mut_ptr()).map_err(|err| {
202 NewSidecarClientError::Io {
203 operation: "query info",
204 err: err.into(),
205 }
206 })?;
207 info.assume_init()
208 };
209
210 let cpus = info.base_cpu..info.base_cpu + info.cpu_count;
211 let per_cpu_shmem_size = info.per_cpu_shmem as usize;
212 assert!(
213 per_cpu_shmem_size >= size_of::<VpSharedPages>(),
214 "invalid state size"
215 );
216
217 let mapping = {
218 let mapping_len = cpus.len() * per_cpu_shmem_size;
219 let mapping = unsafe {
221 libc::mmap(
222 std::ptr::null_mut(),
223 mapping_len,
224 libc::PROT_READ | libc::PROT_WRITE,
225 libc::MAP_SHARED,
226 file.as_raw_fd(),
227 0,
228 )
229 };
230 if mapping == libc::MAP_FAILED {
231 return Err(NewSidecarClientError::Io {
232 operation: "mmap",
233 err: std::io::Error::last_os_error(),
234 });
235 }
236 Mapping(NonNull::new(mapping).unwrap(), mapping_len)
237 };
238
239 let driver = driver(cpus.start);
241
242 let fd_ready = driver
243 .new_dyn_fd_ready(file.as_raw_fd())
244 .map_err(NewSidecarClientError::Driver)?;
245
246 let state = Arc::new(SidecarClientState {
247 file: file.into(),
248 vps: cpus.clone().map(|_| Mutex::new(VpState::Stopped)).collect(),
249 });
250
251 let task = driver.spawn(
252 "sidecar-wait",
253 sidecar_wait_loop(fd_ready, state.clone(), cpus.start),
254 );
255
256 tracing::debug!(
257 "sidecar node {node} started, cpus {}..={}",
258 cpus.start,
259 cpus.end - 1
260 );
261
262 Ok(Some(Self {
263 state,
264 per_cpu_shmem_size,
265 mapping,
266 in_use: cpus.clone().map(|_| AtomicBool::new(false)).collect(),
267 cpus,
268 _task: task,
269 }))
270 }
271
272 fn vp(&self, cpu: u32) -> Option<SidecarVp<'_>> {
273 if !self.cpus.contains(&cpu) {
274 return None;
275 }
276 let index = cpu - self.cpus.start;
277 assert!(
278 !self.in_use[index as usize].swap(true, Acquire),
279 "vp in use"
280 );
281 let shmem = unsafe {
283 self.mapping
284 .0
285 .as_ptr()
286 .byte_add(index as usize * self.per_cpu_shmem_size)
287 }
288 .cast();
289 Some(SidecarVp {
290 cpu: cpu as i32,
291 index: index as usize,
292 shmem: NonNull::new(shmem).unwrap(),
293 node: self,
294 })
295 }
296}
297
298async fn sidecar_wait_loop(
299 mut fd_ready: PollImpl<dyn PollFdReady>,
300 state: Arc<SidecarClientState>,
301 base_cpu: u32,
302) {
303 let err = loop {
304 poll_fn(|cx| fd_ready.poll_fd_ready(cx, InterestSlot::Read, PollEvents::IN)).await;
305 let mut cpu = 0u32;
306 let n = match (&state.file).read(cpu.as_mut_bytes()) {
307 Ok(n) => n,
308 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
309 fd_ready.clear_fd_ready(InterestSlot::Read);
310 continue;
311 }
312 Err(err) => break err,
313 };
314 assert_eq!(n, 4, "unexpected read size");
315 tracing::trace!(cpu, "sidecar stop");
316 let index = cpu - base_cpu;
317 let VpState::Running(waker) =
318 std::mem::replace(&mut *state.vps[index as usize].lock(), VpState::Finished)
319 else {
320 panic!("cpu {cpu} stopped without start");
321 };
322 if let Some(waker) = waker {
323 waker.wake();
324 }
325 };
326 tracing::error!(
327 error = &err as &dyn std::error::Error,
328 "sidecar wait failed"
329 );
330}
331
332impl Drop for Mapping {
333 fn drop(&mut self) {
334 let r = unsafe { libc::munmap(self.0.as_ptr(), self.1) };
336 if r != 0 {
337 panic!("munmap failed: {}", std::io::Error::last_os_error());
338 }
339 }
340}
341
342pub struct SidecarVp<'a> {
344 cpu: i32,
345 index: usize,
346 shmem: NonNull<VpSharedPages>,
347 node: &'a SidecarNode,
348}
349
350#[repr(C)]
351struct VpSharedPages {
352 command_page: CommandPage,
353 register_page: hvdef::HvX64RegisterPage,
354}
355
356const _: () = assert!(size_of::<VpSharedPages>().is_multiple_of(PAGE_SIZE));
357
358impl Drop for SidecarVp<'_> {
359 fn drop(&mut self) {
360 assert!(self.node.in_use[self.index].swap(false, Release));
361 }
362}
363
364#[derive(Debug, Error)]
366pub enum SidecarError {
367 #[error("driver error")]
369 Io(#[source] std::io::Error),
370 #[error("sidecar error: {0}")]
372 Sidecar(String),
373 #[error("hypervisor error")]
375 Hypervisor(#[source] HvError),
376}
377
378impl<'a> SidecarVp<'a> {
379 pub fn run(&mut self) -> Result<SidecarRun<'_, 'a>, SidecarError> {
381 tracing::trace!("run vp");
382 self.set_command::<_, u8>(SidecarCommand::RUN_VP, (), 0);
383 self.start_async()?;
384 Ok(SidecarRun {
385 vp: self,
386 waited: false,
387 })
388 }
389
390 pub fn cpu_context(&self) -> *mut CpuContextX64 {
394 unsafe { addr_of_mut!((*self.shmem.as_ptr()).command_page.cpu_context) }
397 }
398
399 pub fn intercept_message(&self) -> *const HvMessage {
403 unsafe { addr_of!((*self.shmem.as_ptr()).command_page.intercept_message) }
406 }
407
408 pub fn register_page(&self) -> *mut hvdef::HvX64RegisterPage {
415 unsafe { addr_of_mut!((*self.shmem.as_ptr()).register_page) }
418 }
419
420 pub fn test(&mut self) -> Result<(), SidecarError> {
422 tracing::trace!("test");
423 let () = self.dispatch_sync(SidecarCommand::NONE, ())?;
424 Ok(())
425 }
426
427 pub fn get_vp_registers(
429 &mut self,
430 target_vtl: HvInputVtl,
431 names: &[HvRegisterName],
432 values: &mut [HvRegisterValue],
433 ) -> Result<(), SidecarError> {
434 tracing::trace!(count = names.len(), "get vp register");
435 for (names, values) in names
436 .chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS)
437 .zip(values.chunks_mut(sidecar_defs::MAX_GET_SET_VP_REGISTERS))
438 {
439 let buf = self.set_command(
440 SidecarCommand::GET_VP_REGISTERS,
441 GetSetVpRegisterRequest {
442 count: names.len() as u16,
443 target_vtl,
444 rsvd: 0,
445 status: HvStatus::SUCCESS,
446 rsvd2: [0; 10],
447 regs: [],
448 },
449 names.len(),
450 );
451 for (i, name) in names.iter().enumerate() {
452 buf[i] = HvRegisterAssoc {
453 name: *name,
454 pad: Default::default(),
455 value: FromZeros::new_zeroed(),
456 };
457 }
458 self.run_sync()?;
459 let (&GetSetVpRegisterRequest { status, .. }, buf) =
460 self.command_result::<_, HvRegisterAssoc>(names.len())?;
461 status.result().map_err(SidecarError::Hypervisor)?;
462 for (i, value) in values.iter_mut().enumerate() {
463 *value = buf[i].value;
464 }
465 }
466 Ok(())
467 }
468
469 pub fn set_vp_registers(
471 &mut self,
472 target_vtl: HvInputVtl,
473 regs: &[HvRegisterAssoc],
474 ) -> Result<(), SidecarError> {
475 tracing::trace!(count = regs.len(), "set vp register");
476 for regs in regs.chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
477 let buf = self.set_command(
478 SidecarCommand::SET_VP_REGISTERS,
479 GetSetVpRegisterRequest {
480 count: regs.len() as u16,
481 target_vtl,
482 rsvd: 0,
483 status: HvStatus::SUCCESS,
484 rsvd2: [0; 10],
485 regs: [],
486 },
487 regs.len(),
488 );
489 buf.copy_from_slice(regs);
490 self.run_sync()?;
491 let &GetSetVpRegisterRequest { status, .. } = self.command_result::<_, u8>(0)?.0;
492 status.result().map_err(SidecarError::Hypervisor)?;
493 }
494 Ok(())
495 }
496
497 pub fn translate_gva(
500 &mut self,
501 gvn: u64,
502 control_flags: hvdef::hypercall::TranslateGvaControlFlagsX64,
503 ) -> Result<TranslateVirtualAddressExOutputX64, SidecarError> {
504 tracing::trace!("translate gva");
505 let &TranslateGvaResponse {
506 status,
507 rsvd: _,
508 output,
509 } = self.dispatch_sync(
510 SidecarCommand::TRANSLATE_GVA,
511 TranslateGvaRequest { gvn, control_flags },
512 )?;
513 status.result().map_err(SidecarError::Hypervisor)?;
514 Ok(output)
515 }
516
517 fn set_command<
518 T: IntoBytes + Immutable + KnownLayout,
519 S: IntoBytes + FromBytes + Immutable + KnownLayout,
520 >(
521 &mut self,
522 command: SidecarCommand,
523 input: T,
524 n: usize,
525 ) -> &mut [S] {
526 let shmem = unsafe { self.shmem.as_mut() };
529 shmem.command_page.command = command;
530 input
531 .write_to_prefix(shmem.command_page.request_data.as_mut_bytes())
532 .unwrap();
533 <[S]>::mut_from_prefix_with_elems(
534 &mut shmem.command_page.request_data.as_mut_bytes()[input.as_bytes().len()..],
535 n,
536 )
537 .unwrap()
538 .0
539 }
540
541 fn dispatch_sync<O: FromBytes + Immutable + KnownLayout>(
542 &mut self,
543 command: SidecarCommand,
544 input: impl IntoBytes + Immutable + KnownLayout,
545 ) -> Result<&O, SidecarError> {
546 self.set_command::<_, u8>(command, input, 0);
547 self.run_sync()?;
548 Ok(self.command_result::<_, u8>(0)?.0)
549 }
550
551 fn run_sync(&mut self) -> Result<(), SidecarError> {
552 unsafe {
554 ioctl::mshv_vtl_sidecar_run(self.node.state.file.as_raw_fd(), self.cpu)
555 .map_err(|err| SidecarError::Io(err.into()))?;
556 }
557 Ok(())
558 }
559
560 fn start_async(&mut self) -> Result<(), SidecarError> {
561 let old = std::mem::replace(
562 &mut *self.node.state.vps[self.index].lock(),
563 VpState::Running(None),
564 );
565 assert!(matches!(old, VpState::Stopped));
566 unsafe {
568 ioctl::mshv_vtl_sidecar_start(self.node.state.file.as_raw_fd(), self.cpu)
569 .map_err(|err| SidecarError::Io(err.into()))?;
570 }
571 Ok(())
572 }
573
574 fn stop_async(&mut self) {
575 unsafe {
577 ioctl::mshv_vtl_sidecar_stop(self.node.state.file.as_raw_fd(), self.cpu)
578 .expect("failed to stop vp");
579 }
580 }
581
582 async fn wait_async(&mut self) {
583 poll_fn(|cx| {
584 let mut vp = self.node.state.vps[self.index].lock();
585 match &mut *vp {
586 VpState::Stopped => unreachable!(),
587 VpState::Running(waker) => {
588 if waker.as_ref().is_none_or(|w| !cx.waker().will_wake(w)) {
589 *waker = Some(cx.waker().clone());
590 }
591 Poll::Pending
592 }
593 VpState::Finished => {
594 *vp = VpState::Stopped;
595 Poll::Ready(())
596 }
597 }
598 })
599 .await
600 }
601
602 fn command_result<
603 O: FromBytes + Immutable + KnownLayout,
604 S: FromBytes + Immutable + KnownLayout,
605 >(
606 &mut self,
607 n: usize,
608 ) -> Result<(&O, &[S]), SidecarError> {
609 let shmem = unsafe { self.shmem.as_ref() };
612 if shmem.command_page.has_error != 0 {
613 let s = String::from_utf8_lossy(
614 &shmem.command_page.error.buf[..shmem.command_page.error.len as usize],
615 );
616 return Err(SidecarError::Sidecar(s.into_owned()));
617 }
618 let (output, slice) = shmem
619 .command_page
620 .request_data
621 .as_bytes()
622 .split_at(size_of::<O>());
623 let output = O::ref_from_bytes(output).unwrap();
624 let (slice, _) = <[S]>::ref_from_prefix_with_elems(slice, n).unwrap();
625 Ok((output, slice))
626 }
627}
628
629pub struct SidecarRun<'a, 'b> {
633 vp: &'a mut SidecarVp<'b>,
634 waited: bool,
635}
636
637impl SidecarRun<'_, '_> {
638 pub fn cancel(&mut self) {
642 if !self.waited {
643 self.vp.stop_async();
644 }
645 }
646
647 pub async fn wait(&mut self) -> Result<bool, SidecarError> {
651 if !self.waited {
652 self.vp.wait_async().await;
653 self.waited = true;
654 }
655 let &RunVpResponse { intercept } = self.vp.command_result::<_, u8>(0)?.0;
656 Ok(intercept != 0)
657 }
658}
659
660impl Drop for SidecarRun<'_, '_> {
661 fn drop(&mut self) {
662 assert!(self.waited, "failed to stop vp");
663 }
664}