sidecar_client/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The client interface to the sidecar kernel driver.
5
6#![cfg(target_os = "linux")]
7// UNSAFETY: Manually mapping memory for the sidecar kernel and calling ioctls.
8#![expect(unsafe_code)]
9
10use fs_err::os::unix::fs::OpenOptionsExt;
11use hvdef::HvError;
12use hvdef::HvMessage;
13use hvdef::HvStatus;
14use hvdef::hypercall::HvInputVtl;
15use hvdef::hypercall::HvRegisterAssoc;
16use hvdef::hypercall::TranslateVirtualAddressExOutputX64;
17use pal_async::driver::PollImpl;
18use pal_async::driver::SpawnDriver;
19use pal_async::fd::PollFdReady;
20use pal_async::interest::InterestSlot;
21use pal_async::interest::PollEvents;
22use pal_async::task::Task;
23use parking_lot::Mutex;
24use sidecar_defs::CommandPage;
25use sidecar_defs::CpuContextX64;
26use sidecar_defs::GetSetVpRegisterRequest;
27use sidecar_defs::PAGE_SIZE;
28use sidecar_defs::RunVpResponse;
29use sidecar_defs::SidecarCommand;
30use sidecar_defs::TranslateGvaRequest;
31use sidecar_defs::TranslateGvaResponse;
32use std::fs::File;
33use std::future::poll_fn;
34use std::io::Read;
35use std::mem::MaybeUninit;
36use std::ops::Range;
37use std::os::fd::AsRawFd;
38use std::os::raw::c_void;
39use std::ptr::NonNull;
40use std::ptr::addr_of;
41use std::ptr::addr_of_mut;
42use std::sync::Arc;
43use std::sync::atomic::AtomicBool;
44use std::sync::atomic::Ordering::Acquire;
45use std::sync::atomic::Ordering::Release;
46use std::task::Poll;
47use std::task::Waker;
48use thiserror::Error;
49use zerocopy::FromBytes;
50use zerocopy::Immutable;
51use zerocopy::IntoBytes;
52use zerocopy::KnownLayout;
53
54mod ioctl {
55    const BASE: u8 = 0xb8;
56    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_start, nix::request_code_none!(BASE, 0xf0));
57    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_stop, nix::request_code_none!(BASE, 0xf1));
58    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_run, nix::request_code_none!(BASE, 0xf2));
59    nix::ioctl_read!(mshv_vtl_sidecar_info, BASE, 0xf3, SidecarInfo);
60
61    #[repr(C)]
62    pub(crate) struct SidecarInfo {
63        pub base_cpu: u32,
64        pub cpu_count: u32,
65        pub per_cpu_shmem: u32,
66    }
67}
68
69/// A sidecar client.
70///
71/// This is actually a client to multiple sidecar devices, since there is one
72/// per node. This is abstracted away for the caller.
73#[derive(Debug)]
74pub struct SidecarClient {
75    nodes: Vec<SidecarNode>,
76}
77
78#[derive(Debug)]
79struct SidecarNode {
80    mapping: Mapping,
81    per_cpu_shmem_size: usize,
82    cpus: Range<u32>,
83    _task: Task<()>,
84    state: Arc<SidecarClientState>,
85    in_use: Vec<AtomicBool>,
86}
87
88#[derive(Debug)]
89struct SidecarClientState {
90    file: File,
91    vps: Vec<Mutex<VpState>>,
92}
93
94#[derive(Debug)]
95enum VpState {
96    Stopped,
97    Running(Option<Waker>),
98    Finished,
99}
100
101#[derive(Debug)]
102struct Mapping(NonNull<c_void>, usize);
103
104// SAFETY: the underlying mapping can be accessed from any CPU.
105unsafe impl Send for Mapping {}
106// SAFETY: the underlying mapping can be accessed from any CPU.
107unsafe impl Sync for Mapping {}
108
109/// An error returned by [`SidecarClient::new`].
110#[derive(Debug, Error)]
111pub enum NewSidecarClientError {
112    /// IO failure interacting with the sidecar driver.
113    #[error("{operation} failed in sidecar driver")]
114    Io {
115        /// The IO operation.
116        operation: &'static str,
117        /// The error.
118        #[source]
119        err: std::io::Error,
120    },
121    /// An error from an IO driver.
122    #[error("driver error")]
123    Driver(#[source] std::io::Error),
124}
125
126impl SidecarClient {
127    /// Create a new sidecar client. Returns `None` if no sidecar devices are found.
128    ///
129    /// `driver(cpu)` returns the driver to use for polling the sidecar device
130    /// whose base CPU is `cpu`.
131    pub fn new<T: SpawnDriver>(
132        mut driver: impl FnMut(u32) -> T,
133    ) -> Result<Option<Self>, NewSidecarClientError> {
134        let mut nodes = Vec::new();
135        let mut expected_base = 0;
136        loop {
137            let node = match SidecarNode::new(&mut driver, nodes.len()) {
138                Ok(Some(node)) => node,
139                Ok(None) => {
140                    if nodes.is_empty() {
141                        // No sidecar devices could be found at all.
142                        return Ok(None);
143                    }
144                    // No more nodes.
145                    break;
146                }
147                Err(err) => return Err(err),
148            };
149            assert_eq!(node.cpus.start, expected_base);
150            expected_base = node.cpus.end;
151            nodes.push(node);
152        }
153        Ok(Some(Self { nodes }))
154    }
155
156    /// Returns a sidecar VP accessor for the given CPU.
157    pub fn vp(&self, cpu: u32) -> SidecarVp<'_> {
158        self.nodes
159            .iter()
160            .find_map(|node| node.vp(cpu))
161            .expect("invalid cpu")
162    }
163
164    /// Returns the CPU index that manages the given VP.
165    pub fn base_cpu(&self, cpu: u32) -> u32 {
166        self.nodes
167            .iter()
168            .find_map(|node| node.cpus.contains(&cpu).then_some(node.cpus.start))
169            .expect("invalid cpu")
170    }
171}
172
173impl SidecarNode {
174    fn new<T: SpawnDriver>(
175        driver: &mut impl FnMut(u32) -> T,
176        node: usize,
177    ) -> Result<Option<Self>, NewSidecarClientError> {
178        let file = match fs_err::OpenOptions::new()
179            .read(true)
180            .write(true)
181            .custom_flags(libc::O_NONBLOCK)
182            .open(format!("/dev/mshv_vtl_sidecar{node}"))
183        {
184            Ok(file) => file,
185            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
186            Err(err) => {
187                return Err(NewSidecarClientError::Io {
188                    operation: "open",
189                    err,
190                });
191            }
192        };
193
194        // SAFETY: calling the ioctl with a valid output pointer. The ioctl is
195        // guaranteed to initialize the output on success (but pre-zero it just to be safe).
196        let info = unsafe {
197            let mut info = MaybeUninit::zeroed();
198            ioctl::mshv_vtl_sidecar_info(file.as_raw_fd(), info.as_mut_ptr()).map_err(|err| {
199                NewSidecarClientError::Io {
200                    operation: "query info",
201                    err: err.into(),
202                }
203            })?;
204            info.assume_init()
205        };
206
207        let cpus = info.base_cpu..info.base_cpu + info.cpu_count;
208        let per_cpu_shmem_size = info.per_cpu_shmem as usize;
209        assert!(
210            per_cpu_shmem_size >= size_of::<VpSharedPages>(),
211            "invalid state size"
212        );
213
214        let mapping = {
215            let mapping_len = cpus.len() * per_cpu_shmem_size;
216            // SAFETY: creating a new mapping, which has no safety requirements.
217            let mapping = unsafe {
218                libc::mmap(
219                    std::ptr::null_mut(),
220                    mapping_len,
221                    libc::PROT_READ | libc::PROT_WRITE,
222                    libc::MAP_SHARED,
223                    file.as_raw_fd(),
224                    0,
225                )
226            };
227            if mapping == libc::MAP_FAILED {
228                return Err(NewSidecarClientError::Io {
229                    operation: "mmap",
230                    err: std::io::Error::last_os_error(),
231                });
232            }
233            Mapping(NonNull::new(mapping).unwrap(), mapping_len)
234        };
235
236        // Start the driver on the first CPU in the node.
237        let driver = driver(cpus.start);
238
239        let fd_ready = driver
240            .new_dyn_fd_ready(file.as_raw_fd())
241            .map_err(NewSidecarClientError::Driver)?;
242
243        let state = Arc::new(SidecarClientState {
244            file: file.into(),
245            vps: cpus.clone().map(|_| Mutex::new(VpState::Stopped)).collect(),
246        });
247
248        let task = driver.spawn(
249            "sidecar-wait",
250            sidecar_wait_loop(fd_ready, state.clone(), cpus.start),
251        );
252
253        tracing::debug!(
254            "sidecar node {node} started, cpus {}..={}",
255            cpus.start,
256            cpus.end - 1
257        );
258
259        Ok(Some(Self {
260            state,
261            per_cpu_shmem_size,
262            mapping,
263            in_use: cpus.clone().map(|_| AtomicBool::new(false)).collect(),
264            cpus,
265            _task: task,
266        }))
267    }
268
269    fn vp(&self, cpu: u32) -> Option<SidecarVp<'_>> {
270        if !self.cpus.contains(&cpu) {
271            return None;
272        }
273        let index = cpu - self.cpus.start;
274        assert!(
275            !self.in_use[index as usize].swap(true, Acquire),
276            "vp in use"
277        );
278        // SAFETY: the mapping is valid and the index is within the range of CPUs.
279        let shmem = unsafe {
280            self.mapping
281                .0
282                .as_ptr()
283                .byte_add(index as usize * self.per_cpu_shmem_size)
284        }
285        .cast();
286        Some(SidecarVp {
287            cpu: cpu as i32,
288            index: index as usize,
289            shmem: NonNull::new(shmem).unwrap(),
290            node: self,
291        })
292    }
293}
294
295async fn sidecar_wait_loop(
296    mut fd_ready: PollImpl<dyn PollFdReady>,
297    state: Arc<SidecarClientState>,
298    base_cpu: u32,
299) {
300    let err = loop {
301        poll_fn(|cx| fd_ready.poll_fd_ready(cx, InterestSlot::Read, PollEvents::IN)).await;
302        let mut cpu = 0u32;
303        let n = match (&state.file).read(cpu.as_mut_bytes()) {
304            Ok(n) => n,
305            Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
306                fd_ready.clear_fd_ready(InterestSlot::Read);
307                continue;
308            }
309            Err(err) => break err,
310        };
311        assert_eq!(n, 4, "unexpected read size");
312        tracing::trace!(cpu, "sidecar stop");
313        let index = cpu - base_cpu;
314        let VpState::Running(waker) =
315            std::mem::replace(&mut *state.vps[index as usize].lock(), VpState::Finished)
316        else {
317            panic!("cpu {cpu} stopped without start");
318        };
319        if let Some(waker) = waker {
320            waker.wake();
321        }
322    };
323    tracing::error!(
324        error = &err as &dyn std::error::Error,
325        "sidecar wait failed"
326    );
327}
328
329impl Drop for Mapping {
330    fn drop(&mut self) {
331        // SAFETY: the mapping is valid and the length is correct.
332        let r = unsafe { libc::munmap(self.0.as_ptr(), self.1) };
333        if r != 0 {
334            panic!("munmap failed: {}", std::io::Error::last_os_error());
335        }
336    }
337}
338
339/// An accessor for a sidecar VP.
340pub struct SidecarVp<'a> {
341    cpu: i32,
342    index: usize,
343    shmem: NonNull<VpSharedPages>,
344    node: &'a SidecarNode,
345}
346
347#[repr(C)]
348struct VpSharedPages {
349    command_page: CommandPage,
350    register_page: hvdef::HvX64RegisterPage,
351}
352
353const _: () = assert!(size_of::<VpSharedPages>() % PAGE_SIZE == 0);
354
355impl Drop for SidecarVp<'_> {
356    fn drop(&mut self) {
357        assert!(self.node.in_use[self.index].swap(false, Release));
358    }
359}
360
361/// An error from a sidecar operation.
362#[derive(Debug, Error)]
363pub enum SidecarError {
364    /// An IO error interacting with the sidecar driver.
365    #[error("driver error")]
366    Io(#[source] std::io::Error),
367    /// An error from the sidecar kernel.
368    #[error("sidecar error: {0}")]
369    Sidecar(String),
370    /// An error from the hypervisor.
371    #[error("hypervisor error")]
372    Hypervisor(#[source] HvError),
373}
374
375impl<'a> SidecarVp<'a> {
376    /// Runs the VP.
377    pub fn run(&mut self) -> Result<SidecarRun<'_, 'a>, SidecarError> {
378        tracing::trace!("run vp");
379        self.set_command::<_, u8>(SidecarCommand::RUN_VP, (), 0);
380        self.start_async()?;
381        Ok(SidecarRun {
382            vp: self,
383            waited: false,
384        })
385    }
386
387    /// Returns a pointer to the CPU context.
388    ///
389    /// This pointer is only valid for access while the VP is stopped.
390    pub fn cpu_context(&self) -> *mut CpuContextX64 {
391        // SAFETY: the command page pointer is valid so these pointer computations
392        // are also valid.
393        unsafe { addr_of_mut!((*self.shmem.as_ptr()).command_page.cpu_context) }
394    }
395
396    /// Returns a pointer to the intercept message from the hypervisor.
397    ///
398    /// This pointer is only valid for access while the VP is stopped.
399    pub fn intercept_message(&self) -> *const HvMessage {
400        // SAFETY: the command page pointer is valid so these pointer computations
401        // are also valid.
402        unsafe { addr_of!((*self.shmem.as_ptr()).command_page.intercept_message) }
403    }
404
405    /// Returns a pointer to the register page, mapped with the hypervisor.
406    ///
407    /// If the hypervisor does not support register pages, then the `is_valid`
408    /// field will be 0.
409    ///
410    /// This pointer is only valid for access while the VP is stopped.
411    pub fn register_page(&self) -> *mut hvdef::HvX64RegisterPage {
412        // SAFETY: the command page pointer is valid so these pointer computations
413        // are also valid.
414        unsafe { addr_of_mut!((*self.shmem.as_ptr()).register_page) }
415    }
416
417    /// Tests that the VP is running in the sidecar kernel.
418    pub fn test(&mut self) -> Result<(), SidecarError> {
419        tracing::trace!("test");
420        let () = self.dispatch_sync(SidecarCommand::NONE, ())?;
421        Ok(())
422    }
423
424    /// Gets a VP register by name.
425    pub fn get_vp_registers(
426        &mut self,
427        target_vtl: HvInputVtl,
428        regs: &mut [HvRegisterAssoc],
429    ) -> Result<(), SidecarError> {
430        tracing::trace!(count = regs.len(), "get vp register");
431        for regs in regs.chunks_mut(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
432            let buf = self.set_command(
433                SidecarCommand::GET_VP_REGISTERS,
434                GetSetVpRegisterRequest {
435                    count: regs.len() as u16,
436                    target_vtl,
437                    rsvd: 0,
438                    status: HvStatus::SUCCESS,
439                    rsvd2: [0; 10],
440                    regs: [],
441                },
442                regs.len(),
443            );
444            buf.copy_from_slice(regs);
445            self.run_sync()?;
446            let (&GetSetVpRegisterRequest { status, .. }, buf) =
447                self.command_result::<_, HvRegisterAssoc>(regs.len())?;
448            status.result().map_err(SidecarError::Hypervisor)?;
449            regs.copy_from_slice(buf);
450        }
451        Ok(())
452    }
453
454    /// Sets a VP register by name.
455    pub fn set_vp_registers(
456        &mut self,
457        target_vtl: HvInputVtl,
458        regs: &[HvRegisterAssoc],
459    ) -> Result<(), SidecarError> {
460        tracing::trace!(count = regs.len(), "set vp register");
461        for regs in regs.chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
462            let buf = self.set_command(
463                SidecarCommand::SET_VP_REGISTERS,
464                GetSetVpRegisterRequest {
465                    count: regs.len() as u16,
466                    target_vtl,
467                    rsvd: 0,
468                    status: HvStatus::SUCCESS,
469                    rsvd2: [0; 10],
470                    regs: [],
471                },
472                regs.len(),
473            );
474            buf.copy_from_slice(regs);
475            self.run_sync()?;
476            let &GetSetVpRegisterRequest { status, .. } = self.command_result::<_, u8>(0)?.0;
477            status.result().map_err(SidecarError::Hypervisor)?;
478        }
479        Ok(())
480    }
481
482    /// Issues a hypercall to translate a guest virtual address to a guest
483    /// physical address.
484    pub fn translate_gva(
485        &mut self,
486        gvn: u64,
487        control_flags: hvdef::hypercall::TranslateGvaControlFlagsX64,
488    ) -> Result<TranslateVirtualAddressExOutputX64, SidecarError> {
489        tracing::trace!("translate gva");
490        let &TranslateGvaResponse {
491            status,
492            rsvd: _,
493            output,
494        } = self.dispatch_sync(
495            SidecarCommand::TRANSLATE_GVA,
496            TranslateGvaRequest { gvn, control_flags },
497        )?;
498        status.result().map_err(SidecarError::Hypervisor)?;
499        Ok(output)
500    }
501
502    fn set_command<
503        T: IntoBytes + Immutable + KnownLayout,
504        S: IntoBytes + FromBytes + Immutable + KnownLayout,
505    >(
506        &mut self,
507        command: SidecarCommand,
508        input: T,
509        n: usize,
510    ) -> &mut [S] {
511        // SAFETY: no command is running, so the sidecar kernel will not
512        // concurrently modify the state page.
513        let shmem = unsafe { self.shmem.as_mut() };
514        shmem.command_page.command = command;
515        input
516            .write_to_prefix(shmem.command_page.request_data.as_mut_bytes())
517            .unwrap();
518        <[S]>::mut_from_prefix_with_elems(
519            &mut shmem.command_page.request_data.as_mut_bytes()[input.as_bytes().len()..],
520            n,
521        )
522        .unwrap()
523        .0
524    }
525
526    fn dispatch_sync<O: FromBytes + Immutable + KnownLayout>(
527        &mut self,
528        command: SidecarCommand,
529        input: impl IntoBytes + Immutable + KnownLayout,
530    ) -> Result<&O, SidecarError> {
531        self.set_command::<_, u8>(command, input, 0);
532        self.run_sync()?;
533        Ok(self.command_result::<_, u8>(0)?.0)
534    }
535
536    fn run_sync(&mut self) -> Result<(), SidecarError> {
537        // SAFETY: no safety requirements on this ioctl.
538        unsafe {
539            ioctl::mshv_vtl_sidecar_run(self.node.state.file.as_raw_fd(), self.cpu)
540                .map_err(|err| SidecarError::Io(err.into()))?;
541        }
542        Ok(())
543    }
544
545    fn start_async(&mut self) -> Result<(), SidecarError> {
546        let old = std::mem::replace(
547            &mut *self.node.state.vps[self.index].lock(),
548            VpState::Running(None),
549        );
550        assert!(matches!(old, VpState::Stopped));
551        // SAFETY: no safety requirements on this ioctl.
552        unsafe {
553            ioctl::mshv_vtl_sidecar_start(self.node.state.file.as_raw_fd(), self.cpu)
554                .map_err(|err| SidecarError::Io(err.into()))?;
555        }
556        Ok(())
557    }
558
559    fn stop_async(&mut self) {
560        // SAFETY: no safety requirements on this ioctl.
561        unsafe {
562            ioctl::mshv_vtl_sidecar_stop(self.node.state.file.as_raw_fd(), self.cpu)
563                .expect("failed to stop vp");
564        }
565    }
566
567    async fn wait_async(&mut self) {
568        poll_fn(|cx| {
569            let mut vp = self.node.state.vps[self.index].lock();
570            match &mut *vp {
571                VpState::Stopped => unreachable!(),
572                VpState::Running(waker) => {
573                    if waker.as_ref().is_none_or(|w| !cx.waker().will_wake(w)) {
574                        *waker = Some(cx.waker().clone());
575                    }
576                    Poll::Pending
577                }
578                VpState::Finished => {
579                    *vp = VpState::Stopped;
580                    Poll::Ready(())
581                }
582            }
583        })
584        .await
585    }
586
587    fn command_result<
588        O: FromBytes + Immutable + KnownLayout,
589        S: FromBytes + Immutable + KnownLayout,
590    >(
591        &mut self,
592        n: usize,
593    ) -> Result<(&O, &[S]), SidecarError> {
594        // SAFETY: the sidecar kernel will not concurrently modify the state
595        // page after the command has completed.
596        let shmem = unsafe { self.shmem.as_ref() };
597        if shmem.command_page.has_error != 0 {
598            let s = String::from_utf8_lossy(
599                &shmem.command_page.error.buf[..shmem.command_page.error.len as usize],
600            );
601            return Err(SidecarError::Sidecar(s.into_owned()));
602        }
603        let (output, slice) = shmem
604            .command_page
605            .request_data
606            .as_bytes()
607            .split_at(size_of::<O>());
608        let output = O::ref_from_bytes(output).unwrap();
609        let (slice, _) = <[S]>::ref_from_prefix_with_elems(slice, n).unwrap();
610        Ok((output, slice))
611    }
612}
613
614/// An object representing a running VP.
615///
616/// Panics if dropped without waiting for the VP to stop.
617pub struct SidecarRun<'a, 'b> {
618    vp: &'a mut SidecarVp<'b>,
619    waited: bool,
620}
621
622impl SidecarRun<'_, '_> {
623    /// Requests that the sidecar kernel stop the VP.
624    ///
625    /// You must still call `wait` after this to ensure the VP has stopped.
626    pub fn cancel(&mut self) {
627        if !self.waited {
628            self.vp.stop_async();
629        }
630    }
631
632    /// Waits for the VP to stop.
633    ///
634    /// Returns `true` if the VP hit an intercept.
635    pub async fn wait(&mut self) -> Result<bool, SidecarError> {
636        if !self.waited {
637            self.vp.wait_async().await;
638            self.waited = true;
639        }
640        let &RunVpResponse { intercept } = self.vp.command_result::<_, u8>(0)?.0;
641        Ok(intercept != 0)
642    }
643}
644
645impl Drop for SidecarRun<'_, '_> {
646    fn drop(&mut self) {
647        assert!(self.waited, "failed to stop vp");
648    }
649}