sidecar_client/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The client interface to the sidecar kernel driver.
5
6#![cfg(target_os = "linux")]
7// UNSAFETY: Manually mapping memory for the sidecar kernel and calling ioctls.
8#![expect(unsafe_code)]
9
10use fs_err::os::unix::fs::OpenOptionsExt;
11use hvdef::HvError;
12use hvdef::HvMessage;
13use hvdef::HvRegisterName;
14use hvdef::HvRegisterValue;
15use hvdef::HvStatus;
16use hvdef::hypercall::HvInputVtl;
17use hvdef::hypercall::HvRegisterAssoc;
18use hvdef::hypercall::TranslateVirtualAddressExOutputX64;
19use pal_async::driver::PollImpl;
20use pal_async::driver::SpawnDriver;
21use pal_async::fd::PollFdReady;
22use pal_async::interest::InterestSlot;
23use pal_async::interest::PollEvents;
24use pal_async::task::Task;
25use parking_lot::Mutex;
26use sidecar_defs::CommandPage;
27use sidecar_defs::CpuContextX64;
28use sidecar_defs::GetSetVpRegisterRequest;
29use sidecar_defs::PAGE_SIZE;
30use sidecar_defs::RunVpResponse;
31use sidecar_defs::SidecarCommand;
32use sidecar_defs::TranslateGvaRequest;
33use sidecar_defs::TranslateGvaResponse;
34use std::fs::File;
35use std::future::poll_fn;
36use std::io::Read;
37use std::mem::MaybeUninit;
38use std::ops::Range;
39use std::os::fd::AsRawFd;
40use std::os::raw::c_void;
41use std::ptr::NonNull;
42use std::ptr::addr_of;
43use std::ptr::addr_of_mut;
44use std::sync::Arc;
45use std::sync::atomic::AtomicBool;
46use std::sync::atomic::Ordering::Acquire;
47use std::sync::atomic::Ordering::Release;
48use std::task::Poll;
49use std::task::Waker;
50use thiserror::Error;
51use zerocopy::FromBytes;
52use zerocopy::FromZeros;
53use zerocopy::Immutable;
54use zerocopy::IntoBytes;
55use zerocopy::KnownLayout;
56
57mod ioctl {
58    const BASE: u8 = 0xb8;
59    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_start, nix::request_code_none!(BASE, 0xf0));
60    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_stop, nix::request_code_none!(BASE, 0xf1));
61    nix::ioctl_write_int_bad!(mshv_vtl_sidecar_run, nix::request_code_none!(BASE, 0xf2));
62    nix::ioctl_read!(mshv_vtl_sidecar_info, BASE, 0xf3, SidecarInfo);
63
64    #[repr(C)]
65    pub(crate) struct SidecarInfo {
66        pub base_cpu: u32,
67        pub cpu_count: u32,
68        pub per_cpu_shmem: u32,
69    }
70}
71
72/// A sidecar client.
73///
74/// This is actually a client to multiple sidecar devices, since there is one
75/// per node. This is abstracted away for the caller.
76#[derive(Debug)]
77pub struct SidecarClient {
78    nodes: Vec<SidecarNode>,
79}
80
81#[derive(Debug)]
82struct SidecarNode {
83    mapping: Mapping,
84    per_cpu_shmem_size: usize,
85    cpus: Range<u32>,
86    _task: Task<()>,
87    state: Arc<SidecarClientState>,
88    in_use: Vec<AtomicBool>,
89}
90
91#[derive(Debug)]
92struct SidecarClientState {
93    file: File,
94    vps: Vec<Mutex<VpState>>,
95}
96
97#[derive(Debug)]
98enum VpState {
99    Stopped,
100    Running(Option<Waker>),
101    Finished,
102}
103
104#[derive(Debug)]
105struct Mapping(NonNull<c_void>, usize);
106
107// SAFETY: the underlying mapping can be accessed from any CPU.
108unsafe impl Send for Mapping {}
109// SAFETY: the underlying mapping can be accessed from any CPU.
110unsafe impl Sync for Mapping {}
111
112/// An error returned by [`SidecarClient::new`].
113#[derive(Debug, Error)]
114pub enum NewSidecarClientError {
115    /// IO failure interacting with the sidecar driver.
116    #[error("{operation} failed in sidecar driver")]
117    Io {
118        /// The IO operation.
119        operation: &'static str,
120        /// The error.
121        #[source]
122        err: std::io::Error,
123    },
124    /// An error from an IO driver.
125    #[error("driver error")]
126    Driver(#[source] std::io::Error),
127}
128
129impl SidecarClient {
130    /// Create a new sidecar client. Returns `None` if no sidecar devices are found.
131    ///
132    /// `driver(cpu)` returns the driver to use for polling the sidecar device
133    /// whose base CPU is `cpu`.
134    pub fn new<T: SpawnDriver>(
135        mut driver: impl FnMut(u32) -> T,
136    ) -> Result<Option<Self>, NewSidecarClientError> {
137        let mut nodes = Vec::new();
138        let mut expected_base = 0;
139        loop {
140            let node = match SidecarNode::new(&mut driver, nodes.len()) {
141                Ok(Some(node)) => node,
142                Ok(None) => {
143                    if nodes.is_empty() {
144                        // No sidecar devices could be found at all.
145                        return Ok(None);
146                    }
147                    // No more nodes.
148                    break;
149                }
150                Err(err) => return Err(err),
151            };
152            assert_eq!(node.cpus.start, expected_base);
153            expected_base = node.cpus.end;
154            nodes.push(node);
155        }
156        Ok(Some(Self { nodes }))
157    }
158
159    /// Returns a sidecar VP accessor for the given CPU.
160    pub fn vp(&self, cpu: u32) -> SidecarVp<'_> {
161        self.nodes
162            .iter()
163            .find_map(|node| node.vp(cpu))
164            .expect("invalid cpu")
165    }
166
167    /// Returns the CPU index that manages the given VP.
168    pub fn base_cpu(&self, cpu: u32) -> u32 {
169        self.nodes
170            .iter()
171            .find_map(|node| node.cpus.contains(&cpu).then_some(node.cpus.start))
172            .expect("invalid cpu")
173    }
174}
175
176impl SidecarNode {
177    fn new<T: SpawnDriver>(
178        driver: &mut impl FnMut(u32) -> T,
179        node: usize,
180    ) -> Result<Option<Self>, NewSidecarClientError> {
181        let file = match fs_err::OpenOptions::new()
182            .read(true)
183            .write(true)
184            .custom_flags(libc::O_NONBLOCK)
185            .open(format!("/dev/mshv_vtl_sidecar{node}"))
186        {
187            Ok(file) => file,
188            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
189            Err(err) => {
190                return Err(NewSidecarClientError::Io {
191                    operation: "open",
192                    err,
193                });
194            }
195        };
196
197        // SAFETY: calling the ioctl with a valid output pointer. The ioctl is
198        // guaranteed to initialize the output on success (but pre-zero it just to be safe).
199        let info = unsafe {
200            let mut info = MaybeUninit::zeroed();
201            ioctl::mshv_vtl_sidecar_info(file.as_raw_fd(), info.as_mut_ptr()).map_err(|err| {
202                NewSidecarClientError::Io {
203                    operation: "query info",
204                    err: err.into(),
205                }
206            })?;
207            info.assume_init()
208        };
209
210        let cpus = info.base_cpu..info.base_cpu + info.cpu_count;
211        let per_cpu_shmem_size = info.per_cpu_shmem as usize;
212        assert!(
213            per_cpu_shmem_size >= size_of::<VpSharedPages>(),
214            "invalid state size"
215        );
216
217        let mapping = {
218            let mapping_len = cpus.len() * per_cpu_shmem_size;
219            // SAFETY: creating a new mapping, which has no safety requirements.
220            let mapping = unsafe {
221                libc::mmap(
222                    std::ptr::null_mut(),
223                    mapping_len,
224                    libc::PROT_READ | libc::PROT_WRITE,
225                    libc::MAP_SHARED,
226                    file.as_raw_fd(),
227                    0,
228                )
229            };
230            if mapping == libc::MAP_FAILED {
231                return Err(NewSidecarClientError::Io {
232                    operation: "mmap",
233                    err: std::io::Error::last_os_error(),
234                });
235            }
236            Mapping(NonNull::new(mapping).unwrap(), mapping_len)
237        };
238
239        // Start the driver on the first CPU in the node.
240        let driver = driver(cpus.start);
241
242        let fd_ready = driver
243            .new_dyn_fd_ready(file.as_raw_fd())
244            .map_err(NewSidecarClientError::Driver)?;
245
246        let state = Arc::new(SidecarClientState {
247            file: file.into(),
248            vps: cpus.clone().map(|_| Mutex::new(VpState::Stopped)).collect(),
249        });
250
251        let task = driver.spawn(
252            "sidecar-wait",
253            sidecar_wait_loop(fd_ready, state.clone(), cpus.start),
254        );
255
256        tracing::debug!(
257            "sidecar node {node} started, cpus {}..={}",
258            cpus.start,
259            cpus.end - 1
260        );
261
262        Ok(Some(Self {
263            state,
264            per_cpu_shmem_size,
265            mapping,
266            in_use: cpus.clone().map(|_| AtomicBool::new(false)).collect(),
267            cpus,
268            _task: task,
269        }))
270    }
271
272    fn vp(&self, cpu: u32) -> Option<SidecarVp<'_>> {
273        if !self.cpus.contains(&cpu) {
274            return None;
275        }
276        let index = cpu - self.cpus.start;
277        assert!(
278            !self.in_use[index as usize].swap(true, Acquire),
279            "vp in use"
280        );
281        // SAFETY: the mapping is valid and the index is within the range of CPUs.
282        let shmem = unsafe {
283            self.mapping
284                .0
285                .as_ptr()
286                .byte_add(index as usize * self.per_cpu_shmem_size)
287        }
288        .cast();
289        Some(SidecarVp {
290            cpu: cpu as i32,
291            index: index as usize,
292            shmem: NonNull::new(shmem).unwrap(),
293            node: self,
294        })
295    }
296}
297
298async fn sidecar_wait_loop(
299    mut fd_ready: PollImpl<dyn PollFdReady>,
300    state: Arc<SidecarClientState>,
301    base_cpu: u32,
302) {
303    let err = loop {
304        poll_fn(|cx| fd_ready.poll_fd_ready(cx, InterestSlot::Read, PollEvents::IN)).await;
305        let mut cpu = 0u32;
306        let n = match (&state.file).read(cpu.as_mut_bytes()) {
307            Ok(n) => n,
308            Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
309                fd_ready.clear_fd_ready(InterestSlot::Read);
310                continue;
311            }
312            Err(err) => break err,
313        };
314        assert_eq!(n, 4, "unexpected read size");
315        tracing::trace!(cpu, "sidecar stop");
316        let index = cpu - base_cpu;
317        let VpState::Running(waker) =
318            std::mem::replace(&mut *state.vps[index as usize].lock(), VpState::Finished)
319        else {
320            panic!("cpu {cpu} stopped without start");
321        };
322        if let Some(waker) = waker {
323            waker.wake();
324        }
325    };
326    tracing::error!(
327        error = &err as &dyn std::error::Error,
328        "sidecar wait failed"
329    );
330}
331
332impl Drop for Mapping {
333    fn drop(&mut self) {
334        // SAFETY: the mapping is valid and the length is correct.
335        let r = unsafe { libc::munmap(self.0.as_ptr(), self.1) };
336        if r != 0 {
337            panic!("munmap failed: {}", std::io::Error::last_os_error());
338        }
339    }
340}
341
342/// An accessor for a sidecar VP.
343pub struct SidecarVp<'a> {
344    cpu: i32,
345    index: usize,
346    shmem: NonNull<VpSharedPages>,
347    node: &'a SidecarNode,
348}
349
350#[repr(C)]
351struct VpSharedPages {
352    command_page: CommandPage,
353    register_page: hvdef::HvX64RegisterPage,
354}
355
356const _: () = assert!(size_of::<VpSharedPages>().is_multiple_of(PAGE_SIZE));
357
358impl Drop for SidecarVp<'_> {
359    fn drop(&mut self) {
360        assert!(self.node.in_use[self.index].swap(false, Release));
361    }
362}
363
364/// An error from a sidecar operation.
365#[derive(Debug, Error)]
366pub enum SidecarError {
367    /// An IO error interacting with the sidecar driver.
368    #[error("driver error")]
369    Io(#[source] std::io::Error),
370    /// An error from the sidecar kernel.
371    #[error("sidecar error: {0}")]
372    Sidecar(String),
373    /// An error from the hypervisor.
374    #[error("hypervisor error")]
375    Hypervisor(#[source] HvError),
376}
377
378impl<'a> SidecarVp<'a> {
379    /// Runs the VP.
380    pub fn run(&mut self) -> Result<SidecarRun<'_, 'a>, SidecarError> {
381        tracing::trace!("run vp");
382        self.set_command::<_, u8>(SidecarCommand::RUN_VP, (), 0);
383        self.start_async()?;
384        Ok(SidecarRun {
385            vp: self,
386            waited: false,
387        })
388    }
389
390    /// Returns a pointer to the CPU context.
391    ///
392    /// This pointer is only valid for access while the VP is stopped.
393    pub fn cpu_context(&self) -> *mut CpuContextX64 {
394        // SAFETY: the command page pointer is valid so these pointer computations
395        // are also valid.
396        unsafe { addr_of_mut!((*self.shmem.as_ptr()).command_page.cpu_context) }
397    }
398
399    /// Returns a pointer to the intercept message from the hypervisor.
400    ///
401    /// This pointer is only valid for access while the VP is stopped.
402    pub fn intercept_message(&self) -> *const HvMessage {
403        // SAFETY: the command page pointer is valid so these pointer computations
404        // are also valid.
405        unsafe { addr_of!((*self.shmem.as_ptr()).command_page.intercept_message) }
406    }
407
408    /// Returns a pointer to the register page, mapped with the hypervisor.
409    ///
410    /// If the hypervisor does not support register pages, then the `is_valid`
411    /// field will be 0.
412    ///
413    /// This pointer is only valid for access while the VP is stopped.
414    pub fn register_page(&self) -> *mut hvdef::HvX64RegisterPage {
415        // SAFETY: the command page pointer is valid so these pointer computations
416        // are also valid.
417        unsafe { addr_of_mut!((*self.shmem.as_ptr()).register_page) }
418    }
419
420    /// Tests that the VP is running in the sidecar kernel.
421    pub fn test(&mut self) -> Result<(), SidecarError> {
422        tracing::trace!("test");
423        let () = self.dispatch_sync(SidecarCommand::NONE, ())?;
424        Ok(())
425    }
426
427    /// Gets a VP register by name.
428    pub fn get_vp_registers(
429        &mut self,
430        target_vtl: HvInputVtl,
431        names: &[HvRegisterName],
432        values: &mut [HvRegisterValue],
433    ) -> Result<(), SidecarError> {
434        tracing::trace!(count = names.len(), "get vp register");
435        for (names, values) in names
436            .chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS)
437            .zip(values.chunks_mut(sidecar_defs::MAX_GET_SET_VP_REGISTERS))
438        {
439            let buf = self.set_command(
440                SidecarCommand::GET_VP_REGISTERS,
441                GetSetVpRegisterRequest {
442                    count: names.len() as u16,
443                    target_vtl,
444                    rsvd: 0,
445                    status: HvStatus::SUCCESS,
446                    rsvd2: [0; 10],
447                    regs: [],
448                },
449                names.len(),
450            );
451            for (i, name) in names.iter().enumerate() {
452                buf[i] = HvRegisterAssoc {
453                    name: *name,
454                    pad: Default::default(),
455                    value: FromZeros::new_zeroed(),
456                };
457            }
458            self.run_sync()?;
459            let (&GetSetVpRegisterRequest { status, .. }, buf) =
460                self.command_result::<_, HvRegisterAssoc>(names.len())?;
461            status.result().map_err(SidecarError::Hypervisor)?;
462            for (i, value) in values.iter_mut().enumerate() {
463                *value = buf[i].value;
464            }
465        }
466        Ok(())
467    }
468
469    /// Sets a VP register by name.
470    pub fn set_vp_registers(
471        &mut self,
472        target_vtl: HvInputVtl,
473        regs: &[HvRegisterAssoc],
474    ) -> Result<(), SidecarError> {
475        tracing::trace!(count = regs.len(), "set vp register");
476        for regs in regs.chunks(sidecar_defs::MAX_GET_SET_VP_REGISTERS) {
477            let buf = self.set_command(
478                SidecarCommand::SET_VP_REGISTERS,
479                GetSetVpRegisterRequest {
480                    count: regs.len() as u16,
481                    target_vtl,
482                    rsvd: 0,
483                    status: HvStatus::SUCCESS,
484                    rsvd2: [0; 10],
485                    regs: [],
486                },
487                regs.len(),
488            );
489            buf.copy_from_slice(regs);
490            self.run_sync()?;
491            let &GetSetVpRegisterRequest { status, .. } = self.command_result::<_, u8>(0)?.0;
492            status.result().map_err(SidecarError::Hypervisor)?;
493        }
494        Ok(())
495    }
496
497    /// Issues a hypercall to translate a guest virtual address to a guest
498    /// physical address.
499    pub fn translate_gva(
500        &mut self,
501        gvn: u64,
502        control_flags: hvdef::hypercall::TranslateGvaControlFlagsX64,
503    ) -> Result<TranslateVirtualAddressExOutputX64, SidecarError> {
504        tracing::trace!("translate gva");
505        let &TranslateGvaResponse {
506            status,
507            rsvd: _,
508            output,
509        } = self.dispatch_sync(
510            SidecarCommand::TRANSLATE_GVA,
511            TranslateGvaRequest { gvn, control_flags },
512        )?;
513        status.result().map_err(SidecarError::Hypervisor)?;
514        Ok(output)
515    }
516
517    fn set_command<
518        T: IntoBytes + Immutable + KnownLayout,
519        S: IntoBytes + FromBytes + Immutable + KnownLayout,
520    >(
521        &mut self,
522        command: SidecarCommand,
523        input: T,
524        n: usize,
525    ) -> &mut [S] {
526        // SAFETY: no command is running, so the sidecar kernel will not
527        // concurrently modify the state page.
528        let shmem = unsafe { self.shmem.as_mut() };
529        shmem.command_page.command = command;
530        input
531            .write_to_prefix(shmem.command_page.request_data.as_mut_bytes())
532            .unwrap();
533        <[S]>::mut_from_prefix_with_elems(
534            &mut shmem.command_page.request_data.as_mut_bytes()[input.as_bytes().len()..],
535            n,
536        )
537        .unwrap()
538        .0
539    }
540
541    fn dispatch_sync<O: FromBytes + Immutable + KnownLayout>(
542        &mut self,
543        command: SidecarCommand,
544        input: impl IntoBytes + Immutable + KnownLayout,
545    ) -> Result<&O, SidecarError> {
546        self.set_command::<_, u8>(command, input, 0);
547        self.run_sync()?;
548        Ok(self.command_result::<_, u8>(0)?.0)
549    }
550
551    fn run_sync(&mut self) -> Result<(), SidecarError> {
552        // SAFETY: no safety requirements on this ioctl.
553        unsafe {
554            ioctl::mshv_vtl_sidecar_run(self.node.state.file.as_raw_fd(), self.cpu)
555                .map_err(|err| SidecarError::Io(err.into()))?;
556        }
557        Ok(())
558    }
559
560    fn start_async(&mut self) -> Result<(), SidecarError> {
561        let old = std::mem::replace(
562            &mut *self.node.state.vps[self.index].lock(),
563            VpState::Running(None),
564        );
565        assert!(matches!(old, VpState::Stopped));
566        // SAFETY: no safety requirements on this ioctl.
567        unsafe {
568            ioctl::mshv_vtl_sidecar_start(self.node.state.file.as_raw_fd(), self.cpu)
569                .map_err(|err| SidecarError::Io(err.into()))?;
570        }
571        Ok(())
572    }
573
574    fn stop_async(&mut self) {
575        // SAFETY: no safety requirements on this ioctl.
576        unsafe {
577            ioctl::mshv_vtl_sidecar_stop(self.node.state.file.as_raw_fd(), self.cpu)
578                .expect("failed to stop vp");
579        }
580    }
581
582    async fn wait_async(&mut self) {
583        poll_fn(|cx| {
584            let mut vp = self.node.state.vps[self.index].lock();
585            match &mut *vp {
586                VpState::Stopped => unreachable!(),
587                VpState::Running(waker) => {
588                    if waker.as_ref().is_none_or(|w| !cx.waker().will_wake(w)) {
589                        *waker = Some(cx.waker().clone());
590                    }
591                    Poll::Pending
592                }
593                VpState::Finished => {
594                    *vp = VpState::Stopped;
595                    Poll::Ready(())
596                }
597            }
598        })
599        .await
600    }
601
602    fn command_result<
603        O: FromBytes + Immutable + KnownLayout,
604        S: FromBytes + Immutable + KnownLayout,
605    >(
606        &mut self,
607        n: usize,
608    ) -> Result<(&O, &[S]), SidecarError> {
609        // SAFETY: the sidecar kernel will not concurrently modify the state
610        // page after the command has completed.
611        let shmem = unsafe { self.shmem.as_ref() };
612        if shmem.command_page.has_error != 0 {
613            let s = String::from_utf8_lossy(
614                &shmem.command_page.error.buf[..shmem.command_page.error.len as usize],
615            );
616            return Err(SidecarError::Sidecar(s.into_owned()));
617        }
618        let (output, slice) = shmem
619            .command_page
620            .request_data
621            .as_bytes()
622            .split_at(size_of::<O>());
623        let output = O::ref_from_bytes(output).unwrap();
624        let (slice, _) = <[S]>::ref_from_prefix_with_elems(slice, n).unwrap();
625        Ok((output, slice))
626    }
627}
628
629/// An object representing a running VP.
630///
631/// Panics if dropped without waiting for the VP to stop.
632pub struct SidecarRun<'a, 'b> {
633    vp: &'a mut SidecarVp<'b>,
634    waited: bool,
635}
636
637impl SidecarRun<'_, '_> {
638    /// Requests that the sidecar kernel stop the VP.
639    ///
640    /// You must still call `wait` after this to ensure the VP has stopped.
641    pub fn cancel(&mut self) {
642        if !self.waited {
643            self.vp.stop_async();
644        }
645    }
646
647    /// Waits for the VP to stop.
648    ///
649    /// Returns `true` if the VP hit an intercept.
650    pub async fn wait(&mut self) -> Result<bool, SidecarError> {
651        if !self.waited {
652            self.vp.wait_async().await;
653            self.waited = true;
654        }
655        let &RunVpResponse { intercept } = self.vp.command_result::<_, u8>(0)?.0;
656        Ok(intercept != 0)
657    }
658}
659
660impl Drop for SidecarRun<'_, '_> {
661    fn drop(&mut self) {
662        assert!(self.waited, "failed to stop vp");
663    }
664}