Skip to main content

virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12mod arch;
13#[cfg(guest_arch = "x86_64")]
14mod gsi;
15
16pub use arch::Kvm;
17
18use guestmem::GuestMemory;
19use inspect::Inspect;
20use memory_range::MemoryRange;
21use parking_lot::Mutex;
22use std::sync::Arc;
23use thiserror::Error;
24use virt::state::StateError;
25
26/// Returns whether KVM is available on this machine.
27pub fn is_available() -> Result<bool, KvmError> {
28    match std::fs::metadata("/dev/kvm") {
29        Ok(_) => Ok(true),
30        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
31        Err(err) => Err(KvmError::AvailableCheck(err)),
32    }
33}
34
35use arch::KvmVpInner;
36use hvdef::Vtl;
37use std::sync::atomic::Ordering;
38use virt::VpIndex;
39use vmcore::vmtime::VmTimeAccess;
40
41#[derive(Error, Debug)]
42pub enum KvmError {
43    #[error("operation not supported")]
44    NotSupported,
45    #[error("vtl2 is not supported on this hypervisor")]
46    Vtl2NotSupported,
47    #[error("isolation is not supported on this hypervisor")]
48    IsolationNotSupported,
49    #[error("kvm error")]
50    Kvm(#[from] kvm::Error),
51    #[error("failed to stat /dev/kvm")]
52    AvailableCheck(#[source] std::io::Error),
53    #[error(transparent)]
54    State(#[from] Box<StateError<KvmError>>),
55    #[error("invalid state while restoring: {0}")]
56    InvalidState(&'static str),
57    #[error("misaligned gic base address")]
58    Misaligned,
59    #[error("host does not support GICv2 or GICv3")]
60    NoGic,
61    #[error("host does not support required cpu capabilities")]
62    Capabilities(virt::PartitionCapabilitiesError),
63    #[cfg(guest_arch = "x86_64")]
64    #[error("failed to compute topology cpuid")]
65    TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
66}
67
68#[derive(Debug, Inspect)]
69struct KvmMemoryRange {
70    host_addr: *mut u8,
71    range: MemoryRange,
72}
73
74unsafe impl Sync for KvmMemoryRange {}
75unsafe impl Send for KvmMemoryRange {}
76
77#[derive(Debug, Default, Inspect)]
78struct KvmMemoryRangeState {
79    #[inspect(flatten, iter_by_index)]
80    ranges: Vec<Option<KvmMemoryRange>>,
81}
82
83#[derive(Inspect)]
84pub struct KvmPartition {
85    #[inspect(flatten)]
86    inner: Arc<KvmPartitionInner>,
87    #[inspect(skip)]
88    synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
89    #[cfg(guest_arch = "x86_64")]
90    #[inspect(skip)]
91    irqfd_state: Arc<dyn virt::irqfd::IrqFd>,
92}
93
94#[derive(Inspect)]
95struct KvmPartitionInner {
96    #[inspect(skip)]
97    kvm: kvm::Partition,
98    memory: Mutex<KvmMemoryRangeState>,
99    hv1_enabled: bool,
100    gm: GuestMemory,
101    #[inspect(skip)]
102    vps: Vec<KvmVpInner>,
103    #[cfg(guest_arch = "x86_64")]
104    #[inspect(skip)]
105    gsi_routing: Mutex<gsi::GsiRouting>,
106    caps: virt::PartitionCapabilities,
107
108    // This is used for debugging via Inspect
109    #[cfg(guest_arch = "x86_64")]
110    cpuid: virt::CpuidLeafSet,
111
112    /// The GIC device fd, kept alive for the VM lifetime.
113    #[cfg(guest_arch = "aarch64")]
114    #[inspect(skip)]
115    _gic_device: kvm::Device,
116    #[cfg(guest_arch = "aarch64")]
117    #[inspect(skip)]
118    gic_v2m: Option<vm_topology::processor::aarch64::GicV2mInfo>,
119    /// Total configured GIC interrupt count (SGIs + PPIs + SPIs).
120    #[cfg(guest_arch = "aarch64")]
121    gic_nr_irqs: u32,
122    synic_ports: virt::synic::SynicPortMap,
123}
124
125// TODO: Chunk this up into smaller types.
126#[derive(Debug, Error)]
127enum KvmRunVpError {
128    #[error("KVM internal error: {0:#x}")]
129    InternalError(u32),
130    #[error("invalid vp state")]
131    InvalidVpState,
132    #[error("failed to run VP")]
133    Run(#[source] kvm::Error),
134    #[cfg_attr(guest_arch = "x86_64", expect(dead_code))]
135    #[error("unhandled system event type: {0:#x}")]
136    UnhandledSystemEvent(u32),
137    #[cfg(guest_arch = "x86_64")]
138    #[error("failed to inject an extint interrupt")]
139    ExtintInterrupt(#[source] kvm::Error),
140}
141
142#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
143pub struct KvmProcessorBinder {
144    partition: Arc<KvmPartitionInner>,
145    vpindex: VpIndex,
146    vmtime: VmTimeAccess,
147}
148
149impl KvmPartitionInner {
150    #[cfg(guest_arch = "x86_64")]
151    fn bsp(&self) -> &KvmVpInner {
152        &self.vps[0]
153    }
154
155    fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
156        self.vps.get(vp_index.index() as usize)
157    }
158
159    fn evaluate_vp(&self, vp_index: VpIndex) {
160        let Some(vp) = self.vp(vp_index) else { return };
161        vp.set_eval(true, Ordering::Relaxed);
162
163        #[cfg(guest_arch = "x86_64")]
164        self.kvm.vp(vp.vp_info().apic_id).force_exit();
165
166        #[cfg(guest_arch = "aarch64")]
167        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
168    }
169
170    /// # Safety
171    ///
172    /// `data..data+size` must be and remain an allocated VA range until the
173    /// partition is destroyed or the region is unmapped.
174    unsafe fn map_region(
175        &self,
176        data: *mut u8,
177        size: usize,
178        addr: u64,
179        readonly: bool,
180    ) -> anyhow::Result<()> {
181        let mut state = self.memory.lock();
182
183        // Memory slots cannot be resized but can be moved within the guest
184        // address space. Find the existing slot if there is one.
185        let mut slot_to_use = None;
186        for (slot, range) in state.ranges.iter_mut().enumerate() {
187            match range {
188                Some(range) if range.host_addr == data => {
189                    slot_to_use = Some(slot);
190                    break;
191                }
192                Some(_) => (),
193                None => slot_to_use = Some(slot),
194            }
195        }
196        if slot_to_use.is_none() {
197            slot_to_use = Some(state.ranges.len());
198            state.ranges.push(None);
199        }
200        let slot_to_use = slot_to_use.unwrap();
201        unsafe {
202            self.kvm
203                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
204        };
205        state.ranges[slot_to_use] = Some(KvmMemoryRange {
206            host_addr: data,
207            range: MemoryRange::new(addr..addr + size as u64),
208        });
209        Ok(())
210    }
211}
212
213impl virt::PartitionMemoryMapper for KvmPartition {
214    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
215        assert_eq!(vtl, Vtl::Vtl0);
216        self.inner.clone()
217    }
218}
219
220// TODO: figure out a better abstraction that works for both KVM and WHP.
221impl virt::PartitionMemoryMap for KvmPartitionInner {
222    unsafe fn map_range(
223        &self,
224        data: *mut u8,
225        size: usize,
226        addr: u64,
227        writable: bool,
228        _exec: bool,
229    ) -> anyhow::Result<()> {
230        // SAFETY: guaranteed by caller.
231        unsafe { self.map_region(data, size, addr, !writable) }
232    }
233
234    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
235        let range = MemoryRange::new(addr..addr + size);
236        let mut state = self.memory.lock();
237        for (slot, entry) in state.ranges.iter_mut().enumerate() {
238            let Some(kvm_range) = entry else { continue };
239            if range.contains(&kvm_range.range) {
240                // SAFETY: clearing a slot should always be safe since it removes
241                // and does not add memory references.
242                unsafe {
243                    self.kvm.set_user_memory_region(
244                        slot as u32,
245                        std::ptr::null_mut(),
246                        0,
247                        0,
248                        false,
249                    )?;
250                }
251                *entry = None;
252            } else {
253                assert!(
254                    !range.overlaps(&kvm_range.range),
255                    "can only unmap existing ranges of exact size"
256                );
257            }
258        }
259        Ok(())
260    }
261}