Skip to main content

virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12mod arch;
13mod gsi;
14
15pub use arch::Kvm;
16
17use guestmem::GuestMemory;
18use inspect::Inspect;
19use memory_range::MemoryRange;
20use parking_lot::Mutex;
21use std::sync::Arc;
22use thiserror::Error;
23use virt::state::StateError;
24
25/// Returns whether KVM is available on this machine.
26pub fn is_available() -> Result<bool, KvmError> {
27    match std::fs::metadata("/dev/kvm") {
28        Ok(_) => Ok(true),
29        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
30        Err(err) => Err(KvmError::AvailableCheck(err)),
31    }
32}
33
34use arch::KvmVpInner;
35use hvdef::Vtl;
36use std::sync::atomic::Ordering;
37use virt::VpIndex;
38use vmcore::vmtime::VmTimeAccess;
39
40#[derive(Error, Debug)]
41pub enum KvmError {
42    #[error("operation not supported")]
43    NotSupported,
44    #[error("vtl2 is not supported on this hypervisor")]
45    Vtl2NotSupported,
46    #[error("isolation is not supported on this hypervisor")]
47    IsolationNotSupported,
48    #[error("kvm error")]
49    Kvm(#[from] kvm::Error),
50    #[error("failed to stat /dev/kvm")]
51    AvailableCheck(#[source] std::io::Error),
52    #[error(transparent)]
53    State(#[from] Box<StateError<KvmError>>),
54    #[error("invalid state while restoring: {0}")]
55    InvalidState(&'static str),
56    #[error("misaligned gic base address")]
57    Misaligned,
58    #[error("host does not support GICv2 or GICv3")]
59    NoGic,
60    #[error("host does not support required cpu capabilities")]
61    Capabilities(virt::PartitionCapabilitiesError),
62    #[cfg(guest_arch = "x86_64")]
63    #[error("nested virtualization was requested but the host does not support it")]
64    NestedVirtUnsupported,
65    #[cfg(guest_arch = "x86_64")]
66    #[error("unsupported CPU vendor")]
67    UnsupportedCpuVendor,
68    #[cfg(guest_arch = "x86_64")]
69    #[error("failed to compute topology cpuid")]
70    TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
71}
72
73#[derive(Debug, Inspect)]
74struct KvmMemoryRange {
75    host_addr: *mut u8,
76    range: MemoryRange,
77}
78
79unsafe impl Sync for KvmMemoryRange {}
80unsafe impl Send for KvmMemoryRange {}
81
82#[derive(Debug, Default, Inspect)]
83struct KvmMemoryRangeState {
84    #[inspect(flatten, iter_by_index)]
85    ranges: Vec<Option<KvmMemoryRange>>,
86}
87
88#[derive(Inspect)]
89pub struct KvmPartition {
90    #[inspect(flatten)]
91    inner: Arc<KvmPartitionInner>,
92    #[cfg(guest_arch = "x86_64")]
93    #[inspect(skip)]
94    synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
95    #[inspect(skip)]
96    irqfd_state: Arc<gsi::KvmIrqFdState>,
97}
98
99#[derive(Inspect)]
100struct KvmPartitionInner {
101    #[inspect(skip)]
102    kvm: kvm::Partition,
103    memory: Mutex<KvmMemoryRangeState>,
104    hv1_enabled: bool,
105    gm: GuestMemory,
106    #[inspect(skip)]
107    vps: Vec<KvmVpInner>,
108    #[inspect(skip)]
109    gsi_routing: Mutex<gsi::GsiRouting>,
110    caps: virt::PartitionCapabilities,
111
112    // This is used for debugging via Inspect
113    #[cfg(guest_arch = "x86_64")]
114    cpuid: virt::CpuidLeafSet,
115
116    #[cfg(guest_arch = "x86_64")]
117    reserved_vps_per_socket: u32,
118
119    /// The GIC device fd, kept alive for the VM lifetime.
120    #[cfg(guest_arch = "aarch64")]
121    #[inspect(skip)]
122    _gic_device: kvm::Device,
123    /// The ITS device fd, kept alive for the VM lifetime.
124    #[cfg(guest_arch = "aarch64")]
125    #[inspect(skip)]
126    _its_device: Option<kvm::Device>,
127    /// MSI controller configuration (v2m, ITS, or none).
128    #[cfg(guest_arch = "aarch64")]
129    #[inspect(skip)]
130    gic_msi: vm_topology::processor::aarch64::GicMsiController,
131    /// Total configured GIC interrupt count (SGIs + PPIs + SPIs).
132    #[cfg(guest_arch = "aarch64")]
133    gic_nr_irqs: u32,
134    #[cfg(guest_arch = "x86_64")]
135    synic_ports: virt::synic::SynicPortMap,
136}
137
138// TODO: Chunk this up into smaller types.
139#[derive(Debug, Error)]
140enum KvmRunVpError {
141    #[error("KVM internal error: {0:#x}")]
142    InternalError(u32),
143    #[error("invalid vp state")]
144    InvalidVpState,
145    #[error("failed to run VP")]
146    Run(#[source] kvm::Error),
147    #[cfg_attr(guest_arch = "x86_64", expect(dead_code))]
148    #[error("unhandled system event type: {0:#x}")]
149    UnhandledSystemEvent(u32),
150    #[cfg(guest_arch = "x86_64")]
151    #[error("failed to inject an extint interrupt")]
152    ExtintInterrupt(#[source] kvm::Error),
153}
154
155pub struct KvmProcessorBinder {
156    partition: Arc<KvmPartitionInner>,
157    vpindex: VpIndex,
158    vmtime: VmTimeAccess,
159}
160
161impl KvmPartitionInner {
162    #[cfg(guest_arch = "x86_64")]
163    fn bsp(&self) -> &KvmVpInner {
164        &self.vps[0]
165    }
166
167    fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
168        self.vps.get(vp_index.index() as usize)
169    }
170
171    fn evaluate_vp(&self, vp_index: VpIndex) {
172        let Some(vp) = self.vp(vp_index) else { return };
173        vp.set_eval(true, Ordering::Relaxed);
174
175        #[cfg(guest_arch = "x86_64")]
176        self.kvm.vp(vp.vp_info().apic_id).force_exit();
177
178        #[cfg(guest_arch = "aarch64")]
179        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
180    }
181
182    /// # Safety
183    ///
184    /// `data..data+size` must be and remain an allocated VA range until the
185    /// partition is destroyed or the region is unmapped.
186    unsafe fn map_region(
187        &self,
188        data: *mut u8,
189        size: usize,
190        addr: u64,
191        readonly: bool,
192    ) -> anyhow::Result<()> {
193        let mut state = self.memory.lock();
194
195        // Memory slots cannot be resized but can be moved within the guest
196        // address space. Find the existing slot if there is one.
197        let mut slot_to_use = None;
198        for (slot, range) in state.ranges.iter_mut().enumerate() {
199            match range {
200                Some(range) if range.host_addr == data => {
201                    slot_to_use = Some(slot);
202                    break;
203                }
204                Some(_) => (),
205                None => slot_to_use = Some(slot),
206            }
207        }
208        if slot_to_use.is_none() {
209            slot_to_use = Some(state.ranges.len());
210            state.ranges.push(None);
211        }
212        let slot_to_use = slot_to_use.unwrap();
213        unsafe {
214            self.kvm
215                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
216        };
217        state.ranges[slot_to_use] = Some(KvmMemoryRange {
218            host_addr: data,
219            range: MemoryRange::new(addr..addr + size as u64),
220        });
221        Ok(())
222    }
223}
224
225impl virt::PartitionMemoryMapper for KvmPartition {
226    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
227        assert_eq!(vtl, Vtl::Vtl0);
228        self.inner.clone()
229    }
230}
231
232// TODO: figure out a better abstraction that works for both KVM and WHP.
233impl virt::PartitionMemoryMap for KvmPartitionInner {
234    unsafe fn map_range(
235        &self,
236        data: *mut u8,
237        size: usize,
238        addr: u64,
239        writable: bool,
240        _exec: bool,
241    ) -> anyhow::Result<()> {
242        // SAFETY: guaranteed by caller.
243        unsafe { self.map_region(data, size, addr, !writable) }
244    }
245
246    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
247        let range = MemoryRange::new(addr..addr + size);
248        let mut state = self.memory.lock();
249        for (slot, entry) in state.ranges.iter_mut().enumerate() {
250            let Some(kvm_range) = entry else { continue };
251            if range.contains(&kvm_range.range) {
252                // SAFETY: clearing a slot should always be safe since it removes
253                // and does not add memory references.
254                unsafe {
255                    self.kvm.set_user_memory_region(
256                        slot as u32,
257                        std::ptr::null_mut(),
258                        0,
259                        0,
260                        false,
261                    )?;
262                }
263                *entry = None;
264            } else {
265                assert!(
266                    !range.overlaps(&kvm_range.range),
267                    "can only unmap existing ranges of exact size"
268                );
269            }
270        }
271        Ok(())
272    }
273}