virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12use guestmem::GuestMemory;
13use inspect::Inspect;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use std::sync::Arc;
17
18mod arch;
19#[cfg(guest_arch = "x86_64")]
20mod gsi;
21
22use thiserror::Error;
23use virt::state::StateError;
24
25pub use arch::Kvm;
26
27/// Returns whether KVM is available on this machine.
28pub fn is_available() -> Result<bool, KvmError> {
29    match std::fs::metadata("/dev/kvm") {
30        Ok(_) => Ok(true),
31        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
32        Err(err) => Err(KvmError::AvailableCheck(err)),
33    }
34}
35
36use arch::KvmVpInner;
37use hvdef::Vtl;
38use std::sync::atomic::Ordering;
39use virt::VpIndex;
40use vmcore::vmtime::VmTimeAccess;
41
42#[derive(Error, Debug)]
43pub enum KvmError {
44    #[error("operation not supported")]
45    NotSupported,
46    #[error("vtl2 is not supported on this hypervisor")]
47    Vtl2NotSupported,
48    #[error("isolation is not supported on this hypervisor")]
49    IsolationNotSupported,
50    #[error("kvm error")]
51    Kvm(#[from] kvm::Error),
52    #[error("failed to stat /dev/kvm")]
53    AvailableCheck(#[source] std::io::Error),
54    #[error(transparent)]
55    State(#[from] Box<StateError<KvmError>>),
56    #[error("invalid state while restoring: {0}")]
57    InvalidState(&'static str),
58    #[error("misaligned gic base address")]
59    Misaligned,
60    #[error("host does not support required cpu capabilities")]
61    Capabilities(virt::PartitionCapabilitiesError),
62    #[cfg(guest_arch = "x86_64")]
63    #[error("failed to compute topology cpuid")]
64    TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
65}
66
67#[derive(Debug, Inspect)]
68struct KvmMemoryRange {
69    host_addr: *mut u8,
70    range: MemoryRange,
71}
72
73unsafe impl Sync for KvmMemoryRange {}
74unsafe impl Send for KvmMemoryRange {}
75
76#[derive(Debug, Default, Inspect)]
77struct KvmMemoryRangeState {
78    #[inspect(flatten, iter_by_index)]
79    ranges: Vec<Option<KvmMemoryRange>>,
80}
81
82#[derive(Inspect)]
83pub struct KvmPartition {
84    #[inspect(flatten)]
85    inner: Arc<KvmPartitionInner>,
86}
87
88#[derive(Inspect)]
89struct KvmPartitionInner {
90    #[inspect(skip)]
91    kvm: kvm::Partition,
92    memory: Mutex<KvmMemoryRangeState>,
93    hv1_enabled: bool,
94    gm: GuestMemory,
95    #[inspect(skip)]
96    vps: Vec<KvmVpInner>,
97    #[cfg(guest_arch = "x86_64")]
98    #[inspect(skip)]
99    gsi_routing: Mutex<gsi::GsiRouting>,
100    caps: virt::PartitionCapabilities,
101
102    // This is used for debugging via Inspect
103    #[cfg(guest_arch = "x86_64")]
104    cpuid: virt::CpuidLeafSet,
105
106    #[cfg(guest_arch = "aarch64")]
107    #[inspect(skip)]
108    gic_v2m: Option<vm_topology::processor::aarch64::GicV2mInfo>,
109}
110
111// TODO: Chunk this up into smaller types.
112#[derive(Debug, Error)]
113enum KvmRunVpError {
114    #[error("KVM internal error: {0:#x}")]
115    InternalError(u32),
116    #[error("invalid vp state")]
117    InvalidVpState,
118    #[error("failed to run VP")]
119    Run(#[source] kvm::Error),
120    #[cfg(guest_arch = "x86_64")]
121    #[error("failed to inject an extint interrupt")]
122    ExtintInterrupt(#[source] kvm::Error),
123}
124
125#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
126pub struct KvmProcessorBinder {
127    partition: Arc<KvmPartitionInner>,
128    vpindex: VpIndex,
129    vmtime: VmTimeAccess,
130}
131
132impl KvmPartitionInner {
133    #[cfg(guest_arch = "x86_64")]
134    fn bsp(&self) -> &KvmVpInner {
135        &self.vps[0]
136    }
137
138    fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
139        self.vps.get(vp_index.index() as usize)
140    }
141
142    fn evaluate_vp(&self, vp_index: VpIndex) {
143        let Some(vp) = self.vp(vp_index) else { return };
144        vp.set_eval(true, Ordering::Relaxed);
145
146        #[cfg(guest_arch = "x86_64")]
147        self.kvm.vp(vp.vp_info().apic_id).force_exit();
148
149        #[cfg(guest_arch = "aarch64")]
150        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
151    }
152
153    /// # Safety
154    ///
155    /// `data..data+size` must be and remain an allocated VA range until the
156    /// partition is destroyed or the region is unmapped.
157    unsafe fn map_region(
158        &self,
159        data: *mut u8,
160        size: usize,
161        addr: u64,
162        readonly: bool,
163    ) -> anyhow::Result<()> {
164        let mut state = self.memory.lock();
165
166        // Memory slots cannot be resized but can be moved within the guest
167        // address space. Find the existing slot if there is one.
168        let mut slot_to_use = None;
169        for (slot, range) in state.ranges.iter_mut().enumerate() {
170            match range {
171                Some(range) if range.host_addr == data => {
172                    slot_to_use = Some(slot);
173                    break;
174                }
175                Some(_) => (),
176                None => slot_to_use = Some(slot),
177            }
178        }
179        if slot_to_use.is_none() {
180            slot_to_use = Some(state.ranges.len());
181            state.ranges.push(None);
182        }
183        let slot_to_use = slot_to_use.unwrap();
184        unsafe {
185            self.kvm
186                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
187        };
188        state.ranges[slot_to_use] = Some(KvmMemoryRange {
189            host_addr: data,
190            range: MemoryRange::new(addr..addr + size as u64),
191        });
192        Ok(())
193    }
194}
195
196impl virt::PartitionMemoryMapper for KvmPartition {
197    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
198        assert_eq!(vtl, Vtl::Vtl0);
199        self.inner.clone()
200    }
201}
202
203// TODO: figure out a better abstraction that works for both KVM and WHP.
204impl virt::PartitionMemoryMap for KvmPartitionInner {
205    unsafe fn map_range(
206        &self,
207        data: *mut u8,
208        size: usize,
209        addr: u64,
210        writable: bool,
211        _exec: bool,
212    ) -> anyhow::Result<()> {
213        // SAFETY: guaranteed by caller.
214        unsafe { self.map_region(data, size, addr, !writable) }
215    }
216
217    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
218        let range = MemoryRange::new(addr..addr + size);
219        let mut state = self.memory.lock();
220        for (slot, entry) in state.ranges.iter_mut().enumerate() {
221            let Some(kvm_range) = entry else { continue };
222            if range.contains(&kvm_range.range) {
223                // SAFETY: clearing a slot should always be safe since it removes
224                // and does not add memory references.
225                unsafe {
226                    self.kvm.set_user_memory_region(
227                        slot as u32,
228                        std::ptr::null_mut(),
229                        0,
230                        0,
231                        false,
232                    )?;
233                }
234                *entry = None;
235            } else {
236                assert!(
237                    !range.overlaps(&kvm_range.range),
238                    "can only unmap existing ranges of exact size"
239                );
240            }
241        }
242        Ok(())
243    }
244}