Skip to main content

virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12mod arch;
13mod gsi;
14
15pub use arch::Kvm;
16
17use guestmem::GuestMemory;
18use inspect::Inspect;
19use memory_range::MemoryRange;
20use parking_lot::Mutex;
21use std::sync::Arc;
22use thiserror::Error;
23use virt::state::StateError;
24
25/// Returns whether KVM is available on this machine.
26pub fn is_available() -> Result<bool, KvmError> {
27    match std::fs::metadata("/dev/kvm") {
28        Ok(_) => Ok(true),
29        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
30        Err(err) => Err(KvmError::AvailableCheck(err)),
31    }
32}
33
34use arch::KvmVpInner;
35use hvdef::Vtl;
36use std::sync::atomic::Ordering;
37use virt::VpIndex;
38use vmcore::vmtime::VmTimeAccess;
39
40#[derive(Error, Debug)]
41pub enum KvmError {
42    #[error("operation not supported")]
43    NotSupported,
44    #[error("vtl2 is not supported on this hypervisor")]
45    Vtl2NotSupported,
46    #[error("isolation is not supported on this hypervisor")]
47    IsolationNotSupported,
48    #[error("kvm error")]
49    Kvm(#[from] kvm::Error),
50    #[error("failed to stat /dev/kvm")]
51    AvailableCheck(#[source] std::io::Error),
52    #[error(transparent)]
53    State(#[from] Box<StateError<KvmError>>),
54    #[error("invalid state while restoring: {0}")]
55    InvalidState(&'static str),
56    #[error("misaligned gic base address")]
57    Misaligned,
58    #[error("host does not support GICv2 or GICv3")]
59    NoGic,
60    #[error("host does not support required cpu capabilities")]
61    Capabilities(virt::PartitionCapabilitiesError),
62    #[cfg(guest_arch = "x86_64")]
63    #[error("failed to compute topology cpuid")]
64    TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
65}
66
67#[derive(Debug, Inspect)]
68struct KvmMemoryRange {
69    host_addr: *mut u8,
70    range: MemoryRange,
71}
72
73unsafe impl Sync for KvmMemoryRange {}
74unsafe impl Send for KvmMemoryRange {}
75
76#[derive(Debug, Default, Inspect)]
77struct KvmMemoryRangeState {
78    #[inspect(flatten, iter_by_index)]
79    ranges: Vec<Option<KvmMemoryRange>>,
80}
81
82#[derive(Inspect)]
83pub struct KvmPartition {
84    #[inspect(flatten)]
85    inner: Arc<KvmPartitionInner>,
86    #[inspect(skip)]
87    synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
88    #[inspect(skip)]
89    irqfd_state: Arc<gsi::KvmIrqFdState>,
90}
91
92#[derive(Inspect)]
93struct KvmPartitionInner {
94    #[inspect(skip)]
95    kvm: kvm::Partition,
96    memory: Mutex<KvmMemoryRangeState>,
97    hv1_enabled: bool,
98    gm: GuestMemory,
99    #[inspect(skip)]
100    vps: Vec<KvmVpInner>,
101    #[inspect(skip)]
102    gsi_routing: Mutex<gsi::GsiRouting>,
103    caps: virt::PartitionCapabilities,
104
105    // This is used for debugging via Inspect
106    #[cfg(guest_arch = "x86_64")]
107    cpuid: virt::CpuidLeafSet,
108
109    /// The GIC device fd, kept alive for the VM lifetime.
110    #[cfg(guest_arch = "aarch64")]
111    #[inspect(skip)]
112    _gic_device: kvm::Device,
113    /// The ITS device fd, kept alive for the VM lifetime.
114    #[cfg(guest_arch = "aarch64")]
115    #[inspect(skip)]
116    _its_device: Option<kvm::Device>,
117    /// MSI controller configuration (v2m, ITS, or none).
118    #[cfg(guest_arch = "aarch64")]
119    #[inspect(skip)]
120    gic_msi: vm_topology::processor::aarch64::GicMsiController,
121    /// Total configured GIC interrupt count (SGIs + PPIs + SPIs).
122    #[cfg(guest_arch = "aarch64")]
123    gic_nr_irqs: u32,
124    synic_ports: virt::synic::SynicPortMap,
125}
126
127// TODO: Chunk this up into smaller types.
128#[derive(Debug, Error)]
129enum KvmRunVpError {
130    #[error("KVM internal error: {0:#x}")]
131    InternalError(u32),
132    #[error("invalid vp state")]
133    InvalidVpState,
134    #[error("failed to run VP")]
135    Run(#[source] kvm::Error),
136    #[cfg_attr(guest_arch = "x86_64", expect(dead_code))]
137    #[error("unhandled system event type: {0:#x}")]
138    UnhandledSystemEvent(u32),
139    #[cfg(guest_arch = "x86_64")]
140    #[error("failed to inject an extint interrupt")]
141    ExtintInterrupt(#[source] kvm::Error),
142}
143
144#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
145pub struct KvmProcessorBinder {
146    partition: Arc<KvmPartitionInner>,
147    vpindex: VpIndex,
148    vmtime: VmTimeAccess,
149}
150
151impl KvmPartitionInner {
152    #[cfg(guest_arch = "x86_64")]
153    fn bsp(&self) -> &KvmVpInner {
154        &self.vps[0]
155    }
156
157    fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
158        self.vps.get(vp_index.index() as usize)
159    }
160
161    fn evaluate_vp(&self, vp_index: VpIndex) {
162        let Some(vp) = self.vp(vp_index) else { return };
163        vp.set_eval(true, Ordering::Relaxed);
164
165        #[cfg(guest_arch = "x86_64")]
166        self.kvm.vp(vp.vp_info().apic_id).force_exit();
167
168        #[cfg(guest_arch = "aarch64")]
169        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
170    }
171
172    /// # Safety
173    ///
174    /// `data..data+size` must be and remain an allocated VA range until the
175    /// partition is destroyed or the region is unmapped.
176    unsafe fn map_region(
177        &self,
178        data: *mut u8,
179        size: usize,
180        addr: u64,
181        readonly: bool,
182    ) -> anyhow::Result<()> {
183        let mut state = self.memory.lock();
184
185        // Memory slots cannot be resized but can be moved within the guest
186        // address space. Find the existing slot if there is one.
187        let mut slot_to_use = None;
188        for (slot, range) in state.ranges.iter_mut().enumerate() {
189            match range {
190                Some(range) if range.host_addr == data => {
191                    slot_to_use = Some(slot);
192                    break;
193                }
194                Some(_) => (),
195                None => slot_to_use = Some(slot),
196            }
197        }
198        if slot_to_use.is_none() {
199            slot_to_use = Some(state.ranges.len());
200            state.ranges.push(None);
201        }
202        let slot_to_use = slot_to_use.unwrap();
203        unsafe {
204            self.kvm
205                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
206        };
207        state.ranges[slot_to_use] = Some(KvmMemoryRange {
208            host_addr: data,
209            range: MemoryRange::new(addr..addr + size as u64),
210        });
211        Ok(())
212    }
213}
214
215impl virt::PartitionMemoryMapper for KvmPartition {
216    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
217        assert_eq!(vtl, Vtl::Vtl0);
218        self.inner.clone()
219    }
220}
221
222// TODO: figure out a better abstraction that works for both KVM and WHP.
223impl virt::PartitionMemoryMap for KvmPartitionInner {
224    unsafe fn map_range(
225        &self,
226        data: *mut u8,
227        size: usize,
228        addr: u64,
229        writable: bool,
230        _exec: bool,
231    ) -> anyhow::Result<()> {
232        // SAFETY: guaranteed by caller.
233        unsafe { self.map_region(data, size, addr, !writable) }
234    }
235
236    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
237        let range = MemoryRange::new(addr..addr + size);
238        let mut state = self.memory.lock();
239        for (slot, entry) in state.ranges.iter_mut().enumerate() {
240            let Some(kvm_range) = entry else { continue };
241            if range.contains(&kvm_range.range) {
242                // SAFETY: clearing a slot should always be safe since it removes
243                // and does not add memory references.
244                unsafe {
245                    self.kvm.set_user_memory_region(
246                        slot as u32,
247                        std::ptr::null_mut(),
248                        0,
249                        0,
250                        false,
251                    )?;
252                }
253                *entry = None;
254            } else {
255                assert!(
256                    !range.overlaps(&kvm_range.range),
257                    "can only unmap existing ranges of exact size"
258                );
259            }
260        }
261        Ok(())
262    }
263}