virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12use guestmem::GuestMemory;
13use inspect::Inspect;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use std::sync::Arc;
17
18mod arch;
19#[cfg(guest_arch = "x86_64")]
20mod gsi;
21
22use thiserror::Error;
23use virt::state::StateError;
24
25pub use arch::Kvm;
26use arch::KvmVpInner;
27use hvdef::Vtl;
28use std::sync::atomic::Ordering;
29use virt::VpIndex;
30use vmcore::vmtime::VmTimeAccess;
31
32#[derive(Error, Debug)]
33pub enum KvmError {
34    #[error("operation not supported")]
35    NotSupported,
36    #[error("vtl2 is not supported on this hypervisor")]
37    Vtl2NotSupported,
38    #[error("isolation is not supported on this hypervisor")]
39    IsolationNotSupported,
40    #[error("kvm error")]
41    Kvm(#[from] kvm::Error),
42    #[error("failed to stat /dev/kvm")]
43    AvailableCheck(#[source] std::io::Error),
44    #[error(transparent)]
45    State(#[from] Box<StateError<KvmError>>),
46    #[error("invalid state while restoring: {0}")]
47    InvalidState(&'static str),
48    #[error("misaligned gic base address")]
49    Misaligned,
50}
51
52#[derive(Debug, Inspect)]
53struct KvmMemoryRange {
54    host_addr: *mut u8,
55    range: MemoryRange,
56}
57
58unsafe impl Sync for KvmMemoryRange {}
59unsafe impl Send for KvmMemoryRange {}
60
61#[derive(Debug, Default, Inspect)]
62struct KvmMemoryRangeState {
63    #[inspect(flatten, iter_by_index)]
64    ranges: Vec<Option<KvmMemoryRange>>,
65}
66
67#[derive(Inspect)]
68pub struct KvmPartition {
69    #[inspect(flatten)]
70    inner: Arc<KvmPartitionInner>,
71}
72
73#[derive(Inspect)]
74struct KvmPartitionInner {
75    #[inspect(skip)]
76    kvm: kvm::Partition,
77    memory: Mutex<KvmMemoryRangeState>,
78    hv1_enabled: bool,
79    gm: GuestMemory,
80    #[inspect(skip)]
81    vps: Vec<KvmVpInner>,
82    #[cfg(guest_arch = "x86_64")]
83    #[inspect(skip)]
84    gsi_routing: Mutex<gsi::GsiRouting>,
85    caps: virt::PartitionCapabilities,
86
87    // This is used for debugging via Inspect
88    #[cfg(guest_arch = "x86_64")]
89    cpuid: virt::CpuidLeafSet,
90}
91
92#[derive(Debug, Error)]
93pub enum KvmRunVpError {
94    #[error("KVM internal error: {0:#x}")]
95    InternalError(u32),
96    #[error("invalid vp state")]
97    InvalidVpState,
98    #[error("failed to run VP")]
99    Run(#[source] kvm::Error),
100    #[error("failed to inject an extint interrupt")]
101    ExtintInterrupt(#[source] kvm::Error),
102}
103
104#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
105pub struct KvmProcessorBinder {
106    partition: Arc<KvmPartitionInner>,
107    vpindex: VpIndex,
108    vmtime: VmTimeAccess,
109}
110
111impl KvmPartitionInner {
112    fn vp(&self, vp_index: VpIndex) -> &KvmVpInner {
113        &self.vps[vp_index.index() as usize]
114    }
115
116    #[cfg(guest_arch = "x86_64")]
117    fn vps(&self) -> impl Iterator<Item = &'_ KvmVpInner> {
118        (0..self.vps.len() as u32).map(|index| self.vp(VpIndex::new(index)))
119    }
120
121    fn evaluate_vp(&self, vp_index: VpIndex) {
122        let vp = self.vp(vp_index);
123        vp.set_eval(true, Ordering::Relaxed);
124
125        #[cfg(guest_arch = "x86_64")]
126        self.kvm.vp(vp.vp_info().apic_id).force_exit();
127
128        #[cfg(guest_arch = "aarch64")]
129        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
130    }
131
132    /// # Safety
133    ///
134    /// `data..data+size` must be and remain an allocated VA range until the
135    /// partition is destroyed or the region is unmapped.
136    unsafe fn map_region(
137        &self,
138        data: *mut u8,
139        size: usize,
140        addr: u64,
141        readonly: bool,
142    ) -> Result<(), virt::Error> {
143        let mut state = self.memory.lock();
144
145        // Memory slots cannot be resized but can be moved within the guest
146        // address space. Find the existing slot if there is one.
147        let mut slot_to_use = None;
148        for (slot, range) in state.ranges.iter_mut().enumerate() {
149            match range {
150                Some(range) if range.host_addr == data => {
151                    slot_to_use = Some(slot);
152                    break;
153                }
154                Some(_) => (),
155                None => slot_to_use = Some(slot),
156            }
157        }
158        if slot_to_use.is_none() {
159            slot_to_use = Some(state.ranges.len());
160            state.ranges.push(None);
161        }
162        let slot_to_use = slot_to_use.unwrap();
163        unsafe {
164            self.kvm
165                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
166        };
167        state.ranges[slot_to_use] = Some(KvmMemoryRange {
168            host_addr: data,
169            range: MemoryRange::new(addr..addr + size as u64),
170        });
171        Ok(())
172    }
173}
174
175impl virt::PartitionMemoryMapper for KvmPartition {
176    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
177        assert_eq!(vtl, Vtl::Vtl0);
178        self.inner.clone()
179    }
180}
181
182// TODO: figure out a better abstraction that works for both KVM and WHP.
183impl virt::PartitionMemoryMap for KvmPartitionInner {
184    unsafe fn map_range(
185        &self,
186        data: *mut u8,
187        size: usize,
188        addr: u64,
189        writable: bool,
190        _exec: bool,
191    ) -> Result<(), virt::Error> {
192        // SAFETY: guaranteed by caller.
193        unsafe { self.map_region(data, size, addr, !writable) }
194    }
195
196    fn unmap_range(&self, addr: u64, size: u64) -> Result<(), virt::Error> {
197        let range = MemoryRange::new(addr..addr + size);
198        let mut state = self.memory.lock();
199        for (slot, entry) in state.ranges.iter_mut().enumerate() {
200            let Some(kvm_range) = entry else { continue };
201            if range.contains(&kvm_range.range) {
202                // SAFETY: clearing a slot should always be safe since it removes
203                // and does not add memory references.
204                unsafe {
205                    self.kvm.set_user_memory_region(
206                        slot as u32,
207                        std::ptr::null_mut(),
208                        0,
209                        0,
210                        false,
211                    )?;
212                }
213                *entry = None;
214            } else {
215                assert!(
216                    !range.overlaps(&kvm_range.range),
217                    "can only unmap existing ranges of exact size"
218                );
219            }
220        }
221        Ok(())
222    }
223}