virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12use guestmem::GuestMemory;
13use inspect::Inspect;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use std::sync::Arc;
17
18mod arch;
19#[cfg(guest_arch = "x86_64")]
20mod gsi;
21
22use thiserror::Error;
23use virt::state::StateError;
24
25pub use arch::Kvm;
26use arch::KvmVpInner;
27use hvdef::Vtl;
28use std::sync::atomic::Ordering;
29use virt::VpIndex;
30use vmcore::vmtime::VmTimeAccess;
31
32#[derive(Error, Debug)]
33pub enum KvmError {
34    #[error("operation not supported")]
35    NotSupported,
36    #[error("vtl2 is not supported on this hypervisor")]
37    Vtl2NotSupported,
38    #[error("isolation is not supported on this hypervisor")]
39    IsolationNotSupported,
40    #[error("kvm error")]
41    Kvm(#[from] kvm::Error),
42    #[error("failed to stat /dev/kvm")]
43    AvailableCheck(#[source] std::io::Error),
44    #[error(transparent)]
45    State(#[from] Box<StateError<KvmError>>),
46    #[error("invalid state while restoring: {0}")]
47    InvalidState(&'static str),
48    #[error("misaligned gic base address")]
49    Misaligned,
50    #[error("host does not support required cpu capabilities")]
51    Capabilities(virt::PartitionCapabilitiesError),
52}
53
54#[derive(Debug, Inspect)]
55struct KvmMemoryRange {
56    host_addr: *mut u8,
57    range: MemoryRange,
58}
59
60unsafe impl Sync for KvmMemoryRange {}
61unsafe impl Send for KvmMemoryRange {}
62
63#[derive(Debug, Default, Inspect)]
64struct KvmMemoryRangeState {
65    #[inspect(flatten, iter_by_index)]
66    ranges: Vec<Option<KvmMemoryRange>>,
67}
68
69#[derive(Inspect)]
70pub struct KvmPartition {
71    #[inspect(flatten)]
72    inner: Arc<KvmPartitionInner>,
73}
74
75#[derive(Inspect)]
76struct KvmPartitionInner {
77    #[inspect(skip)]
78    kvm: kvm::Partition,
79    memory: Mutex<KvmMemoryRangeState>,
80    hv1_enabled: bool,
81    gm: GuestMemory,
82    #[inspect(skip)]
83    vps: Vec<KvmVpInner>,
84    #[cfg(guest_arch = "x86_64")]
85    #[inspect(skip)]
86    gsi_routing: Mutex<gsi::GsiRouting>,
87    caps: virt::PartitionCapabilities,
88
89    // This is used for debugging via Inspect
90    #[cfg(guest_arch = "x86_64")]
91    cpuid: virt::CpuidLeafSet,
92}
93
94// TODO: Chunk this up into smaller types.
95#[derive(Debug, Error)]
96enum KvmRunVpError {
97    #[error("KVM internal error: {0:#x}")]
98    InternalError(u32),
99    #[error("invalid vp state")]
100    InvalidVpState,
101    #[error("failed to run VP")]
102    Run(#[source] kvm::Error),
103    #[cfg(guest_arch = "x86_64")]
104    #[error("failed to inject an extint interrupt")]
105    ExtintInterrupt(#[source] kvm::Error),
106}
107
108#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
109pub struct KvmProcessorBinder {
110    partition: Arc<KvmPartitionInner>,
111    vpindex: VpIndex,
112    vmtime: VmTimeAccess,
113}
114
115impl KvmPartitionInner {
116    #[cfg(guest_arch = "x86_64")]
117    fn bsp(&self) -> &KvmVpInner {
118        &self.vps[0]
119    }
120
121    fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
122        self.vps.get(vp_index.index() as usize)
123    }
124
125    #[cfg(guest_arch = "x86_64")]
126    fn vps(&self) -> impl Iterator<Item = &'_ KvmVpInner> {
127        (0..self.vps.len() as u32).filter_map(|index| self.vp(VpIndex::new(index)))
128    }
129
130    fn evaluate_vp(&self, vp_index: VpIndex) {
131        let Some(vp) = self.vp(vp_index) else { return };
132        vp.set_eval(true, Ordering::Relaxed);
133
134        #[cfg(guest_arch = "x86_64")]
135        self.kvm.vp(vp.vp_info().apic_id).force_exit();
136
137        #[cfg(guest_arch = "aarch64")]
138        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
139    }
140
141    /// # Safety
142    ///
143    /// `data..data+size` must be and remain an allocated VA range until the
144    /// partition is destroyed or the region is unmapped.
145    unsafe fn map_region(
146        &self,
147        data: *mut u8,
148        size: usize,
149        addr: u64,
150        readonly: bool,
151    ) -> anyhow::Result<()> {
152        let mut state = self.memory.lock();
153
154        // Memory slots cannot be resized but can be moved within the guest
155        // address space. Find the existing slot if there is one.
156        let mut slot_to_use = None;
157        for (slot, range) in state.ranges.iter_mut().enumerate() {
158            match range {
159                Some(range) if range.host_addr == data => {
160                    slot_to_use = Some(slot);
161                    break;
162                }
163                Some(_) => (),
164                None => slot_to_use = Some(slot),
165            }
166        }
167        if slot_to_use.is_none() {
168            slot_to_use = Some(state.ranges.len());
169            state.ranges.push(None);
170        }
171        let slot_to_use = slot_to_use.unwrap();
172        unsafe {
173            self.kvm
174                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
175        };
176        state.ranges[slot_to_use] = Some(KvmMemoryRange {
177            host_addr: data,
178            range: MemoryRange::new(addr..addr + size as u64),
179        });
180        Ok(())
181    }
182}
183
184impl virt::PartitionMemoryMapper for KvmPartition {
185    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
186        assert_eq!(vtl, Vtl::Vtl0);
187        self.inner.clone()
188    }
189}
190
191// TODO: figure out a better abstraction that works for both KVM and WHP.
192impl virt::PartitionMemoryMap for KvmPartitionInner {
193    unsafe fn map_range(
194        &self,
195        data: *mut u8,
196        size: usize,
197        addr: u64,
198        writable: bool,
199        _exec: bool,
200    ) -> anyhow::Result<()> {
201        // SAFETY: guaranteed by caller.
202        unsafe { self.map_region(data, size, addr, !writable) }
203    }
204
205    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
206        let range = MemoryRange::new(addr..addr + size);
207        let mut state = self.memory.lock();
208        for (slot, entry) in state.ranges.iter_mut().enumerate() {
209            let Some(kvm_range) = entry else { continue };
210            if range.contains(&kvm_range.range) {
211                // SAFETY: clearing a slot should always be safe since it removes
212                // and does not add memory references.
213                unsafe {
214                    self.kvm.set_user_memory_region(
215                        slot as u32,
216                        std::ptr::null_mut(),
217                        0,
218                        0,
219                        false,
220                    )?;
221                }
222                *entry = None;
223            } else {
224                assert!(
225                    !range.overlaps(&kvm_range.range),
226                    "can only unmap existing ranges of exact size"
227                );
228            }
229        }
230        Ok(())
231    }
232}