virt_kvm/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! KVM implementation of the virt::generic interfaces.
5
6#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8// UNSAFETY: Calling KVM APIs and manually managing memory.
9#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12use guestmem::GuestMemory;
13use inspect::Inspect;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use std::sync::Arc;
17
18mod arch;
19#[cfg(guest_arch = "x86_64")]
20mod gsi;
21
22use thiserror::Error;
23use virt::state::StateError;
24
25pub use arch::Kvm;
26use arch::KvmVpInner;
27use hvdef::Vtl;
28use std::sync::atomic::Ordering;
29use virt::VpIndex;
30use vmcore::vmtime::VmTimeAccess;
31
32#[derive(Error, Debug)]
33pub enum KvmError {
34    #[error("operation not supported")]
35    NotSupported,
36    #[error("vtl2 is not supported on this hypervisor")]
37    Vtl2NotSupported,
38    #[error("isolation is not supported on this hypervisor")]
39    IsolationNotSupported,
40    #[error("kvm error")]
41    Kvm(#[from] kvm::Error),
42    #[error("failed to stat /dev/kvm")]
43    AvailableCheck(#[source] std::io::Error),
44    #[error(transparent)]
45    State(#[from] Box<StateError<KvmError>>),
46    #[error("invalid state while restoring: {0}")]
47    InvalidState(&'static str),
48    #[error("misaligned gic base address")]
49    Misaligned,
50    #[error("host does not support required cpu capabilities")]
51    Capabilities(virt::PartitionCapabilitiesError),
52}
53
54#[derive(Debug, Inspect)]
55struct KvmMemoryRange {
56    host_addr: *mut u8,
57    range: MemoryRange,
58}
59
60unsafe impl Sync for KvmMemoryRange {}
61unsafe impl Send for KvmMemoryRange {}
62
63#[derive(Debug, Default, Inspect)]
64struct KvmMemoryRangeState {
65    #[inspect(flatten, iter_by_index)]
66    ranges: Vec<Option<KvmMemoryRange>>,
67}
68
69#[derive(Inspect)]
70pub struct KvmPartition {
71    #[inspect(flatten)]
72    inner: Arc<KvmPartitionInner>,
73}
74
75#[derive(Inspect)]
76struct KvmPartitionInner {
77    #[inspect(skip)]
78    kvm: kvm::Partition,
79    memory: Mutex<KvmMemoryRangeState>,
80    hv1_enabled: bool,
81    gm: GuestMemory,
82    #[inspect(skip)]
83    vps: Vec<KvmVpInner>,
84    #[cfg(guest_arch = "x86_64")]
85    #[inspect(skip)]
86    gsi_routing: Mutex<gsi::GsiRouting>,
87    caps: virt::PartitionCapabilities,
88
89    // This is used for debugging via Inspect
90    #[cfg(guest_arch = "x86_64")]
91    cpuid: virt::CpuidLeafSet,
92}
93
94// TODO: Chunk this up into smaller types.
95#[derive(Debug, Error)]
96enum KvmRunVpError {
97    #[error("KVM internal error: {0:#x}")]
98    InternalError(u32),
99    #[error("invalid vp state")]
100    InvalidVpState,
101    #[error("failed to run VP")]
102    Run(#[source] kvm::Error),
103    #[cfg(guest_arch = "x86_64")]
104    #[error("failed to inject an extint interrupt")]
105    ExtintInterrupt(#[source] kvm::Error),
106}
107
108#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
109pub struct KvmProcessorBinder {
110    partition: Arc<KvmPartitionInner>,
111    vpindex: VpIndex,
112    vmtime: VmTimeAccess,
113}
114
115impl KvmPartitionInner {
116    fn vp(&self, vp_index: VpIndex) -> &KvmVpInner {
117        &self.vps[vp_index.index() as usize]
118    }
119
120    #[cfg(guest_arch = "x86_64")]
121    fn vps(&self) -> impl Iterator<Item = &'_ KvmVpInner> {
122        (0..self.vps.len() as u32).map(|index| self.vp(VpIndex::new(index)))
123    }
124
125    fn evaluate_vp(&self, vp_index: VpIndex) {
126        let vp = self.vp(vp_index);
127        vp.set_eval(true, Ordering::Relaxed);
128
129        #[cfg(guest_arch = "x86_64")]
130        self.kvm.vp(vp.vp_info().apic_id).force_exit();
131
132        #[cfg(guest_arch = "aarch64")]
133        self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
134    }
135
136    /// # Safety
137    ///
138    /// `data..data+size` must be and remain an allocated VA range until the
139    /// partition is destroyed or the region is unmapped.
140    unsafe fn map_region(
141        &self,
142        data: *mut u8,
143        size: usize,
144        addr: u64,
145        readonly: bool,
146    ) -> anyhow::Result<()> {
147        let mut state = self.memory.lock();
148
149        // Memory slots cannot be resized but can be moved within the guest
150        // address space. Find the existing slot if there is one.
151        let mut slot_to_use = None;
152        for (slot, range) in state.ranges.iter_mut().enumerate() {
153            match range {
154                Some(range) if range.host_addr == data => {
155                    slot_to_use = Some(slot);
156                    break;
157                }
158                Some(_) => (),
159                None => slot_to_use = Some(slot),
160            }
161        }
162        if slot_to_use.is_none() {
163            slot_to_use = Some(state.ranges.len());
164            state.ranges.push(None);
165        }
166        let slot_to_use = slot_to_use.unwrap();
167        unsafe {
168            self.kvm
169                .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
170        };
171        state.ranges[slot_to_use] = Some(KvmMemoryRange {
172            host_addr: data,
173            range: MemoryRange::new(addr..addr + size as u64),
174        });
175        Ok(())
176    }
177}
178
179impl virt::PartitionMemoryMapper for KvmPartition {
180    fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
181        assert_eq!(vtl, Vtl::Vtl0);
182        self.inner.clone()
183    }
184}
185
186// TODO: figure out a better abstraction that works for both KVM and WHP.
187impl virt::PartitionMemoryMap for KvmPartitionInner {
188    unsafe fn map_range(
189        &self,
190        data: *mut u8,
191        size: usize,
192        addr: u64,
193        writable: bool,
194        _exec: bool,
195    ) -> anyhow::Result<()> {
196        // SAFETY: guaranteed by caller.
197        unsafe { self.map_region(data, size, addr, !writable) }
198    }
199
200    fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
201        let range = MemoryRange::new(addr..addr + size);
202        let mut state = self.memory.lock();
203        for (slot, entry) in state.ranges.iter_mut().enumerate() {
204            let Some(kvm_range) = entry else { continue };
205            if range.contains(&kvm_range.range) {
206                // SAFETY: clearing a slot should always be safe since it removes
207                // and does not add memory references.
208                unsafe {
209                    self.kvm.set_user_memory_region(
210                        slot as u32,
211                        std::ptr::null_mut(),
212                        0,
213                        0,
214                        false,
215                    )?;
216                }
217                *entry = None;
218            } else {
219                assert!(
220                    !range.overlaps(&kvm_range.range),
221                    "can only unmap existing ranges of exact size"
222                );
223            }
224        }
225        Ok(())
226    }
227}