1#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12use guestmem::GuestMemory;
13use inspect::Inspect;
14use memory_range::MemoryRange;
15use parking_lot::Mutex;
16use std::sync::Arc;
17
18mod arch;
19#[cfg(guest_arch = "x86_64")]
20mod gsi;
21
22use thiserror::Error;
23use virt::state::StateError;
24
25pub use arch::Kvm;
26
27pub fn is_available() -> Result<bool, KvmError> {
29 match std::fs::metadata("/dev/kvm") {
30 Ok(_) => Ok(true),
31 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
32 Err(err) => Err(KvmError::AvailableCheck(err)),
33 }
34}
35
36use arch::KvmVpInner;
37use hvdef::Vtl;
38use std::sync::atomic::Ordering;
39use virt::VpIndex;
40use vmcore::vmtime::VmTimeAccess;
41
42#[derive(Error, Debug)]
43pub enum KvmError {
44 #[error("operation not supported")]
45 NotSupported,
46 #[error("vtl2 is not supported on this hypervisor")]
47 Vtl2NotSupported,
48 #[error("isolation is not supported on this hypervisor")]
49 IsolationNotSupported,
50 #[error("kvm error")]
51 Kvm(#[from] kvm::Error),
52 #[error("failed to stat /dev/kvm")]
53 AvailableCheck(#[source] std::io::Error),
54 #[error(transparent)]
55 State(#[from] Box<StateError<KvmError>>),
56 #[error("invalid state while restoring: {0}")]
57 InvalidState(&'static str),
58 #[error("misaligned gic base address")]
59 Misaligned,
60 #[error("host does not support required cpu capabilities")]
61 Capabilities(virt::PartitionCapabilitiesError),
62 #[cfg(guest_arch = "x86_64")]
63 #[error("failed to compute topology cpuid")]
64 TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
65}
66
67#[derive(Debug, Inspect)]
68struct KvmMemoryRange {
69 host_addr: *mut u8,
70 range: MemoryRange,
71}
72
73unsafe impl Sync for KvmMemoryRange {}
74unsafe impl Send for KvmMemoryRange {}
75
76#[derive(Debug, Default, Inspect)]
77struct KvmMemoryRangeState {
78 #[inspect(flatten, iter_by_index)]
79 ranges: Vec<Option<KvmMemoryRange>>,
80}
81
82#[derive(Inspect)]
83pub struct KvmPartition {
84 #[inspect(flatten)]
85 inner: Arc<KvmPartitionInner>,
86}
87
88#[derive(Inspect)]
89struct KvmPartitionInner {
90 #[inspect(skip)]
91 kvm: kvm::Partition,
92 memory: Mutex<KvmMemoryRangeState>,
93 hv1_enabled: bool,
94 gm: GuestMemory,
95 #[inspect(skip)]
96 vps: Vec<KvmVpInner>,
97 #[cfg(guest_arch = "x86_64")]
98 #[inspect(skip)]
99 gsi_routing: Mutex<gsi::GsiRouting>,
100 caps: virt::PartitionCapabilities,
101
102 #[cfg(guest_arch = "x86_64")]
104 cpuid: virt::CpuidLeafSet,
105
106 #[cfg(guest_arch = "aarch64")]
107 #[inspect(skip)]
108 gic_v2m: Option<vm_topology::processor::aarch64::GicV2mInfo>,
109}
110
111#[derive(Debug, Error)]
113enum KvmRunVpError {
114 #[error("KVM internal error: {0:#x}")]
115 InternalError(u32),
116 #[error("invalid vp state")]
117 InvalidVpState,
118 #[error("failed to run VP")]
119 Run(#[source] kvm::Error),
120 #[cfg(guest_arch = "x86_64")]
121 #[error("failed to inject an extint interrupt")]
122 ExtintInterrupt(#[source] kvm::Error),
123}
124
125#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
126pub struct KvmProcessorBinder {
127 partition: Arc<KvmPartitionInner>,
128 vpindex: VpIndex,
129 vmtime: VmTimeAccess,
130}
131
132impl KvmPartitionInner {
133 #[cfg(guest_arch = "x86_64")]
134 fn bsp(&self) -> &KvmVpInner {
135 &self.vps[0]
136 }
137
138 fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
139 self.vps.get(vp_index.index() as usize)
140 }
141
142 fn evaluate_vp(&self, vp_index: VpIndex) {
143 let Some(vp) = self.vp(vp_index) else { return };
144 vp.set_eval(true, Ordering::Relaxed);
145
146 #[cfg(guest_arch = "x86_64")]
147 self.kvm.vp(vp.vp_info().apic_id).force_exit();
148
149 #[cfg(guest_arch = "aarch64")]
150 self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
151 }
152
153 unsafe fn map_region(
158 &self,
159 data: *mut u8,
160 size: usize,
161 addr: u64,
162 readonly: bool,
163 ) -> anyhow::Result<()> {
164 let mut state = self.memory.lock();
165
166 let mut slot_to_use = None;
169 for (slot, range) in state.ranges.iter_mut().enumerate() {
170 match range {
171 Some(range) if range.host_addr == data => {
172 slot_to_use = Some(slot);
173 break;
174 }
175 Some(_) => (),
176 None => slot_to_use = Some(slot),
177 }
178 }
179 if slot_to_use.is_none() {
180 slot_to_use = Some(state.ranges.len());
181 state.ranges.push(None);
182 }
183 let slot_to_use = slot_to_use.unwrap();
184 unsafe {
185 self.kvm
186 .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
187 };
188 state.ranges[slot_to_use] = Some(KvmMemoryRange {
189 host_addr: data,
190 range: MemoryRange::new(addr..addr + size as u64),
191 });
192 Ok(())
193 }
194}
195
196impl virt::PartitionMemoryMapper for KvmPartition {
197 fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
198 assert_eq!(vtl, Vtl::Vtl0);
199 self.inner.clone()
200 }
201}
202
203impl virt::PartitionMemoryMap for KvmPartitionInner {
205 unsafe fn map_range(
206 &self,
207 data: *mut u8,
208 size: usize,
209 addr: u64,
210 writable: bool,
211 _exec: bool,
212 ) -> anyhow::Result<()> {
213 unsafe { self.map_region(data, size, addr, !writable) }
215 }
216
217 fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
218 let range = MemoryRange::new(addr..addr + size);
219 let mut state = self.memory.lock();
220 for (slot, entry) in state.ranges.iter_mut().enumerate() {
221 let Some(kvm_range) = entry else { continue };
222 if range.contains(&kvm_range.range) {
223 unsafe {
226 self.kvm.set_user_memory_region(
227 slot as u32,
228 std::ptr::null_mut(),
229 0,
230 0,
231 false,
232 )?;
233 }
234 *entry = None;
235 } else {
236 assert!(
237 !range.overlaps(&kvm_range.range),
238 "can only unmap existing ranges of exact size"
239 );
240 }
241 }
242 Ok(())
243 }
244}