1#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12mod arch;
13mod gsi;
14
15pub use arch::Kvm;
16
17use guestmem::GuestMemory;
18use inspect::Inspect;
19use memory_range::MemoryRange;
20use parking_lot::Mutex;
21use std::sync::Arc;
22use thiserror::Error;
23use virt::state::StateError;
24
25pub fn is_available() -> Result<bool, KvmError> {
27 match std::fs::metadata("/dev/kvm") {
28 Ok(_) => Ok(true),
29 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
30 Err(err) => Err(KvmError::AvailableCheck(err)),
31 }
32}
33
34use arch::KvmVpInner;
35use hvdef::Vtl;
36use std::sync::atomic::Ordering;
37use virt::VpIndex;
38use vmcore::vmtime::VmTimeAccess;
39
40#[derive(Error, Debug)]
41pub enum KvmError {
42 #[error("operation not supported")]
43 NotSupported,
44 #[error("vtl2 is not supported on this hypervisor")]
45 Vtl2NotSupported,
46 #[error("isolation is not supported on this hypervisor")]
47 IsolationNotSupported,
48 #[error("kvm error")]
49 Kvm(#[from] kvm::Error),
50 #[error("failed to stat /dev/kvm")]
51 AvailableCheck(#[source] std::io::Error),
52 #[error(transparent)]
53 State(#[from] Box<StateError<KvmError>>),
54 #[error("invalid state while restoring: {0}")]
55 InvalidState(&'static str),
56 #[error("misaligned gic base address")]
57 Misaligned,
58 #[error("host does not support GICv2 or GICv3")]
59 NoGic,
60 #[error("host does not support required cpu capabilities")]
61 Capabilities(virt::PartitionCapabilitiesError),
62 #[cfg(guest_arch = "x86_64")]
63 #[error("nested virtualization was requested but the host does not support it")]
64 NestedVirtUnsupported,
65 #[cfg(guest_arch = "x86_64")]
66 #[error("unsupported CPU vendor")]
67 UnsupportedCpuVendor,
68 #[cfg(guest_arch = "x86_64")]
69 #[error("failed to compute topology cpuid")]
70 TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
71}
72
73#[derive(Debug, Inspect)]
74struct KvmMemoryRange {
75 host_addr: *mut u8,
76 range: MemoryRange,
77}
78
79unsafe impl Sync for KvmMemoryRange {}
80unsafe impl Send for KvmMemoryRange {}
81
82#[derive(Debug, Default, Inspect)]
83struct KvmMemoryRangeState {
84 #[inspect(flatten, iter_by_index)]
85 ranges: Vec<Option<KvmMemoryRange>>,
86}
87
88#[derive(Inspect)]
89pub struct KvmPartition {
90 #[inspect(flatten)]
91 inner: Arc<KvmPartitionInner>,
92 #[cfg(guest_arch = "x86_64")]
93 #[inspect(skip)]
94 synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
95 #[inspect(skip)]
96 irqfd_state: Arc<gsi::KvmIrqFdState>,
97}
98
99#[derive(Inspect)]
100struct KvmPartitionInner {
101 #[inspect(skip)]
102 kvm: kvm::Partition,
103 memory: Mutex<KvmMemoryRangeState>,
104 hv1_enabled: bool,
105 gm: GuestMemory,
106 #[inspect(skip)]
107 vps: Vec<KvmVpInner>,
108 #[inspect(skip)]
109 gsi_routing: Mutex<gsi::GsiRouting>,
110 caps: virt::PartitionCapabilities,
111
112 #[cfg(guest_arch = "x86_64")]
114 cpuid: virt::CpuidLeafSet,
115
116 #[cfg(guest_arch = "x86_64")]
117 reserved_vps_per_socket: u32,
118
119 #[cfg(guest_arch = "aarch64")]
121 #[inspect(skip)]
122 _gic_device: kvm::Device,
123 #[cfg(guest_arch = "aarch64")]
125 #[inspect(skip)]
126 _its_device: Option<kvm::Device>,
127 #[cfg(guest_arch = "aarch64")]
129 #[inspect(skip)]
130 gic_msi: vm_topology::processor::aarch64::GicMsiController,
131 #[cfg(guest_arch = "aarch64")]
133 gic_nr_irqs: u32,
134 #[cfg(guest_arch = "x86_64")]
135 synic_ports: virt::synic::SynicPortMap,
136}
137
138#[derive(Debug, Error)]
140enum KvmRunVpError {
141 #[error("KVM internal error: {0:#x}")]
142 InternalError(u32),
143 #[error("invalid vp state")]
144 InvalidVpState,
145 #[error("failed to run VP")]
146 Run(#[source] kvm::Error),
147 #[cfg_attr(guest_arch = "x86_64", expect(dead_code))]
148 #[error("unhandled system event type: {0:#x}")]
149 UnhandledSystemEvent(u32),
150 #[cfg(guest_arch = "x86_64")]
151 #[error("failed to inject an extint interrupt")]
152 ExtintInterrupt(#[source] kvm::Error),
153}
154
155pub struct KvmProcessorBinder {
156 partition: Arc<KvmPartitionInner>,
157 vpindex: VpIndex,
158 vmtime: VmTimeAccess,
159}
160
161impl KvmPartitionInner {
162 #[cfg(guest_arch = "x86_64")]
163 fn bsp(&self) -> &KvmVpInner {
164 &self.vps[0]
165 }
166
167 fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
168 self.vps.get(vp_index.index() as usize)
169 }
170
171 fn evaluate_vp(&self, vp_index: VpIndex) {
172 let Some(vp) = self.vp(vp_index) else { return };
173 vp.set_eval(true, Ordering::Relaxed);
174
175 #[cfg(guest_arch = "x86_64")]
176 self.kvm.vp(vp.vp_info().apic_id).force_exit();
177
178 #[cfg(guest_arch = "aarch64")]
179 self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
180 }
181
182 unsafe fn map_region(
187 &self,
188 data: *mut u8,
189 size: usize,
190 addr: u64,
191 readonly: bool,
192 ) -> anyhow::Result<()> {
193 let mut state = self.memory.lock();
194
195 let mut slot_to_use = None;
198 for (slot, range) in state.ranges.iter_mut().enumerate() {
199 match range {
200 Some(range) if range.host_addr == data => {
201 slot_to_use = Some(slot);
202 break;
203 }
204 Some(_) => (),
205 None => slot_to_use = Some(slot),
206 }
207 }
208 if slot_to_use.is_none() {
209 slot_to_use = Some(state.ranges.len());
210 state.ranges.push(None);
211 }
212 let slot_to_use = slot_to_use.unwrap();
213 unsafe {
214 self.kvm
215 .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
216 };
217 state.ranges[slot_to_use] = Some(KvmMemoryRange {
218 host_addr: data,
219 range: MemoryRange::new(addr..addr + size as u64),
220 });
221 Ok(())
222 }
223}
224
225impl virt::PartitionMemoryMapper for KvmPartition {
226 fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
227 assert_eq!(vtl, Vtl::Vtl0);
228 self.inner.clone()
229 }
230}
231
232impl virt::PartitionMemoryMap for KvmPartitionInner {
234 unsafe fn map_range(
235 &self,
236 data: *mut u8,
237 size: usize,
238 addr: u64,
239 writable: bool,
240 _exec: bool,
241 ) -> anyhow::Result<()> {
242 unsafe { self.map_region(data, size, addr, !writable) }
244 }
245
246 fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
247 let range = MemoryRange::new(addr..addr + size);
248 let mut state = self.memory.lock();
249 for (slot, entry) in state.ranges.iter_mut().enumerate() {
250 let Some(kvm_range) = entry else { continue };
251 if range.contains(&kvm_range.range) {
252 unsafe {
255 self.kvm.set_user_memory_region(
256 slot as u32,
257 std::ptr::null_mut(),
258 0,
259 0,
260 false,
261 )?;
262 }
263 *entry = None;
264 } else {
265 assert!(
266 !range.overlaps(&kvm_range.range),
267 "can only unmap existing ranges of exact size"
268 );
269 }
270 }
271 Ok(())
272 }
273}