1#![cfg(all(target_os = "linux", guest_is_native))]
7#![expect(missing_docs)]
8#![expect(unsafe_code)]
10#![expect(clippy::undocumented_unsafe_blocks)]
11
12mod arch;
13mod gsi;
14
15pub use arch::Kvm;
16
17use guestmem::GuestMemory;
18use inspect::Inspect;
19use memory_range::MemoryRange;
20use parking_lot::Mutex;
21use std::sync::Arc;
22use thiserror::Error;
23use virt::state::StateError;
24
25pub fn is_available() -> Result<bool, KvmError> {
27 match std::fs::metadata("/dev/kvm") {
28 Ok(_) => Ok(true),
29 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
30 Err(err) => Err(KvmError::AvailableCheck(err)),
31 }
32}
33
34use arch::KvmVpInner;
35use hvdef::Vtl;
36use std::sync::atomic::Ordering;
37use virt::VpIndex;
38use vmcore::vmtime::VmTimeAccess;
39
40#[derive(Error, Debug)]
41pub enum KvmError {
42 #[error("operation not supported")]
43 NotSupported,
44 #[error("vtl2 is not supported on this hypervisor")]
45 Vtl2NotSupported,
46 #[error("isolation is not supported on this hypervisor")]
47 IsolationNotSupported,
48 #[error("kvm error")]
49 Kvm(#[from] kvm::Error),
50 #[error("failed to stat /dev/kvm")]
51 AvailableCheck(#[source] std::io::Error),
52 #[error(transparent)]
53 State(#[from] Box<StateError<KvmError>>),
54 #[error("invalid state while restoring: {0}")]
55 InvalidState(&'static str),
56 #[error("misaligned gic base address")]
57 Misaligned,
58 #[error("host does not support GICv2 or GICv3")]
59 NoGic,
60 #[error("host does not support required cpu capabilities")]
61 Capabilities(virt::PartitionCapabilitiesError),
62 #[cfg(guest_arch = "x86_64")]
63 #[error("failed to compute topology cpuid")]
64 TopologyCpuid(#[source] virt::x86::topology::UnknownVendor),
65}
66
67#[derive(Debug, Inspect)]
68struct KvmMemoryRange {
69 host_addr: *mut u8,
70 range: MemoryRange,
71}
72
73unsafe impl Sync for KvmMemoryRange {}
74unsafe impl Send for KvmMemoryRange {}
75
76#[derive(Debug, Default, Inspect)]
77struct KvmMemoryRangeState {
78 #[inspect(flatten, iter_by_index)]
79 ranges: Vec<Option<KvmMemoryRange>>,
80}
81
82#[derive(Inspect)]
83pub struct KvmPartition {
84 #[inspect(flatten)]
85 inner: Arc<KvmPartitionInner>,
86 #[cfg(guest_arch = "x86_64")]
87 #[inspect(skip)]
88 synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
89 #[inspect(skip)]
90 irqfd_state: Arc<gsi::KvmIrqFdState>,
91}
92
93#[derive(Inspect)]
94struct KvmPartitionInner {
95 #[inspect(skip)]
96 kvm: kvm::Partition,
97 memory: Mutex<KvmMemoryRangeState>,
98 hv1_enabled: bool,
99 gm: GuestMemory,
100 #[inspect(skip)]
101 vps: Vec<KvmVpInner>,
102 #[inspect(skip)]
103 gsi_routing: Mutex<gsi::GsiRouting>,
104 caps: virt::PartitionCapabilities,
105
106 #[cfg(guest_arch = "x86_64")]
108 cpuid: virt::CpuidLeafSet,
109
110 #[cfg(guest_arch = "x86_64")]
111 reserved_vps_per_socket: u32,
112
113 #[cfg(guest_arch = "aarch64")]
115 #[inspect(skip)]
116 _gic_device: kvm::Device,
117 #[cfg(guest_arch = "aarch64")]
119 #[inspect(skip)]
120 _its_device: Option<kvm::Device>,
121 #[cfg(guest_arch = "aarch64")]
123 #[inspect(skip)]
124 gic_msi: vm_topology::processor::aarch64::GicMsiController,
125 #[cfg(guest_arch = "aarch64")]
127 gic_nr_irqs: u32,
128 #[cfg(guest_arch = "x86_64")]
129 synic_ports: virt::synic::SynicPortMap,
130}
131
132#[derive(Debug, Error)]
134enum KvmRunVpError {
135 #[error("KVM internal error: {0:#x}")]
136 InternalError(u32),
137 #[error("invalid vp state")]
138 InvalidVpState,
139 #[error("failed to run VP")]
140 Run(#[source] kvm::Error),
141 #[cfg_attr(guest_arch = "x86_64", expect(dead_code))]
142 #[error("unhandled system event type: {0:#x}")]
143 UnhandledSystemEvent(u32),
144 #[cfg(guest_arch = "x86_64")]
145 #[error("failed to inject an extint interrupt")]
146 ExtintInterrupt(#[source] kvm::Error),
147}
148
149pub struct KvmProcessorBinder {
150 partition: Arc<KvmPartitionInner>,
151 vpindex: VpIndex,
152 vmtime: VmTimeAccess,
153}
154
155impl KvmPartitionInner {
156 #[cfg(guest_arch = "x86_64")]
157 fn bsp(&self) -> &KvmVpInner {
158 &self.vps[0]
159 }
160
161 fn vp(&self, vp_index: VpIndex) -> Option<&KvmVpInner> {
162 self.vps.get(vp_index.index() as usize)
163 }
164
165 fn evaluate_vp(&self, vp_index: VpIndex) {
166 let Some(vp) = self.vp(vp_index) else { return };
167 vp.set_eval(true, Ordering::Relaxed);
168
169 #[cfg(guest_arch = "x86_64")]
170 self.kvm.vp(vp.vp_info().apic_id).force_exit();
171
172 #[cfg(guest_arch = "aarch64")]
173 self.kvm.vp(vp.vp_info().base.vp_index.index()).force_exit();
174 }
175
176 unsafe fn map_region(
181 &self,
182 data: *mut u8,
183 size: usize,
184 addr: u64,
185 readonly: bool,
186 ) -> anyhow::Result<()> {
187 let mut state = self.memory.lock();
188
189 let mut slot_to_use = None;
192 for (slot, range) in state.ranges.iter_mut().enumerate() {
193 match range {
194 Some(range) if range.host_addr == data => {
195 slot_to_use = Some(slot);
196 break;
197 }
198 Some(_) => (),
199 None => slot_to_use = Some(slot),
200 }
201 }
202 if slot_to_use.is_none() {
203 slot_to_use = Some(state.ranges.len());
204 state.ranges.push(None);
205 }
206 let slot_to_use = slot_to_use.unwrap();
207 unsafe {
208 self.kvm
209 .set_user_memory_region(slot_to_use as u32, data, size, addr, readonly)?
210 };
211 state.ranges[slot_to_use] = Some(KvmMemoryRange {
212 host_addr: data,
213 range: MemoryRange::new(addr..addr + size as u64),
214 });
215 Ok(())
216 }
217}
218
219impl virt::PartitionMemoryMapper for KvmPartition {
220 fn memory_mapper(&self, vtl: Vtl) -> Arc<dyn virt::PartitionMemoryMap> {
221 assert_eq!(vtl, Vtl::Vtl0);
222 self.inner.clone()
223 }
224}
225
226impl virt::PartitionMemoryMap for KvmPartitionInner {
228 unsafe fn map_range(
229 &self,
230 data: *mut u8,
231 size: usize,
232 addr: u64,
233 writable: bool,
234 _exec: bool,
235 ) -> anyhow::Result<()> {
236 unsafe { self.map_region(data, size, addr, !writable) }
238 }
239
240 fn unmap_range(&self, addr: u64, size: u64) -> anyhow::Result<()> {
241 let range = MemoryRange::new(addr..addr + size);
242 let mut state = self.memory.lock();
243 for (slot, entry) in state.ranges.iter_mut().enumerate() {
244 let Some(kvm_range) = entry else { continue };
245 if range.contains(&kvm_range.range) {
246 unsafe {
249 self.kvm.set_user_memory_region(
250 slot as u32,
251 std::ptr::null_mut(),
252 0,
253 0,
254 false,
255 )?;
256 }
257 *entry = None;
258 } else {
259 assert!(
260 !range.overlaps(&kvm_range.range),
261 "can only unmap existing ranges of exact size"
262 );
263 }
264 }
265 Ok(())
266 }
267}