1#![expect(missing_docs)]
5#![cfg(unix)]
6#![expect(unsafe_code)]
8
9use anyhow::Context;
10use bitfield_struct::bitfield;
11use libc::c_void;
12use pal_async::driver::Driver;
13use pal_async::timer::PolledTimer;
14use std::ffi::CString;
15use std::fs;
16use std::fs::File;
17use std::io::BufRead;
18use std::io::BufReader;
19use std::os::unix::prelude::*;
20use std::path::Path;
21use vfio_bindings::bindings::vfio::VFIO_IRQ_SET_ACTION_TRIGGER;
22use vfio_bindings::bindings::vfio::VFIO_IRQ_SET_DATA_EVENTFD;
23use vfio_bindings::bindings::vfio::VFIO_IRQ_SET_DATA_NONE;
24use vfio_bindings::bindings::vfio::VFIO_PCI_MSIX_IRQ_INDEX;
25use vfio_bindings::bindings::vfio::vfio_device_info;
26use vfio_bindings::bindings::vfio::vfio_group_status;
27use vfio_bindings::bindings::vfio::vfio_irq_info;
28use vfio_bindings::bindings::vfio::vfio_irq_set;
29use vfio_bindings::bindings::vfio::vfio_region_info;
30
31mod ioctl {
32 use nix::request_code_none;
33 use std::os::raw::c_char;
34 use std::os::raw::c_int;
35 use vfio_bindings::bindings::vfio::VFIO_BASE;
36 use vfio_bindings::bindings::vfio::VFIO_TYPE;
37 use vfio_bindings::bindings::vfio::vfio_device_info;
38 use vfio_bindings::bindings::vfio::vfio_group_status;
39 use vfio_bindings::bindings::vfio::vfio_irq_info;
40 use vfio_bindings::bindings::vfio::vfio_irq_set;
41 use vfio_bindings::bindings::vfio::vfio_region_info;
42
43 const VFIO_PRIVATE_BASE: u32 = 200;
44
45 nix::ioctl_write_int_bad!(vfio_set_iommu, request_code_none!(VFIO_TYPE, VFIO_BASE + 2));
46 nix::ioctl_read_bad!(
47 vfio_group_get_status,
48 request_code_none!(VFIO_TYPE, VFIO_BASE + 3),
49 vfio_group_status
50 );
51 nix::ioctl_write_ptr_bad!(
52 vfio_group_set_container,
53 request_code_none!(VFIO_TYPE, VFIO_BASE + 4),
54 c_int
55 );
56 nix::ioctl_write_ptr_bad!(
57 vfio_group_get_device_fd,
58 request_code_none!(VFIO_TYPE, VFIO_BASE + 6),
59 c_char
60 );
61 nix::ioctl_read_bad!(
62 vfio_device_get_info,
63 request_code_none!(VFIO_TYPE, VFIO_BASE + 7),
64 vfio_device_info
65 );
66 nix::ioctl_readwrite_bad!(
67 vfio_device_get_region_info,
68 request_code_none!(VFIO_TYPE, VFIO_BASE + 8),
69 vfio_region_info
70 );
71 nix::ioctl_readwrite_bad!(
72 vfio_device_get_irq_info,
73 request_code_none!(VFIO_TYPE, VFIO_BASE + 9),
74 vfio_irq_info
75 );
76 nix::ioctl_write_ptr_bad!(
77 vfio_device_set_irqs,
78 request_code_none!(VFIO_TYPE, VFIO_BASE + 10),
79 vfio_irq_set
80 );
81 nix::ioctl_write_ptr_bad!(
82 vfio_group_set_keep_alive,
83 request_code_none!(VFIO_TYPE, VFIO_PRIVATE_BASE),
84 c_char
85 );
86}
87
88pub struct Container {
89 file: File,
90}
91
92impl Container {
93 pub fn new() -> anyhow::Result<Self> {
94 let file = fs::OpenOptions::new()
95 .read(true)
96 .write(true)
97 .open("/dev/vfio/vfio")
98 .context("failed to open /dev/vfio/vfio")?;
99
100 Ok(Self { file })
101 }
102
103 pub fn set_iommu(&self, iommu: IommuType) -> anyhow::Result<()> {
104 unsafe {
106 ioctl::vfio_set_iommu(self.file.as_raw_fd(), iommu as i32)
107 .context("failed to set iommu")?;
108 }
109 Ok(())
110 }
111}
112
113#[repr(u32)]
114pub enum IommuType {
115 NoIommu = vfio_bindings::bindings::vfio::VFIO_NOIOMMU_IOMMU,
116}
117
118pub struct Group {
119 file: File,
120}
121
122impl Group {
123 pub fn open(group: u64) -> anyhow::Result<Self> {
124 Self::open_path(format!("/dev/vfio/{group}").as_ref())
125 }
126
127 pub fn open_noiommu(group: u64) -> anyhow::Result<Self> {
128 Self::open_path(format!("/dev/vfio/noiommu-{group}").as_ref())
129 }
130
131 fn open_path(group: &Path) -> anyhow::Result<Self> {
132 let file = fs::OpenOptions::new()
133 .read(true)
134 .write(true)
135 .open(group)
136 .with_context(|| format!("failed to open group {}", group.display()))?;
137
138 Ok(Self { file })
139 }
140
141 pub fn find_group_for_device(device_sysfs_path: &Path) -> anyhow::Result<u64> {
142 let group = device_sysfs_path.join("iommu_group");
143 let group = fs::read_link(group).context("failed to read iommu group")?;
144 let group: u64 = group
145 .file_name()
146 .and_then(|s| s.to_str())
147 .context("invalid group link")?
148 .parse()
149 .context("failed to parse iommu group")?;
150
151 Ok(group)
152 }
153
154 pub async fn open_device(
155 &self,
156 device_id: &str,
157 driver: &(impl ?Sized + Driver),
158 ) -> anyhow::Result<Device> {
159 let id = CString::new(device_id)?;
160 let file = unsafe {
162 let fd = ioctl::vfio_group_get_device_fd(self.file.as_raw_fd(), id.as_ptr());
163 let fd = match fd {
168 Err(nix::errno::Errno::ENODEV) => {
169 tracing::warn!(pci_id = device_id, "Retrying vfio open_device after delay");
170 PolledTimer::new(driver)
171 .sleep(std::time::Duration::from_millis(250))
172 .await;
173 ioctl::vfio_group_get_device_fd(self.file.as_raw_fd(), id.as_ptr())
174 }
175 _ => fd,
176 };
177 let fd = fd.with_context(|| format!("failed to get device fd for {device_id}"))?;
178 File::from_raw_fd(fd)
179 };
180
181 Ok(Device { file })
182 }
183
184 pub fn set_container(&self, container: &Container) -> anyhow::Result<()> {
185 unsafe {
187 ioctl::vfio_group_set_container(self.file.as_raw_fd(), &container.file.as_raw_fd())
188 .context("failed to set container")?;
189 }
190 Ok(())
191 }
192
193 pub fn status(&self) -> anyhow::Result<GroupStatus> {
194 let mut status = vfio_group_status {
195 argsz: size_of::<vfio_group_status>() as u32,
196 flags: 0,
197 };
198 unsafe {
200 ioctl::vfio_group_get_status(self.file.as_raw_fd(), &mut status)
201 .context("failed to get group status")?;
202 };
203 Ok(GroupStatus::from(status.flags))
204 }
205
206 pub async fn set_keep_alive(
210 &self,
211 device_id: &str,
212 driver: &(impl ?Sized + Driver),
213 ) -> anyhow::Result<()> {
214 unsafe {
216 let id = CString::new(device_id)?;
217 let r = ioctl::vfio_group_set_keep_alive(self.file.as_raw_fd(), id.as_ptr());
218 match r {
219 Ok(_) => Ok(()),
220 Err(nix::errno::Errno::ENODEV) => {
221 tracing::warn!(
226 pci_id = device_id,
227 "vfio keepalive got ENODEV, retrying after delay"
228 );
229 PolledTimer::new(driver)
230 .sleep(std::time::Duration::from_millis(250))
231 .await;
232 ioctl::vfio_group_set_keep_alive(self.file.as_raw_fd(), id.as_ptr())
233 .with_context(|| {
234 format!("failed to set keep-alive after delay for {device_id}")
235 })
236 .map(|_| ())
237 }
238 Err(_) => r
239 .with_context(|| format!("failed to set keep-alive for {device_id}"))
240 .map(|_| ()),
241 }
242 }
243 }
244}
245
246#[bitfield(u32)]
247pub struct GroupStatus {
248 pub viable: bool,
249 pub container_set: bool,
250
251 #[bits(30)]
252 _reserved: u32,
253}
254
255pub struct Device {
256 file: File,
257}
258
259#[derive(Debug)]
260pub struct DeviceInfo {
261 pub flags: DeviceFlags,
262 pub num_regions: u32,
263 pub num_irqs: u32,
264}
265
266#[bitfield(u32)]
267pub struct DeviceFlags {
268 reset: bool,
269 pci: bool,
270 platform: bool,
271 amba: bool,
272 ccw: bool,
273 ap: bool,
274
275 #[bits(26)]
276 _reserved: u32,
277}
278
279#[derive(Debug)]
280pub struct RegionInfo {
281 pub flags: RegionFlags,
282 pub size: u64,
283 pub offset: u64,
284}
285
286#[bitfield(u32)]
287pub struct RegionFlags {
288 read: bool,
289 write: bool,
290 mmap: bool,
291 caps: bool,
292
293 #[bits(28)]
294 _reserved: u32,
295}
296
297#[derive(Debug)]
298pub struct IrqInfo {
299 pub flags: IrqFlags,
300 pub count: u32,
301}
302
303#[bitfield(u32)]
304pub struct IrqFlags {
305 eventfd: bool,
306 maskable: bool,
307 automasked: bool,
308 pub noresize: bool,
309
310 #[bits(28)]
311 _reserved: u32,
312}
313
314impl Device {
315 pub fn info(&self) -> anyhow::Result<DeviceInfo> {
316 let mut info = vfio_device_info {
317 argsz: size_of::<vfio_device_info>() as u32,
318 flags: 0,
319 num_regions: 0,
320 num_irqs: 0,
321 };
322 unsafe {
324 ioctl::vfio_device_get_info(self.file.as_raw_fd(), &mut info)
325 .context("failed to get device info")?;
326 }
327 Ok(DeviceInfo {
328 flags: DeviceFlags::from(info.flags),
329 num_regions: info.num_regions,
330 num_irqs: info.num_irqs,
331 })
332 }
333
334 pub fn region_info(&self, index: u32) -> anyhow::Result<RegionInfo> {
335 let mut info = vfio_region_info {
336 argsz: size_of::<vfio_region_info>() as u32,
337 index,
338 flags: 0,
339 cap_offset: 0,
340 size: 0,
341 offset: 0,
342 };
343 unsafe {
345 ioctl::vfio_device_get_region_info(self.file.as_raw_fd(), &mut info)
346 .context("failed to get region info")?;
347 };
348 Ok(RegionInfo {
349 flags: RegionFlags::from(info.flags),
350 size: info.size,
351 offset: info.offset,
352 })
353 }
354
355 pub fn irq_info(&self, index: u32) -> anyhow::Result<IrqInfo> {
356 let mut info = vfio_irq_info {
357 argsz: size_of::<vfio_irq_info>() as u32,
358 index,
359 flags: 0,
360 count: 0,
361 };
362 unsafe {
364 ioctl::vfio_device_get_irq_info(self.file.as_raw_fd(), &mut info)
365 .context("failed to get irq info")?;
366 }
367 Ok(IrqInfo {
368 flags: IrqFlags::from(info.flags),
369 count: info.count,
370 })
371 }
372
373 pub fn map(&self, offset: u64, len: usize, write: bool) -> anyhow::Result<MappedRegion> {
374 let mut prot = libc::PROT_READ;
375 if write {
376 prot |= libc::PROT_WRITE;
377 }
378 let addr = unsafe {
381 libc::mmap(
382 std::ptr::null_mut(),
383 len,
384 prot,
385 libc::MAP_SHARED,
386 self.file.as_raw_fd(),
387 offset as i64,
388 )
389 };
390 if addr == libc::MAP_FAILED {
391 return Err(std::io::Error::last_os_error()).context("failed to map region");
392 }
393 Ok(MappedRegion { addr, len })
394 }
395
396 pub fn map_msix<I>(&self, start: u32, eventfd: I) -> anyhow::Result<()>
397 where
398 I: IntoIterator,
399 I::Item: AsFd,
400 {
401 #[repr(C)]
402 struct VfioIrqSetWithArray {
403 header: vfio_irq_set,
404 fd: [i32; 256],
405 }
406 let mut param = VfioIrqSetWithArray {
407 header: vfio_irq_set {
408 argsz: size_of::<VfioIrqSetWithArray>() as u32,
409 flags: VFIO_IRQ_SET_ACTION_TRIGGER,
410 index: VFIO_PCI_MSIX_IRQ_INDEX,
411 start,
412 count: 0,
413 data: Default::default(),
415 },
416 fd: [-1; 256],
417 };
418
419 for (x, y) in eventfd.into_iter().zip(&mut param.fd) {
420 *y = x.as_fd().as_raw_fd();
421 param.header.count += 1;
422 }
423
424 if param.header.count == 0 {
425 param.header.flags |= VFIO_IRQ_SET_DATA_NONE;
426 } else {
427 param.header.flags |= VFIO_IRQ_SET_DATA_EVENTFD;
428 }
429
430 unsafe {
432 ioctl::vfio_device_set_irqs(self.file.as_raw_fd(), ¶m.header)
433 .context("failed to set msi-x trigger")?;
434 }
435 Ok(())
436 }
437
438 pub fn unmap_msix(&self, start: u32, count: u32) -> anyhow::Result<()> {
444 if count == 0 {
445 return Ok(());
446 }
447
448 let header = vfio_irq_set {
449 argsz: size_of::<vfio_irq_set>() as u32,
450 flags: VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
451 index: VFIO_PCI_MSIX_IRQ_INDEX,
452 start,
453 count,
454 data: Default::default(),
455 };
456
457 unsafe {
459 ioctl::vfio_device_set_irqs(self.file.as_raw_fd(), &header)
460 .context("failed to unmap msix vectors")?;
461 }
462 Ok(())
463 }
464}
465
466impl AsRef<File> for Device {
467 fn as_ref(&self) -> &File {
468 &self.file
469 }
470}
471
472impl AsFd for Device {
473 fn as_fd(&self) -> BorrowedFd<'_> {
474 self.file.as_fd()
475 }
476}
477
478pub fn find_msix_irq(pci_id: &str, index: u32) -> anyhow::Result<u32> {
480 let buffered = BufReader::new(File::open("/proc/interrupts")?);
481
482 let id = format!("vfio-msix[{}]({})", index, pci_id);
483 let match_str = buffered
484 .lines()
485 .map_while(Result::ok)
486 .find(|line| line.contains(&id))
487 .with_context(|| format!("cannot find interrupt {id} in /proc/interrupts"))?;
488
489 let irq = match_str.trim_start().split(':').next().unwrap();
491 let irq: u32 = irq
492 .parse()
493 .with_context(|| format!("unexpected irq format {}. Expecting 'irq#:'", irq))?;
494
495 Ok(irq)
496}
497
498pub fn print_relevant_params() {
499 #[derive(Debug)]
500 struct Param {
501 _name: &'static str,
502 _value: Option<String>,
503 }
504
505 let vfio_params = [
506 "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode",
507 "/sys/module/driver/parameters/async_probe",
508 ]
509 .iter()
510 .map(|path| Param {
511 _name: path,
512 _value: fs::read_to_string(path).ok().map(|s| s.trim().to_string()),
513 })
514 .collect::<Vec<_>>();
515
516 tracing::debug!(
517 vfio_params = ?vfio_params,
518 "Relevant VFIO module parameters"
519 );
520}
521
522pub struct MappedRegion {
523 addr: *mut c_void,
524 len: usize,
525}
526
527unsafe impl Send for MappedRegion {}
529unsafe impl Sync for MappedRegion {}
531
532impl MappedRegion {
533 pub fn as_ptr(&self) -> *mut c_void {
534 self.addr
535 }
536
537 pub fn len(&self) -> usize {
538 self.len
539 }
540
541 pub fn read_u32(&self, offset: usize) -> u32 {
542 assert_eq!(offset % 4, 0);
543 assert!(offset.saturating_add(4) <= self.len);
544 unsafe { std::ptr::read_volatile(self.addr.byte_add(offset).cast()) }
546 }
547
548 pub fn read_u64(&self, offset: usize) -> u64 {
549 assert_eq!(offset % 8, 0);
550 assert!(offset.saturating_add(8) <= self.len);
551 unsafe { std::ptr::read_volatile(self.addr.byte_add(offset).cast()) }
553 }
554
555 pub fn write_u32(&self, offset: usize, data: u32) {
556 assert_eq!(offset % 4, 0);
557 assert!(offset.saturating_add(4) <= self.len);
558 unsafe {
560 std::ptr::write_volatile(self.addr.byte_add(offset).cast(), data);
561 }
562 }
563
564 pub fn write_u64(&self, offset: usize, data: u64) {
565 assert_eq!(offset % 8, 0);
566 assert!(offset.saturating_add(8) <= self.len);
567 unsafe {
569 std::ptr::write_volatile(self.addr.byte_add(offset).cast(), data);
570 }
571 }
572}
573
574impl Drop for MappedRegion {
575 fn drop(&mut self) {
576 unsafe {
578 libc::munmap(self.addr, self.len);
579 }
580 }
581}