1#![cfg(target_os = "linux")]
7#![expect(missing_docs)]
8#![expect(unsafe_code)]
10
11mod options;
12mod syslog;
13
14pub use options::Options;
17
18use anyhow::Context;
19use libc::STDERR_FILENO;
20use libc::STDIN_FILENO;
21use libc::STDOUT_FILENO;
22use libc::c_void;
23use std::collections::HashMap;
24use std::ffi::CStr;
25use std::ffi::OsStr;
26use std::io;
27use std::io::BufRead;
28use std::io::BufReader;
29use std::io::Write;
30use std::os::unix::prelude::*;
31use std::path::Path;
32use std::process::Child;
33use std::process::Command;
34use std::process::ExitStatus;
35use std::process::Stdio;
36use std::time::Duration;
37use syslog::SysLog;
38use walkdir::WalkDir;
39
40const UNDERHILL_PATH: &str = "/bin/openvmm_hcl";
41
42struct FilesystemMount<'a> {
43 source: &'a CStr,
44 target: &'a CStr,
45 fstype: &'a CStr,
46 options: &'a CStr,
47 flags: u64,
48}
49
50impl<'a> FilesystemMount<'a> {
51 pub fn new(
52 source: &'a CStr,
53 target: &'a CStr,
54 fstype: &'a CStr,
55 flags: u64,
56 options: &'a CStr,
57 ) -> Self {
58 Self {
59 source,
60 target,
61 fstype,
62 options,
63 flags,
64 }
65 }
66
67 pub fn mount(&self) -> io::Result<()> {
68 let err = unsafe {
70 libc::mount(
71 self.source.as_ptr(),
72 self.target.as_ptr(),
73 self.fstype.as_ptr(),
74 self.flags,
75 self.options.as_ptr().cast::<c_void>(),
76 )
77 };
78
79 if err != 0 {
80 Err(io::Error::last_os_error())
81 } else {
82 Ok(())
83 }
84 }
85}
86
87mod dev_random_ioctls {
88 pub const MAX_ENTROPY_SIZE: usize = 256;
89 const RANDOM_IOC_MAGIC: u8 = b'R';
90 #[repr(C)]
91 pub struct RndAddEntropy {
92 pub entropy_count: i32,
93 pub buf_size: i32,
94 pub buf: [u8; MAX_ENTROPY_SIZE],
95 }
96 nix::ioctl_write_ptr_bad!(
98 rnd_add_entropy_ioctl,
99 nix::request_code_write!(RANDOM_IOC_MAGIC, 0x3, size_of::<std::os::raw::c_int>() * 2),
100 RndAddEntropy
101 );
102}
103
104fn use_host_entropy() -> anyhow::Result<()> {
118 use dev_random_ioctls::MAX_ENTROPY_SIZE;
119
120 let host_entropy = match fs_err::read("/proc/device-tree/openhcl/entropy/reg") {
121 Ok(contents) => contents,
122 Err(e) => {
123 log::warn!("Did not get entropy from the host: {e:#}");
124 return Ok(());
125 }
126 };
127
128 if host_entropy.len() > MAX_ENTROPY_SIZE {
129 log::warn!(
130 "Truncating host-provided entropy (received {} bytes)",
131 host_entropy.len()
132 );
133 }
134 let use_entropy_bytes = std::cmp::min(host_entropy.len(), MAX_ENTROPY_SIZE);
135 log::info!("Using {} bytes of entropy from the host", use_entropy_bytes);
136
137 let mut entropy = dev_random_ioctls::RndAddEntropy {
138 entropy_count: (use_entropy_bytes * 8) as i32,
139 buf_size: use_entropy_bytes as i32,
140 buf: [0; MAX_ENTROPY_SIZE],
141 };
142 entropy.buf[..use_entropy_bytes].copy_from_slice(&host_entropy[..use_entropy_bytes]);
143
144 let mut dev_random = fs_err::OpenOptions::new()
145 .write(true)
146 .open("/dev/random")
147 .with_context(|| ("failed to open dev random for setting entropy").to_string())?;
148
149 if underhill_confidentiality::is_confidential_vm() {
150 dev_random
152 .write_all(&entropy.buf[..use_entropy_bytes])
153 .context("write to /dev/random")?;
154 } else {
155 unsafe {
159 dev_random_ioctls::rnd_add_entropy_ioctl(dev_random.as_raw_fd(), &entropy)
160 .context("rnd_add_entropy_ioctl")?;
161 }
162 }
163
164 Ok(())
165}
166
167fn setup(
168 stat_files: &[&str],
169 options: &Options,
170 writes: &[(&str, &str)],
171 filesystems: &[FilesystemMount<'_>],
172) -> anyhow::Result<()> {
173 log::info!("Mounting filesystems");
174
175 for filesystem in filesystems {
176 let path: &Path = OsStr::from_bytes(filesystem.target.to_bytes()).as_ref();
177 fs_err::create_dir_all(path)?;
179
180 filesystem
181 .mount()
182 .with_context(|| format!("failed to mount {}", path.display()))?;
183 }
184
185 log::info!("Command line args: {:?}", options);
186
187 if log::log_enabled!(log::Level::Trace) {
188 for stat_file in stat_files {
189 if let Ok(file) = fs_err::File::open(stat_file) {
190 log::trace!("{}", stat_file);
191 for line in BufReader::new(file).lines() {
192 if let Ok(line) = line {
193 log::trace!("{}", line);
194 }
195 }
196 }
197 }
198 }
199
200 log::info!("Setting system resource limits and parameters");
201
202 for (path, data) in writes {
203 fs_err::write(path, data).with_context(|| format!("failed to write {data}"))?;
204 }
205
206 use_host_entropy().context("use host entropy")?;
207
208 Ok(())
209}
210
211fn run_setup_scripts(scripts: &[String]) -> anyhow::Result<Vec<(String, String)>> {
212 let mut new_env = Vec::new();
213 for setup in scripts {
214 log::info!("Running provided setup script {}", setup);
215
216 let result = Command::new("/bin/sh")
217 .arg("-c")
218 .arg(setup)
219 .stderr(Stdio::inherit())
220 .output()
221 .context("script failed to start")?;
222
223 if !result.status.success() {
224 anyhow::bail!("setup script failed: {}", result.status);
225 }
226
227 for line in result.stdout.split(|&x| x == b'\n') {
230 if let Some((key, value)) = std::str::from_utf8(line)
231 .ok()
232 .and_then(|line| line.split_once('='))
233 {
234 log::info!("setting env var {}={}", key, value);
235 new_env.push((key.into(), value.into()));
236 }
237 }
238 }
239 Ok(new_env)
240}
241
242fn run(options: &Options, env: impl IntoIterator<Item = (String, String)>) -> anyhow::Result<()> {
243 let mut command = Command::new(UNDERHILL_PATH);
244 command.arg("--pid").arg("/run/underhill.pid");
245 command.args(&options.underhill_args);
246 command.envs(env);
247
248 let limit = 0x100000;
253 unsafe {
255 if libc::prlimit(
256 0,
257 libc::RLIMIT_NOFILE,
258 &libc::rlimit {
259 rlim_cur: limit,
260 rlim_max: limit,
261 },
262 std::ptr::null_mut(),
263 ) < 0
264 {
265 return Err(io::Error::last_os_error()).context("failed to update rlimit");
266 }
267 }
268
269 log::info!("running {:?}", &command);
270
271 let child = command.spawn().context("underhill failed to start")?;
272
273 let status = reap_until(child).context("wait failed")?;
274 if status.success() {
275 log::info!("underhill exited successfully");
276 } else {
277 log::error!("underhill terminated unsuccessfully: {}", status);
278 }
279
280 std::process::exit(status.code().unwrap_or(255));
281}
282
283fn reap_until(child: Child) -> io::Result<ExitStatus> {
285 loop {
286 let mut status = 0;
287 let pid = unsafe { libc::wait(&mut status) };
289 if pid < 0 {
290 return Err(io::Error::last_os_error());
291 }
292
293 if pid == child.id() as i32 {
294 return Ok(ExitStatus::from_raw(status));
296 }
297 }
298}
299
300fn move_stdio(src: impl Into<std::fs::File>, dst: RawFd) {
301 assert!((0..=2).contains(&dst));
302 let src = src.into();
303 if src.as_raw_fd() != dst {
304 let r = unsafe { libc::dup2(src.as_raw_fd(), dst) };
306 assert_eq!(r, dst);
307 } else {
308 let _ = src.into_raw_fd();
309 }
310}
311
312fn init_logging() {
313 move_stdio(fs_err::File::open("/dev/null").unwrap(), STDIN_FILENO);
315
316 move_stdio(
317 fs_err::OpenOptions::new()
318 .write(true)
319 .open("/dev/null")
320 .unwrap(),
321 STDOUT_FILENO,
322 );
323
324 let ttyprintk_err = match fs_err::OpenOptions::new()
326 .write(true)
327 .open("/dev/ttyprintk")
328 {
329 Ok(ttyprintk) => {
330 move_stdio(ttyprintk, STDERR_FILENO);
331 None
332 }
333 Err(err) => Some(err),
334 };
335
336 let syslog = SysLog::new().expect("failed to open /dev/kmsg");
338 log::set_boxed_logger(Box::new(syslog)).expect("no logger already set");
339
340 log::set_max_level(log::LevelFilter::Info);
345
346 if let Some(err) = ttyprintk_err {
349 log::error!("failed to open stderr output: {}", err);
350 panic!();
351 }
352}
353
354fn load_modules(modules_path: &str) -> anyhow::Result<()> {
355 let cmdline = fs_err::read_to_string("/proc/cmdline")?;
357 let mut params = HashMap::new();
358 for option in cmdline.split_ascii_whitespace() {
359 if let Some((module, option)) = option.split_once('.') {
360 if option.contains('=') {
361 let v: &mut String = params.entry(module.replace('-', "_")).or_default();
362 *v += option;
363 *v += " ";
364 }
365 }
366 }
367
368 for module in WalkDir::new(modules_path).sort_by_file_name() {
370 let module = module?;
371 if !module.file_type().is_file() {
372 continue;
373 }
374
375 let module = module.path();
376 let module_name = module
377 .file_stem()
378 .unwrap()
379 .to_str()
380 .unwrap()
381 .replace('-', "_");
382
383 let params = params.get_mut(&module_name);
384
385 log::info!(
386 "loading kernel module {}: {}",
387 module.display(),
388 params.as_ref().map_or("", |s| s.as_str())
389 );
390 let file = fs_err::File::open(module).context("failed to open module")?;
391
392 let params = if let Some(params) = params {
393 params.pop();
395 params.push('\0');
396 params.as_bytes()
397 } else {
398 b"\0"
399 };
400
401 let r =
405 unsafe { libc::syscall(libc::SYS_finit_module, file.as_raw_fd(), params.as_ptr(), 0) };
406 if r < 0 {
407 return Err(io::Error::last_os_error())
408 .with_context(|| format!("failed to load module {}", module.display()));
409 }
410
411 log::info!("load complete for {}", module.display());
412 }
413
414 fs_err::remove_dir_all(modules_path)?;
417
418 Ok(())
419}
420
421fn timestamp() -> u64 {
422 let mut tp;
423 unsafe {
425 tp = std::mem::zeroed();
426 libc::clock_gettime(libc::CLOCK_BOOTTIME, &mut tp);
427 }
428 Duration::new(tp.tv_sec as u64, tp.tv_nsec as u32).as_nanos() as u64
429}
430
431fn do_main() -> anyhow::Result<()> {
432 let boot_time = timestamp();
433
434 init_logging();
435
436 log::info!(
437 "Initial process: crate_name={}, crate_revision={}, crate_branch={}",
438 env!("CARGO_PKG_NAME"),
439 option_env!("VERGEN_GIT_SHA").unwrap_or("UNKNOWN_REVISION"),
440 option_env!("VERGEN_GIT_BRANCH").unwrap_or("UNKNOWN_BRANCH"),
441 );
442
443 let stat_files = [
444 "/proc/uptime",
445 "/proc/timer_list",
446 "/proc/interrupts",
447 "/proc/meminfo",
448 "/proc/iomem",
449 "/proc/ioports",
450 "/proc/sys/kernel/pid_max",
451 "/proc/sys/kernel/threads-max",
452 "/proc/sys/vm/max_map_count",
453 ];
454 let options = Options::parse();
455 let writes = &[
456 ("/proc/sys/kernel/threads-max", "32768"),
463 ("/proc/sys/kernel/kptr_restrict", "1"),
465 ("/sys/kernel/mm/transparent_hugepage/enabled", "madvise"),
470 (
473 "/sys/bus/vmbus/drivers/uio_hv_generic/new_id",
474 "8dedd1aa-9056-49e4-bfd6-1bf90dc38ef0",
476 ),
477 (
478 "/sys/bus/vmbus/drivers/uio_hv_generic/new_id",
479 "8b60ccf6-709f-4c11-90b5-229c959a9e6a",
481 ),
482 (
483 "/sys/bus/vmbus/drivers/uio_hv_generic/new_id",
484 "427b03e7-4ceb-4286-b5fc-486f4a1dd439",
486 ),
487 (
488 "/proc/sys/kernel/core_pattern",
489 if underhill_confidentiality::confidential_filtering_enabled() {
490 ""
492 } else {
493 "|/bin/underhill-crash %p %i %s %e"
498 },
499 ),
500 ("/proc/sys/kernel/core_pipe_limit", "1"),
502 ("/proc/sys/vm/panic_on_oom", "1"),
506 ("/proc/sys/vm/min_free_kbytes", "1024"),
520 ("/proc/sys/vm/watermark_scale_factor", "1"),
524 ("/proc/sys/vm/watermark_boost_factor", "0"),
526 ];
527 let filesystems = [
528 FilesystemMount::new(
529 c"proc",
530 c"/proc",
531 c"proc",
532 libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RELATIME,
533 c"",
534 ),
535 FilesystemMount::new(
536 c"sysfs",
537 c"/sys",
538 c"sysfs",
539 libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RELATIME,
540 c"",
541 ),
542 FilesystemMount::new(
543 c"dev",
544 c"/dev",
545 c"devtmpfs",
546 libc::MS_NOSUID | libc::MS_NOEXEC | libc::MS_RELATIME,
547 c"",
548 ),
549 FilesystemMount::new(
550 c"devpts",
551 c"/dev/pts",
552 c"devpts",
553 libc::MS_NOSUID | libc::MS_NOEXEC | libc::MS_RELATIME,
554 c"",
555 ),
556 ];
557
558 setup(&stat_files, &options, writes, &filesystems)?;
559 let mut new_env = run_setup_scripts(&options.setup_script)?;
560 new_env.push(("KERNEL_BOOT_TIME".into(), boot_time.to_string()));
561
562 if matches!(
563 std::env::var("OPENHCL_NVME_VFIO").as_deref(),
564 Ok("true" | "1")
565 ) {
566 fs_err::write(
571 "/sys/bus/pci/drivers/vfio-pci/new_id",
572 "ffffffff ffffffff ffffffff ffffffff 010802 ffffff",
573 )
574 .context("failed to register nvme for vfio")?;
575 log::info!("registered vfio-pci as driver for nvme");
576 }
577
578 let thread = std::thread::spawn(|| {
580 if let Err(err) = load_modules("/lib/modules") {
581 panic!("failed to load modules: {:#}", err);
582 }
583 });
584 if std::env::var("OPENHCL_WAIT_FOR_MODULES").as_deref() == Ok("1") {
585 thread.join().unwrap();
586 }
587
588 run(&options, new_env)
589}
590
591pub fn main() -> ! {
592 match do_main() {
593 Ok(_) => unreachable!(),
594 Err(err) => {
595 log::error!("fatal: {:#}", err);
596 std::process::exit(1);
597 }
598 }
599}