1#![cfg(target_os = "linux")]
8#![expect(missing_docs)]
9#![forbid(unsafe_code)]
10
11mod diag;
12mod dispatch;
13mod emuplat;
14mod get_tracing;
15mod inspect_internal;
16mod inspect_proc;
17mod livedump;
18mod loader;
19mod nvme_manager;
20mod options;
21mod reference_time;
22mod servicing;
23mod threadpool_vm_task_backend;
24mod vmbus_relay_unit;
25mod vmgs_logger;
26mod vp;
27mod vpci;
28mod worker;
29mod wrapped_partition;
30
31pub use options::Options;
34
35use crate::diag::DiagWorker;
36use crate::dispatch::UhVmRpc;
37use crate::worker::UnderhillEnvCfg;
38use crate::worker::UnderhillRemoteConsoleCfg;
39use crate::worker::UnderhillVmWorker;
40use crate::worker::UnderhillWorkerParameters;
41use anyhow::Context;
42use bootloader_fdt_parser::BootTimes;
43use cvm_tracing::CVM_ALLOWED;
44use framebuffer::FRAMEBUFFER_SIZE;
45use framebuffer::FramebufferAccess;
46use futures::StreamExt;
47use futures_concurrency::stream::Merge;
48use get_tracing::init_tracing;
49use get_tracing::init_tracing_backend;
50use inspect::Inspect;
51use inspect::SensitivityLevel;
52use mesh::CancelContext;
53use mesh::CancelReason;
54use mesh::MeshPayload;
55use mesh::error::RemoteError;
56use mesh::rpc::Rpc;
57use mesh::rpc::RpcSend;
58use mesh_process::Mesh;
59use mesh_process::ProcessConfig;
60use mesh_process::try_run_mesh_host;
61use mesh_tracing::RemoteTracer;
62use mesh_tracing::TracingBackend;
63use mesh_worker::RegisteredWorkers;
64use mesh_worker::WorkerEvent;
65use mesh_worker::WorkerHandle;
66use mesh_worker::WorkerHost;
67use mesh_worker::WorkerHostRunner;
68use mesh_worker::launch_local_worker;
69use mesh_worker::register_workers;
70use pal_async::DefaultDriver;
71use pal_async::DefaultPool;
72use pal_async::task::Spawn;
73#[cfg(feature = "profiler")]
74use profiler_worker::ProfilerWorker;
75#[cfg(feature = "profiler")]
76use profiler_worker::ProfilerWorkerParameters;
77use std::time::Duration;
78use vmsocket::VmAddress;
79use vmsocket::VmListener;
80use vnc_worker_defs::VncParameters;
81
82fn new_underhill_remote_console_cfg(
83 framebuffer_gpa_base: Option<u64>,
84) -> anyhow::Result<(UnderhillRemoteConsoleCfg, Option<FramebufferAccess>)> {
85 if let Some(framebuffer_gpa_base) = framebuffer_gpa_base {
86 let gpa_fd = fs_err::OpenOptions::new()
96 .read(true)
97 .write(true)
98 .open("/dev/mshv_vtl_low")
99 .context("failed to open gpa device")?;
100
101 let vram = sparse_mmap::new_mappable_from_file(gpa_fd.file(), true, false)?;
102 let (fb, fba) = framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, framebuffer_gpa_base)
103 .context("allocating framebuffer")?;
104 tracing::debug!("framebuffer_gpa_base: {:#x}", framebuffer_gpa_base);
105
106 Ok((
107 UnderhillRemoteConsoleCfg {
108 synth_keyboard: true,
109 synth_mouse: true,
110 synth_video: true,
111 input: mesh::Receiver::new(),
112 framebuffer: Some(fb),
113 },
114 Some(fba),
115 ))
116 } else {
117 Ok((
118 UnderhillRemoteConsoleCfg {
119 synth_keyboard: false,
120 synth_mouse: false,
121 synth_video: false,
122 input: mesh::Receiver::new(),
123 framebuffer: None,
124 },
125 None,
126 ))
127 }
128}
129
130pub fn main() -> anyhow::Result<()> {
131 install_task_name_panic_hook();
134
135 if let Some(path) = std::env::var_os("OPENVMM_WRITE_SAVED_STATE_PROTO") {
136 if cfg!(debug_assertions) {
137 mesh::payload::protofile::DescriptorWriter::new(
138 vmcore::save_restore::saved_state_roots(),
139 )
140 .write_to_path(path)
141 .context("failed to write protobuf descriptors")?;
142 return Ok(());
143 } else {
144 anyhow::bail!(".proto output only supported in debug builds");
146 }
147 }
148
149 let (_, tracing_driver) = DefaultPool::spawn_on_thread("tracing");
151
152 try_run_mesh_host("underhill", {
159 let tracing_driver = tracing_driver.clone();
160 async |params: MeshHostParams| {
161 if let Some(remote_tracer) = params.tracer {
162 init_tracing(tracing_driver, remote_tracer).context("failed to init tracing")?;
163 }
164 params.runner.run(RegisteredWorkers).await;
165 Ok(())
166 }
167 })?;
168
169 let mut tracing = init_tracing_backend(tracing_driver.clone())?;
171 init_tracing(tracing_driver, tracing.tracer()).context("failed to init tracing")?;
173 DefaultPool::run_with(|driver| do_main(driver, tracing))
174}
175
176fn install_task_name_panic_hook() {
177 use std::io::Write;
178
179 let panic_hook = std::panic::take_hook();
180 std::panic::set_hook(Box::new(move |info| {
181 pal_async::task::with_current_task_metadata(|metadata| {
182 if let Some(metadata) = metadata {
183 let _ = write!(std::io::stderr(), "task '{}', ", metadata.name());
184 }
185 });
186 panic_hook(info);
188 }));
189}
190
191async fn do_main(driver: DefaultDriver, mut tracing: TracingBackend) -> anyhow::Result<()> {
192 let opt = Options::parse(Vec::new(), Vec::new())?;
193
194 let crate_name = build_info::get().crate_name();
195 let crate_revision = build_info::get().scm_revision();
196 tracing::info!(CVM_ALLOWED, ?crate_name, ?crate_revision, "VMM process");
197 log_boot_times().context("failure logging boot times")?;
198
199 if let Some(pid_path) = &opt.pid {
201 std::fs::write(pid_path, std::process::id().to_string())
202 .with_context(|| format!("failed to write pid to {}", pid_path.display()))?;
203 }
204
205 let mesh = Mesh::new("underhill".to_string()).context("failed to create mesh")?;
206
207 let r = run_control(driver, &mesh, opt, &mut tracing).await;
208 if let Err(err) = &r {
209 tracing::error!(
210 CVM_ALLOWED,
211 error = err.as_ref() as &dyn std::error::Error,
212 "VM failure"
213 );
214 }
215
216 CancelContext::new()
218 .with_timeout(Duration::from_secs(10))
219 .until_cancelled(async {
220 mesh.shutdown().await;
221 tracing.shutdown().await;
222 })
223 .await
224 .ok();
225
226 r
227}
228
229fn log_boot_times() -> anyhow::Result<()> {
230 fn diff(start: Option<u64>, end: Option<u64>) -> Option<tracing::field::DebugValue<Duration>> {
231 use reference_time::ReferenceTime;
232 Some(tracing::field::debug(
233 ReferenceTime::new(end?).since(ReferenceTime::new(start?))?,
234 ))
235 }
236
237 let BootTimes {
239 start,
240 end,
241 sidecar_start,
242 sidecar_end,
243 } = BootTimes::new().context("failed to parse boot times")?;
244 tracing::info!(
245 CVM_ALLOWED,
246 start,
247 end,
248 sidecar_start,
249 sidecar_end,
250 elapsed = diff(start, end),
251 sidecar_elapsed = diff(sidecar_start, sidecar_end),
252 "boot loader times"
253 );
254 Ok(())
255}
256
257struct DiagState {
258 _worker: WorkerHandle,
259 request_recv: mesh::Receiver<diag_server::DiagRequest>,
260}
261
262impl DiagState {
263 async fn new() -> anyhow::Result<Self> {
264 let (request_send, request_recv) = mesh::channel();
266 let worker = launch_local_worker::<DiagWorker>(diag::DiagWorkerParameters { request_send })
267 .await
268 .context("failed to launch diagnostics worker")?;
269 Ok(Self {
270 _worker: worker,
271 request_recv,
272 })
273 }
274}
275
276#[derive(Inspect)]
277struct Workers {
278 vm: WorkerHandle,
279 #[inspect(skip)]
280 vm_rpc: mesh::Sender<UhVmRpc>,
281 vnc: Option<WorkerHandle>,
282 #[cfg(feature = "gdb")]
283 gdb: Option<WorkerHandle>,
284}
285
286#[derive(MeshPayload)]
287struct MeshHostParams {
288 tracer: Option<RemoteTracer>,
289 runner: WorkerHostRunner,
290}
291
292async fn launch_mesh_host(
293 mesh: &Mesh,
294 name: &str,
295 tracer: Option<RemoteTracer>,
296) -> anyhow::Result<WorkerHost> {
297 let (host, runner) = mesh_worker::worker_host();
298 mesh.launch_host(ProcessConfig::new(name), MeshHostParams { tracer, runner })
299 .await?;
300 Ok(host)
301}
302
303async fn launch_workers(
304 mesh: &Mesh,
305 tracing: &mut TracingBackend,
306 control_send: mesh::Sender<ControlRequest>,
307 opt: Options,
308) -> anyhow::Result<Workers> {
309 let env_cfg = UnderhillEnvCfg {
310 vmbus_max_version: opt.vmbus_max_version,
311 vmbus_enable_mnf: opt.vmbus_enable_mnf,
312 vmbus_force_confidential_external_memory: opt.vmbus_force_confidential_external_memory,
313 cmdline_append: opt.cmdline_append.clone(),
314 reformat_vmgs: opt.reformat_vmgs,
315 vtl0_starts_paused: opt.vtl0_starts_paused,
316 emulated_serial_wait_for_rts: opt.serial_wait_for_rts,
317 force_load_vtl0_image: opt.force_load_vtl0_image,
318 nvme_vfio: opt.nvme_vfio,
319 mcr: opt.mcr,
320 enable_shared_visibility_pool: opt.enable_shared_visibility_pool,
321 halt_on_guest_halt: opt.halt_on_guest_halt,
322 no_sidecar_hotplug: opt.no_sidecar_hotplug,
323 gdbstub: opt.gdbstub,
324 hide_isolation: opt.hide_isolation,
325 nvme_keep_alive: opt.nvme_keep_alive,
326 test_configuration: opt.test_configuration,
327 disable_uefi_frontpage: opt.disable_uefi_frontpage,
328 };
329
330 let (mut remote_console_cfg, framebuffer_access) =
331 new_underhill_remote_console_cfg(opt.framebuffer_gpa_base)?;
332
333 let mut vnc_worker = None;
334 if let Some(framebuffer) = framebuffer_access {
335 let listener = VmListener::bind(VmAddress::vsock_any(opt.vnc_port))
336 .context("failed to bind socket")?;
337
338 let input_send = remote_console_cfg.input.sender();
339
340 let vnc_host = launch_mesh_host(mesh, "vnc", Some(tracing.tracer()))
341 .await
342 .context("spawning vnc process failed")?;
343
344 vnc_worker = Some(
345 vnc_host
346 .launch_worker(
347 vnc_worker_defs::VNC_WORKER_VMSOCKET,
348 VncParameters {
349 listener,
350 framebuffer,
351 input_send,
352 },
353 )
354 .await?,
355 )
356 }
357
358 #[cfg(feature = "gdb")]
359 let mut gdbstub_worker = None;
360 #[cfg_attr(not(feature = "gdb"), expect(unused_mut))]
361 let mut debugger_rpc = None;
362 #[cfg(feature = "gdb")]
363 if opt.gdbstub {
364 let listener = VmListener::bind(VmAddress::vsock_any(opt.gdbstub_port))
365 .context("failed to bind socket")?;
366
367 let gdb_host = launch_mesh_host(mesh, "gdb", Some(tracing.tracer()))
368 .await
369 .context("failed to spawn gdb host process")?;
370
371 let vp_count =
375 pal::unix::affinity::max_present_cpu().context("failed to get max present cpu")? + 1;
376
377 let (send, recv) = mesh::channel();
378 debugger_rpc = Some(recv);
379 gdbstub_worker = Some(
380 gdb_host
381 .launch_worker(
382 debug_worker_defs::DEBUGGER_VSOCK_WORKER,
383 debug_worker_defs::DebuggerParameters {
384 listener,
385 req_chan: send,
386 vp_count,
387 target_arch: if cfg!(guest_arch = "x86_64") {
388 debug_worker_defs::TargetArch::X86_64
389 } else {
390 debug_worker_defs::TargetArch::Aarch64
391 },
392 },
393 )
394 .await?,
395 );
396 }
397 let (vm_rpc, vm_rpc_rx) = mesh::channel();
398
399 let host = launch_mesh_host(mesh, "vm", Some(tracing.tracer()))
403 .await
404 .context("failed to launch worker process")?;
405
406 let vm_worker = host
407 .start_worker(
408 worker::UNDERHILL_WORKER,
409 UnderhillWorkerParameters {
410 env_cfg,
411 remote_console_cfg,
412 debugger_rpc,
413 vm_rpc: vm_rpc_rx,
414 control_send,
415 },
416 )
417 .context("failed to launch worker")?;
418
419 Ok(Workers {
420 vm: vm_worker,
421 vm_rpc,
422 vnc: vnc_worker,
423 #[cfg(feature = "gdb")]
424 gdb: gdbstub_worker,
425 })
426}
427
428#[derive(Inspect)]
430enum ControlState {
431 WaitingForStart,
432 Starting,
433 Started,
434 Restarting,
435}
436
437#[derive(MeshPayload)]
438pub enum ControlRequest {
439 FlushLogs(Rpc<CancelContext, Result<(), CancelReason>>),
440}
441
442async fn run_control(
443 driver: DefaultDriver,
444 mesh: &Mesh,
445 opt: Options,
446 mut tracing: &mut TracingBackend,
447) -> anyhow::Result<()> {
448 let (control_send, mut control_recv) = mesh::channel();
449 let mut control_send = Some(control_send);
450
451 if opt.signal_vtl0_started {
452 signal_vtl0_started(&driver)
453 .await
454 .context("failed to signal vtl0 started")?;
455 }
456
457 let mut diag = DiagState::new().await?;
458
459 let (diag_reinspect_send, mut diag_reinspect_recv) = mesh::channel();
460 #[cfg(feature = "profiler")]
461 let mut profiler_host = None;
462 let mut state;
463 let mut workers = if opt.wait_for_start {
464 state = ControlState::WaitingForStart;
465 None
466 } else {
467 state = ControlState::Starting;
468 let workers = launch_workers(mesh, tracing, control_send.take().unwrap(), opt)
469 .await
470 .context("failed to launch workers")?;
471 Some(workers)
472 };
473
474 enum Event {
475 Diag(diag_server::DiagRequest),
476 Worker(WorkerEvent),
477 Control(ControlRequest),
478 }
479
480 let mut restart_rpc = None;
481 loop {
482 let event = {
483 let mut stream = (
484 (&mut diag.request_recv).map(Event::Diag),
485 (&mut diag_reinspect_recv)
486 .map(|req| Event::Diag(diag_server::DiagRequest::Inspect(req))),
487 (&mut control_recv).map(Event::Control),
488 futures::stream::select_all(workers.as_mut().map(|w| &mut w.vm)).map(Event::Worker),
489 )
490 .merge();
491
492 let Some(event) = stream.next().await else {
493 break;
494 };
495 event
496 };
497
498 match event {
499 Event::Diag(request) => {
500 match request {
501 diag_server::DiagRequest::Start(rpc) => {
502 rpc.handle_failable(async |params| {
503 if workers.is_some() {
504 Err(anyhow::anyhow!("workers have already been started"))?;
505 }
506 let new_opt = Options::parse(params.args, params.env)
507 .context("failed to parse new options")?;
508
509 workers = Some(
510 launch_workers(
511 mesh,
512 tracing,
513 control_send.take().unwrap(),
514 new_opt,
515 )
516 .await?,
517 );
518 state = ControlState::Starting;
519 anyhow::Ok(())
520 })
521 .await
522 }
523 diag_server::DiagRequest::Inspect(deferred) => deferred.respond(|resp| {
524 resp.sensitivity_field("mesh", SensitivityLevel::Safe, mesh)
525 .sensitivity_field_mut("trace", SensitivityLevel::Safe, &mut tracing)
526 .sensitivity_field(
527 "build_info",
528 SensitivityLevel::Safe,
529 build_info::get(),
530 )
531 .sensitivity_child(
532 "proc",
533 SensitivityLevel::Safe,
534 inspect_proc::inspect_proc,
535 )
536 .sensitivity_field("control_state", SensitivityLevel::Safe, &state)
537 .sensitivity_child("uhdiag", SensitivityLevel::Safe, |req| {
540 inspect_internal::inspect_internal_diagnostics(
541 req,
542 diag_reinspect_send.clone(),
543 driver.clone(),
544 )
545 });
546
547 resp.merge(&workers);
548 }),
549 diag_server::DiagRequest::Crash(pid) => {
550 mesh.crash(pid);
551 }
552 diag_server::DiagRequest::Restart(rpc) => {
553 let Some(workers) = &mut workers else {
554 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
555 "worker has not been started yet"
556 ))));
557 continue;
558 };
559
560 let r = async {
561 if restart_rpc.is_some() {
562 anyhow::bail!("previous restart still in progress");
563 }
564
565 let host = launch_mesh_host(mesh, "vm", Some(tracing.tracer()))
566 .await
567 .context("failed to launch worker process")?;
568
569 workers.vm.restart(&host);
570 Ok(())
571 }
572 .await;
573
574 if r.is_err() {
575 rpc.complete(r.map_err(RemoteError::new));
576 } else {
577 state = ControlState::Restarting;
578 restart_rpc = Some(rpc);
579 }
580 }
581 diag_server::DiagRequest::Pause(rpc) => {
582 let Some(workers) = &mut workers else {
583 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
584 "worker has not been started yet"
585 ))));
586 continue;
587 };
588
589 let req = workers.vm_rpc.call(UhVmRpc::Pause, ());
592
593 driver
595 .spawn("diag-pause", async move {
596 let was_paused = req.await.expect("failed to pause VM");
597 rpc.handle_failable_sync(|_| {
598 if !was_paused {
599 Err(anyhow::anyhow!("VM is already paused"))
600 } else {
601 Ok(())
602 }
603 });
604 })
605 .detach();
606 }
607 diag_server::DiagRequest::PacketCapture(rpc) => {
608 let Some(workers) = &mut workers else {
609 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
610 "worker has not been started yet"
611 ))));
612 continue;
613 };
614
615 workers.vm_rpc.send(UhVmRpc::PacketCapture(rpc));
616 }
617 diag_server::DiagRequest::Resume(rpc) => {
618 let Some(workers) = &mut workers else {
619 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
620 "worker has not been started yet"
621 ))));
622 continue;
623 };
624
625 let was_resumed = workers
626 .vm_rpc
627 .call(UhVmRpc::Resume, ())
628 .await
629 .context("failed to resumed VM")?;
630
631 let was_halted = workers
632 .vm_rpc
633 .call(UhVmRpc::ClearHalt, ())
634 .await
635 .context("failed to clear halt from VPs")?;
636
637 rpc.handle_sync(|_| {
638 if was_resumed || was_halted {
639 Ok(())
640 } else {
641 Err(RemoteError::new(anyhow::anyhow!("VM is currently running")))
642 }
643 });
644 }
645 diag_server::DiagRequest::Save(rpc) => {
646 let Some(workers) = &mut workers else {
647 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
648 "worker has not been started yet"
649 ))));
650 continue;
651 };
652
653 workers.vm_rpc.send(UhVmRpc::Save(rpc));
654 }
655 #[cfg(feature = "profiler")]
656 diag_server::DiagRequest::Profile(rpc) => {
657 let (rpc_params, rpc_sender) = rpc.split();
658 if profiler_host.is_none() {
660 match launch_mesh_host(mesh, "profiler", Some(tracing.tracer()))
661 .await
662 .context("failed to launch profiler host")
663 {
664 Ok(host) => {
665 profiler_host = Some(host);
666 }
667 Err(e) => {
668 rpc_sender.complete(Err(RemoteError::new(e)));
669 continue;
670 }
671 }
672 }
673
674 let profiling_duration = rpc_params.duration;
675 let host = profiler_host.as_ref().unwrap();
676 let mut profiler_worker;
677 match host
678 .launch_worker(
679 profiler_worker::PROFILER_WORKER,
680 ProfilerWorkerParameters {
681 profiler_request: rpc_params,
682 },
683 )
684 .await
685 {
686 Ok(worker) => {
687 profiler_worker = worker;
688 }
689 Err(e) => {
690 rpc_sender.complete(Err(RemoteError::new(e)));
691 continue;
692 }
693 }
694
695 driver
696 .spawn("profiler_worker", async move {
697 let result = CancelContext::new()
698 .with_timeout(Duration::from_secs(profiling_duration + 30))
699 .until_cancelled(profiler_worker.join())
700 .await
701 .context("profiler worker cancelled")
702 .and_then(|result| result.context("profiler worker failed"))
703 .map_err(RemoteError::new);
704
705 rpc_sender.complete(result);
706 })
707 .detach();
708 }
709 }
710 }
711 Event::Worker(event) => match event {
712 WorkerEvent::Started => {
713 if let Some(response) = restart_rpc.take() {
714 tracing::info!(CVM_ALLOWED, "restart complete");
715 response.complete(Ok(()));
716 } else {
717 tracing::info!(CVM_ALLOWED, "vm worker started");
718 }
719 state = ControlState::Started;
720 }
721 WorkerEvent::Stopped => {
722 anyhow::bail!("worker unexpectedly stopped");
723 }
724 WorkerEvent::Failed(err) => {
725 return Err(anyhow::Error::from(err)).context("vm worker failed");
726 }
727 WorkerEvent::RestartFailed(err) => {
728 tracing::error!(
729 CVM_ALLOWED,
730 error = &err as &dyn std::error::Error,
731 "restart failed"
732 );
733 restart_rpc.take().unwrap().complete(Err(err));
734 state = ControlState::Started;
735 }
736 },
737 Event::Control(req) => match req {
738 ControlRequest::FlushLogs(rpc) => {
739 rpc.handle(async |mut ctx| {
740 tracing::info!(CVM_ALLOWED, "flushing logs");
741 ctx.until_cancelled(tracing.flush()).await?;
742 Ok(())
743 })
744 .await
745 }
746 },
747 }
748 }
749
750 Ok(())
751}
752
753async fn signal_vtl0_started(driver: &DefaultDriver) -> anyhow::Result<()> {
754 tracing::info!(CVM_ALLOWED, "signaling vtl0 started early");
755 let (client, task) = guest_emulation_transport::spawn_get_worker(driver.clone())
756 .await
757 .context("failed to spawn GET")?;
758 client.complete_start_vtl0(None).await;
759 drop(client);
761 task.await.unwrap();
762 tracing::info!(CVM_ALLOWED, "signaled vtl0 start");
763 Ok(())
764}
765
766register_workers! {
772 UnderhillVmWorker,
773 DiagWorker,
774 #[cfg(feature = "profiler")]
775 ProfilerWorker,
776}