1#![cfg(target_os = "linux")]
8#![expect(missing_docs)]
9#![forbid(unsafe_code)]
10
11mod diag;
12mod dispatch;
13mod emuplat;
14mod get_tracing;
15mod inspect_internal;
16mod inspect_proc;
17mod livedump;
18mod loader;
19mod nvme_manager;
20mod options;
21mod reference_time;
22mod servicing;
23mod threadpool_vm_task_backend;
24mod vmbus_relay_unit;
25mod vmgs_logger;
26mod vp;
27mod vpci;
28mod worker;
29mod wrapped_partition;
30
31pub use options::Options;
34
35use crate::diag::DiagWorker;
36use crate::dispatch::UhVmRpc;
37use crate::worker::UnderhillEnvCfg;
38use crate::worker::UnderhillRemoteConsoleCfg;
39use crate::worker::UnderhillVmWorker;
40use crate::worker::UnderhillWorkerParameters;
41use anyhow::Context;
42use bootloader_fdt_parser::BootTimes;
43use cvm_tracing::CVM_ALLOWED;
44use framebuffer::FRAMEBUFFER_SIZE;
45use framebuffer::FramebufferAccess;
46use futures::StreamExt;
47use futures_concurrency::stream::Merge;
48use get_tracing::init_tracing;
49use get_tracing::init_tracing_backend;
50use inspect::Inspect;
51use inspect::SensitivityLevel;
52use mesh::CancelContext;
53use mesh::CancelReason;
54use mesh::MeshPayload;
55use mesh::error::RemoteError;
56use mesh::rpc::Rpc;
57use mesh::rpc::RpcSend;
58use mesh_process::Mesh;
59use mesh_process::ProcessConfig;
60use mesh_process::try_run_mesh_host;
61use mesh_tracing::RemoteTracer;
62use mesh_tracing::TracingBackend;
63use mesh_worker::RegisteredWorkers;
64use mesh_worker::WorkerEvent;
65use mesh_worker::WorkerHandle;
66use mesh_worker::WorkerHost;
67use mesh_worker::WorkerHostRunner;
68use mesh_worker::launch_local_worker;
69use mesh_worker::register_workers;
70use pal_async::DefaultDriver;
71use pal_async::DefaultPool;
72use pal_async::task::Spawn;
73#[cfg(feature = "profiler")]
74use profiler_worker::ProfilerWorker;
75#[cfg(feature = "profiler")]
76use profiler_worker::ProfilerWorkerParameters;
77use std::time::Duration;
78use vmsocket::VmAddress;
79use vmsocket::VmListener;
80use vnc_worker_defs::VncParameters;
81
82fn new_underhill_remote_console_cfg(
83 framebuffer_gpa_base: Option<u64>,
84) -> anyhow::Result<(UnderhillRemoteConsoleCfg, Option<FramebufferAccess>)> {
85 if let Some(framebuffer_gpa_base) = framebuffer_gpa_base {
86 let gpa_fd = fs_err::OpenOptions::new()
96 .read(true)
97 .write(true)
98 .open("/dev/mshv_vtl_low")
99 .context("failed to open gpa device")?;
100
101 let vram = sparse_mmap::new_mappable_from_file(gpa_fd.file(), true, false)?;
102 let (fb, fba) = framebuffer::framebuffer(vram, FRAMEBUFFER_SIZE, framebuffer_gpa_base)
103 .context("allocating framebuffer")?;
104 tracing::debug!("framebuffer_gpa_base: {:#x}", framebuffer_gpa_base);
105
106 Ok((
107 UnderhillRemoteConsoleCfg {
108 synth_keyboard: true,
109 synth_mouse: true,
110 synth_video: true,
111 input: mesh::Receiver::new(),
112 framebuffer: Some(fb),
113 },
114 Some(fba),
115 ))
116 } else {
117 Ok((
118 UnderhillRemoteConsoleCfg {
119 synth_keyboard: false,
120 synth_mouse: false,
121 synth_video: false,
122 input: mesh::Receiver::new(),
123 framebuffer: None,
124 },
125 None,
126 ))
127 }
128}
129
130pub fn main() -> anyhow::Result<()> {
131 install_task_name_panic_hook();
134
135 if let Some(path) = std::env::var_os("OPENVMM_WRITE_SAVED_STATE_PROTO") {
136 if cfg!(debug_assertions) {
137 mesh::payload::protofile::DescriptorWriter::new(
138 vmcore::save_restore::saved_state_roots(),
139 )
140 .write_to_path(path)
141 .context("failed to write protobuf descriptors")?;
142 return Ok(());
143 } else {
144 anyhow::bail!(".proto output only supported in debug builds");
146 }
147 }
148
149 let (_, tracing_driver) = DefaultPool::spawn_on_thread("tracing");
151
152 try_run_mesh_host("underhill", {
159 let tracing_driver = tracing_driver.clone();
160 async |params: MeshHostParams| {
161 if let Some(remote_tracer) = params.tracer {
162 init_tracing(tracing_driver, remote_tracer).context("failed to init tracing")?;
163 }
164 params.runner.run(RegisteredWorkers).await;
165 Ok(())
166 }
167 })?;
168
169 let mut tracing = init_tracing_backend(tracing_driver.clone())?;
171 init_tracing(tracing_driver, tracing.tracer()).context("failed to init tracing")?;
173 DefaultPool::run_with(|driver| do_main(driver, tracing))
174}
175
176fn install_task_name_panic_hook() {
177 use std::io::Write;
178
179 let panic_hook = std::panic::take_hook();
180 std::panic::set_hook(Box::new(move |info| {
181 pal_async::task::with_current_task_metadata(|metadata| {
182 if let Some(metadata) = metadata {
183 let _ = write!(std::io::stderr(), "task '{}', ", metadata.name());
184 }
185 });
186 panic_hook(info);
188 }));
189}
190
191async fn do_main(driver: DefaultDriver, mut tracing: TracingBackend) -> anyhow::Result<()> {
192 let opt = Options::parse(Vec::new(), Vec::new())?;
193
194 let crate_name = build_info::get().crate_name();
195 let crate_revision = build_info::get().scm_revision();
196 tracing::info!(CVM_ALLOWED, ?crate_name, ?crate_revision, "VMM process");
197 log_boot_times().context("failure logging boot times")?;
198
199 if let Some(pid_path) = &opt.pid {
201 std::fs::write(pid_path, std::process::id().to_string())
202 .with_context(|| format!("failed to write pid to {}", pid_path.display()))?;
203 }
204
205 let mesh = Mesh::new("underhill".to_string()).context("failed to create mesh")?;
206
207 let r = run_control(driver, &mesh, opt, &mut tracing).await;
208 if let Err(err) = &r {
209 tracing::error!(
210 CVM_ALLOWED,
211 error = err.as_ref() as &dyn std::error::Error,
212 "VM failure"
213 );
214 }
215
216 CancelContext::new()
218 .with_timeout(Duration::from_secs(10))
219 .until_cancelled(async {
220 mesh.shutdown().await;
221 tracing.shutdown().await;
222 })
223 .await
224 .ok();
225
226 r
227}
228
229fn log_boot_times() -> anyhow::Result<()> {
230 fn diff(start: Option<u64>, end: Option<u64>) -> Option<tracing::field::DebugValue<Duration>> {
231 use reference_time::ReferenceTime;
232 Some(tracing::field::debug(
233 ReferenceTime::new(end?).since(ReferenceTime::new(start?))?,
234 ))
235 }
236
237 let BootTimes {
239 start,
240 end,
241 sidecar_start,
242 sidecar_end,
243 } = BootTimes::new().context("failed to parse boot times")?;
244 tracing::info!(
245 CVM_ALLOWED,
246 start,
247 end,
248 sidecar_start,
249 sidecar_end,
250 elapsed = diff(start, end),
251 sidecar_elapsed = diff(sidecar_start, sidecar_end),
252 "boot loader times"
253 );
254 Ok(())
255}
256
257struct DiagState {
258 _worker: WorkerHandle,
259 request_recv: mesh::Receiver<diag_server::DiagRequest>,
260}
261
262impl DiagState {
263 async fn new() -> anyhow::Result<Self> {
264 let (request_send, request_recv) = mesh::channel();
266 let worker = launch_local_worker::<DiagWorker>(diag::DiagWorkerParameters { request_send })
267 .await
268 .context("failed to launch diagnostics worker")?;
269 Ok(Self {
270 _worker: worker,
271 request_recv,
272 })
273 }
274}
275
276#[derive(Inspect)]
277struct Workers {
278 vm: WorkerHandle,
279 #[inspect(skip)]
280 vm_rpc: mesh::Sender<UhVmRpc>,
281 vnc: Option<WorkerHandle>,
282 #[cfg(feature = "gdb")]
283 gdb: Option<WorkerHandle>,
284}
285
286#[derive(MeshPayload)]
287struct MeshHostParams {
288 tracer: Option<RemoteTracer>,
289 runner: WorkerHostRunner,
290}
291
292async fn launch_mesh_host(
293 mesh: &Mesh,
294 name: &str,
295 tracer: Option<RemoteTracer>,
296) -> anyhow::Result<WorkerHost> {
297 let (host, runner) = mesh_worker::worker_host();
298 mesh.launch_host(ProcessConfig::new(name), MeshHostParams { tracer, runner })
299 .await?;
300 Ok(host)
301}
302
303async fn launch_workers(
304 mesh: &Mesh,
305 tracing: &mut TracingBackend,
306 control_send: mesh::Sender<ControlRequest>,
307 opt: Options,
308) -> anyhow::Result<Workers> {
309 let env_cfg = UnderhillEnvCfg {
310 vmbus_max_version: opt.vmbus_max_version,
311 vmbus_enable_mnf: opt.vmbus_enable_mnf,
312 vmbus_force_confidential_external_memory: opt.vmbus_force_confidential_external_memory,
313 vmbus_channel_unstick_delay: (opt.vmbus_channel_unstick_delay_ms != 0)
314 .then(|| Duration::from_millis(opt.vmbus_channel_unstick_delay_ms)),
315 cmdline_append: opt.cmdline_append.clone(),
316 reformat_vmgs: opt.reformat_vmgs,
317 vtl0_starts_paused: opt.vtl0_starts_paused,
318 emulated_serial_wait_for_rts: opt.serial_wait_for_rts,
319 force_load_vtl0_image: opt.force_load_vtl0_image,
320 nvme_vfio: opt.nvme_vfio,
321 mcr: opt.mcr,
322 halt_on_guest_halt: opt.halt_on_guest_halt,
323 no_sidecar_hotplug: opt.no_sidecar_hotplug,
324 gdbstub: opt.gdbstub,
325 hide_isolation: opt.hide_isolation,
326 nvme_keep_alive: opt.nvme_keep_alive,
327 mana_keep_alive: opt.mana_keep_alive,
328 nvme_always_flr: opt.nvme_always_flr,
329 test_configuration: opt.test_configuration,
330 disable_uefi_frontpage: opt.disable_uefi_frontpage,
331 default_boot_always_attempt: opt.default_boot_always_attempt,
332 guest_state_lifetime: opt.guest_state_lifetime,
333 guest_state_encryption_policy: opt.guest_state_encryption_policy,
334 strict_encryption_policy: opt.strict_encryption_policy,
335 attempt_ak_cert_callback: opt.attempt_ak_cert_callback,
336 enable_vpci_relay: opt.enable_vpci_relay,
337 disable_proxy_redirect: opt.disable_proxy_redirect,
338 disable_lower_vtl_timer_virt: opt.disable_lower_vtl_timer_virt,
339 config_timeout_in_seconds: opt.config_timeout_in_seconds,
340 };
341
342 let (mut remote_console_cfg, framebuffer_access) =
343 new_underhill_remote_console_cfg(opt.framebuffer_gpa_base)?;
344
345 let mut vnc_worker = None;
346 if let Some(framebuffer) = framebuffer_access {
347 let listener = VmListener::bind(VmAddress::vsock_any(opt.vnc_port))
348 .context("failed to bind socket")?;
349
350 let input_send = remote_console_cfg.input.sender();
351
352 let vnc_host = launch_mesh_host(mesh, "vnc", Some(tracing.tracer()))
353 .await
354 .context("spawning vnc process failed")?;
355
356 vnc_worker = Some(
357 vnc_host
358 .launch_worker(
359 vnc_worker_defs::VNC_WORKER_VMSOCKET,
360 VncParameters {
361 listener,
362 framebuffer,
363 input_send,
364 },
365 )
366 .await?,
367 )
368 }
369
370 #[cfg(feature = "gdb")]
371 let mut gdbstub_worker = None;
372 #[cfg_attr(not(feature = "gdb"), expect(unused_mut))]
373 let mut debugger_rpc = None;
374 #[cfg(feature = "gdb")]
375 if opt.gdbstub {
376 let listener = VmListener::bind(VmAddress::vsock_any(opt.gdbstub_port))
377 .context("failed to bind socket")?;
378
379 let gdb_host = launch_mesh_host(mesh, "gdb", Some(tracing.tracer()))
380 .await
381 .context("failed to spawn gdb host process")?;
382
383 let vp_count =
387 pal::unix::affinity::max_present_cpu().context("failed to get max present cpu")? + 1;
388
389 let (send, recv) = mesh::channel();
390 debugger_rpc = Some(recv);
391 gdbstub_worker = Some(
392 gdb_host
393 .launch_worker(
394 debug_worker_defs::DEBUGGER_VSOCK_WORKER,
395 debug_worker_defs::DebuggerParameters {
396 listener,
397 req_chan: send,
398 vp_count,
399 target_arch: if cfg!(guest_arch = "x86_64") {
400 debug_worker_defs::TargetArch::X86_64
401 } else {
402 debug_worker_defs::TargetArch::Aarch64
403 },
404 },
405 )
406 .await?,
407 );
408 }
409 let (vm_rpc, vm_rpc_rx) = mesh::channel();
410
411 let host = launch_mesh_host(mesh, "vm", Some(tracing.tracer()))
415 .await
416 .context("failed to launch worker process")?;
417
418 let vm_worker = host
419 .start_worker(
420 worker::UNDERHILL_WORKER,
421 UnderhillWorkerParameters {
422 env_cfg,
423 remote_console_cfg,
424 debugger_rpc,
425 vm_rpc: vm_rpc_rx,
426 control_send,
427 },
428 )
429 .context("failed to launch worker")?;
430
431 Ok(Workers {
432 vm: vm_worker,
433 vm_rpc,
434 vnc: vnc_worker,
435 #[cfg(feature = "gdb")]
436 gdb: gdbstub_worker,
437 })
438}
439
440#[derive(Inspect)]
442enum ControlState {
443 WaitingForStart,
444 Starting,
445 Started,
446 Restarting,
447}
448
449#[derive(MeshPayload)]
450pub enum ControlRequest {
451 FlushLogs(Rpc<CancelContext, Result<(), CancelReason>>),
452 MakeWorker(Rpc<String, Result<WorkerHost, RemoteError>>),
453}
454
455async fn run_control(
456 driver: DefaultDriver,
457 mesh: &Mesh,
458 opt: Options,
459 mut tracing: &mut TracingBackend,
460) -> anyhow::Result<()> {
461 let (control_send, mut control_recv) = mesh::channel();
462 let mut control_send = Some(control_send);
463
464 if opt.signal_vtl0_started {
465 signal_vtl0_started(&driver)
466 .await
467 .context("failed to signal vtl0 started")?;
468 }
469
470 let mut diag = DiagState::new().await?;
471
472 let (diag_reinspect_send, mut diag_reinspect_recv) = mesh::channel();
473 #[cfg(feature = "profiler")]
474 let mut profiler_host = None;
475 let mut state;
476 let mut workers = if opt.wait_for_start {
477 state = ControlState::WaitingForStart;
478 None
479 } else {
480 state = ControlState::Starting;
481 let workers = launch_workers(mesh, tracing, control_send.take().unwrap(), opt)
482 .await
483 .context("failed to launch workers")?;
484 Some(workers)
485 };
486
487 enum Event {
488 Diag(diag_server::DiagRequest),
489 Worker(WorkerEvent),
490 Control(ControlRequest),
491 }
492
493 let mut restart_rpc = None;
494 loop {
495 let event = {
496 let mut stream = (
497 (&mut diag.request_recv).map(Event::Diag),
498 (&mut diag_reinspect_recv)
499 .map(|req| Event::Diag(diag_server::DiagRequest::Inspect(req))),
500 (&mut control_recv).map(Event::Control),
501 futures::stream::select_all(workers.as_mut().map(|w| &mut w.vm)).map(Event::Worker),
502 )
503 .merge();
504
505 let Some(event) = stream.next().await else {
506 break;
507 };
508 event
509 };
510
511 match event {
512 Event::Diag(request) => {
513 match request {
514 diag_server::DiagRequest::Start(rpc) => {
515 rpc.handle_failable(async |params| {
516 if workers.is_some() {
517 Err(anyhow::anyhow!("workers have already been started"))?;
518 }
519 let new_opt = Options::parse(params.args, params.env)
520 .context("failed to parse new options")?;
521
522 workers = Some(
523 launch_workers(
524 mesh,
525 tracing,
526 control_send.take().unwrap(),
527 new_opt,
528 )
529 .await?,
530 );
531 state = ControlState::Starting;
532 anyhow::Ok(())
533 })
534 .await
535 }
536 diag_server::DiagRequest::Inspect(deferred) => deferred.respond(|resp| {
537 resp.sensitivity_field("mesh", SensitivityLevel::Safe, mesh)
538 .sensitivity_field_mut("trace", SensitivityLevel::Safe, &mut tracing)
539 .sensitivity_field(
540 "build_info",
541 SensitivityLevel::Safe,
542 build_info::get(),
543 )
544 .sensitivity_child(
545 "proc",
546 SensitivityLevel::Safe,
547 inspect_proc::inspect_proc,
548 )
549 .sensitivity_field("control_state", SensitivityLevel::Safe, &state)
550 .sensitivity_child("uhdiag", SensitivityLevel::Safe, |req| {
553 inspect_internal::inspect_internal_diagnostics(
554 req,
555 &diag_reinspect_send,
556 &driver,
557 )
558 });
559
560 resp.merge(&workers);
561 }),
562 diag_server::DiagRequest::Crash(pid) => {
563 mesh.crash(pid);
564 }
565 diag_server::DiagRequest::Restart(rpc) => {
566 let Some(workers) = &mut workers else {
567 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
568 "worker has not been started yet"
569 ))));
570 continue;
571 };
572
573 let r = async {
574 if restart_rpc.is_some() {
575 anyhow::bail!("previous restart still in progress");
576 }
577
578 let host = launch_mesh_host(mesh, "vm", Some(tracing.tracer()))
579 .await
580 .context("failed to launch worker process")?;
581
582 workers.vm.restart(&host);
583 Ok(())
584 }
585 .await;
586
587 if r.is_err() {
588 rpc.complete(r.map_err(RemoteError::new));
589 } else {
590 state = ControlState::Restarting;
591 restart_rpc = Some(rpc);
592 }
593 }
594 diag_server::DiagRequest::Pause(rpc) => {
595 let Some(workers) = &mut workers else {
596 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
597 "worker has not been started yet"
598 ))));
599 continue;
600 };
601
602 let req = workers.vm_rpc.call(UhVmRpc::Pause, ());
605
606 driver
608 .spawn("diag-pause", async move {
609 let was_paused = req.await.expect("failed to pause VM");
610 rpc.handle_failable_sync(|_| {
611 if !was_paused {
612 Err(anyhow::anyhow!("VM is already paused"))
613 } else {
614 Ok(())
615 }
616 });
617 })
618 .detach();
619 }
620 diag_server::DiagRequest::PacketCapture(rpc) => {
621 let Some(workers) = &mut workers else {
622 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
623 "worker has not been started yet"
624 ))));
625 continue;
626 };
627
628 workers.vm_rpc.send(UhVmRpc::PacketCapture(rpc));
629 }
630 diag_server::DiagRequest::Resume(rpc) => {
631 let Some(workers) = &mut workers else {
632 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
633 "worker has not been started yet"
634 ))));
635 continue;
636 };
637
638 let was_resumed = workers
639 .vm_rpc
640 .call(UhVmRpc::Resume, ())
641 .await
642 .context("failed to resumed VM")?;
643
644 let was_halted = workers
645 .vm_rpc
646 .call(UhVmRpc::ClearHalt, ())
647 .await
648 .context("failed to clear halt from VPs")?;
649
650 rpc.handle_sync(|_| {
651 if was_resumed || was_halted {
652 Ok(())
653 } else {
654 Err(RemoteError::new(anyhow::anyhow!("VM is currently running")))
655 }
656 });
657 }
658 diag_server::DiagRequest::Save(rpc) => {
659 let Some(workers) = &mut workers else {
660 rpc.complete(Err(RemoteError::new(anyhow::anyhow!(
661 "worker has not been started yet"
662 ))));
663 continue;
664 };
665
666 workers.vm_rpc.send(UhVmRpc::Save(rpc));
667 }
668 #[cfg(feature = "profiler")]
669 diag_server::DiagRequest::Profile(rpc) => {
670 let (rpc_params, rpc_sender) = rpc.split();
671 if profiler_host.is_none() {
673 match launch_mesh_host(mesh, "profiler", Some(tracing.tracer()))
674 .await
675 .context("failed to launch profiler host")
676 {
677 Ok(host) => {
678 profiler_host = Some(host);
679 }
680 Err(e) => {
681 rpc_sender.complete(Err(RemoteError::new(e)));
682 continue;
683 }
684 }
685 }
686
687 let profiling_duration = rpc_params.duration;
688 let host = profiler_host.as_ref().unwrap();
689 let mut profiler_worker;
690 match host
691 .launch_worker(
692 profiler_worker::PROFILER_WORKER,
693 ProfilerWorkerParameters {
694 profiler_request: rpc_params,
695 },
696 )
697 .await
698 {
699 Ok(worker) => {
700 profiler_worker = worker;
701 }
702 Err(e) => {
703 rpc_sender.complete(Err(RemoteError::new(e)));
704 continue;
705 }
706 }
707
708 driver
709 .spawn("profiler_worker", async move {
710 let result = CancelContext::new()
711 .with_timeout(Duration::from_secs(profiling_duration + 30))
712 .until_cancelled(profiler_worker.join())
713 .await
714 .context("profiler worker cancelled")
715 .and_then(|result| result.context("profiler worker failed"))
716 .map_err(RemoteError::new);
717
718 rpc_sender.complete(result);
719 })
720 .detach();
721 }
722 }
723 }
724 Event::Worker(event) => match event {
725 WorkerEvent::Started => {
726 if let Some(response) = restart_rpc.take() {
727 tracing::info!(CVM_ALLOWED, "restart complete");
728 response.complete(Ok(()));
729 } else {
730 tracing::info!(CVM_ALLOWED, "vm worker started");
731 }
732 state = ControlState::Started;
733 }
734 WorkerEvent::Stopped => {
735 anyhow::bail!("worker unexpectedly stopped");
736 }
737 WorkerEvent::Failed(err) => {
738 return Err(anyhow::Error::from(err)).context("vm worker failed");
739 }
740 WorkerEvent::RestartFailed(err) => {
741 tracing::error!(
742 CVM_ALLOWED,
743 error = &err as &dyn std::error::Error,
744 "restart failed"
745 );
746 restart_rpc.take().unwrap().complete(Err(err));
747 state = ControlState::Started;
748 }
749 },
750 Event::Control(req) => match req {
751 ControlRequest::FlushLogs(rpc) => {
752 rpc.handle(async |mut ctx| {
753 tracing::info!(CVM_ALLOWED, "flushing logs");
754 ctx.until_cancelled(tracing.flush()).await?;
755 Ok(())
756 })
757 .await
758 }
759 ControlRequest::MakeWorker(rpc) => {
760 rpc.handle_failable(async |name| {
761 launch_mesh_host(mesh, &name, Some(tracing.tracer())).await
762 })
763 .await
764 }
765 },
766 }
767 }
768
769 Ok(())
770}
771
772async fn signal_vtl0_started(driver: &DefaultDriver) -> anyhow::Result<()> {
773 tracing::info!(CVM_ALLOWED, "signaling vtl0 started early");
774 let (client, task) = guest_emulation_transport::spawn_get_worker(driver.clone())
775 .await
776 .context("failed to spawn GET")?;
777 client.complete_start_vtl0(None).await;
778 drop(client);
780 task.await.unwrap();
781 tracing::info!(CVM_ALLOWED, "signaled vtl0 start");
782 Ok(())
783}
784
785register_workers! {
791 UnderhillVmWorker,
792 DiagWorker,
793 #[cfg(feature = "profiler")]
794 ProfilerWorker,
795}