petri/vm/openvmm/
runtime.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Methods to interact with a running [`PetriVmOpenVmm`].
5
6use super::PetriVmResourcesOpenVmm;
7use crate::OpenHclServicingFlags;
8use crate::PetriHaltReason;
9use crate::PetriVmFramebufferAccess;
10use crate::PetriVmInspector;
11use crate::PetriVmRuntime;
12use crate::ShutdownKind;
13use crate::VmScreenshotMeta;
14use crate::openhcl_diag::OpenHclDiagHandler;
15use crate::worker::Worker;
16use anyhow::Context;
17use async_trait::async_trait;
18use framebuffer::View;
19use futures::FutureExt;
20use futures_concurrency::future::Race;
21use get_resources::ged::FirmwareEvent;
22use hyperv_ic_resources::shutdown::ShutdownRpc;
23use mesh::CancelContext;
24use mesh::Receiver;
25use mesh::RecvError;
26use mesh::rpc::RpcError;
27use mesh::rpc::RpcSend;
28use mesh_process::Mesh;
29use openvmm_defs::rpc::PulseSaveRestoreError;
30use pal_async::socket::PolledSocket;
31use petri_artifacts_core::ResolvedArtifact;
32use pipette_client::PipetteClient;
33use std::future::Future;
34use std::path::Path;
35use std::sync::Arc;
36use std::time::Duration;
37use vmm_core_defs::HaltReason;
38use vtl2_settings_proto::Vtl2Settings;
39
40/// A running VM that tests can interact with.
41// DEVNOTE: Really the PetriVmInner is the actual VM and channels that we interact
42// with. This struct exists as a wrapper to provide error handling, such as not
43// hanging indefinitely when waiting on certain channels if the VM crashes.
44pub struct PetriVmOpenVmm {
45    inner: PetriVmInner,
46    halt: PetriVmHaltReceiver,
47}
48
49#[async_trait]
50impl PetriVmRuntime for PetriVmOpenVmm {
51    type VmInspector = OpenVmmInspector;
52    type VmFramebufferAccess = OpenVmmFramebufferAccess;
53
54    async fn teardown(self) -> anyhow::Result<()> {
55        tracing::info!("waiting for worker");
56        let worker = Arc::into_inner(self.inner.worker)
57            .context("all references to the OpenVMM worker have not been closed")?;
58        worker.shutdown().await?;
59
60        tracing::info!("Worker quit, waiting for mesh");
61        self.inner.mesh.shutdown().await;
62
63        tracing::info!("Mesh shutdown, waiting for logging tasks");
64        for t in self.inner.resources.log_stream_tasks {
65            t.await?;
66        }
67
68        Ok(())
69    }
70
71    async fn wait_for_halt(&mut self, allow_reset: bool) -> anyhow::Result<PetriHaltReason> {
72        let halt_reason = if let Some(already) = self.halt.already_received.take() {
73            already.map_err(anyhow::Error::from)
74        } else {
75            self.halt
76                .halt_notif
77                .recv()
78                .await
79                .context("Failed to get halt reason")
80        }?;
81
82        tracing::info!(?halt_reason, "Got halt reason");
83
84        let halt_reason = match halt_reason {
85            HaltReason::PowerOff => PetriHaltReason::PowerOff,
86            HaltReason::Reset => PetriHaltReason::Reset,
87            HaltReason::Hibernate => PetriHaltReason::Hibernate,
88            HaltReason::TripleFault { .. } => PetriHaltReason::TripleFault,
89            _ => PetriHaltReason::Other,
90        };
91
92        if allow_reset && halt_reason == PetriHaltReason::Reset {
93            self.reset().await?
94        }
95
96        Ok(halt_reason)
97    }
98
99    async fn wait_for_agent(&mut self, set_high_vtl: bool) -> anyhow::Result<PipetteClient> {
100        Self::wait_for_agent(self, set_high_vtl).await
101    }
102
103    fn openhcl_diag(&self) -> Option<OpenHclDiagHandler> {
104        self.inner.resources.vtl2_vsock_path.as_ref().map(|path| {
105            OpenHclDiagHandler::new(diag_client::DiagClient::from_hybrid_vsock(
106                self.inner.resources.driver.clone(),
107                path,
108            ))
109        })
110    }
111
112    async fn wait_for_boot_event(&mut self) -> anyhow::Result<FirmwareEvent> {
113        Self::wait_for_boot_event(self).await
114    }
115
116    async fn wait_for_enlightened_shutdown_ready(&mut self) -> anyhow::Result<()> {
117        Self::wait_for_enlightened_shutdown_ready(self)
118            .await
119            .map(|_| ())
120    }
121
122    async fn send_enlightened_shutdown(&mut self, kind: ShutdownKind) -> anyhow::Result<()> {
123        Self::send_enlightened_shutdown(self, kind).await
124    }
125
126    async fn restart_openhcl(
127        &mut self,
128        new_openhcl: &ResolvedArtifact,
129        flags: OpenHclServicingFlags,
130    ) -> anyhow::Result<()> {
131        Self::save_openhcl(self, new_openhcl, flags).await?;
132        Self::restore_openhcl(self).await
133    }
134
135    async fn save_openhcl(
136        &mut self,
137        new_openhcl: &ResolvedArtifact,
138        flags: OpenHclServicingFlags,
139    ) -> anyhow::Result<()> {
140        Self::save_openhcl(self, new_openhcl, flags).await
141    }
142
143    async fn restore_openhcl(&mut self) -> anyhow::Result<()> {
144        Self::restore_openhcl(self).await
145    }
146
147    async fn update_command_line(&mut self, command_line: &str) -> anyhow::Result<()> {
148        Self::update_command_line(self, command_line).await
149    }
150
151    fn inspector(&self) -> Option<OpenVmmInspector> {
152        Some(OpenVmmInspector {
153            worker: self.inner.worker.clone(),
154        })
155    }
156
157    fn take_framebuffer_access(&mut self) -> Option<OpenVmmFramebufferAccess> {
158        self.inner
159            .framebuffer_view
160            .take()
161            .map(|view| OpenVmmFramebufferAccess { view })
162    }
163
164    async fn reset(&mut self) -> anyhow::Result<()> {
165        Self::reset(self).await
166    }
167
168    async fn set_vtl2_settings(&mut self, settings: &Vtl2Settings) -> anyhow::Result<()> {
169        Self::set_vtl2_settings(self, settings).await
170    }
171}
172
173pub(super) struct PetriVmInner {
174    pub(super) resources: PetriVmResourcesOpenVmm,
175    pub(super) mesh: Mesh,
176    pub(super) worker: Arc<Worker>,
177    pub(super) framebuffer_view: Option<View>,
178}
179
180struct PetriVmHaltReceiver {
181    halt_notif: Receiver<HaltReason>,
182    already_received: Option<Result<HaltReason, RecvError>>,
183}
184
185// Wrap a PetriVmInner function in [`PetriVmOpenVmm::wait_for_halt_or_internal`] to
186// provide better error handling.
187macro_rules! petri_vm_fn {
188    ($(#[$($attrss:tt)*])* $vis:vis async fn $fn_name:ident (&mut self $(,$arg:ident: $ty:ty)*) $(-> $ret:ty)?) => {
189        $(#[$($attrss)*])*
190        $vis async fn $fn_name(&mut self, $($arg:$ty,)*) $(-> $ret)? {
191            Self::wait_for_halt_or_internal(&mut self.halt, self.inner.$fn_name($($arg,)*)).await
192        }
193    };
194}
195
196// TODO: Add all runtime functions that are not backend specific
197// to the `PetriVmRuntime` trait
198impl PetriVmOpenVmm {
199    pub(super) fn new(inner: PetriVmInner, halt_notif: Receiver<HaltReason>) -> Self {
200        Self {
201            inner,
202            halt: PetriVmHaltReceiver {
203                halt_notif,
204                already_received: None,
205            },
206        }
207    }
208
209    /// Get the path to the VTL 2 vsock socket, if the VM is configured with OpenHCL.
210    pub fn vtl2_vsock_path(&self) -> anyhow::Result<&Path> {
211        self.inner
212            .resources
213            .vtl2_vsock_path
214            .as_deref()
215            .context("VM is not configured with OpenHCL")
216    }
217
218    petri_vm_fn!(
219        /// Waits for an event emitted by the firmware about its boot status, and
220        /// returns that status.
221        pub async fn wait_for_boot_event(&mut self) -> anyhow::Result<FirmwareEvent>
222    );
223    petri_vm_fn!(
224        /// Waits for the Hyper-V shutdown IC to be ready, returning a receiver
225        /// that will be closed when it is no longer ready.
226        pub async fn wait_for_enlightened_shutdown_ready(&mut self) -> anyhow::Result<mesh::OneshotReceiver<()>>
227    );
228    petri_vm_fn!(
229        /// Instruct the guest to shutdown via the Hyper-V shutdown IC.
230        pub async fn send_enlightened_shutdown(&mut self, kind: ShutdownKind) -> anyhow::Result<()>
231    );
232    petri_vm_fn!(
233        /// Waits for the KVP IC to be ready, returning a sender that can be used
234        /// to send requests to it.
235        pub async fn wait_for_kvp(&mut self) -> anyhow::Result<mesh::Sender<hyperv_ic_resources::kvp::KvpRpc>>
236    );
237    petri_vm_fn!(
238        /// Stages the new OpenHCL file and saves the existing state.
239        pub async fn save_openhcl(
240            &mut self,
241            new_openhcl: &ResolvedArtifact,
242            flags: OpenHclServicingFlags
243        ) -> anyhow::Result<()>
244    );
245    petri_vm_fn!(
246        /// Restores OpenHCL from a previously saved state.
247        pub async fn restore_openhcl(
248            &mut self
249        ) -> anyhow::Result<()>
250    );
251    petri_vm_fn!(
252        /// Updates the command line parameters of the running VM.
253        pub async fn update_command_line(
254            &mut self,
255            command_line: &str
256        ) -> anyhow::Result<()>
257    );
258    petri_vm_fn!(
259        /// Resets the hardware state of the VM, simulating a power cycle.
260        pub async fn reset(&mut self) -> anyhow::Result<()>
261    );
262    petri_vm_fn!(
263        /// Wait for a connection from a pipette agent
264        pub async fn wait_for_agent(&mut self, set_high_vtl: bool) -> anyhow::Result<PipetteClient>
265    );
266    petri_vm_fn!(
267        /// Set the OpenHCL VTL2 settings.
268        pub async fn set_vtl2_settings(&mut self, settings: &Vtl2Settings) -> anyhow::Result<()>
269    );
270
271    petri_vm_fn!(pub(crate) async fn resume(&mut self) -> anyhow::Result<()>);
272    petri_vm_fn!(pub(crate) async fn verify_save_restore(&mut self) -> anyhow::Result<()>);
273    petri_vm_fn!(pub(crate) async fn launch_linux_direct_pipette(&mut self) -> anyhow::Result<()>);
274
275    /// Wrap the provided future in a race with the worker process's halt
276    /// notification channel. This is useful for preventing a future from
277    /// waiting indefinitely if the VM dies for any reason. If the worker
278    /// process crashes the halt notification channel will return an error, and
279    /// if the VM halts for any other reason the future will complete with that
280    /// reason.
281    pub async fn wait_for_halt_or<T, F: Future<Output = anyhow::Result<T>>>(
282        &mut self,
283        future: F,
284    ) -> anyhow::Result<T> {
285        Self::wait_for_halt_or_internal(&mut self.halt, future).await
286    }
287
288    async fn wait_for_halt_or_internal<T, F: Future<Output = anyhow::Result<T>>>(
289        halt: &mut PetriVmHaltReceiver,
290        future: F,
291    ) -> anyhow::Result<T> {
292        let future = &mut std::pin::pin!(future);
293        enum Either<T> {
294            Future(anyhow::Result<T>),
295            Halt(Result<HaltReason, RecvError>),
296        }
297        let res = (
298            future.map(Either::Future),
299            halt.halt_notif.recv().map(Either::Halt),
300        )
301            .race()
302            .await;
303
304        match res {
305            Either::Future(Ok(success)) => Ok(success),
306            Either::Future(Err(e)) => {
307                tracing::warn!(
308                    ?e,
309                    "Future returned with an error, sleeping for 5 seconds to let outstanding work finish"
310                );
311                let mut c = CancelContext::new().with_timeout(Duration::from_secs(5));
312                c.cancelled().await;
313                Err(e)
314            }
315            Either::Halt(halt_result) => {
316                tracing::warn!(
317                    halt_result = format_args!("{:x?}", halt_result),
318                    "Halt channel returned while waiting for other future, sleeping for 5 seconds to let outstanding work finish"
319                );
320                let mut c = CancelContext::new().with_timeout(Duration::from_secs(5));
321                let try_again = c.until_cancelled(future).await;
322
323                match try_again {
324                    Ok(fut_result) => {
325                        halt.already_received = Some(halt_result);
326                        if let Err(e) = &fut_result {
327                            tracing::warn!(
328                                ?e,
329                                "Future returned with an error, sleeping for 5 seconds to let outstanding work finish"
330                            );
331                            let mut c = CancelContext::new().with_timeout(Duration::from_secs(5));
332                            c.cancelled().await;
333                        }
334                        fut_result
335                    }
336                    Err(_cancel) => match halt_result {
337                        Ok(halt_reason) => Err(anyhow::anyhow!("VM halted: {:x?}", halt_reason)),
338                        Err(e) => Err(e).context("VM disappeared"),
339                    },
340                }
341            }
342        }
343    }
344}
345
346impl PetriVmInner {
347    async fn wait_for_boot_event(&mut self) -> anyhow::Result<FirmwareEvent> {
348        self.resources
349            .firmware_event_recv
350            .recv()
351            .await
352            .context("Failed to get firmware boot event")
353    }
354
355    async fn wait_for_enlightened_shutdown_ready(
356        &mut self,
357    ) -> anyhow::Result<mesh::OneshotReceiver<()>> {
358        let recv = self
359            .resources
360            .shutdown_ic_send
361            .call(ShutdownRpc::WaitReady, ())
362            .await?;
363
364        Ok(recv)
365    }
366
367    async fn send_enlightened_shutdown(&mut self, kind: ShutdownKind) -> anyhow::Result<()> {
368        let shutdown_result = self
369            .resources
370            .shutdown_ic_send
371            .call(
372                ShutdownRpc::Shutdown,
373                hyperv_ic_resources::shutdown::ShutdownParams {
374                    shutdown_type: match kind {
375                        ShutdownKind::Shutdown => {
376                            hyperv_ic_resources::shutdown::ShutdownType::PowerOff
377                        }
378                        ShutdownKind::Reboot => hyperv_ic_resources::shutdown::ShutdownType::Reboot,
379                    },
380                    force: false,
381                },
382            )
383            .await?;
384
385        tracing::info!(?shutdown_result, "Shutdown sent");
386        anyhow::ensure!(
387            shutdown_result == hyperv_ic_resources::shutdown::ShutdownResult::Ok,
388            "Got non-Ok shutdown response"
389        );
390
391        Ok(())
392    }
393
394    async fn wait_for_kvp(
395        &mut self,
396    ) -> anyhow::Result<mesh::Sender<hyperv_ic_resources::kvp::KvpRpc>> {
397        tracing::info!("Waiting for KVP IC");
398        let (send, _) = self
399            .resources
400            .kvp_ic_send
401            .call_failable(hyperv_ic_resources::kvp::KvpConnectRpc::WaitForGuest, ())
402            .await
403            .context("failed to connect to KVP IC")?;
404
405        Ok(send)
406    }
407
408    async fn save_openhcl(
409        &self,
410        new_openhcl: &ResolvedArtifact,
411        flags: OpenHclServicingFlags,
412    ) -> anyhow::Result<()> {
413        let ged_send = self
414            .resources
415            .ged_send
416            .as_ref()
417            .context("openhcl not configured")?;
418
419        let igvm_file = fs_err::File::open(new_openhcl).context("failed to open igvm file")?;
420        self.worker
421            .save_openhcl(ged_send, flags, igvm_file.into())
422            .await
423    }
424
425    async fn update_command_line(&mut self, command_line: &str) -> anyhow::Result<()> {
426        self.worker.update_command_line(command_line).await
427    }
428
429    async fn restore_openhcl(&self) -> anyhow::Result<()> {
430        let ged_send = self
431            .resources
432            .ged_send
433            .as_ref()
434            .context("openhcl not configured")?;
435
436        self.worker.restore_openhcl(ged_send).await
437    }
438
439    async fn set_vtl2_settings(&self, settings: &Vtl2Settings) -> anyhow::Result<()> {
440        let ged_send = self
441            .resources
442            .ged_send
443            .as_ref()
444            .context("openhcl not configured")?;
445
446        ged_send
447            .call_failable(
448                get_resources::ged::GuestEmulationRequest::ModifyVtl2Settings,
449                prost::Message::encode_to_vec(settings),
450            )
451            .await?;
452
453        Ok(())
454    }
455
456    async fn reset(&mut self) -> anyhow::Result<()> {
457        tracing::info!("Resetting VM");
458        self.worker.reset().await?;
459        // On linux direct pipette won't auto start, start it over serial
460        if let Some(agent) = self.resources.linux_direct_serial_agent.as_mut() {
461            agent.reset();
462
463            if self
464                .resources
465                .agent_image
466                .as_ref()
467                .is_some_and(|x| x.contains_pipette())
468            {
469                self.launch_linux_direct_pipette().await?;
470            }
471        }
472        Ok(())
473    }
474
475    async fn wait_for_agent(&mut self, set_high_vtl: bool) -> anyhow::Result<PipetteClient> {
476        let listener = if set_high_vtl {
477            self.resources
478                .vtl2_pipette_listener
479                .as_mut()
480                .context("VM is not configured with VTL 2")?
481        } else {
482            &mut self.resources.pipette_listener
483        };
484
485        tracing::info!(set_high_vtl, "listening for pipette connection");
486        let (conn, _) = listener
487            .accept()
488            .await
489            .context("failed to accept pipette connection")?;
490        tracing::info!(set_high_vtl, "handshaking with pipette");
491        let client = PipetteClient::new(
492            &self.resources.driver,
493            PolledSocket::new(&self.resources.driver, conn)?,
494            &self.resources.output_dir,
495        )
496        .await
497        .context("failed to connect to pipette");
498        tracing::info!(set_high_vtl, "completed pipette handshake");
499        client
500    }
501
502    async fn resume(&self) -> anyhow::Result<()> {
503        self.worker.resume().await?;
504        Ok(())
505    }
506
507    async fn verify_save_restore(&self) -> anyhow::Result<()> {
508        for i in 0..2 {
509            let result = self.worker.pulse_save_restore().await;
510            match result {
511                Ok(()) => {}
512                Err(RpcError::Channel(err)) => return Err(err.into()),
513                Err(RpcError::Call(PulseSaveRestoreError::ResetNotSupported)) => {
514                    tracing::warn!("Reset not supported, could not test save + restore.");
515                    break;
516                }
517                Err(RpcError::Call(PulseSaveRestoreError::Other(err))) => {
518                    return Err(anyhow::Error::from(err))
519                        .context(format!("Save + restore {i} failed."));
520                }
521            }
522        }
523
524        Ok(())
525    }
526
527    async fn launch_linux_direct_pipette(&mut self) -> anyhow::Result<()> {
528        // Start pipette through serial on linux direct.
529        self.resources
530            .linux_direct_serial_agent
531            .as_mut()
532            .unwrap()
533            .run_command("mkdir /cidata && mount LABEL=cidata /cidata && sh -c '/cidata/pipette &'")
534            .await?;
535        Ok(())
536    }
537}
538
539/// Interface for inspecting OpenVMM
540pub struct OpenVmmInspector {
541    worker: Arc<Worker>,
542}
543
544#[async_trait]
545impl PetriVmInspector for OpenVmmInspector {
546    async fn inspect_all(&self) -> anyhow::Result<inspect::Node> {
547        Ok(self.worker.inspect_all().await)
548    }
549}
550
551/// Interface to the OpenVMM framebuffer
552pub struct OpenVmmFramebufferAccess {
553    view: View,
554}
555
556#[async_trait]
557impl PetriVmFramebufferAccess for OpenVmmFramebufferAccess {
558    async fn screenshot(
559        &mut self,
560        image: &mut Vec<u8>,
561    ) -> anyhow::Result<Option<VmScreenshotMeta>> {
562        // Our framebuffer uses 4 bytes per pixel, approximating an
563        // BGRA image, however it only actually contains BGR data.
564        // The fourth byte is effectively noise. We can set the 'alpha'
565        // value to 0xFF to make the image opaque.
566        const BYTES_PER_PIXEL: usize = 4;
567        let (width, height) = self.view.resolution();
568        let (widthsize, heightsize) = (width as usize, height as usize);
569        let len = widthsize * heightsize * BYTES_PER_PIXEL;
570
571        image.resize(len, 0);
572        for (i, line) in (0..height).zip(image.chunks_exact_mut(widthsize * BYTES_PER_PIXEL)) {
573            self.view.read_line(i, line);
574            for pixel in line.chunks_exact_mut(BYTES_PER_PIXEL) {
575                pixel.swap(0, 2);
576                pixel[3] = 0xFF;
577            }
578        }
579
580        Ok(Some(VmScreenshotMeta {
581            color: image::ExtendedColorType::Rgba8,
582            width,
583            height,
584        }))
585    }
586}