underhill_core/dispatch/
mod.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implements vm dispatch and vm state management for underhill.
5
6mod pci_shutdown;
7pub mod vtl2_settings_worker;
8
9use self::vtl2_settings_worker::DeviceInterfaces;
10use crate::ControlRequest;
11use crate::emuplat::EmuplatServicing;
12use crate::emuplat::netvsp::RuntimeSavedState;
13use crate::nvme_manager::NvmeManager;
14use crate::options::TestScenarioConfig;
15use crate::reference_time::ReferenceTime;
16use crate::servicing;
17use crate::servicing::NvmeSavedState;
18use crate::servicing::ServicingState;
19use crate::vmbus_relay_unit::VmbusRelayHandle;
20use crate::worker::FirmwareType;
21use crate::worker::NetworkSettingsError;
22use anyhow::Context;
23use async_trait::async_trait;
24use cvm_tracing::CVM_ALLOWED;
25use futures::FutureExt;
26use futures::StreamExt;
27use futures_concurrency::future::Join;
28use get_protocol::SaveGuestVtl2StateFlags;
29use guest_emulation_transport::api::GuestSaveRequest;
30use guid::Guid;
31use hyperv_ic_resources::shutdown::ShutdownParams;
32use hyperv_ic_resources::shutdown::ShutdownResult;
33use hyperv_ic_resources::shutdown::ShutdownRpc;
34use hyperv_ic_resources::shutdown::ShutdownType;
35use igvm_defs::MemoryMapEntryType;
36use inspect::Inspect;
37use mesh::CancelContext;
38use mesh::MeshPayload;
39use mesh::error::RemoteError;
40use mesh::rpc::FailableRpc;
41use mesh::rpc::Rpc;
42use mesh::rpc::RpcSend;
43use mesh_worker::WorkerRpc;
44use net_packet_capture::PacketCaptureParams;
45use openhcl_dma_manager::DmaClientSpawner;
46use openhcl_dma_manager::OpenhclDmaManager;
47use pal_async::task::Spawn;
48use pal_async::task::Task;
49use parking_lot::Mutex;
50use socket2::Socket;
51use state_unit::SavedStateUnit;
52use state_unit::SpawnedUnit;
53use state_unit::StateUnits;
54use std::sync::Arc;
55use std::time::Duration;
56use tracing::Instrument;
57use tracing::instrument;
58use uevent::UeventListener;
59use underhill_threadpool::AffinitizedThreadpool;
60use virt::IsolationType;
61use virt_mshv_vtl::UhPartition;
62use virt_mshv_vtl::VtlCrash;
63use vm_resource::ResourceResolver;
64use vm_topology::memory::MemoryRangeWithNode;
65use vmbus_channel::channel::VmbusDevice;
66use vmcore::vmtime::VmTimeKeeper;
67use vmm_core::input_distributor::InputDistributor;
68use vmm_core::partition_unit::PartitionUnit;
69use vmm_core::vmbus_unit::ChannelUnit;
70use vmm_core::vmbus_unit::VmbusServerHandle;
71use vmotherboard::ChipsetDevices;
72use vtl2_settings_worker::Vtl2ConfigNicRpc;
73use vtl2_settings_worker::Vtl2SettingsWorker;
74use vtl2_settings_worker::handle_vtl2_config_rpc;
75
76#[derive(MeshPayload)]
77pub enum UhVmRpc {
78    Pause(Rpc<(), bool>),
79    Resume(Rpc<(), bool>),
80    Save(FailableRpc<(), Vec<u8>>),
81    ClearHalt(Rpc<(), bool>), // TODO: remove this, and use DebugRequest::Resume
82    PacketCapture(FailableRpc<PacketCaptureParams<Socket>, PacketCaptureParams<Socket>>),
83}
84
85#[async_trait]
86pub trait LoadedVmNetworkSettings: Inspect {
87    /// Callback to prepare for guest hibernation. This should remove any
88    /// directly assigned devices before the guest saves state.
89    ///
90    /// When rollback is 'true' it means the hibernate request was vetoed, so
91    /// any changes can be undone.
92    async fn prepare_for_hibernate(&self, rollback: bool);
93
94    /// Callback when network settings are modified externally.
95    async fn modify_network_settings(
96        &mut self,
97        instance_id: Guid,
98        subordinate_instance_id: Option<Guid>,
99    ) -> anyhow::Result<()>;
100
101    /// Callback when network is added externally.
102    async fn add_network(
103        &mut self,
104        instance_id: Guid,
105        subordinate_instance_id: Option<Guid>,
106        max_sub_channels: Option<u16>,
107        threadpool: &AffinitizedThreadpool,
108        uevent_listener: &UeventListener,
109        servicing_netvsp_state: &Option<Vec<crate::emuplat::netvsp::SavedState>>,
110        partition: Arc<UhPartition>,
111        state_units: &StateUnits,
112        vmbus_server: &Option<VmbusServerHandle>,
113        dma_client_spawner: DmaClientSpawner,
114        is_isolated: bool,
115    ) -> anyhow::Result<RuntimeSavedState>;
116
117    /// Callback when network is removed externally.
118    async fn remove_network(&mut self, instance_id: Guid) -> anyhow::Result<()>;
119
120    /// Callback after stopping the VM and all workers, in preparation for a VTL2 reboot.
121    async fn unload_for_servicing(&mut self);
122
123    /// Handles packet capture related operations.
124    async fn packet_capture(
125        &self,
126        mut params: PacketCaptureParams<Socket>,
127    ) -> anyhow::Result<PacketCaptureParams<Socket>>;
128}
129
130/// A VM that has been loaded and can be run.
131pub(crate) struct LoadedVm {
132    pub partition_unit: PartitionUnit,
133    /// The various guest memory objects.
134    pub memory: underhill_mem::MemoryMappings,
135    pub firmware_type: FirmwareType,
136    pub isolation: IsolationType,
137    // contain task handles which must be kept live
138    pub _chipset_devices: ChipsetDevices,
139    // keep the unit task alive
140    pub _vmtime: SpawnedUnit<VmTimeKeeper>,
141    pub _halt_task: Task<()>,
142    pub uevent_listener: Arc<UeventListener>,
143    pub resolver: ResourceResolver,
144    pub nvme_manager: Option<NvmeManager>,
145    pub emuplat_servicing: EmuplatServicing,
146    pub device_interfaces: Option<DeviceInterfaces>,
147    pub vmbus_client: Option<vmbus_client::VmbusClient>,
148    /// Memory map with IGVM types for each range.
149    pub vtl0_memory_map: Vec<(MemoryRangeWithNode, MemoryMapEntryType)>,
150
151    pub partition: Arc<UhPartition>,
152    pub state_units: StateUnits,
153    pub last_state_unit_stop: Option<ReferenceTime>,
154    pub vmbus_server: Option<VmbusServerHandle>,
155    // contain task handles which must be kept live
156    pub host_vmbus_relay: Option<VmbusRelayHandle>,
157    // channels are revoked when dropped, so make sure to keep them alive
158    pub _vmbus_devices: Vec<SpawnedUnit<ChannelUnit<dyn VmbusDevice>>>,
159    pub _vmbus_intercept_devices: Vec<mesh::OneshotSender<()>>,
160    pub _ide_accel_devices: Vec<SpawnedUnit<ChannelUnit<storvsp::StorageDevice>>>,
161    pub network_settings: Option<Box<dyn LoadedVmNetworkSettings>>,
162    pub shutdown_relay: Option<(
163        mesh::Receiver<Rpc<ShutdownParams, ShutdownResult>>,
164        mesh::Sender<ShutdownRpc>,
165    )>,
166
167    pub vmgs: Option<(
168        vmgs_broker::VmgsThinClient,
169        disk_get_vmgs::save_restore::SavedBlockStorageMetadata,
170        Task<()>,
171    )>,
172
173    // dependencies of the vtl2 settings service
174    pub get_client: guest_emulation_transport::GuestEmulationTransportClient,
175    pub device_platform_settings:
176        guest_emulation_transport::api::platform_settings::DevicePlatformSettings,
177    pub runtime_params: crate::loader::vtl2_config::RuntimeParameters,
178
179    pub _input_distributor: SpawnedUnit<InputDistributor>,
180
181    pub crash_notification_recv: mesh::Receiver<VtlCrash>,
182    pub control_send: Arc<Mutex<Option<mesh::Sender<ControlRequest>>>>,
183
184    pub _periodic_telemetry_task: Task<()>,
185
186    pub nvme_keep_alive: bool,
187    pub test_configuration: Option<TestScenarioConfig>,
188    pub dma_manager: OpenhclDmaManager,
189}
190
191pub struct LoadedVmState<T> {
192    pub restart_rpc: FailableRpc<(), T>,
193    pub servicing_state: ServicingState,
194    pub vm_rpc: mesh::Receiver<UhVmRpc>,
195    pub control_send: mesh::Sender<ControlRequest>,
196}
197
198impl LoadedVm {
199    /// Start running the VM which will start running VTL0.
200    pub async fn run<T: 'static + MeshPayload + Send>(
201        mut self,
202        threadpool: &AffinitizedThreadpool,
203        autostart_vps: bool,
204        correlation_id: Option<Guid>,
205        mut vm_rpc: mesh::Receiver<UhVmRpc>,
206        mut worker_rpc: mesh::Receiver<WorkerRpc<T>>,
207    ) -> Option<LoadedVmState<T>> {
208        if autostart_vps {
209            self.start(correlation_id).await;
210        }
211
212        // VTL2 settings services
213        let (device_config_send, mut device_config_recv) = mesh::channel();
214        let _vtl2_settings_service_handle = {
215            let initial_settings = self
216                .device_platform_settings
217                .general
218                .vtl2_settings
219                .as_ref()
220                .map_or_else(Default::default, |settings| settings.dynamic.clone());
221
222            let mut vtl2_settings_worker = Vtl2SettingsWorker::new(
223                initial_settings,
224                device_config_send,
225                self.get_client.clone(),
226                self.device_interfaces.take().unwrap(),
227            );
228
229            threadpool.spawn("VTL2 settings services", {
230                let uevent_listener = self.uevent_listener.clone();
231                async move { vtl2_settings_worker.run(&uevent_listener).await }
232            })
233        };
234
235        let mut save_request_recv = self
236            .get_client
237            .take_save_request_recv()
238            .await
239            .expect("no failure");
240
241        let state = loop {
242            enum Event<T> {
243                WorkerRpc(WorkerRpc<T>),
244                WorkerRpcGone,
245                Vtl2ConfigNicRpc(Vtl2ConfigNicRpc),
246                UhVmRpc(UhVmRpc),
247                VtlCrash(VtlCrash),
248                ServicingRequest(GuestSaveRequest),
249                ShutdownRequest(Rpc<ShutdownParams, ShutdownResult>),
250            }
251
252            let event: Event<T> = futures::select! { // merge semantics
253                message = worker_rpc.next() => message.map_or(Event::WorkerRpcGone, Event::WorkerRpc),
254                message = device_config_recv.select_next_some() => Event::Vtl2ConfigNicRpc(message),
255                message = vm_rpc.select_next_some() => Event::UhVmRpc(message),
256                message = self.crash_notification_recv.select_next_some() => Event::VtlCrash(message),
257                message = save_request_recv.select_next_some() => Event::ServicingRequest(message),
258                message = async {
259                    if self.shutdown_relay.is_none() {
260                        std::future::pending::<()>().await;
261                    }
262                    let (recv, _) = self.shutdown_relay.as_mut().unwrap();
263                    recv.select_next_some().await
264                }.fuse() => Event::ShutdownRequest(message),
265            };
266
267            match event {
268                Event::WorkerRpcGone => break None,
269                Event::WorkerRpc(message) => match message {
270                    WorkerRpc::Stop => break None,
271                    WorkerRpc::Restart(rpc) => {
272                        let state = async {
273                            let running = self.stop().await;
274                            match self.save(None, false).await {
275                                Ok(servicing_state) => Some((rpc, servicing_state)),
276                                Err(err) => {
277                                    if running {
278                                        self.start(None).await;
279                                    }
280                                    rpc.complete(Err(RemoteError::new(err)));
281                                    None
282                                }
283                            }
284                        }
285                        .instrument(tracing::info_span!("restart", CVM_ALLOWED))
286                        .await;
287
288                        if let Some((rpc, servicing_state)) = state {
289                            break Some(LoadedVmState {
290                                restart_rpc: rpc,
291                                servicing_state,
292                                vm_rpc,
293                                control_send: self.control_send.lock().take().unwrap(),
294                            });
295                        }
296                    }
297                    WorkerRpc::Inspect(deferred) => deferred.respond(|resp| {
298                        resp.field("threadpool", threadpool)
299                            .merge(&self.state_units);
300                        resp.child("init_data", |req| {
301                            req.respond().field("dps", &self.device_platform_settings);
302                        });
303                        resp.field("runtime_params", &self.runtime_params);
304                        resp.field("get", &self.get_client);
305                        resp.field("vmgs", self.vmgs.as_ref().map(|x| &x.0));
306                        resp.field("network", &self.network_settings);
307                        resp.field("nvme", &self.nvme_manager);
308                        resp.field("resolver", &self.resolver);
309                        resp.field(
310                            "vtl0_memory_map",
311                            inspect_helpers::vtl0_memory_map(&self.vtl0_memory_map),
312                        );
313                        resp.field("memory", &self.memory);
314                        resp.field("dma_manager", &self.dma_manager);
315                    }),
316                },
317                Event::Vtl2ConfigNicRpc(message) => {
318                    handle_vtl2_config_rpc(message, &mut self, threadpool).await
319                }
320                Event::UhVmRpc(msg) => match msg {
321                    UhVmRpc::Resume(rpc) => {
322                        rpc.handle(async |()| {
323                            if !self.state_units.is_running() {
324                                self.start(None).await;
325                                true
326                            } else {
327                                false
328                            }
329                        })
330                        .await
331                    }
332                    UhVmRpc::Pause(rpc) => rpc.handle(async |()| self.stop().await).await,
333                    UhVmRpc::Save(rpc) => {
334                        rpc.handle_failable(async |()| {
335                            let running = self.stop().await;
336                            let r = self.save(None, false).await;
337                            if running {
338                                self.start(None).await;
339                            }
340                            r.map(mesh::payload::encode)
341                        })
342                        .await
343                    }
344                    UhVmRpc::ClearHalt(rpc) => {
345                        rpc.handle(async |()| self.partition_unit.clear_halt().await)
346                            .await
347                    }
348                    UhVmRpc::PacketCapture(rpc) => {
349                        rpc.handle_failable(async |params| {
350                            let network_settings = self
351                                .network_settings
352                                .as_ref()
353                                .context("No network settings have been set up")?;
354                            network_settings.packet_capture(params).await
355                        })
356                        .await
357                    }
358                },
359                Event::ServicingRequest(message) => {
360                    // Explicitly destructure the message for easier tracking of its changes.
361                    let GuestSaveRequest {
362                        correlation_id,
363                        deadline,
364                        capabilities_flags,
365                    } = message;
366                    match self
367                        .handle_servicing_request(correlation_id, deadline, capabilities_flags)
368                        .await
369                    {
370                        Ok(true) => {
371                            // Now do nothing. The host will restart VTL2 when it is ready.
372                        }
373                        Ok(false) => {
374                            // Servicing failed. Continue running the VM.
375                            continue;
376                        }
377                        Err(err) => {
378                            tracing::error!(
379                                CVM_ALLOWED,
380                                error = err.as_ref() as &dyn std::error::Error,
381                                "failed to notify host of servicing result"
382                            );
383                            // This is not recoverable, so tear down.
384                            break None;
385                        }
386                    }
387                }
388                Event::ShutdownRequest(rpc) => {
389                    rpc.handle(async |msg| {
390                        if matches!(msg.shutdown_type, ShutdownType::Hibernate) {
391                            self.handle_hibernate_request(false).await;
392                        }
393                        let (_, send_guest) =
394                            self.shutdown_relay.as_mut().expect("active shutdown_relay");
395                        tracing::info!(CVM_ALLOWED, params = ?msg, "Relaying shutdown message");
396                        let result = match send_guest.call(ShutdownRpc::Shutdown, msg).await {
397                            Ok(result) => result,
398                            Err(err) => {
399                                tracing::error!(
400                                    CVM_ALLOWED,
401                                    error = &err as &dyn std::error::Error,
402                                    "Failed to relay shutdown notification to guest"
403                                );
404                                ShutdownResult::Failed(0x80000001)
405                            }
406                        };
407                        if !matches!(result, ShutdownResult::Ok) {
408                            tracing::warn!(CVM_ALLOWED, ?result, "Shutdown request failed");
409                            self.handle_hibernate_request(true).await;
410                        }
411                        result
412                    })
413                    .await
414                }
415                Event::VtlCrash(vtl_crash) => self.notify_of_vtl_crash(vtl_crash),
416            }
417        };
418
419        let _client_notify_send = self.partition_unit.teardown().await;
420
421        // Terminate the vmbus relay before vmbus to avoid sending channel
422        // revokes back to the host.
423        if let Some(vmbus_relay) = self.host_vmbus_relay {
424            vmbus_relay.teardown().await;
425        }
426
427        if let Some(vmbus) = self.vmbus_server {
428            vmbus.remove().await.shutdown().await;
429        }
430
431        state
432    }
433
434    /// Handles a servicing request from the host.
435    ///
436    /// Returns `true` if servicing was successful (in which case the VM will be
437    /// terminated any moment), `false` if it failed non-destructively the VM
438    /// should keep running.
439    async fn handle_servicing_request(
440        &mut self,
441        correlation_id: Guid,
442        deadline: std::time::Instant,
443        capabilities_flags: SaveGuestVtl2StateFlags,
444    ) -> anyhow::Result<bool> {
445        if let Some(TestScenarioConfig::SaveStuck) = self.test_configuration {
446            tracing::info!("Test configuration SERVICING_SAVE_STUCK is set. Waiting indefinitely.");
447            std::future::pending::<()>().await;
448        }
449
450        let running = self.state_units.is_running();
451        let success = match self
452            .handle_servicing_inner(correlation_id, deadline, capabilities_flags)
453            .await
454            .and_then(|state| {
455                if let Some(TestScenarioConfig::SaveFail) = self.test_configuration {
456                    tracing::info!(
457                        "Test configuration SERVICING_SAVE_FAIL is set. Failing the save."
458                    );
459                    return Err(anyhow::anyhow!("Simulated servicing save failure"));
460                }
461                Ok(state)
462            }) {
463            Ok(state) => {
464                self.get_client
465                    .send_servicing_state(mesh::payload::encode(state))
466                    .await?;
467
468                true
469            }
470            Err(err) => {
471                tracing::error!(
472                    CVM_ALLOWED,
473                    error = err.as_ref() as &dyn std::error::Error,
474                    "error while handling servicing"
475                );
476                self.get_client
477                    .send_servicing_failure(format_args!("{:#}", err))
478                    .await
479                    .context("failed to notify host of servicing-while-paused failure")?;
480
481                if running {
482                    self.start(Some(correlation_id)).await;
483                }
484                false
485            }
486        };
487
488        Ok(success)
489    }
490
491    async fn handle_servicing_inner(
492        &mut self,
493        correlation_id: Guid,
494        deadline: std::time::Instant,
495        capabilities_flags: SaveGuestVtl2StateFlags,
496    ) -> anyhow::Result<ServicingState> {
497        if self.isolation.is_isolated() {
498            anyhow::bail!("Servicing is not yet supported for isolated VMs");
499        }
500
501        // NOTE: This is set via the corresponding env arg, as this feature is
502        // experimental.
503        let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive();
504
505        // Do everything before the log flush under a span.
506        let r = async {
507            if !self.stop().await {
508                // This should only occur if you tried to initiate a
509                // servicing operation after manually pausing underhill
510                // via `ohcldiag-dev`.
511                //
512                // This is something that we _could_ enable, but it'd
513                // require additional plumbing, so we'll just disallow
514                // this for now.
515                anyhow::bail!("cannot service underhill while paused");
516            }
517
518            let mut state = self.save(Some(deadline), nvme_keepalive).await?;
519            state.init_state.correlation_id = Some(correlation_id);
520
521            // Unload any network devices.
522            let shutdown_mana = async {
523                if let Some(network_settings) = self.network_settings.as_mut() {
524                    network_settings
525                        .unload_for_servicing()
526                        .instrument(tracing::info_span!("shutdown_mana", CVM_ALLOWED))
527                        .await;
528                }
529            };
530
531            // Reset all user-mode NVMe devices.
532            let shutdown_nvme = async {
533                if let Some(nvme_manager) = self.nvme_manager.take() {
534                    nvme_manager
535                        .shutdown(nvme_keepalive)
536                        .instrument(tracing::info_span!("shutdown_nvme_vfio", CVM_ALLOWED, %correlation_id, %nvme_keepalive))
537                        .await;
538                }
539            };
540
541            // Unbind drivers from the PCI devices to prepare for a kernel
542            // restart.
543            let shutdown_pci = async {
544                pci_shutdown::shutdown_pci_devices()
545                    .instrument(tracing::info_span!("shutdown_pci_devices", CVM_ALLOWED))
546                    .await
547            };
548
549            let (r, (), ()) = (shutdown_pci, shutdown_mana, shutdown_nvme).join().await;
550            r?;
551
552            Ok(state)
553        }
554        .instrument(tracing::info_span!("servicing_save_vtl2", CVM_ALLOWED, %correlation_id))
555        .await;
556
557        let mut state = match r {
558            Ok(state) => state,
559            Err(err) => {
560                self.resume_drivers();
561                return Err(err);
562            }
563        };
564
565        // Tell the initial process to flush all logs. Any logs
566        // emitted after this point may be lost.
567        state.init_state.flush_logs_result = Some({
568            // Only wait up to a half second (which is still
569            // a long time!) to prevent delays from
570            // introducing longer blackouts.
571            let ctx = CancelContext::new().with_timeout(Duration::from_millis(500));
572
573            let now = std::time::Instant::now();
574            let call = self
575                .control_send
576                .lock()
577                .as_ref()
578                .unwrap()
579                .call(ControlRequest::FlushLogs, ctx);
580
581            let error = call
582                .await
583                .map_err(anyhow::Error::from)
584                .and_then(|x| x.map_err(anyhow::Error::from))
585                .err()
586                .map(|err| format!("{err:#}"));
587
588            servicing::FlushLogsResult {
589                duration_us: now.elapsed().as_micros() as u64,
590                error,
591            }
592        });
593
594        Ok(state)
595    }
596
597    async fn handle_hibernate_request(&self, rollback: bool) {
598        if let Some(network_settings) = &self.network_settings {
599            if !rollback {
600                network_settings
601                    .prepare_for_hibernate(rollback)
602                    .instrument(tracing::info_span!(
603                        "prepare_for_guest_hibernate",
604                        CVM_ALLOWED
605                    ))
606                    .await;
607            } else {
608                network_settings
609                    .prepare_for_hibernate(rollback)
610                    .instrument(tracing::info_span!(
611                        "rollback_prepare_for_guest_hibernate",
612                        CVM_ALLOWED
613                    ))
614                    .await;
615            };
616        }
617    }
618
619    async fn start(&mut self, correlation_id: Option<Guid>) {
620        self.state_units.start().await;
621
622        // Log the boot/blackout time.
623        let reference_time = ReferenceTime::new(self.partition.reference_time());
624        if let Some(stopped) = self.last_state_unit_stop {
625            let blackout_time = reference_time.since(stopped);
626            tracing::info!(CVM_ALLOWED,
627                correlation_id = %correlation_id.unwrap_or(Guid::ZERO),
628                blackout_time_ms = blackout_time.map(|t| t.as_millis() as u64),
629                blackout_time = blackout_time
630                    .map_or_else(|| "unknown".to_string(), |t| format!("{:?}", t))
631                    .as_str(),
632                "resuming VM"
633            );
634        } else {
635            // Assume we started at reference time 0.
636            let boot_time = reference_time.since(ReferenceTime::new(0));
637            tracing::info!(
638                CVM_ALLOWED,
639                boot_time_ms = boot_time.map(|t| t.as_millis() as u64),
640                boot_time = boot_time
641                    .map_or_else(|| "unknown".to_string(), |t| format!("{:?}", t))
642                    .as_str(),
643                "starting VM"
644            )
645        }
646    }
647
648    /// Returns true if the VM was previously running.
649    async fn stop(&mut self) -> bool {
650        if self.state_units.is_running() {
651            self.last_state_unit_stop = Some(ReferenceTime::new(self.partition.reference_time()));
652            tracing::info!(CVM_ALLOWED, "stopping VM");
653            self.state_units.stop().await;
654            true
655        } else {
656            false
657        }
658    }
659
660    /// Called after a failed servicing operation.
661    ///
662    /// FUTURE: model the drivers as "driver" state units (as opposed to guest
663    /// VM state units) so that we have a consistent way to model their state
664    /// transitions.
665    fn resume_drivers(&mut self) {
666        if let Some(client) = &mut self.vmbus_client {
667            client.start();
668        }
669
670        // BUGBUG: resume the other drivers. This only becomes a problem once
671        // nvme keepalive is enabled, since otherwise no other drivers have been
672        // stopped.
673    }
674
675    async fn save(
676        &mut self,
677        _deadline: Option<std::time::Instant>,
678        vf_keepalive_flag: bool,
679    ) -> anyhow::Result<ServicingState> {
680        assert!(!self.state_units.is_running());
681
682        let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?;
683
684        // Only save NVMe state when there are NVMe controllers and keep alive
685        // was enabled.
686        let nvme_state = if let Some(n) = &self.nvme_manager {
687            n.save(vf_keepalive_flag)
688                .instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED))
689                .await
690                .map(|s| NvmeSavedState { nvme_state: s })
691        } else {
692            None
693        };
694
695        let units = self.save_units().await.context("state unit save failed")?;
696        let vmgs = if let Some((vmgs_thin_client, vmgs_disk_metadata, _)) = self.vmgs.as_ref() {
697            Some((
698                vmgs_thin_client.save().await.context("vmgs save failed")?,
699                vmgs_disk_metadata.clone(),
700            ))
701        } else {
702            None
703        };
704
705        // Only save dma manager state if we are expected to keep VF devices
706        // alive across save. Otherwise, don't persist the state at all, as
707        // there should be no live DMA across save.
708        let dma_manager_state = if vf_keepalive_flag {
709            use vmcore::save_restore::SaveRestore;
710            Some(self.dma_manager.save().context("dma_manager save failed")?)
711        } else {
712            None
713        };
714
715        let vmbus_client = if let Some(vmbus_client) = &mut self.vmbus_client {
716            vmbus_client.stop().await;
717            Some(vmbus_client.save().await)
718        } else {
719            None
720        };
721
722        let mut state = ServicingState {
723            init_state: servicing::ServicingInitState {
724                firmware_type: self.firmware_type.into(),
725                vm_stop_reference_time: self.last_state_unit_stop.unwrap().as_100ns(),
726                correlation_id: None,
727                emuplat,
728                flush_logs_result: None,
729                vmgs,
730                overlay_shutdown_device: self.shutdown_relay.is_some(),
731                nvme_state,
732                dma_manager_state,
733                vmbus_client,
734            },
735            units,
736        };
737
738        state
739            .fix_pre_save()
740            .context("failed to fix up servicing state before save")?;
741        Ok(state)
742    }
743
744    #[instrument(skip(self), fields(CVM_ALLOWED))]
745    async fn save_units(&mut self) -> anyhow::Result<Vec<SavedStateUnit>> {
746        Ok(self.state_units.save().await?)
747    }
748
749    #[instrument(skip(self, saved_state), fields(CVM_ALLOWED))]
750    pub async fn restore_units(&mut self, saved_state: Vec<SavedStateUnit>) -> anyhow::Result<()> {
751        self.state_units.restore(saved_state).await?;
752        Ok(())
753    }
754
755    fn notify_of_vtl_crash(&self, vtl_crash: VtlCrash) {
756        tracelimit::info_ratelimited!(CVM_ALLOWED, "Notifying the host of the guest system crash");
757
758        let VtlCrash {
759            vp_index,
760            last_vtl,
761            control,
762            parameters,
763        } = vtl_crash;
764        self.get_client.notify_of_vtl_crash(
765            vp_index.index(),
766            last_vtl.into(),
767            control.into(),
768            parameters,
769        );
770    }
771
772    async fn add_vf_manager(
773        &mut self,
774        threadpool: &AffinitizedThreadpool,
775        instance_id: Guid,
776        subordinate_instance_id: Option<Guid>,
777        max_sub_channels: Option<u16>,
778    ) -> anyhow::Result<()> {
779        // Network Settings may not exist, if the VM was created without network_settings
780        if self.network_settings.is_none() {
781            return Err(NetworkSettingsError::NetworkSettingsMissing.into());
782        }
783
784        let save_state = self
785            .network_settings
786            .as_mut()
787            .unwrap()
788            .add_network(
789                instance_id,
790                subordinate_instance_id,
791                max_sub_channels,
792                threadpool,
793                &self.uevent_listener,
794                &None, // VF getting added; no existing state
795                self.partition.clone(),
796                &self.state_units,
797                &self.vmbus_server,
798                self.dma_manager.client_spawner(),
799                self.isolation.is_isolated(),
800            )
801            .await?;
802
803        self.state_units.start_stopped_units().await;
804        self.emuplat_servicing.netvsp_state.push(save_state);
805
806        Ok(())
807    }
808
809    async fn remove_vf_manager(&mut self, instance_id: Guid) -> anyhow::Result<()> {
810        if self.network_settings.is_none() {
811            return Err(NetworkSettingsError::NetworkSettingsMissing.into());
812        }
813
814        self.network_settings
815            .as_mut()
816            .unwrap()
817            .remove_network(instance_id)
818            .await?;
819
820        // Remove Netvsp RuntimeSavedState
821        if let Some(index) = self
822            .emuplat_servicing
823            .netvsp_state
824            .iter()
825            .position(|value| value.instance_id == instance_id)
826        {
827            self.emuplat_servicing.netvsp_state.swap_remove(index);
828        } else {
829            return Err(NetworkSettingsError::RuntimeSavedStateMissing(instance_id).into());
830        }
831
832        Ok(())
833    }
834}
835
836mod inspect_helpers {
837    use super::*;
838
839    fn inspect_memory_map_entry_type(typ: &MemoryMapEntryType) -> impl Inspect + '_ {
840        // TODO: inspect::AsDebug would work here once
841        // https://github.com/kupiakos/open-enum/pull/15 is merged.
842        inspect::adhoc(|req| match *typ {
843            MemoryMapEntryType::MEMORY => req.value("MEMORY"),
844            MemoryMapEntryType::PERSISTENT => req.value("PERSISTENT"),
845            MemoryMapEntryType::PLATFORM_RESERVED => req.value("PLATFORM_RESERVED"),
846            MemoryMapEntryType::VTL2_PROTECTABLE => req.value("VTL2_PROTECTABLE"),
847            _ => req.value(typ.0),
848        })
849    }
850
851    pub(super) fn vtl0_memory_map(
852        memory: &[(MemoryRangeWithNode, MemoryMapEntryType)],
853    ) -> impl Inspect + '_ {
854        inspect::iter_by_key(memory.iter().map(|(entry, typ)| {
855            (
856                entry.range,
857                inspect::adhoc(|req| {
858                    req.respond()
859                        .hex("length", entry.range.len())
860                        .field("type", inspect_memory_map_entry_type(typ))
861                        .field("vnode", entry.vnode);
862                }),
863            )
864        }))
865    }
866}