vmm_core/
partition_unit.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! State unit for managing the VM partition and associated virtual processors.
5
6mod debug;
7mod vp_set;
8
9pub use vp_set::Halt;
10pub use vp_set::RequestYield;
11pub use vp_set::RunCancelled;
12pub use vp_set::RunnerCanceller;
13pub use vp_set::VpRunner;
14pub use vp_set::block_on_vp;
15
16use self::vp_set::RegisterSetError;
17use async_trait::async_trait;
18use futures::FutureExt;
19use futures::StreamExt;
20use guestmem::GuestMemory;
21use hvdef::Vtl;
22use inspect::InspectMut;
23use memory_range::MemoryRange;
24use mesh::Receiver;
25use mesh::rpc::Rpc;
26use mesh::rpc::RpcSend;
27use pal_async::task::Spawn;
28use state_unit::NameInUse;
29use state_unit::SpawnedUnit;
30use state_unit::StateRequest;
31use state_unit::StateUnit;
32use state_unit::UnitBuilder;
33use state_unit::UnitHandle;
34use std::sync::Arc;
35use thiserror::Error;
36use virt::InitialRegs;
37use virt::PageVisibility;
38use vm_topology::processor::ProcessorTopology;
39use vmcore::save_restore::ProtobufSaveRestore;
40use vmcore::save_restore::RestoreError;
41use vmcore::save_restore::SaveError;
42use vmcore::save_restore::SavedStateBlob;
43use vmm_core_defs::HaltReason;
44use vp_set::VpSet;
45
46/// The control point for managing a partition unit.
47pub struct PartitionUnit {
48    handle: SpawnedUnit<PartitionUnitRunner>,
49    req_send: mesh::Sender<PartitionRequest>,
50}
51
52/// Trait with the minimal methods needed to run the partition.
53#[async_trait]
54pub trait VmPartition: 'static + Send + Sync + InspectMut + ProtobufSaveRestore {
55    /// Resets the partition.
56    fn reset(&mut self) -> anyhow::Result<()>;
57
58    /// Scrubs the VTL state for a partition.
59    fn scrub_vtl(&mut self, vtl: Vtl) -> anyhow::Result<()>;
60
61    /// Accepts pages on behalf of the loader.
62    fn accept_initial_pages(
63        &mut self,
64        pages: Vec<(MemoryRange, PageVisibility)>,
65    ) -> anyhow::Result<()>;
66}
67
68/// An object to run the VM partition state unit.
69struct PartitionUnitRunner {
70    partition: Box<dyn VmPartition>,
71    vp_set: VpSet,
72    unit_started: bool,
73    vp_stop_count: usize,
74    needs_reset: bool,
75    halt_reason: Option<HaltReason>,
76    halt_request_recv: Receiver<InternalHaltReason>,
77    client_notify_send: mesh::Sender<HaltReason>,
78    req_recv: Receiver<PartitionRequest>,
79    topology: ProcessorTopology,
80    initial_regs: Option<Arc<InitialRegs>>,
81
82    #[cfg(feature = "gdb")]
83    debugger_state: debug::DebuggerState,
84}
85
86impl InspectMut for PartitionUnitRunner {
87    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
88        req.respond()
89            .field(
90                "power_state",
91                self.halt_reason.as_ref().map_or("running", |_| "halted"),
92            )
93            .merge(&self.halt_reason)
94            .merge(&self.vp_set)
95            .field_mut_with("clear_halt", |clear| {
96                // Clear halt if "true" is specified.
97                if let Some(clear) = clear {
98                    match clear.parse::<bool>() {
99                        Ok(x) => {
100                            if x {
101                                self.clear_halt();
102                            }
103                            Ok(x)
104                        }
105                        Err(err) => Err(err),
106                    }
107                } else {
108                    Ok(false)
109                }
110            })
111            .field("topology", &self.topology)
112            .merge(&mut self.partition);
113    }
114}
115
116enum PartitionRequest {
117    ClearHalt(Rpc<(), bool>), // TODO: remove this, and use DebugRequest::Resume
118    SetInitialRegs(Rpc<(Vtl, Arc<InitialRegs>), Result<(), InitialRegError>>),
119    SetInitialPageVisibility(
120        Rpc<Vec<(MemoryRange, PageVisibility)>, Result<(), InitialVisibilityError>>,
121    ),
122    StopVps(Rpc<(), ()>),
123    StartVps,
124}
125
126pub struct PartitionUnitParams<'a> {
127    pub vtl_guest_memory: [Option<&'a GuestMemory>; 3],
128    pub processor_topology: &'a ProcessorTopology,
129    /// Tracks the halt state of VPs.
130    pub halt_vps: Arc<Halt>,
131    /// The receiver returned from `Halt::new()`.
132    pub halt_request_recv: HaltReasonReceiver,
133    /// Notified when the partition has been halted (due to a triple fault or
134    /// other reason).
135    pub client_notify_send: mesh::Sender<HaltReason>,
136    pub debugger_rpc: Option<Receiver<vmm_core_defs::debug_rpc::DebugRequest>>,
137}
138
139/// The halt reason receiver to pass to put in [`PartitionUnitParams`].
140pub struct HaltReasonReceiver(Receiver<InternalHaltReason>);
141
142enum InternalHaltReason {
143    Halt(HaltReason),
144    ReplayMtrrs,
145}
146
147/// Error returned by [`PartitionUnit::new()`].
148#[derive(Debug, Error)]
149pub enum Error {
150    #[error("debugging is not supported in this build")]
151    DebuggingNotSupported,
152    #[error(transparent)]
153    NameInUse(NameInUse),
154    #[error("missing guest memory required for gdb support")]
155    MissingGuestMemory,
156}
157
158/// Error returned by [`PartitionUnit::set_initial_regs()`].
159#[derive(Debug, Error)]
160pub enum InitialRegError {
161    #[error("failed to set registers")]
162    RegisterSet(#[source] RegisterSetError),
163    #[error("failed to scrub VTL state")]
164    ScrubVtl(#[source] anyhow::Error),
165}
166
167/// Error returned by [`PartitionUnit::set_initial_page_visibility()`].
168#[derive(Debug, Error)]
169pub enum InitialVisibilityError {
170    #[error("failed to set initial page acceptance")]
171    PageAcceptance(#[source] anyhow::Error),
172}
173
174impl PartitionUnit {
175    /// Creates a new VM partition state unit.
176    ///
177    /// The caller is responsible for launching a thread for each VP and running
178    /// the VP using the returned [`VpRunner`]s.
179    pub fn new(
180        spawner: impl Spawn,
181        builder: UnitBuilder<'_>,
182        partition: impl VmPartition,
183        params: PartitionUnitParams<'_>,
184    ) -> Result<(Self, Vec<VpRunner>), Error> {
185        #[cfg(not(feature = "gdb"))]
186        if params.debugger_rpc.is_some() {
187            return Err(Error::DebuggingNotSupported);
188        }
189
190        let mut vp_set = VpSet::new(params.vtl_guest_memory.map(|m| m.cloned()), params.halt_vps);
191        let vps = params
192            .processor_topology
193            .vps_arch()
194            .map(|vp| vp_set.add(vp))
195            .collect();
196
197        let (req_send, req_recv) = mesh::channel();
198
199        let mut runner = PartitionUnitRunner {
200            partition: Box::new(partition),
201            vp_set,
202            unit_started: false,
203            vp_stop_count: 0,
204            needs_reset: false,
205            halt_reason: None,
206            halt_request_recv: params.halt_request_recv.0,
207            client_notify_send: params.client_notify_send,
208            req_recv,
209            topology: params.processor_topology.clone(),
210            initial_regs: None,
211            #[cfg(feature = "gdb")]
212            debugger_state: debug::DebuggerState::new(
213                params.vtl_guest_memory[0]
214                    .ok_or(Error::MissingGuestMemory)?
215                    .clone(),
216                params.debugger_rpc,
217            ),
218        };
219
220        let handle = builder
221            .spawn(spawner, async |recv| {
222                runner.run(recv).await;
223                runner
224            })
225            .unwrap();
226
227        Ok((Self { handle, req_send }, vps))
228    }
229
230    /// Gets the handle for the partition unit.
231    pub fn unit_handle(&self) -> &UnitHandle {
232        self.handle.handle()
233    }
234
235    /// Tears down the state unit, returning the `client_notify_send` sender
236    /// passed to [`Self::new()`].
237    pub async fn teardown(self) -> mesh::Sender<HaltReason> {
238        let runner = self.handle.remove().await;
239        runner.vp_set.teardown().await;
240        runner.client_notify_send
241    }
242
243    /// Clears the current halt reason from the partition, resuming the VPs if
244    /// they are stopped.
245    pub async fn clear_halt(&mut self) -> bool {
246        self.req_send
247            .call(PartitionRequest::ClearHalt, ())
248            .await
249            .unwrap()
250    }
251
252    /// Temporarily stops the VPs, returning a guard that will resume them when
253    /// dropped.
254    pub async fn temporarily_stop_vps(&mut self) -> StopGuard {
255        self.req_send
256            .call(PartitionRequest::StopVps, ())
257            .await
258            .unwrap();
259
260        StopGuard(self.req_send.clone())
261    }
262
263    /// Sets the register state for the VPs for initial boot.
264    ///
265    /// If the VM has been run before and has not been reset since it last ran,
266    /// the target VTL will be scrubbed first so that the partition state is
267    /// clean.
268    pub async fn set_initial_regs(
269        &mut self,
270        vtl: Vtl,
271        state: Arc<InitialRegs>,
272    ) -> Result<(), InitialRegError> {
273        self.req_send
274            .call(PartitionRequest::SetInitialRegs, (vtl, state))
275            .await
276            .unwrap()
277    }
278
279    pub async fn set_initial_page_visibility(
280        &mut self,
281        vis: Vec<(MemoryRange, PageVisibility)>,
282    ) -> Result<(), InitialVisibilityError> {
283        self.req_send
284            .call(PartitionRequest::SetInitialPageVisibility, vis)
285            .await
286            .unwrap()
287    }
288}
289
290impl PartitionUnitRunner {
291    /// Runs the VM partition, handling state change requests from `recv`.
292    async fn run(&mut self, mut recv: Receiver<StateRequest>) {
293        loop {
294            enum Event {
295                State(Option<StateRequest>),
296                Halt(InternalHaltReason),
297                Request(PartitionRequest),
298                #[cfg(feature = "gdb")]
299                Debug(vmm_core_defs::debug_rpc::DebugRequest),
300            }
301
302            #[cfg(feature = "gdb")]
303            let debug = self.debugger_state.wait_rpc();
304            #[cfg(not(feature = "gdb"))]
305            let debug = std::future::pending();
306
307            let event = futures::select! {  // merge semantics
308                request = recv.next() => Event::State(request),
309                request = self.halt_request_recv.select_next_some() => Event::Halt(request),
310                request = self.req_recv.select_next_some() => Event::Request(request),
311                request = debug.fuse() => {
312                    #[cfg(feature = "gdb")]
313                    {
314                        Event::Debug(request)
315                    }
316                    #[cfg(not(feature = "gdb"))]
317                    {
318                        let _: std::convert::Infallible = request;
319                        unreachable!()
320                    }
321                }
322            };
323
324            match event {
325                Event::State(request) => {
326                    if let Some(request) = request {
327                        request.apply(self).await;
328                    } else {
329                        break;
330                    }
331                }
332                Event::Halt(reason) => {
333                    // Wait for the VPs to stop before reporting this anywhere.
334                    // This is generally good behavior, but it is especially
335                    // necessary because Self::clear_halt() will call
336                    // VpSet::clear_halt(), which relies on the VPs being
337                    // affirmatively stopped.
338                    self.vp_set.stop().await;
339                    self.handle_halt(reason).await;
340                }
341                Event::Request(request) => match request {
342                    PartitionRequest::ClearHalt(rpc) => rpc.handle_sync(|()| self.clear_halt()),
343                    PartitionRequest::SetInitialRegs(rpc) => {
344                        rpc.handle(async |(vtl, state)| self.set_initial_regs(vtl, state).await)
345                            .await
346                    }
347                    PartitionRequest::SetInitialPageVisibility(rpc) => {
348                        rpc.handle(async |vis| self.set_initial_page_visibility(vis).await)
349                            .await
350                    }
351                    PartitionRequest::StopVps(rpc) => {
352                        rpc.handle(async |()| {
353                            self.vp_set.stop().await;
354                            self.vp_stop_count += 1;
355                        })
356                        .await
357                    }
358                    PartitionRequest::StartVps => {
359                        self.vp_stop_count -= 1;
360                        self.try_start();
361                    }
362                },
363                #[cfg(feature = "gdb")]
364                Event::Debug(request) => {
365                    self.handle_gdb(request).await;
366                }
367            }
368        }
369
370        if self.unit_started {
371            self.vp_set.stop().await;
372        }
373    }
374
375    async fn handle_halt(&mut self, reason: InternalHaltReason) {
376        match reason {
377            InternalHaltReason::Halt(reason) => {
378                // Only report the first halt request per boot so that the
379                // client does not have to deal with multiple halt reasons
380                // due to race conditions.
381                if self.halt_reason.is_none() {
382                    self.halt_reason = Some(reason.clone());
383
384                    // Report the halt to the debugger.
385                    #[cfg(feature = "gdb")]
386                    let reported = self.debugger_state.report_halt_to_debugger(&reason);
387                    #[cfg(not(feature = "gdb"))]
388                    let reported = false;
389
390                    // If the debugger is not attached, then report the halt
391                    // to the client.
392                    if !reported {
393                        self.client_notify_send.send(reason);
394                    }
395                } else {
396                    // Clear this specific halt.
397                    self.vp_set.clear_halt();
398                }
399            }
400            InternalHaltReason::ReplayMtrrs => {
401                if let Some(initial_regs) = self.initial_regs.clone() {
402                    if let Err(err) = self
403                        .vp_set
404                        .set_initial_regs(
405                            Vtl::Vtl0,
406                            initial_regs,
407                            vp_set::RegistersToSet::MtrrsOnly,
408                        )
409                        .await
410                    {
411                        tracing::error!(
412                            error = &err as &dyn std::error::Error,
413                            "failed to replay mtrrs, guest may see inconsistent results"
414                        );
415                    }
416                } else {
417                    tracing::warn!("no initial mtrrs to replay");
418                }
419                self.vp_set.clear_halt();
420                self.try_start();
421            }
422        }
423    }
424
425    /// Clears the halt and resumes the VPs if the partition is started. Returns
426    /// `false` if VPs were not already halted.
427    fn clear_halt(&mut self) -> bool {
428        if self.halt_reason.is_some() {
429            self.halt_reason = None;
430            self.vp_set.clear_halt();
431            self.try_start();
432            true
433        } else {
434            false
435        }
436    }
437
438    async fn set_initial_regs(
439        &mut self,
440        vtl: Vtl,
441        state: Arc<InitialRegs>,
442    ) -> Result<(), InitialRegError> {
443        assert!(!self.unit_started || self.vp_stop_count > 0);
444
445        // If this VM has been run before, then automatically scrub the target
446        // VTL state.
447        if self.needs_reset {
448            self.partition
449                .scrub_vtl(vtl)
450                .map_err(InitialRegError::ScrubVtl)?;
451            self.vp_set
452                .scrub(vtl)
453                .await
454                .map_err(InitialRegError::ScrubVtl)?;
455            self.needs_reset = false;
456        }
457
458        self.vp_set
459            .set_initial_regs(vtl, state.clone(), vp_set::RegistersToSet::All)
460            .await
461            .map_err(InitialRegError::RegisterSet)?;
462
463        self.initial_regs = Some(state);
464        Ok(())
465    }
466
467    async fn set_initial_page_visibility(
468        &mut self,
469        visibility: Vec<(MemoryRange, PageVisibility)>,
470    ) -> Result<(), InitialVisibilityError> {
471        assert!(!self.unit_started);
472
473        self.partition
474            .accept_initial_pages(visibility)
475            .map_err(InitialVisibilityError::PageAcceptance)
476    }
477
478    fn try_start(&mut self) {
479        if self.unit_started && self.halt_reason.is_none() && self.vp_stop_count == 0 {
480            self.needs_reset = true;
481            self.vp_set.start();
482        }
483    }
484}
485
486#[must_use = "when dropped, the VPs will be resumed"]
487pub struct StopGuard(mesh::Sender<PartitionRequest>);
488
489impl Drop for StopGuard {
490    fn drop(&mut self) {
491        self.0.send(PartitionRequest::StartVps);
492    }
493}
494
495impl StateUnit for PartitionUnitRunner {
496    async fn start(&mut self) {
497        self.unit_started = true;
498        self.try_start();
499    }
500
501    async fn stop(&mut self) {
502        self.vp_set.stop().await;
503        self.unit_started = false;
504
505        // Now that the VM is stopped, flush any guest-initiated
506        // power state change that may have raced with this request.
507        while let Ok(reason) = self.halt_request_recv.try_recv() {
508            self.handle_halt(reason).await;
509        }
510    }
511
512    async fn reset(&mut self) -> anyhow::Result<()> {
513        self.partition.reset()?;
514        self.vp_set.reset().await?;
515        self.clear_halt();
516        self.needs_reset = false;
517        Ok(())
518    }
519
520    async fn save(&mut self) -> Result<Option<SavedStateBlob>, SaveError> {
521        let state = self.save().await?;
522        Ok(Some(SavedStateBlob::new(state)))
523    }
524
525    async fn restore(&mut self, buffer: SavedStateBlob) -> Result<(), RestoreError> {
526        // TODO: restore halted state
527        self.needs_reset = true;
528        self.restore(buffer.parse()?).await?;
529        Ok(())
530    }
531}
532
533mod save_restore {
534    use super::PartitionUnitRunner;
535    use virt::VpIndex;
536    use vmcore::save_restore::RestoreError;
537    use vmcore::save_restore::SaveError;
538
539    mod state {
540        use mesh::payload::Protobuf;
541        use vmcore::save_restore::SavedStateBlob;
542        use vmcore::save_restore::SavedStateRoot;
543
544        #[derive(Protobuf, SavedStateRoot)]
545        #[mesh(package = "partition")]
546        pub struct Partition {
547            #[mesh(1)]
548            pub(super) partition: SavedStateBlob,
549            #[mesh(2)]
550            pub(super) vps: Vec<Vp>,
551            // TODO: save halted state
552        }
553
554        #[derive(Protobuf)]
555        #[mesh(package = "partition")]
556        pub struct Vp {
557            #[mesh(1)]
558            pub vp_index: u32,
559            #[mesh(2)]
560            pub data: SavedStateBlob,
561        }
562    }
563
564    impl PartitionUnitRunner {
565        pub async fn save(&mut self) -> Result<state::Partition, SaveError> {
566            let partition = self.partition.save()?;
567            let vps = self.vp_set.save().await?;
568            let vps = vps
569                .into_iter()
570                .map(|(vp_index, data)| state::Vp {
571                    vp_index: vp_index.index(),
572                    data,
573                })
574                .collect();
575
576            Ok(state::Partition { partition, vps })
577        }
578
579        pub async fn restore(&mut self, state: state::Partition) -> Result<(), RestoreError> {
580            let state::Partition { partition, vps } = state;
581            self.partition.restore(partition)?;
582            self.vp_set
583                .restore(
584                    vps.into_iter()
585                        .map(|state::Vp { vp_index, data }| (VpIndex::new(vp_index), data)),
586                )
587                .await?;
588            Ok(())
589        }
590    }
591}