vmm_core/
partition_unit.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! State unit for managing the VM partition and associated virtual processors.
5
6mod debug;
7mod vp_set;
8
9pub use vp_set::Halt;
10pub use vp_set::RequestYield;
11pub use vp_set::RunCancelled;
12pub use vp_set::RunnerCanceller;
13pub use vp_set::VpRunner;
14pub use vp_set::block_on_vp;
15
16use self::vp_set::RegisterSetError;
17use async_trait::async_trait;
18use futures::FutureExt;
19use futures::StreamExt;
20use guestmem::GuestMemory;
21use hvdef::Vtl;
22use inspect::InspectMut;
23use memory_range::MemoryRange;
24use mesh::Receiver;
25use mesh::rpc::Rpc;
26use mesh::rpc::RpcSend;
27use pal_async::task::Spawn;
28use state_unit::NameInUse;
29use state_unit::SpawnedUnit;
30use state_unit::StateRequest;
31use state_unit::StateUnit;
32use state_unit::UnitBuilder;
33use state_unit::UnitHandle;
34use std::sync::Arc;
35use thiserror::Error;
36use virt::InitialRegs;
37use virt::PageVisibility;
38use vm_topology::processor::ProcessorTopology;
39use vmcore::save_restore::ProtobufSaveRestore;
40use vmcore::save_restore::RestoreError;
41use vmcore::save_restore::SaveError;
42use vmcore::save_restore::SavedStateBlob;
43use vmm_core_defs::HaltReason;
44use vp_set::VpSet;
45
46/// The control point for managing a partition unit.
47pub struct PartitionUnit {
48    handle: SpawnedUnit<PartitionUnitRunner>,
49    req_send: mesh::Sender<PartitionRequest>,
50}
51
52/// Trait with the minimal methods needed to run the partition.
53#[async_trait]
54pub trait VmPartition: 'static + Send + Sync + InspectMut + ProtobufSaveRestore {
55    /// Resets the partition.
56    fn reset(&mut self) -> anyhow::Result<()>;
57
58    /// Scrubs the VTL state for a partition.
59    fn scrub_vtl(&mut self, vtl: Vtl) -> anyhow::Result<()>;
60
61    /// Accepts pages on behalf of the loader.
62    fn accept_initial_pages(
63        &mut self,
64        pages: Vec<(MemoryRange, PageVisibility)>,
65    ) -> anyhow::Result<()>;
66}
67
68/// An object to run the VM partition state unit.
69struct PartitionUnitRunner {
70    partition: Box<dyn VmPartition>,
71    vp_set: VpSet,
72    unit_started: bool,
73    vp_stop_count: usize,
74    needs_reset: bool,
75    halt_reason: Option<HaltReason>,
76    halt_request_recv: Receiver<InternalHaltReason>,
77    client_notify_send: mesh::Sender<HaltReason>,
78    req_recv: Receiver<PartitionRequest>,
79    topology: ProcessorTopology,
80    initial_regs: Option<Arc<InitialRegs>>,
81
82    #[cfg(feature = "gdb")]
83    debugger_state: debug::DebuggerState,
84}
85
86impl InspectMut for PartitionUnitRunner {
87    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
88        req.respond()
89            .field(
90                "power_state",
91                self.halt_reason.as_ref().map_or("running", |_| "halted"),
92            )
93            .merge(&self.halt_reason)
94            .merge(&self.vp_set)
95            .field_mut_with("clear_halt", |clear| {
96                // Clear halt if "true" is specified.
97                if let Some(clear) = clear {
98                    match clear.parse::<bool>() {
99                        Ok(x) => {
100                            if x {
101                                self.clear_halt();
102                            }
103                            Ok(x)
104                        }
105                        Err(err) => Err(err),
106                    }
107                } else {
108                    Ok(false)
109                }
110            })
111            .field("topology", &self.topology)
112            .merge(&mut self.partition);
113    }
114}
115
116enum PartitionRequest {
117    ClearHalt(Rpc<(), bool>), // TODO: remove this, and use DebugRequest::Resume
118    SetInitialRegs(Rpc<(Vtl, Arc<InitialRegs>), Result<(), InitialRegError>>),
119    SetInitialPageVisibility(
120        Rpc<Vec<(MemoryRange, PageVisibility)>, Result<(), InitialVisibilityError>>,
121    ),
122    StopVps(Rpc<(), ()>),
123    StartVps,
124}
125
126pub struct PartitionUnitParams<'a> {
127    pub vtl_guest_memory: [Option<&'a GuestMemory>; 3],
128    pub processor_topology: &'a ProcessorTopology,
129    /// Tracks the halt state of VPs.
130    pub halt_vps: Arc<Halt>,
131    /// The receiver returned from `Halt::new()`.
132    pub halt_request_recv: HaltReasonReceiver,
133    /// Notified when the partition has been halted (due to a triple fault or
134    /// other reason).
135    pub client_notify_send: mesh::Sender<HaltReason>,
136    pub debugger_rpc: Option<Receiver<vmm_core_defs::debug_rpc::DebugRequest>>,
137}
138
139/// The halt reason receiver to pass to put in [`PartitionUnitParams`].
140pub struct HaltReasonReceiver(Receiver<InternalHaltReason>);
141
142enum InternalHaltReason {
143    Halt(HaltReason),
144    ReplayMtrrs,
145}
146
147/// Error returned by [`PartitionUnit::new()`].
148#[derive(Debug, Error)]
149pub enum Error {
150    #[error("debugging is not supported in this build")]
151    DebuggingNotSupported,
152    #[error(transparent)]
153    NameInUse(NameInUse),
154    #[error("missing guest memory required for gdb support")]
155    MissingGuestMemory,
156}
157
158/// Error returned by [`PartitionUnit::set_initial_regs()`].
159#[derive(Debug, Error)]
160pub enum InitialRegError {
161    #[error("failed to set registers")]
162    RegisterSet(#[source] RegisterSetError),
163    #[error("failed to scrub VTL state")]
164    ScrubVtl(#[source] anyhow::Error),
165}
166
167/// Error returned by [`PartitionUnit::set_initial_page_visibility()`].
168#[derive(Debug, Error)]
169pub enum InitialVisibilityError {
170    #[error("failed to set initial page acceptance")]
171    PageAcceptance(#[source] anyhow::Error),
172}
173
174impl PartitionUnit {
175    /// Creates a new VM partition state unit.
176    ///
177    /// The caller is responsible for launching a thread for each VP and running
178    /// the VP using the returned [`VpRunner`]s.
179    pub fn new(
180        spawner: impl Spawn,
181        builder: UnitBuilder<'_>,
182        partition: impl VmPartition,
183        params: PartitionUnitParams<'_>,
184    ) -> Result<(Self, Vec<VpRunner>), Error> {
185        #[cfg(not(feature = "gdb"))]
186        if params.debugger_rpc.is_some() {
187            return Err(Error::DebuggingNotSupported);
188        }
189
190        let mut vp_set = VpSet::new(params.vtl_guest_memory.map(|m| m.cloned()), params.halt_vps);
191        let vps = params
192            .processor_topology
193            .vps_arch()
194            .map(|vp| vp_set.add(vp))
195            .collect();
196
197        let (req_send, req_recv) = mesh::channel();
198
199        let mut runner = PartitionUnitRunner {
200            partition: Box::new(partition),
201            vp_set,
202            unit_started: false,
203            vp_stop_count: 0,
204            needs_reset: false,
205            halt_reason: None,
206            halt_request_recv: params.halt_request_recv.0,
207            client_notify_send: params.client_notify_send,
208            req_recv,
209            topology: params.processor_topology.clone(),
210            initial_regs: None,
211            #[cfg(feature = "gdb")]
212            debugger_state: debug::DebuggerState::new(
213                params.vtl_guest_memory[0]
214                    .ok_or(Error::MissingGuestMemory)?
215                    .clone(),
216                params.debugger_rpc,
217            ),
218        };
219
220        let handle = builder
221            .spawn(spawner, async |recv| {
222                runner.run(recv).await;
223                runner
224            })
225            .unwrap();
226
227        Ok((Self { handle, req_send }, vps))
228    }
229
230    /// Gets the handle for the partition unit.
231    pub fn unit_handle(&self) -> &UnitHandle {
232        self.handle.handle()
233    }
234
235    /// Tears down the state unit, returning the `client_notify_send` sender
236    /// passed to [`Self::new()`].
237    pub async fn teardown(self) -> mesh::Sender<HaltReason> {
238        let runner = self.handle.remove().await;
239        runner.vp_set.teardown().await;
240        runner.client_notify_send
241    }
242
243    /// Clears the current halt reason from the partition, resuming the VPs if
244    /// they are stopped.
245    pub async fn clear_halt(&mut self) -> bool {
246        self.req_send
247            .call(PartitionRequest::ClearHalt, ())
248            .await
249            .unwrap()
250    }
251
252    /// Temporarily stops the VPs, returning a guard that will resume them when
253    /// dropped.
254    pub async fn temporarily_stop_vps(&mut self) -> StopGuard {
255        self.req_send
256            .call(PartitionRequest::StopVps, ())
257            .await
258            .unwrap();
259
260        StopGuard(self.req_send.clone())
261    }
262
263    /// Sets the register state for the VPs for initial boot.
264    ///
265    /// If the VM has been run before and has not been reset since it last ran,
266    /// the target VTL will be scrubbed first so that the partition state is
267    /// clean.
268    pub async fn set_initial_regs(
269        &mut self,
270        vtl: Vtl,
271        state: Arc<InitialRegs>,
272    ) -> Result<(), InitialRegError> {
273        self.req_send
274            .call(PartitionRequest::SetInitialRegs, (vtl, state))
275            .await
276            .unwrap()
277    }
278
279    pub async fn set_initial_page_visibility(
280        &mut self,
281        vis: Vec<(MemoryRange, PageVisibility)>,
282    ) -> Result<(), InitialVisibilityError> {
283        self.req_send
284            .call(PartitionRequest::SetInitialPageVisibility, vis)
285            .await
286            .unwrap()
287    }
288}
289
290impl PartitionUnitRunner {
291    /// Runs the VM partition, handling state change requests from `recv`.
292    async fn run(&mut self, mut recv: Receiver<StateRequest>) {
293        loop {
294            enum Event {
295                State(Option<StateRequest>),
296                Halt(InternalHaltReason),
297                Request(PartitionRequest),
298                #[cfg(feature = "gdb")]
299                Debug(vmm_core_defs::debug_rpc::DebugRequest),
300            }
301
302            #[cfg(feature = "gdb")]
303            let debug = self.debugger_state.wait_rpc();
304            #[cfg(not(feature = "gdb"))]
305            let debug = std::future::pending();
306
307            let event = futures::select! {  // merge semantics
308                request = recv.next() => Event::State(request),
309                request = self.halt_request_recv.select_next_some() => Event::Halt(request),
310                request = self.req_recv.select_next_some() => Event::Request(request),
311                request = debug.fuse() => {
312                    #[cfg(feature = "gdb")]
313                    {
314                        Event::Debug(request)
315                    }
316                    #[cfg(not(feature = "gdb"))]
317                    {
318                        let _: std::convert::Infallible = request;
319                        unreachable!()
320                    }
321                }
322            };
323
324            match event {
325                Event::State(request) => {
326                    if let Some(request) = request {
327                        request.apply(self).await;
328                    } else {
329                        break;
330                    }
331                }
332                Event::Halt(reason) => {
333                    // Wait for the VPs to stop before reporting this anywhere.
334                    // This is generally good behavior, but it is especially
335                    // necessary because Self::clear_halt() will call
336                    // VpSet::clear_halt(), which relies on the VPs being
337                    // affirmatively stopped.
338                    self.vp_set.stop().await;
339                    self.handle_halt(reason).await;
340                }
341                Event::Request(request) => match request {
342                    PartitionRequest::ClearHalt(rpc) => rpc.handle_sync(|()| self.clear_halt()),
343                    PartitionRequest::SetInitialRegs(rpc) => {
344                        rpc.handle(async |(vtl, state)| self.set_initial_regs(vtl, state).await)
345                            .await
346                    }
347                    PartitionRequest::SetInitialPageVisibility(rpc) => {
348                        rpc.handle(async |vis| self.set_initial_page_visibility(vis).await)
349                            .await
350                    }
351                    PartitionRequest::StopVps(rpc) => {
352                        rpc.handle(async |()| {
353                            self.vp_set.stop().await;
354                            self.vp_stop_count += 1;
355                        })
356                        .await
357                    }
358                    PartitionRequest::StartVps => {
359                        self.vp_stop_count -= 1;
360                        self.try_start();
361                    }
362                },
363                #[cfg(feature = "gdb")]
364                Event::Debug(request) => {
365                    self.handle_gdb(request).await;
366                }
367            }
368        }
369
370        if self.unit_started {
371            self.vp_set.stop().await;
372        }
373    }
374
375    async fn handle_halt(&mut self, reason: InternalHaltReason) {
376        match reason {
377            InternalHaltReason::Halt(reason) => {
378                // Only report the first halt request per boot so that the
379                // client does not have to deal with multiple halt reasons
380                // due to race conditions.
381                if self.halt_reason.is_none() {
382                    self.halt_reason = Some(reason.clone());
383
384                    // Report the halt to the debugger.
385                    #[cfg(feature = "gdb")]
386                    let reported = self.debugger_state.report_halt_to_debugger(&reason);
387                    #[cfg(not(feature = "gdb"))]
388                    let reported = false;
389
390                    // If the debugger is not attached, then report the halt
391                    // to the client.
392                    if !reported {
393                        self.client_notify_send.send(reason);
394                    }
395                } else {
396                    // Clear this specific halt.
397                    self.vp_set.clear_halt();
398                }
399            }
400            InternalHaltReason::ReplayMtrrs => {
401                if let Some(initial_regs) = self.initial_regs.clone() {
402                    if let Err(err) = self
403                        .vp_set
404                        .set_initial_regs(
405                            Vtl::Vtl0,
406                            initial_regs,
407                            vp_set::RegistersToSet::MtrrsOnly,
408                        )
409                        .await
410                    {
411                        tracing::error!(
412                            error = &err as &dyn std::error::Error,
413                            "failed to replay mtrrs, guest may see inconsistent results"
414                        );
415                    }
416                } else {
417                    tracing::warn!("no initial mtrrs to replay");
418                }
419                self.vp_set.clear_halt();
420                self.try_start();
421            }
422        }
423    }
424
425    /// Clears the halt and resumes the VPs if the partition is started. Returns
426    /// `false` if VPs were not already halted.
427    fn clear_halt(&mut self) -> bool {
428        if self.halt_reason.is_some() {
429            self.halt_reason = None;
430            self.vp_set.clear_halt();
431            self.try_start();
432            true
433        } else {
434            false
435        }
436    }
437
438    async fn set_initial_regs(
439        &mut self,
440        vtl: Vtl,
441        state: Arc<InitialRegs>,
442    ) -> Result<(), InitialRegError> {
443        assert!(!self.unit_started || self.vp_stop_count > 0);
444
445        // If this VM has been run before, then automatically scrub the target
446        // VTL state.
447        if self.needs_reset {
448            self.partition
449                .scrub_vtl(vtl)
450                .map_err(InitialRegError::ScrubVtl)?;
451            self.needs_reset = false;
452        }
453
454        self.vp_set
455            .set_initial_regs(vtl, state.clone(), vp_set::RegistersToSet::All)
456            .await
457            .map_err(InitialRegError::RegisterSet)?;
458
459        self.initial_regs = Some(state);
460        Ok(())
461    }
462
463    async fn set_initial_page_visibility(
464        &mut self,
465        visibility: Vec<(MemoryRange, PageVisibility)>,
466    ) -> Result<(), InitialVisibilityError> {
467        assert!(!self.unit_started);
468
469        self.partition
470            .accept_initial_pages(visibility)
471            .map_err(InitialVisibilityError::PageAcceptance)
472    }
473
474    fn try_start(&mut self) {
475        if self.unit_started && self.halt_reason.is_none() && self.vp_stop_count == 0 {
476            self.needs_reset = true;
477            self.vp_set.start();
478        }
479    }
480}
481
482#[must_use = "when dropped, the VPs will be resumed"]
483pub struct StopGuard(mesh::Sender<PartitionRequest>);
484
485impl Drop for StopGuard {
486    fn drop(&mut self) {
487        self.0.send(PartitionRequest::StartVps);
488    }
489}
490
491impl StateUnit for PartitionUnitRunner {
492    async fn start(&mut self) {
493        self.unit_started = true;
494        self.try_start();
495    }
496
497    async fn stop(&mut self) {
498        self.vp_set.stop().await;
499        self.unit_started = false;
500
501        // Now that the VM is stopped, flush any guest-initiated
502        // power state change that may have raced with this request.
503        while let Ok(reason) = self.halt_request_recv.try_recv() {
504            self.handle_halt(reason).await;
505        }
506    }
507
508    async fn reset(&mut self) -> anyhow::Result<()> {
509        self.partition.reset()?;
510        self.clear_halt();
511        self.needs_reset = false;
512        Ok(())
513    }
514
515    async fn save(&mut self) -> Result<Option<SavedStateBlob>, SaveError> {
516        let state = self.save().await?;
517        Ok(Some(SavedStateBlob::new(state)))
518    }
519
520    async fn restore(&mut self, buffer: SavedStateBlob) -> Result<(), RestoreError> {
521        // TODO: restore halted state
522        self.needs_reset = true;
523        self.restore(buffer.parse()?).await?;
524        Ok(())
525    }
526}
527
528mod save_restore {
529    use super::PartitionUnitRunner;
530    use virt::VpIndex;
531    use vmcore::save_restore::RestoreError;
532    use vmcore::save_restore::SaveError;
533
534    mod state {
535        use mesh::payload::Protobuf;
536        use vmcore::save_restore::SavedStateBlob;
537        use vmcore::save_restore::SavedStateRoot;
538
539        #[derive(Protobuf, SavedStateRoot)]
540        #[mesh(package = "partition")]
541        pub struct Partition {
542            #[mesh(1)]
543            pub(super) partition: SavedStateBlob,
544            #[mesh(2)]
545            pub(super) vps: Vec<Vp>,
546            // TODO: save halted state
547        }
548
549        #[derive(Protobuf)]
550        #[mesh(package = "partition")]
551        pub struct Vp {
552            #[mesh(1)]
553            pub vp_index: u32,
554            #[mesh(2)]
555            pub data: SavedStateBlob,
556        }
557    }
558
559    impl PartitionUnitRunner {
560        pub async fn save(&mut self) -> Result<state::Partition, SaveError> {
561            let partition = self.partition.save()?;
562            let vps = self.vp_set.save().await?;
563            let vps = vps
564                .into_iter()
565                .map(|(vp_index, data)| state::Vp {
566                    vp_index: vp_index.index(),
567                    data,
568                })
569                .collect();
570
571            Ok(state::Partition { partition, vps })
572        }
573
574        pub async fn restore(&mut self, state: state::Partition) -> Result<(), RestoreError> {
575            let state::Partition { partition, vps } = state;
576            self.partition.restore(partition)?;
577            self.vp_set
578                .restore(
579                    vps.into_iter()
580                        .map(|state::Vp { vp_index, data }| (VpIndex::new(vp_index), data)),
581                )
582                .await?;
583            Ok(())
584        }
585    }
586}