guest_crash_device/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implementation of the Underhill guest crash device, used by
5//! `underhill_crash` to send user-mode crash dumps to the host.
6
7#![forbid(unsafe_code)]
8
9pub mod resolver;
10
11use anyhow::anyhow;
12use async_trait::async_trait;
13use get_protocol::crash;
14use get_protocol::crash::CRASHDUMP_GUID;
15use guid::Guid;
16use inspect::Inspect;
17use inspect::InspectMut;
18use mesh::rpc::FailableRpc;
19use mesh::rpc::PendingFailableRpc;
20use mesh::rpc::RpcSend;
21use std::fs::File;
22use std::io::Seek;
23use std::io::SeekFrom;
24use std::io::Write;
25use task_control::Cancelled;
26use task_control::StopTask;
27use vmbus_async::async_dgram::AsyncRecvExt;
28use vmbus_async::pipe::MessagePipe;
29use vmbus_channel::bus::OfferParams;
30use vmbus_channel::channel::ChannelOpenError;
31use vmbus_channel::gpadl_ring::GpadlRingMem;
32use vmbus_channel::simple::SaveRestoreSimpleVmbusDevice;
33use vmbus_channel::simple::SimpleVmbusDevice;
34use vmcore::save_restore::SavedStateNotSupported;
35use zerocopy::FromBytes;
36use zerocopy::Immutable;
37use zerocopy::IntoBytes;
38use zerocopy::KnownLayout;
39
40/// The crash device.
41#[derive(InspectMut)]
42pub struct GuestCrashDevice {
43    #[inspect(skip)]
44    request_dump: mesh::Sender<FailableRpc<mesh::OneshotReceiver<()>, File>>,
45    max_dump_size: u64,
46}
47
48/// The internal guest crash channel.
49#[derive(InspectMut)]
50pub struct GuestCrashChannel {
51    #[inspect(mut)]
52    pipe: GuestCrashPipe,
53    state: ProtocolState,
54}
55
56#[derive(InspectMut)]
57struct GuestCrashPipe {
58    #[inspect(flatten, mut)]
59    pipe: MessagePipe<GpadlRingMem>,
60}
61
62impl GuestCrashPipe {
63    fn send<T: IntoBytes + Immutable + KnownLayout>(&mut self, data: &T) -> std::io::Result<()> {
64        self.pipe.try_send(data.as_bytes())
65    }
66
67    async fn recv<'a>(&mut self, data: &'a mut [u8]) -> std::io::Result<&'a [u8]> {
68        let n = self.pipe.recv(data).await?;
69        Ok(&data[..n])
70    }
71
72    async fn recv_message<'a>(
73        &mut self,
74        data: &'a mut [u8],
75    ) -> anyhow::Result<(crash::Header, &'a [u8])> {
76        let message = self.recv(data).await?;
77        let header = crash::Header::read_from_prefix(message)
78            .map_err(|_| anyhow!("truncated message"))?
79            .0;
80        Ok((header, message))
81    }
82}
83
84enum ProtocolState {
85    Init,
86    DumpRequested {
87        activity_id: Guid,
88        done: mesh::OneshotSender<()>,
89        state: DumpState,
90    },
91    Failed {
92        activity_id: Guid,
93    },
94}
95
96enum DumpState {
97    OpeningFile {
98        recv: PendingFailableRpc<File>,
99    },
100    Writing {
101        file: File,
102        payload: Option<(u64, u32)>,
103    },
104}
105
106impl Inspect for ProtocolState {
107    fn inspect(&self, req: inspect::Request<'_>) {
108        req.ignore(); // TODO
109    }
110}
111
112impl GuestCrashDevice {
113    /// Makes a new crash device.
114    ///
115    /// When the guest requests a crash dump, the device will send a request to
116    /// `request_dump` to retrieve the file to write to. When the dump completes
117    /// successfully, the device will send an empty message to the provided
118    /// oneshot channel.
119    pub fn new(
120        request_dump: mesh::Sender<FailableRpc<mesh::OneshotReceiver<()>, File>>,
121        max_dump_size: u64,
122    ) -> Self {
123        Self {
124            request_dump,
125            max_dump_size,
126        }
127    }
128
129    /// Deconstructs the object, returning the original resources passed to
130    /// `new`.
131    pub fn into_inner(
132        self,
133    ) -> (
134        mesh::Sender<FailableRpc<mesh::OneshotReceiver<()>, File>>,
135        u64,
136    ) {
137        (self.request_dump, self.max_dump_size)
138    }
139}
140
141#[async_trait]
142impl SimpleVmbusDevice for GuestCrashDevice {
143    type SavedState = SavedStateNotSupported;
144    type Runner = GuestCrashChannel;
145
146    fn offer(&self) -> OfferParams {
147        OfferParams {
148            interface_name: "guest_crash".into(),
149            instance_id: CRASHDUMP_GUID,
150            interface_id: CRASHDUMP_GUID,
151            channel_type: vmbus_channel::bus::ChannelType::Pipe { message_mode: true },
152            ..Default::default()
153        }
154    }
155
156    fn inspect(&mut self, req: inspect::Request<'_>, runner: Option<&mut Self::Runner>) {
157        req.respond().merge(self).merge(runner);
158    }
159
160    fn open(
161        &mut self,
162        channel: vmbus_channel::RawAsyncChannel<GpadlRingMem>,
163        _guest_memory: guestmem::GuestMemory,
164    ) -> Result<Self::Runner, ChannelOpenError> {
165        let pipe = MessagePipe::new(channel)?;
166        Ok(GuestCrashChannel {
167            pipe: GuestCrashPipe { pipe },
168            state: ProtocolState::Init,
169        })
170    }
171
172    async fn run(
173        &mut self,
174        stop: &mut StopTask<'_>,
175        runner: &mut Self::Runner,
176    ) -> Result<(), Cancelled> {
177        stop.until_stopped(self.process(runner)).await
178    }
179
180    fn supports_save_restore(
181        &mut self,
182    ) -> Option<
183        &mut dyn SaveRestoreSimpleVmbusDevice<SavedState = Self::SavedState, Runner = Self::Runner>,
184    > {
185        None
186    }
187}
188
189impl GuestCrashDevice {
190    async fn process(&mut self, channel: &mut GuestCrashChannel) {
191        if let Err(err) = self.process_inner(channel).await {
192            tracing::error!(
193                error = err.as_ref() as &dyn std::error::Error,
194                "guest crash failure"
195            );
196        }
197    }
198
199    async fn process_inner(&mut self, channel: &mut GuestCrashChannel) -> anyhow::Result<()> {
200        let mut buffer = vec![0; 16384];
201        loop {
202            channel.pipe.pipe.wait_write_ready(256).await?;
203
204            match &mut channel.state {
205                ProtocolState::Init => {
206                    let (header, _message) = channel.pipe.recv_message(&mut buffer).await?;
207                    match header.message_type {
208                        crash::MessageType::REQUEST_GET_CAPABILITIES_V1 => {
209                            channel.pipe.send(&crash::DumpCapabilitiesResponseV1 {
210                                header: crash::Header {
211                                    message_type: crash::MessageType::RESPONSE_GET_CAPABILITIES_V1,
212                                    ..header
213                                },
214                                capabilities: crash::Capabilities::new().with_linux_config_v1(true),
215                            })?;
216                        }
217                        crash::MessageType::REQUEST_GET_NIX_DUMP_CONFIG_V1 => {
218                            channel.pipe.send(&crash::DumpConfigResponseV1 {
219                                header: crash::Header {
220                                    message_type:
221                                        crash::MessageType::RESPONSE_GET_NIX_DUMP_CONFIG_V1,
222                                    ..header
223                                },
224                                config: crash::ConfigV1 {
225                                    max_dump_size: self.max_dump_size,
226                                    dump_type: crash::DumpType::ELF,
227                                },
228                            })?;
229                        }
230                        crash::MessageType::REQUEST_NIX_DUMP_START_V1 => {
231                            let (send, recv) = mesh::oneshot();
232                            let recv = self.request_dump.call_failable(|x| x, recv);
233                            channel.state = ProtocolState::DumpRequested {
234                                activity_id: header.activity_id,
235                                done: send,
236                                state: DumpState::OpeningFile { recv },
237                            };
238                        }
239                        message_type => anyhow::bail!("invalid message type {message_type:?}"),
240                    }
241                }
242                &mut ProtocolState::DumpRequested {
243                    state: ref mut state @ DumpState::OpeningFile { .. },
244                    activity_id,
245                    ..
246                } => {
247                    let DumpState::OpeningFile { recv } = state else {
248                        unreachable!()
249                    };
250                    let status = match recv.await {
251                        Ok(file) => {
252                            *state = DumpState::Writing {
253                                file,
254                                payload: None,
255                            };
256                            0
257                        }
258                        Err(err) => {
259                            channel.state = ProtocolState::Failed { activity_id };
260                            tracing::error!(
261                                err = &err as &dyn std::error::Error,
262                                "failed to open crash dump file"
263                            );
264                            -1
265                        }
266                    };
267                    channel.pipe.send(&crash::DumpStartResponseV1 {
268                        header: crash::Header {
269                            message_type: crash::MessageType::RESPONSE_NIX_DUMP_START_V1,
270                            activity_id,
271                        },
272                        status,
273                    })?;
274                    continue;
275                }
276                &mut ProtocolState::DumpRequested {
277                    state:
278                        DumpState::Writing {
279                            ref mut file,
280                            ref mut payload,
281                            ..
282                        },
283                    activity_id,
284                    ..
285                } => {
286                    if let Some((offset, size)) = *payload {
287                        // Read the payload message.
288                        let message = channel.pipe.recv(&mut buffer).await?;
289                        if size as usize != message.len() {
290                            anyhow::bail!("size mismatch");
291                        }
292                        if self.max_dump_size < offset || self.max_dump_size - offset < size as u64
293                        {
294                            anyhow::bail!("dump file out of range");
295                        }
296
297                        match file
298                            .seek(SeekFrom::Start(offset))
299                            .and_then(|_| file.write_all(message))
300                        {
301                            Ok(()) => {
302                                *payload = None;
303                            }
304                            Err(err) => {
305                                tracing::error!(
306                                    error = &err as &dyn std::error::Error,
307                                    "failed to write crash data"
308                                );
309                                channel.pipe.send(&crash::DumpWriteResponseV1 {
310                                    header: crash::Header {
311                                        activity_id,
312                                        message_type:
313                                            crash::MessageType::RESPONSE_NIX_DUMP_WRITE_V1,
314                                    },
315                                    status: -1,
316                                })?;
317                                channel.state = ProtocolState::Failed { activity_id };
318                            }
319                        }
320                    } else {
321                        let (header, message) = channel.pipe.recv_message(&mut buffer).await?;
322                        match header.message_type {
323                            crash::MessageType::REQUEST_NIX_DUMP_WRITE_V1 => {
324                                let request = crash::DumpWriteRequestV1::read_from_prefix(message)
325                                    .map_err(|_| anyhow!("truncated message"))? // TODO: zerocopy: anyhow! (https://github.com/microsoft/openvmm/issues/759)
326                                    .0;
327                                *payload = Some((request.offset, request.size));
328                            }
329                            crash::MessageType::REQUEST_NIX_DUMP_COMPLETE_V1 => {
330                                // Notify the VMM that the crash is done being written.
331                                let ProtocolState::DumpRequested { done, .. } =
332                                    std::mem::replace(&mut channel.state, ProtocolState::Init)
333                                else {
334                                    unreachable!()
335                                };
336                                done.send(());
337                            }
338                            message_type => anyhow::bail!("invalid message type {message_type:?}"),
339                        }
340                    }
341                }
342                &mut ProtocolState::Failed { activity_id } => {
343                    let (header, _message) = channel.pipe.recv_message(&mut buffer).await?;
344                    match header.message_type {
345                        crash::MessageType::REQUEST_NIX_DUMP_WRITE_V1 => {
346                            channel.pipe.send(&crash::DumpWriteResponseV1 {
347                                header: crash::Header {
348                                    activity_id,
349                                    message_type: crash::MessageType::RESPONSE_NIX_DUMP_WRITE_V1,
350                                },
351                                status: -1,
352                            })?;
353                        }
354                        crash::MessageType::REQUEST_NIX_DUMP_COMPLETE_V1 => {}
355                        message_type => anyhow::bail!("invalid message type {message_type:?}"),
356                    }
357                }
358            }
359        }
360    }
361}