debug_worker/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! A worker which runs a gdbstub event loop.
5//!
6//! Implements [`DebuggerWorker`], which exposes control of the VM via the GDB
7//! Remote Serial Protocol. This is used to debug the VM's execution when a
8//! guest debugger is not available or practical.
9
10#![expect(missing_docs)]
11#![forbid(unsafe_code)]
12
13mod gdb;
14
15use anyhow::Context;
16use debug_worker_defs::DEBUGGER_WORKER;
17use debug_worker_defs::DebuggerParameters;
18use futures::AsyncReadExt;
19use futures::FutureExt;
20use gdb::VmProxy;
21use gdb::targets::TargetArch;
22use gdb::targets::VmTarget;
23use gdbstub::target::ext::breakpoints::WatchKind;
24use inspect::InspectMut;
25use mesh::message::MeshField;
26use mesh_worker::Worker;
27use mesh_worker::WorkerId;
28use mesh_worker::WorkerRpc;
29use pal_async::local::LocalDriver;
30use pal_async::local::block_with_io;
31use pal_async::socket::Listener;
32use pal_async::socket::PolledSocket;
33use socket2::Socket;
34use std::fmt::Display;
35use std::future::Future;
36use std::io::Write;
37use std::net::TcpListener;
38use std::pin::Pin;
39use vmm_core_defs::debug_rpc::BreakpointType;
40use vmm_core_defs::debug_rpc::DebugRequest;
41use vmm_core_defs::debug_rpc::DebugStopReason;
42
43pub struct DebuggerWorker<T: Listener> {
44    listener: T,
45    state: State<T::Address>,
46    initial_arch: Architecture,
47}
48
49/// The current server state.
50enum State<T> {
51    Listening {
52        vm_proxy: VmProxy,
53    },
54    Connected {
55        remote_addr: T,
56        task: Pin<Box<dyn Future<Output = VmProxy>>>,
57        abort: mesh::OneshotSender<()>,
58    },
59    Invalid,
60}
61
62trait GdbListener: 'static + Send + Listener + Sized + MeshField {
63    const ID: WorkerId<DebuggerParameters<Self>>;
64}
65
66impl GdbListener for TcpListener {
67    const ID: WorkerId<DebuggerParameters<Self>> = DEBUGGER_WORKER;
68}
69
70#[cfg(any(windows, target_os = "linux"))]
71impl GdbListener for vmsocket::VmListener {
72    const ID: WorkerId<DebuggerParameters<Self>> = debug_worker_defs::DEBUGGER_VSOCK_WORKER;
73}
74
75impl<T: GdbListener> Worker for DebuggerWorker<T>
76where
77    T::Address: Display,
78{
79    type Parameters = DebuggerParameters<T>;
80    type State = DebuggerParameters<T>;
81    const ID: WorkerId<Self::Parameters> = T::ID;
82
83    fn new(params: Self::Parameters) -> anyhow::Result<Self> {
84        Ok(Self {
85            listener: params.listener,
86            state: State::Listening {
87                vm_proxy: VmProxy::new(params.req_chan, params.vp_count),
88            },
89            initial_arch: match params.target_arch {
90                debug_worker_defs::TargetArch::X86_64 => Architecture::X86_64,
91                debug_worker_defs::TargetArch::I8086 => Architecture::I8086,
92                debug_worker_defs::TargetArch::Aarch64 => Architecture::Aarch64,
93            },
94        })
95    }
96
97    fn restart(state: Self::State) -> anyhow::Result<Self> {
98        Self::new(state)
99    }
100
101    fn run(self, mut rpc_recv: mesh::Receiver<WorkerRpc<Self::Parameters>>) -> anyhow::Result<()> {
102        block_with_io(async |driver| {
103            tracing::info!(
104                address = %self.listener.local_addr().unwrap(),
105                "gdbstub listening",
106            );
107
108            let listener = PolledSocket::new(&driver, self.listener)?;
109            let mut server = Server {
110                listener,
111                state: self.state,
112                architecture: self.initial_arch,
113            };
114
115            loop {
116                let r = futures::select! { // merge semantics
117                    r = rpc_recv.recv().fuse() => r,
118                    r = server.process(&driver).fuse() => {
119                        r?;
120                        return Ok(())
121                    },
122                };
123                match r {
124                    Ok(message) => match message {
125                        WorkerRpc::Stop => return Ok(()),
126                        WorkerRpc::Inspect(deferred) => deferred.inspect(&mut server),
127                        WorkerRpc::Restart(rpc) => {
128                            let vm_proxy = match server.state {
129                                State::Listening { vm_proxy } => vm_proxy,
130                                State::Connected { task, abort, .. } => {
131                                    drop(abort);
132                                    task.await
133                                }
134                                State::Invalid => unreachable!(),
135                            };
136
137                            let state = {
138                                let (req_chan, vp_count) = vm_proxy.into_params();
139                                DebuggerParameters {
140                                    listener: server.listener.into_inner(),
141                                    req_chan,
142                                    vp_count,
143                                    target_arch: match server.architecture {
144                                        Architecture::X86_64 => {
145                                            debug_worker_defs::TargetArch::X86_64
146                                        }
147                                        Architecture::I8086 => debug_worker_defs::TargetArch::I8086,
148                                        Architecture::Aarch64 => {
149                                            debug_worker_defs::TargetArch::Aarch64
150                                        }
151                                    },
152                                }
153                            };
154                            rpc.complete(Ok(state));
155                            return Ok(());
156                        }
157                    },
158                    Err(_) => return Ok(()),
159                }
160            }
161        })
162    }
163}
164
165struct Server<T: Listener> {
166    listener: PolledSocket<T>,
167    state: State<T::Address>,
168    architecture: Architecture,
169}
170
171#[derive(Debug, Copy, Clone, InspectMut)]
172enum Architecture {
173    #[inspect(rename = "x86_64")]
174    X86_64,
175    #[inspect(rename = "i8086")]
176    I8086,
177    Aarch64,
178}
179
180impl<T: Listener> Server<T>
181where
182    T::Address: Display,
183{
184    /// Runs the state machine forward, either advancing the current connection
185    /// task or waiting for a new connection.
186    async fn process(&mut self, driver: &LocalDriver) -> anyhow::Result<()> {
187        loop {
188            match &mut self.state {
189                State::Listening { .. } => {
190                    // Accept the connection if one is really ready.
191                    let (socket, remote_addr) = self.listener.accept().await?;
192                    let socket = PolledSocket::new(driver, socket.into())?;
193
194                    let architecture = self.architecture;
195                    tracing::info!(address = %remote_addr, ?architecture, "GDB client connected");
196
197                    let mut vm_proxy = if let State::Listening { vm_proxy } =
198                        std::mem::replace(&mut self.state, State::Invalid)
199                    {
200                        vm_proxy
201                    } else {
202                        unreachable!()
203                    };
204
205                    let (abort_send, abort_recv) = mesh::oneshot();
206                    let connection = Box::pin(async move {
207                        let state_machine_fut = async {
208                            match architecture {
209                                Architecture::X86_64 => {
210                                    run_state_machine(
211                                        socket,
212                                        VmTarget::<gdb::arch::x86::X86_64_QEMU>::new(&mut vm_proxy),
213                                    )
214                                    .await
215                                }
216                                Architecture::I8086 => {
217                                    run_state_machine(
218                                        socket,
219                                        VmTarget::<gdb::arch::x86::I8086>::new(&mut vm_proxy),
220                                    )
221                                    .await
222                                }
223                                Architecture::Aarch64 => {
224                                    run_state_machine(
225                                        socket,
226                                        VmTarget::<gdbstub_arch::aarch64::AArch64>::new(
227                                            &mut vm_proxy,
228                                        ),
229                                    )
230                                    .await
231                                }
232                            }
233                        };
234
235                        let res = futures::select! { // race semantics
236                            gdb_res = state_machine_fut.fuse() => Some(gdb_res),
237                            _ = abort_recv.fuse() => None,
238                        };
239
240                        match res {
241                            Some(gdb_res) => {
242                                if let Err(err) = gdb_res {
243                                    tracing::error!(
244                                        error = (&err) as &dyn std::error::Error,
245                                        "gdbstub error"
246                                    );
247                                }
248                            }
249                            None => {
250                                tracing::info!("Aborting existing GDB worker...");
251                            }
252                        }
253
254                        vm_proxy
255                    });
256
257                    self.state = State::Connected {
258                        remote_addr,
259                        task: connection,
260                        abort: abort_send,
261                    };
262                }
263                State::Connected { task, .. } => {
264                    let vm_proxy = task.await;
265                    self.state = State::Listening { vm_proxy };
266                }
267                State::Invalid => unreachable!(),
268            }
269        }
270    }
271}
272
273async fn run_state_machine<T: TargetArch>(
274    socket: PolledSocket<Socket>,
275    mut vm_target: VmTarget<'_, T>,
276) -> Result<(), gdbstub::stub::GdbStubError<anyhow::Error, std::io::Error>> {
277    use gdbstub::common::Signal;
278    use gdbstub::stub::DisconnectReason;
279    use gdbstub::stub::GdbStubError;
280    use gdbstub::stub::MultiThreadStopReason;
281    use gdbstub::stub::state_machine::GdbStubStateMachine;
282
283    vm_target.send_req(DebugRequest::Attach);
284    let (init_break_send, init_break_recv) = mesh::oneshot();
285    vm_target.send_req(DebugRequest::Resume {
286        response: init_break_send,
287    });
288    vm_target.send_req(DebugRequest::Break);
289
290    // Wait for the initial break.
291    let reason = init_break_recv
292        .await
293        .context("failed to wait for initial break")
294        .map_err(GdbStubError::TargetError)?;
295
296    tracing::info!(?reason, "got initial breakpoint");
297
298    let mut gdb =
299        gdbstub::stub::GdbStub::new(SocketConnection(socket)).run_state_machine(&mut vm_target)?;
300
301    let reason = loop {
302        gdb = match gdb {
303            GdbStubStateMachine::Idle(mut gdb) => {
304                // "blocking" read waiting for GDB to send a command
305                let mut b = [0];
306                gdb.borrow_conn()
307                    .0
308                    .read_exact(&mut b)
309                    .await
310                    .map_err(GdbStubError::ConnectionRead)?;
311
312                gdb.incoming_data(&mut vm_target, b[0])?
313            }
314
315            GdbStubStateMachine::Disconnected(gdb) => {
316                break gdb.get_reason();
317            }
318
319            GdbStubStateMachine::CtrlCInterrupt(gdb) => {
320                vm_target.send_req(DebugRequest::Break);
321
322                let stop_reason = Some(MultiThreadStopReason::Signal(Signal::SIGINT));
323                gdb.interrupt_handled(&mut vm_target, stop_reason)?
324            }
325
326            GdbStubStateMachine::Running(mut gdb) => {
327                enum Event {
328                    HaltReason(DebugStopReason),
329                    IncomingData(u8),
330                }
331
332                let stop_chan = vm_target
333                    .take_stop_chan()
334                    .expect("halt chan is set as part of `resume`");
335
336                let mut b = [0];
337                let incoming_data = gdb.borrow_conn().0.read_exact(&mut b);
338
339                let event = futures::select! { // race semantics
340                    r = stop_chan.fuse() => {
341                        let reason = r.map_err(|e| GdbStubError::TargetError(e.into()))?;
342                        Event::HaltReason(reason)
343                    },
344                    _ = incoming_data.fuse() => Event::IncomingData(b[0]),
345                };
346
347                match event {
348                    Event::IncomingData(b) => gdb.incoming_data(&mut vm_target, b)?,
349                    Event::HaltReason(reason) => {
350                        let stop_reason = match reason {
351                            DebugStopReason::Break => MultiThreadStopReason::Signal(Signal::SIGINT),
352                            DebugStopReason::PowerOff => MultiThreadStopReason::Exited(0),
353                            DebugStopReason::Reset => MultiThreadStopReason::Exited(1),
354                            DebugStopReason::TripleFault { vp } => {
355                                MultiThreadStopReason::SignalWithThread {
356                                    tid: vm_target.vp_to_tid(vp),
357                                    signal: Signal::SIGSEGV,
358                                }
359                            }
360                            DebugStopReason::HwBreakpoint { vp, breakpoint } => {
361                                if let Ok(address) = T::Address::try_from(breakpoint.address) {
362                                    match breakpoint.ty {
363                                        BreakpointType::Execute => {
364                                            MultiThreadStopReason::HwBreak(vm_target.vp_to_tid(vp))
365                                        }
366                                        BreakpointType::Invalid => {
367                                            tracing::error!(
368                                                address = breakpoint.address,
369                                                "invalid breakpoint type"
370                                            );
371                                            MultiThreadStopReason::Signal(Signal::SIGINT)
372                                        }
373                                        BreakpointType::Write => MultiThreadStopReason::Watch {
374                                            tid: vm_target.vp_to_tid(vp),
375                                            kind: WatchKind::Write,
376                                            addr: address,
377                                        },
378                                        BreakpointType::ReadOrWrite => {
379                                            MultiThreadStopReason::Watch {
380                                                tid: vm_target.vp_to_tid(vp),
381                                                kind: WatchKind::ReadWrite,
382                                                addr: address,
383                                            }
384                                        }
385                                    }
386                                } else {
387                                    tracing::error!(
388                                        address = breakpoint.address,
389                                        "breakpoint address out of range"
390                                    );
391                                    MultiThreadStopReason::Signal(Signal::SIGINT)
392                                }
393                            }
394                            DebugStopReason::SingleStep { vp } => {
395                                // Work around WinDbg client limitation
396                                MultiThreadStopReason::SignalWithThread {
397                                    tid: vm_target.vp_to_tid(vp),
398                                    signal: Signal::SIGTRAP,
399                                }
400                            }
401                        };
402
403                        gdb.report_stop(&mut vm_target, stop_reason)?
404                    }
405                }
406            }
407        }
408    };
409
410    match reason {
411        DisconnectReason::Disconnect => tracing::info!("GDB Disconnected"),
412        DisconnectReason::TargetExited(status_code) => {
413            tracing::info!(status_code, "Target exited")
414        }
415        DisconnectReason::TargetTerminated(signal) => {
416            tracing::info!(signal = signal.to_string().as_str(), "Target terminated")
417        }
418        DisconnectReason::Kill => tracing::info!("GDB sent a kill command"),
419    }
420
421    Ok(())
422}
423
424impl<T: Listener> InspectMut for Server<T>
425where
426    T::Address: Display,
427{
428    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
429        let mut resp = req.respond();
430        resp.display("local_addr", &self.listener.get().local_addr().unwrap());
431        let state = match &self.state {
432            State::Listening { .. } => "listening",
433            State::Connected { remote_addr, .. } => {
434                resp.display("remote_addr", remote_addr);
435                "connected"
436            }
437            State::Invalid => unreachable!(),
438        };
439        resp.field("state", state)
440            .field_mut("architecture", &mut self.architecture);
441    }
442}
443
444struct SocketConnection(PolledSocket<Socket>);
445
446impl gdbstub::conn::Connection for SocketConnection {
447    type Error = std::io::Error;
448
449    fn write(&mut self, byte: u8) -> Result<(), Self::Error> {
450        self.0.get_mut().write_all(&[byte])
451    }
452
453    fn flush(&mut self) -> Result<(), Self::Error> {
454        self.0.get_mut().flush()
455    }
456}