debug_worker/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! A worker which runs a gdbstub event loop.
5//!
6//! Implements [`DebuggerWorker`], which exposes control of the VM via the GDB
7//! Remote Serial Protocol. This is used to debug the VM's execution when a
8//! guest debugger is not available or practical.
9
10#![expect(missing_docs)]
11
12mod gdb;
13
14use anyhow::Context;
15use debug_worker_defs::DEBUGGER_WORKER;
16use debug_worker_defs::DebuggerParameters;
17use futures::AsyncReadExt;
18use futures::FutureExt;
19use gdb::VmProxy;
20use gdb::targets::TargetArch;
21use gdb::targets::VmTarget;
22use gdbstub::target::ext::breakpoints::WatchKind;
23use inspect::InspectMut;
24use mesh::message::MeshField;
25use mesh_worker::Worker;
26use mesh_worker::WorkerId;
27use mesh_worker::WorkerRpc;
28use pal_async::local::LocalDriver;
29use pal_async::local::block_with_io;
30use pal_async::socket::Listener;
31use pal_async::socket::PolledSocket;
32use socket2::Socket;
33use std::fmt::Display;
34use std::future::Future;
35use std::io::Write;
36use std::net::TcpListener;
37use std::pin::Pin;
38use vmm_core_defs::debug_rpc::BreakpointType;
39use vmm_core_defs::debug_rpc::DebugRequest;
40use vmm_core_defs::debug_rpc::DebugStopReason;
41
42pub struct DebuggerWorker<T: Listener> {
43    listener: T,
44    state: State<T::Address>,
45    initial_arch: Architecture,
46}
47
48/// The current server state.
49enum State<T> {
50    Listening {
51        vm_proxy: VmProxy,
52    },
53    Connected {
54        remote_addr: T,
55        task: Pin<Box<dyn Future<Output = VmProxy>>>,
56        abort: mesh::OneshotSender<()>,
57    },
58    Invalid,
59}
60
61trait GdbListener: 'static + Send + Listener + Sized + MeshField {
62    const ID: WorkerId<DebuggerParameters<Self>>;
63}
64
65impl GdbListener for TcpListener {
66    const ID: WorkerId<DebuggerParameters<Self>> = DEBUGGER_WORKER;
67}
68
69#[cfg(any(windows, target_os = "linux"))]
70impl GdbListener for vmsocket::VmListener {
71    const ID: WorkerId<DebuggerParameters<Self>> = debug_worker_defs::DEBUGGER_VSOCK_WORKER;
72}
73
74impl<T: GdbListener> Worker for DebuggerWorker<T>
75where
76    T::Address: Display,
77{
78    type Parameters = DebuggerParameters<T>;
79    type State = DebuggerParameters<T>;
80    const ID: WorkerId<Self::Parameters> = T::ID;
81
82    fn new(params: Self::Parameters) -> anyhow::Result<Self> {
83        Ok(Self {
84            listener: params.listener,
85            state: State::Listening {
86                vm_proxy: VmProxy::new(params.req_chan, params.vp_count),
87            },
88            initial_arch: match params.target_arch {
89                debug_worker_defs::TargetArch::X86_64 => Architecture::X86_64,
90                debug_worker_defs::TargetArch::I8086 => Architecture::I8086,
91                debug_worker_defs::TargetArch::Aarch64 => Architecture::Aarch64,
92            },
93        })
94    }
95
96    fn restart(state: Self::State) -> anyhow::Result<Self> {
97        Self::new(state)
98    }
99
100    fn run(self, mut rpc_recv: mesh::Receiver<WorkerRpc<Self::Parameters>>) -> anyhow::Result<()> {
101        block_with_io(async |driver| {
102            tracing::info!(
103                address = %self.listener.local_addr().unwrap(),
104                "gdbstub listening",
105            );
106
107            let listener = PolledSocket::new(&driver, self.listener)?;
108            let mut server = Server {
109                listener,
110                state: self.state,
111                architecture: self.initial_arch,
112            };
113
114            loop {
115                let r = futures::select! { // merge semantics
116                    r = rpc_recv.recv().fuse() => r,
117                    r = server.process(&driver).fuse() => {
118                        r?;
119                        return Ok(())
120                    },
121                };
122                match r {
123                    Ok(message) => match message {
124                        WorkerRpc::Stop => return Ok(()),
125                        WorkerRpc::Inspect(deferred) => deferred.inspect(&mut server),
126                        WorkerRpc::Restart(rpc) => {
127                            let vm_proxy = match server.state {
128                                State::Listening { vm_proxy } => vm_proxy,
129                                State::Connected { task, abort, .. } => {
130                                    drop(abort);
131                                    task.await
132                                }
133                                State::Invalid => unreachable!(),
134                            };
135
136                            let state = {
137                                let (req_chan, vp_count) = vm_proxy.into_params();
138                                DebuggerParameters {
139                                    listener: server.listener.into_inner(),
140                                    req_chan,
141                                    vp_count,
142                                    target_arch: match server.architecture {
143                                        Architecture::X86_64 => {
144                                            debug_worker_defs::TargetArch::X86_64
145                                        }
146                                        Architecture::I8086 => debug_worker_defs::TargetArch::I8086,
147                                        Architecture::Aarch64 => {
148                                            debug_worker_defs::TargetArch::Aarch64
149                                        }
150                                    },
151                                }
152                            };
153                            rpc.complete(Ok(state));
154                            return Ok(());
155                        }
156                    },
157                    Err(_) => return Ok(()),
158                }
159            }
160        })
161    }
162}
163
164struct Server<T: Listener> {
165    listener: PolledSocket<T>,
166    state: State<T::Address>,
167    architecture: Architecture,
168}
169
170#[derive(Debug, Copy, Clone, InspectMut)]
171enum Architecture {
172    #[inspect(rename = "x86_64")]
173    X86_64,
174    #[inspect(rename = "i8086")]
175    I8086,
176    Aarch64,
177}
178
179impl<T: Listener> Server<T>
180where
181    T::Address: Display,
182{
183    /// Runs the state machine forward, either advancing the current connection
184    /// task or waiting for a new connection.
185    async fn process(&mut self, driver: &LocalDriver) -> anyhow::Result<()> {
186        loop {
187            match &mut self.state {
188                State::Listening { .. } => {
189                    // Accept the connection if one is really ready.
190                    let (socket, remote_addr) = self.listener.accept().await?;
191                    let socket = PolledSocket::new(driver, socket.into())?;
192
193                    let architecture = self.architecture;
194                    tracing::info!(address = %remote_addr, ?architecture, "GDB client connected");
195
196                    let mut vm_proxy = if let State::Listening { vm_proxy } =
197                        std::mem::replace(&mut self.state, State::Invalid)
198                    {
199                        vm_proxy
200                    } else {
201                        unreachable!()
202                    };
203
204                    let (abort_send, abort_recv) = mesh::oneshot();
205                    let connection = Box::pin(async move {
206                        let state_machine_fut = async {
207                            match architecture {
208                                Architecture::X86_64 => {
209                                    run_state_machine(
210                                        socket,
211                                        VmTarget::<gdb::arch::x86::X86_64_QEMU>::new(&mut vm_proxy),
212                                    )
213                                    .await
214                                }
215                                Architecture::I8086 => {
216                                    run_state_machine(
217                                        socket,
218                                        VmTarget::<gdb::arch::x86::I8086>::new(&mut vm_proxy),
219                                    )
220                                    .await
221                                }
222                                Architecture::Aarch64 => {
223                                    run_state_machine(
224                                        socket,
225                                        VmTarget::<gdbstub_arch::aarch64::AArch64>::new(
226                                            &mut vm_proxy,
227                                        ),
228                                    )
229                                    .await
230                                }
231                            }
232                        };
233
234                        let res = futures::select! { // race semantics
235                            gdb_res = state_machine_fut.fuse() => Some(gdb_res),
236                            _ = abort_recv.fuse() => None,
237                        };
238
239                        match res {
240                            Some(gdb_res) => {
241                                if let Err(err) = gdb_res {
242                                    tracing::error!(
243                                        error = (&err) as &dyn std::error::Error,
244                                        "gdbstub error"
245                                    );
246                                }
247                            }
248                            None => {
249                                tracing::info!("Aborting existing GDB worker...");
250                            }
251                        }
252
253                        vm_proxy
254                    });
255
256                    self.state = State::Connected {
257                        remote_addr,
258                        task: connection,
259                        abort: abort_send,
260                    };
261                }
262                State::Connected { task, .. } => {
263                    let vm_proxy = task.await;
264                    self.state = State::Listening { vm_proxy };
265                }
266                State::Invalid => unreachable!(),
267            }
268        }
269    }
270}
271
272async fn run_state_machine<T: TargetArch>(
273    socket: PolledSocket<Socket>,
274    mut vm_target: VmTarget<'_, T>,
275) -> Result<(), gdbstub::stub::GdbStubError<anyhow::Error, std::io::Error>> {
276    use gdbstub::common::Signal;
277    use gdbstub::stub::DisconnectReason;
278    use gdbstub::stub::GdbStubError;
279    use gdbstub::stub::MultiThreadStopReason;
280    use gdbstub::stub::state_machine::GdbStubStateMachine;
281
282    vm_target.send_req(DebugRequest::Attach);
283    let (init_break_send, init_break_recv) = mesh::oneshot();
284    vm_target.send_req(DebugRequest::Resume {
285        response: init_break_send,
286    });
287    vm_target.send_req(DebugRequest::Break);
288
289    // Wait for the initial break.
290    let reason = init_break_recv
291        .await
292        .context("failed to wait for initial break")
293        .map_err(GdbStubError::TargetError)?;
294
295    tracing::info!(?reason, "got initial breakpoint");
296
297    let mut gdb =
298        gdbstub::stub::GdbStub::new(SocketConnection(socket)).run_state_machine(&mut vm_target)?;
299
300    let reason = loop {
301        gdb = match gdb {
302            GdbStubStateMachine::Idle(mut gdb) => {
303                // "blocking" read waiting for GDB to send a command
304                let mut b = [0];
305                gdb.borrow_conn()
306                    .0
307                    .read_exact(&mut b)
308                    .await
309                    .map_err(GdbStubError::ConnectionRead)?;
310
311                gdb.incoming_data(&mut vm_target, b[0])?
312            }
313
314            GdbStubStateMachine::Disconnected(gdb) => {
315                break gdb.get_reason();
316            }
317
318            GdbStubStateMachine::CtrlCInterrupt(gdb) => {
319                vm_target.send_req(DebugRequest::Break);
320
321                let stop_reason = Some(MultiThreadStopReason::Signal(Signal::SIGINT));
322                gdb.interrupt_handled(&mut vm_target, stop_reason)?
323            }
324
325            GdbStubStateMachine::Running(mut gdb) => {
326                enum Event {
327                    HaltReason(DebugStopReason),
328                    IncomingData(u8),
329                }
330
331                let stop_chan = vm_target
332                    .take_stop_chan()
333                    .expect("halt chan is set as part of `resume`");
334
335                let mut b = [0];
336                let incoming_data = gdb.borrow_conn().0.read_exact(&mut b);
337
338                let event = futures::select! { // race semantics
339                    r = stop_chan.fuse() => {
340                        let reason = r.map_err(|e| GdbStubError::TargetError(e.into()))?;
341                        Event::HaltReason(reason)
342                    },
343                    _ = incoming_data.fuse() => Event::IncomingData(b[0]),
344                };
345
346                match event {
347                    Event::IncomingData(b) => gdb.incoming_data(&mut vm_target, b)?,
348                    Event::HaltReason(reason) => {
349                        let stop_reason = match reason {
350                            DebugStopReason::Break => MultiThreadStopReason::Signal(Signal::SIGINT),
351                            DebugStopReason::PowerOff => MultiThreadStopReason::Exited(0),
352                            DebugStopReason::Reset => MultiThreadStopReason::Exited(1),
353                            DebugStopReason::TripleFault { vp } => {
354                                MultiThreadStopReason::SignalWithThread {
355                                    tid: vm_target.vp_to_tid(vp),
356                                    signal: Signal::SIGSEGV,
357                                }
358                            }
359                            DebugStopReason::HwBreakpoint { vp, breakpoint } => {
360                                if let Ok(address) = T::Address::try_from(breakpoint.address) {
361                                    match breakpoint.ty {
362                                        BreakpointType::Execute => {
363                                            MultiThreadStopReason::HwBreak(vm_target.vp_to_tid(vp))
364                                        }
365                                        BreakpointType::Invalid => {
366                                            tracing::error!(
367                                                address = breakpoint.address,
368                                                "invalid breakpoint type"
369                                            );
370                                            MultiThreadStopReason::Signal(Signal::SIGINT)
371                                        }
372                                        BreakpointType::Write => MultiThreadStopReason::Watch {
373                                            tid: vm_target.vp_to_tid(vp),
374                                            kind: WatchKind::Write,
375                                            addr: address,
376                                        },
377                                        BreakpointType::ReadOrWrite => {
378                                            MultiThreadStopReason::Watch {
379                                                tid: vm_target.vp_to_tid(vp),
380                                                kind: WatchKind::ReadWrite,
381                                                addr: address,
382                                            }
383                                        }
384                                    }
385                                } else {
386                                    tracing::error!(
387                                        address = breakpoint.address,
388                                        "breakpoint address out of range"
389                                    );
390                                    MultiThreadStopReason::Signal(Signal::SIGINT)
391                                }
392                            }
393                            DebugStopReason::SingleStep { vp } => {
394                                // Work around WinDbg client limitation
395                                MultiThreadStopReason::SignalWithThread {
396                                    tid: vm_target.vp_to_tid(vp),
397                                    signal: Signal::SIGTRAP,
398                                }
399                            }
400                        };
401
402                        gdb.report_stop(&mut vm_target, stop_reason)?
403                    }
404                }
405            }
406        }
407    };
408
409    match reason {
410        DisconnectReason::Disconnect => tracing::info!("GDB Disconnected"),
411        DisconnectReason::TargetExited(status_code) => {
412            tracing::info!(status_code, "Target exited")
413        }
414        DisconnectReason::TargetTerminated(signal) => {
415            tracing::info!(signal = signal.to_string().as_str(), "Target terminated")
416        }
417        DisconnectReason::Kill => tracing::info!("GDB sent a kill command"),
418    }
419
420    Ok(())
421}
422
423impl<T: Listener> InspectMut for Server<T>
424where
425    T::Address: Display,
426{
427    fn inspect_mut(&mut self, req: inspect::Request<'_>) {
428        let mut resp = req.respond();
429        resp.display("local_addr", &self.listener.get().local_addr().unwrap());
430        let state = match &self.state {
431            State::Listening { .. } => "listening",
432            State::Connected { remote_addr, .. } => {
433                resp.display("remote_addr", remote_addr);
434                "connected"
435            }
436            State::Invalid => unreachable!(),
437        };
438        resp.field("state", state)
439            .field_mut("architecture", &mut self.architecture);
440    }
441}
442
443struct SocketConnection(PolledSocket<Socket>);
444
445impl gdbstub::conn::Connection for SocketConnection {
446    type Error = std::io::Error;
447
448    fn write(&mut self, byte: u8) -> Result<(), Self::Error> {
449        self.0.get_mut().write_all(&[byte])
450    }
451
452    fn flush(&mut self) -> Result<(), Self::Error> {
453        self.0.get_mut().flush()
454    }
455}