consomme/
lib.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The Consomme user-mode TCP stack.
5//!
6//! This crate implements a user-mode TCP stack designed for use with
7//! virtualization. The guest operating system sends Ethernet frames, and this
8//! crate parses them and distributes the data streams to individual TCP and UDP
9//! sockets.
10//!
11//! The current implementation supports OS-backed TCP and UDP sockets,
12//! essentially causing this stack to act as a NAT implementation, providing
13//! guest OS networking by leveraging the host's network stack.
14//!
15//! This implementation includes a small DHCP server for address assignment.
16
17mod arp;
18mod dhcp;
19#[cfg_attr(unix, path = "dns_unix.rs")]
20#[cfg_attr(windows, path = "dns_windows.rs")]
21mod dns;
22mod icmp;
23mod tcp;
24mod udp;
25mod windows;
26
27use inspect::Inspect;
28use inspect::InspectMut;
29use pal_async::driver::Driver;
30use smoltcp::phy::Checksum;
31use smoltcp::phy::ChecksumCapabilities;
32use smoltcp::wire::DhcpMessageType;
33use smoltcp::wire::EthernetAddress;
34use smoltcp::wire::EthernetFrame;
35use smoltcp::wire::EthernetProtocol;
36use smoltcp::wire::EthernetRepr;
37use smoltcp::wire::IPV4_HEADER_LEN;
38use smoltcp::wire::IpProtocol;
39use smoltcp::wire::Ipv4Address;
40use smoltcp::wire::Ipv4Packet;
41use std::net::SocketAddrV4;
42use std::task::Context;
43use thiserror::Error;
44
45/// A consomme instance.
46#[derive(InspectMut)]
47pub struct Consomme {
48    state: ConsommeState,
49    tcp: tcp::Tcp,
50    #[inspect(mut)]
51    udp: udp::Udp,
52    icmp: icmp::Icmp,
53}
54
55#[derive(Inspect)]
56struct ConsommeState {
57    params: ConsommeParams,
58    #[inspect(skip)]
59    buffer: Box<[u8]>,
60}
61
62/// Dynamic networking properties of a consomme endpoint.
63#[derive(Inspect)]
64pub struct ConsommeParams {
65    /// Current IPv4 network mask.
66    #[inspect(display)]
67    pub net_mask: Ipv4Address,
68    /// Current Ipv4 gateway address.
69    #[inspect(display)]
70    pub gateway_ip: Ipv4Address,
71    /// Current Ipv4 gateway MAC address.
72    #[inspect(display)]
73    pub gateway_mac: EthernetAddress,
74    /// Current Ipv4 address assigned to endpoint.
75    #[inspect(display)]
76    pub client_ip: Ipv4Address,
77    /// Current client MAC address.
78    #[inspect(display)]
79    pub client_mac: EthernetAddress,
80    /// Current list of DNS resolvers.
81    #[inspect(with = "|x| inspect::iter_by_index(x).map_value(inspect::AsDisplay)")]
82    pub nameservers: Vec<Ipv4Address>,
83}
84
85/// An error indicating that the CIDR is invalid.
86#[derive(Debug, Error)]
87#[error("invalid CIDR")]
88pub struct InvalidCidr;
89
90impl ConsommeParams {
91    /// Create default dynamic network state. The default state is
92    ///     IP address: 10.0.0.2 / 24
93    ///     gateway: 10.0.0.1 with MAC address 52-55-10-0-0-1
94    ///     no DNS resolvers
95    pub fn new() -> Result<Self, Error> {
96        let nameservers = dns::nameservers()?;
97        Ok(Self {
98            gateway_ip: Ipv4Address::new(10, 0, 0, 1),
99            gateway_mac: EthernetAddress([0x52, 0x55, 10, 0, 0, 1]),
100            client_ip: Ipv4Address::new(10, 0, 0, 2),
101            client_mac: EthernetAddress([0x0, 0x0, 0x0, 0x0, 0x1, 0x0]),
102            net_mask: Ipv4Address::new(255, 255, 255, 0),
103            nameservers,
104        })
105    }
106
107    /// Sets the cidr for the network.
108    ///
109    /// Setting, for example, 192.168.0.0/24 will set the gateway to
110    /// 192.168.0.1 and the client IP to 192.168.0.2.
111    pub fn set_cidr(&mut self, cidr: &str) -> Result<(), InvalidCidr> {
112        let cidr: smoltcp::wire::Ipv4Cidr = cidr.parse().map_err(|()| InvalidCidr)?;
113        let base_address = cidr.network().address();
114        self.gateway_ip = base_address;
115        self.gateway_ip.0[3] += 1;
116        self.client_ip = base_address;
117        self.client_ip.0[3] += 2;
118        self.net_mask = cidr.netmask();
119        Ok(())
120    }
121}
122
123/// An accessor for consomme.
124pub struct Access<'a, T> {
125    inner: &'a mut Consomme,
126    client: &'a mut T,
127}
128
129/// A consomme client.
130pub trait Client {
131    /// Gets the driver to use for handling new connections.
132    ///
133    /// TODO: generalize connection creation to allow pluggable model (not just
134    /// OS sockets) and remove this.
135    fn driver(&self) -> &dyn Driver;
136
137    /// Transmits a packet to the client.
138    ///
139    /// If `checksum.ipv4`, `checksum.tcp`, or `checksum.udp` are set, then the
140    /// packet contains an IPv4 header, TCP header, and/or UDP header with a
141    /// valid checksum.
142    ///
143    /// TODO:
144    ///
145    /// 1. support >MTU sized packets (RSC/LRO/GRO)
146    /// 2. allow discontiguous data to eliminate the extra copy from the TCP
147    ///    window.
148    fn recv(&mut self, data: &[u8], checksum: &ChecksumState);
149
150    /// Specifies the maximum size for the next call to `recv`.
151    ///
152    /// This is the MTU including the Ethernet frame header. This must be at
153    /// least [`MIN_MTU`].
154    ///
155    /// Return 0 to indicate that there are no buffers available for receiving
156    /// data.
157    fn rx_mtu(&mut self) -> usize;
158}
159
160/// Specifies the checksum state for a packet being transmitted.
161#[derive(Debug, Copy, Clone)]
162pub struct ChecksumState {
163    /// On receive, the data has a valid IPv4 header checksum. On send, the
164    /// checksum should be ignored.
165    pub ipv4: bool,
166    /// On receive, the data has a valid TCP checksum. On send, the checksum
167    /// should be ignored.
168    pub tcp: bool,
169    /// On receive, the data has a valid UDP checksum. On send, the checksum
170    /// should be ignored.
171    pub udp: bool,
172    /// The data consists of multiple TCP segments, each with the provided
173    /// segment size.
174    ///
175    /// The IP header's length field may be invalid and should be ignored.
176    pub tso: Option<u16>,
177}
178
179impl ChecksumState {
180    const NONE: Self = Self {
181        ipv4: false,
182        tcp: false,
183        udp: false,
184        tso: None,
185    };
186    const IPV4_ONLY: Self = Self {
187        ipv4: true,
188        tcp: false,
189        udp: false,
190        tso: None,
191    };
192    const TCP4: Self = Self {
193        ipv4: true,
194        tcp: true,
195        udp: false,
196        tso: None,
197    };
198    const UDP4: Self = Self {
199        ipv4: true,
200        tcp: false,
201        udp: true,
202        tso: None,
203    };
204
205    fn caps(&self) -> ChecksumCapabilities {
206        let mut caps = ChecksumCapabilities::default();
207        if self.ipv4 {
208            caps.ipv4 = Checksum::None;
209        }
210        if self.tcp {
211            caps.tcp = Checksum::None;
212        }
213        if self.udp {
214            caps.udp = Checksum::None;
215        }
216        caps
217    }
218}
219
220/// The minimum MTU for receives supported by Consomme (including the Ethernet
221/// frame).
222pub const MIN_MTU: usize = 1514;
223
224#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
225struct SocketAddress {
226    ip: Ipv4Address,
227    port: u16,
228}
229
230impl From<SocketAddress> for SocketAddrV4 {
231    fn from(addr: SocketAddress) -> Self {
232        Self::new(addr.ip.into(), addr.port)
233    }
234}
235
236impl From<SocketAddress> for socket2::SockAddr {
237    fn from(addr: SocketAddress) -> Self {
238        socket2::SockAddr::from(SocketAddrV4::from(addr))
239    }
240}
241
242#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
243struct FourTuple {
244    dst: SocketAddress,
245    src: SocketAddress,
246}
247
248/// The reason a packet was dropped without being handled.
249#[derive(Debug, Error)]
250pub enum DropReason {
251    /// The packet could not be parsed.
252    #[error("packet parsing error")]
253    Packet(#[from] smoltcp::Error),
254    /// The ethertype is unknown.
255    #[error("unsupported ethertype {0}")]
256    UnsupportedEthertype(EthernetProtocol),
257    /// The ethertype is unknown.
258    #[error("unsupported ip protocol {0}")]
259    UnsupportedIpProtocol(IpProtocol),
260    /// The ARP type is unsupported.
261    #[error("unsupported dhcp message type {0:?}")]
262    UnsupportedDhcp(DhcpMessageType),
263    /// The ARP type is unsupported.
264    #[error("unsupported arp type")]
265    UnsupportedArp,
266    /// The IPv4 checksum was invalid.
267    #[error("ipv4 checksum failure")]
268    Ipv4Checksum,
269    /// The send buffer is invalid.
270    #[error("send buffer full")]
271    SendBufferFull,
272    /// There was an IO error.
273    #[error("io error")]
274    Io(#[source] std::io::Error),
275    /// The TCP state is invalid.
276    #[error("bad tcp state")]
277    BadTcpState(#[from] tcp::TcpError),
278    /// Specified port is not bound.
279    #[error("port is not bound")]
280    PortNotBound,
281}
282
283/// An error to create a consomme instance.
284#[derive(Debug, Error)]
285pub enum Error {
286    /// Could not get DNS nameserver information.
287    #[error("failed to initialize nameservers")]
288    Dns(#[from] dns::Error),
289}
290
291#[derive(Debug)]
292struct Ipv4Addresses {
293    src_addr: Ipv4Address,
294    dst_addr: Ipv4Address,
295}
296
297impl Consomme {
298    /// Creates a new consomme instance with specified state.
299    pub fn new(params: ConsommeParams) -> Self {
300        Self {
301            state: ConsommeState {
302                params,
303                buffer: Box::new([0; 65536]),
304            },
305            tcp: tcp::Tcp::new(),
306            udp: udp::Udp::new(),
307            icmp: icmp::Icmp::new(),
308        }
309    }
310
311    /// Get access to the parameters to be updated.
312    ///
313    /// FUTURE: add support for updating only the parameters that can be safely
314    /// changed at runtime.
315    pub fn params_mut(&mut self) -> &mut ConsommeParams {
316        &mut self.state.params
317    }
318
319    /// Pairs the client with this instance to operate on the consomme instance.
320    pub fn access<'a, T: Client>(&'a mut self, client: &'a mut T) -> Access<'a, T> {
321        Access {
322            inner: self,
323            client,
324        }
325    }
326}
327
328impl<T: Client> Access<'_, T> {
329    /// Gets the inner consomme object.
330    pub fn get(&self) -> &Consomme {
331        self.inner
332    }
333
334    /// Gets the inner consomme object.
335    pub fn get_mut(&mut self) -> &mut Consomme {
336        self.inner
337    }
338
339    /// Polls for work, transmitting any ready packets to the client.
340    pub fn poll(&mut self, cx: &mut Context<'_>) {
341        self.poll_udp(cx);
342        self.poll_tcp(cx);
343        self.poll_icmp(cx);
344    }
345
346    /// Update all sockets to use the new client's IO driver. This must be
347    /// called if the previous driver is no longer usable or if the client
348    /// otherwise wants existing connections to be polled on a new IO driver.
349    pub fn refresh_driver(&mut self) {
350        self.refresh_tcp_driver();
351        self.refresh_udp_driver();
352    }
353
354    /// Sends an Ethernet frame to the network.
355    ///
356    /// If `checksum.ipv4`, `checksum.tcp`, or `checksum.udp` are set, then
357    /// skips validating the IPv4, TCP, and UDP checksums. Otherwise, these
358    /// checksums are validated as normal and packets with invalid checksums are
359    /// dropped.
360    ///
361    /// If `checksum.tso.is_some()`, then perform TCP segmentation offset on the
362    /// frame. Practically speaking, this means that the frame contains a TCP
363    /// packet with these caveats:
364    ///
365    ///   * The IP header length may be invalid and will be ignored. The TCP
366    ///     packet payload is assumed to end at the end of `data`.
367    ///   * The TCP segment's payload size may be larger than the advertized TCP
368    ///     MSS value.
369    ///
370    /// This allows for sending TCP data that is much larger than the MSS size
371    /// via a single call.
372    ///
373    /// TODO:
374    ///
375    ///   1. allow for discontiguous packets
376    ///   2. allow for packets in guest memory (including lifetime model, if
377    ///      necessary--currently TCP transmits only happen in `poll`, but this
378    ///      may not be necessary. If the underlying socket implementation
379    ///      performs a copy (as the standard kernel socket APIs do), then no
380    ///      lifetime model is necessary, but if an implementation wants
381    ///      zerocopy support then some mechanism to allow the guest memory to
382    ///      be released later will be necessary.
383    pub fn send(&mut self, data: &[u8], checksum: &ChecksumState) -> Result<(), DropReason> {
384        let frame_packet = EthernetFrame::new_unchecked(data);
385        let frame = EthernetRepr::parse(&frame_packet)?;
386        match frame.ethertype {
387            EthernetProtocol::Ipv4 => self.handle_ipv4(&frame, frame_packet.payload(), checksum)?,
388            EthernetProtocol::Arp => self.handle_arp(&frame, frame_packet.payload())?,
389            _ => return Err(DropReason::UnsupportedEthertype(frame.ethertype)),
390        }
391        Ok(())
392    }
393
394    fn handle_ipv4(
395        &mut self,
396        frame: &EthernetRepr,
397        payload: &[u8],
398        checksum: &ChecksumState,
399    ) -> Result<(), DropReason> {
400        let ipv4 = Ipv4Packet::new_unchecked(payload);
401        if payload.len() < IPV4_HEADER_LEN
402            || ipv4.version() != 4
403            || payload.len() < ipv4.header_len().into()
404            || payload.len() < ipv4.total_len().into()
405        {
406            return Err(DropReason::Packet(smoltcp::Error::Malformed));
407        }
408
409        let total_len = if checksum.tso.is_some() {
410            payload.len()
411        } else {
412            ipv4.total_len().into()
413        };
414        if total_len < ipv4.header_len().into() {
415            return Err(DropReason::Packet(smoltcp::Error::Malformed));
416        }
417
418        if ipv4.more_frags() || ipv4.frag_offset() != 0 {
419            return Err(DropReason::Packet(smoltcp::Error::Fragmented));
420        }
421
422        if !checksum.ipv4 && !ipv4.verify_checksum() {
423            return Err(DropReason::Ipv4Checksum);
424        }
425
426        let addresses = Ipv4Addresses {
427            src_addr: ipv4.src_addr(),
428            dst_addr: ipv4.dst_addr(),
429        };
430
431        let inner = &payload[ipv4.header_len().into()..total_len];
432
433        match ipv4.protocol() {
434            IpProtocol::Tcp => self.handle_tcp(&addresses, inner, checksum)?,
435            IpProtocol::Udp => self.handle_udp(frame, &addresses, inner, checksum)?,
436            IpProtocol::Icmp => {
437                self.handle_icmp(frame, &addresses, inner, checksum, ipv4.hop_limit())?
438            }
439            p => return Err(DropReason::UnsupportedIpProtocol(p)),
440        };
441        Ok(())
442    }
443}