Skip to main content

netvsp/
buffers.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implementation of [`RxBufferAccess`] and friends on top of the receive
5//! buffers.
6
7use crate::rndisprot;
8use guestmem::GuestMemory;
9use guestmem::GuestMemoryError;
10use guestmem::LockedPages;
11use net_backend::BufferAccess;
12use net_backend::L4Protocol;
13use net_backend::RxBufferSegment;
14use net_backend::RxChecksumState;
15use net_backend::RxId;
16use net_backend::RxMetadata;
17use safeatomic::AtomicSliceOps;
18use std::ops::Range;
19use std::sync::Arc;
20use thiserror::Error;
21use vmbus_channel::gpadl::GpadlView;
22use zerocopy::FromZeros;
23use zerocopy::Immutable;
24use zerocopy::IntoBytes;
25use zerocopy::KnownLayout;
26
27const PAGE_SIZE: usize = 4096;
28const PAGE_SIZE32: u32 = 4096;
29
30#[derive(Debug, Error)]
31pub enum GuestBuffersError {
32    #[error("invalid mtu {mtu}")]
33    InvalidMtu { mtu: u32 },
34    #[error("sub_allocation_size {sub_allocation_size} is too small for mtu {mtu}")]
35    SubAllocationTooSmall { sub_allocation_size: u32, mtu: u32 },
36    #[error("GPADL has no ranges")]
37    EmptyGpadl,
38    #[error("failed to lock guest page numbers")]
39    GpnLock(#[source] GuestMemoryError),
40}
41
42/// A type providing access to the netvsp receive buffer.
43pub struct GuestBuffers {
44    mem: GuestMemory,
45    _gpadl: GpadlView,
46    locked_pages: LockedPages,
47    gpns: Vec<u64>,
48    sub_allocation_size: u32,
49    mtu: u32,
50}
51
52/// A per-queue wrapper around guest buffers. The receive buffer is shared
53/// across all queues, but they are statically partitioned into per-queue
54/// suballocations.
55pub struct BufferPool {
56    buffers: Arc<GuestBuffers>,
57    rx_vlan_count: u64,
58}
59
60impl BufferPool {
61    pub fn new(buffers: Arc<GuestBuffers>) -> Self {
62        Self {
63            buffers,
64            rx_vlan_count: 0,
65        }
66    }
67
68    fn offset(&self, id: RxId) -> u32 {
69        id.0 * self.buffers.sub_allocation_size
70    }
71
72    /// Returns and resets the number of RX packets with VLAN metadata
73    /// observed since the last call.
74    pub fn take_rx_vlan_count(&mut self) -> u64 {
75        std::mem::take(&mut self.rx_vlan_count)
76    }
77}
78
79impl GuestBuffers {
80    /// Validates that the GPADL and sub_allocation_size are compatible with the MTU
81    /// without performing any allocations.
82    pub fn validate_config(
83        gpadl: &GpadlView,
84        sub_allocation_size: u32,
85        mtu: u32,
86    ) -> Result<(), GuestBuffersError> {
87        if gpadl.first().is_none() {
88            return Err(GuestBuffersError::EmptyGpadl);
89        }
90        mtu.checked_add(RX_HEADER_LEN)
91            .and_then(|v| v.checked_add(BROKEN_CO_NETVSC_FOOTER_LEN))
92            .ok_or(GuestBuffersError::InvalidMtu { mtu })?;
93        if sub_allocation_size < sub_allocation_size_for_mtu(mtu) {
94            return Err(GuestBuffersError::SubAllocationTooSmall {
95                sub_allocation_size,
96                mtu,
97            });
98        }
99        Ok(())
100    }
101
102    pub fn new(
103        mem: GuestMemory,
104        gpadl: GpadlView,
105        sub_allocation_size: u32,
106        mtu: u32,
107    ) -> Result<Self, GuestBuffersError> {
108        Self::validate_config(&gpadl, sub_allocation_size, mtu)?;
109
110        let gpns = gpadl.first().unwrap().gpns().to_vec();
111        let locked_pages = mem
112            .lock_gpns(false, &gpns)
113            .map_err(GuestBuffersError::GpnLock)?;
114        Ok(Self {
115            mem,
116            _gpadl: gpadl,
117            gpns,
118            sub_allocation_size,
119            locked_pages,
120            mtu,
121        })
122    }
123
124    fn write_at(&self, offset: u32, mut buf: &[u8]) {
125        let mut offset = offset as usize;
126        while !buf.is_empty() {
127            let len = (PAGE_SIZE - offset % PAGE_SIZE).min(buf.len());
128            let (this, next) = buf.split_at(len);
129            self.locked_pages.pages()[offset / PAGE_SIZE][offset % PAGE_SIZE..][..len]
130                .atomic_write(this);
131            buf = next;
132            offset += len;
133        }
134    }
135}
136
137// Reserve this many bytes for the RNDIS headers.
138const RX_HEADER_LEN: u32 = 256;
139
140// The last 36 bytes of each suballocation cannot be used due to a bug in netvsc
141// in newer versions of Windows.
142const BROKEN_CO_NETVSC_FOOTER_LEN: u32 = 36;
143
144/// Computes the suballocation size needed for the specified MTU.
145pub const fn sub_allocation_size_for_mtu(mtu: u32) -> u32 {
146    RX_HEADER_LEN + mtu + BROKEN_CO_NETVSC_FOOTER_LEN
147}
148
149/// Computes the buffer segments for accessing a range of the receive buffer.
150fn compute_buffer_segments(v: &mut Vec<RxBufferSegment>, gpns: &[u64], mut range: Range<u32>) {
151    while !range.is_empty() {
152        let start_page = range.start / PAGE_SIZE32;
153        let start_offset = range.start % PAGE_SIZE32;
154        let max_page = (range.end - 1) / PAGE_SIZE32 + 1;
155        let mut end_page = start_page + 1;
156        while end_page < max_page && gpns[end_page as usize] == gpns[end_page as usize - 1] + 1 {
157            end_page += 1;
158        }
159
160        let gpa = gpns[start_page as usize] * PAGE_SIZE as u64 + start_offset as u64;
161        let end = (end_page * PAGE_SIZE32).min(range.end);
162
163        v.push(RxBufferSegment {
164            gpa,
165            len: (end - range.start),
166        });
167
168        range.start = end;
169    }
170}
171
172impl BufferAccess for BufferPool {
173    fn guest_memory(&self) -> &GuestMemory {
174        &self.buffers.mem
175    }
176
177    fn push_guest_addresses(&self, id: RxId, buf: &mut Vec<RxBufferSegment>) {
178        let offset = self.offset(id);
179        compute_buffer_segments(
180            buf,
181            &self.buffers.gpns,
182            offset + RX_HEADER_LEN..offset + RX_HEADER_LEN + self.buffers.mtu,
183        );
184    }
185
186    fn capacity(&self, _id: RxId) -> u32 {
187        self.buffers.mtu
188    }
189
190    fn write_data(&mut self, id: RxId, data: &[u8]) {
191        self.buffers.write_at(self.offset(id) + RX_HEADER_LEN, data);
192    }
193
194    fn write_header(&mut self, id: RxId, metadata: &RxMetadata) {
195        #[repr(C)]
196        #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)]
197        struct Header {
198            header: rndisprot::MessageHeader,
199            packet: rndisprot::Packet,
200        }
201
202        #[repr(C)]
203        #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)]
204        struct PerPacketInfo {
205            header: rndisprot::PerPacketInfo,
206            payload: u32,
207        }
208
209        let mut ppi_count = 1;
210        let checksum = rndisprot::RxTcpIpChecksumInfo::new_zeroed()
211            .set_ip_checksum_failed(metadata.ip_checksum == RxChecksumState::Bad)
212            .set_ip_checksum_succeeded(metadata.ip_checksum.is_valid())
213            .set_ip_checksum_value_invalid(
214                metadata.ip_checksum == RxChecksumState::ValidatedButWrong,
215            )
216            .set_tcp_checksum_failed(
217                metadata.l4_protocol == L4Protocol::Tcp
218                    && metadata.l4_checksum == RxChecksumState::Bad,
219            )
220            .set_tcp_checksum_succeeded(
221                metadata.l4_protocol == L4Protocol::Tcp && metadata.l4_checksum.is_valid(),
222            )
223            .set_tcp_checksum_value_invalid(
224                metadata.l4_protocol == L4Protocol::Tcp
225                    && metadata.l4_checksum == RxChecksumState::ValidatedButWrong,
226            )
227            .set_udp_checksum_failed(
228                metadata.l4_protocol == L4Protocol::Udp
229                    && metadata.l4_checksum == RxChecksumState::Bad,
230            )
231            .set_udp_checksum_succeeded(
232                metadata.l4_protocol == L4Protocol::Udp && metadata.l4_checksum.is_valid(),
233            );
234        let checksum_ppi = PerPacketInfo {
235            header: rndisprot::PerPacketInfo {
236                size: size_of::<PerPacketInfo>() as u32,
237                typ: rndisprot::PPI_TCP_IP_CHECKSUM,
238                per_packet_information_offset: size_of::<rndisprot::PerPacketInfo>() as u32,
239            },
240            payload: checksum.0,
241        };
242
243        let vlan = if let Some(vlan_info) = metadata.vlan {
244            self.rx_vlan_count += 1;
245            ppi_count += 1;
246
247            Some(PerPacketInfo {
248                header: rndisprot::PerPacketInfo {
249                    size: size_of::<PerPacketInfo>() as u32,
250                    typ: rndisprot::PPI_VLAN,
251                    per_packet_information_offset: size_of::<rndisprot::PerPacketInfo>() as u32,
252                },
253                payload: Into::<rndisprot::EthVlanInfo>::into(vlan_info).into(),
254            })
255        } else {
256            None
257        };
258
259        let header = Header {
260            header: rndisprot::MessageHeader {
261                message_type: rndisprot::MESSAGE_TYPE_PACKET_MSG,
262                // Always claim the full suballocation length to avoid needing
263                // to track this more accurately. This needs to match the
264                // transfer page length but is not otherwise constrained for
265                // packet messages.
266                message_length: self.buffers.sub_allocation_size,
267            },
268            packet: rndisprot::Packet {
269                data_offset: RX_HEADER_LEN - size_of::<rndisprot::MessageHeader>() as u32
270                    + metadata.offset as u32,
271                data_length: metadata.len as u32,
272                oob_data_offset: 0,
273                oob_data_length: 0,
274                num_oob_data_elements: 0,
275                per_packet_info_offset: size_of::<rndisprot::Packet>() as u32,
276                per_packet_info_length: ppi_count * size_of::<PerPacketInfo>() as u32,
277                vc_handle: 0,
278                reserved: 0,
279            },
280        };
281
282        let mut offset = self.offset(id);
283        self.buffers.write_at(offset, header.as_bytes());
284        offset += size_of::<Header>() as u32;
285        self.buffers.write_at(offset, checksum_ppi.as_bytes());
286        offset += size_of::<PerPacketInfo>() as u32;
287        if let Some(vlan_ppi) = vlan {
288            self.buffers.write_at(offset, vlan_ppi.as_bytes());
289        }
290        static_assertions::const_assert!(
291            (size_of::<Header>() + 2 * size_of::<PerPacketInfo>()) < RX_HEADER_LEN as usize
292        );
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use crate::buffers::GuestBuffers;
299    use crate::buffers::GuestBuffersError;
300    use crate::buffers::compute_buffer_segments;
301    use crate::buffers::sub_allocation_size_for_mtu;
302    use guestmem::GuestMemory;
303    use net_backend::RxBufferSegment;
304    use vmbus_channel::gpadl::GpadlMap;
305    use vmbus_core::protocol::GpadlId;
306    use vmbus_ring::gparange::GpaRange;
307    use vmbus_ring::gparange::MultiPagedRangeBuf;
308    use zerocopy::IntoBytes;
309
310    /// Verify that inconsistent sub_allocation_size and MTU from saved state
311    /// returns an error instead of panicking.
312    #[test]
313    fn sub_allocation_too_small_for_mtu() {
314        let default_mtu = 1514;
315        let max_mtu = 9216;
316        let sub_alloc_for_default = sub_allocation_size_for_mtu(default_mtu);
317
318        // The sub_allocation for default MTU must be smaller than for max MTU.
319        assert!(sub_alloc_for_default < sub_allocation_size_for_mtu(max_mtu));
320
321        // Build a multipaged ranged buffer.
322        let num_pages = 16;
323        let hdr = GpaRange {
324            len: (num_pages * 4096) as u32,
325            offset: 0,
326        };
327        let mut buf = vec![u64::from_le_bytes(hdr.as_bytes().try_into().unwrap())];
328        // Append one GPN per page.
329        buf.extend((0..num_pages).map(|i| i as u64));
330        let multipaged_ranged_buf = MultiPagedRangeBuf::from_range_buffer(1, buf).unwrap();
331
332        // Build a minimal GpadlView (won't be accessed — the check fires first).
333        let gpadl_map = GpadlMap::new();
334        let gpadl_id = GpadlId(1);
335        gpadl_map.add(gpadl_id, multipaged_ranged_buf);
336        let gpadl_view = gpadl_map.view().map(gpadl_id).unwrap();
337
338        let mem = GuestMemory::empty();
339        let result = GuestBuffers::new(mem, gpadl_view, sub_alloc_for_default, max_mtu);
340        match result {
341            Err(GuestBuffersError::SubAllocationTooSmall { .. }) => {}
342            Err(e) => panic!("expected SubAllocationTooSmall, got {e}"),
343            Ok(_) => panic!("expected SubAllocationTooSmall, got Ok"),
344        }
345    }
346
347    /// Verify that an MTU near u32::MAX returns InvalidMtu instead of
348    /// wrapping the sub_allocation_size calculation.
349    #[test]
350    fn overflowing_mtu_returns_error() {
351        let num_pages = 16;
352        let hdr = GpaRange {
353            len: (num_pages * 4096) as u32,
354            offset: 0,
355        };
356        let mut buf = vec![u64::from_le_bytes(hdr.as_bytes().try_into().unwrap())];
357        buf.extend((0..num_pages).map(|i| i as u64));
358        let multipaged_ranged_buf = MultiPagedRangeBuf::from_range_buffer(1, buf).unwrap();
359
360        let gpadl_map = GpadlMap::new();
361        let gpadl_id = GpadlId(3);
362        gpadl_map.add(gpadl_id, multipaged_ranged_buf);
363        let gpadl_view = gpadl_map.view().map(gpadl_id).unwrap();
364
365        // An MTU of u32::MAX would overflow the sub_allocation_size addition.
366        let result = GuestBuffers::validate_config(&gpadl_view, 1806, u32::MAX);
367        match result {
368            Err(GuestBuffersError::InvalidMtu { .. }) => {}
369            Err(e) => panic!("expected InvalidMtu, got {e}"),
370            Ok(_) => panic!("expected InvalidMtu, got Ok"),
371        }
372    }
373
374    /// Verify that a GPADL with zero ranges returns EmptyGpadl instead of
375    /// panicking.
376    #[test]
377    fn empty_gpadl_returns_error() {
378        let multipaged_ranged_buf = MultiPagedRangeBuf::from_range_buffer(0, vec![]).unwrap();
379
380        let gpadl_map = GpadlMap::new();
381        let gpadl_id = GpadlId(2);
382        gpadl_map.add(gpadl_id, multipaged_ranged_buf);
383        let gpadl_view = gpadl_map.view().map(gpadl_id).unwrap();
384
385        let mem = GuestMemory::empty();
386        let result = GuestBuffers::new(mem, gpadl_view, 1806, 1514);
387        match result {
388            Err(GuestBuffersError::EmptyGpadl) => {}
389            Err(e) => panic!("expected EmptyGpadl, got {e}"),
390            Ok(_) => panic!("expected EmptyGpadl, got Ok"),
391        }
392    }
393
394    #[test]
395    fn test_buffer_segments() {
396        fn check(addrs: &[RxBufferSegment], check: &[(u64, u32)]) {
397            assert_eq!(addrs.len(), check.len());
398            let v: Vec<_> = addrs.iter().map(|range| (range.gpa, range.len)).collect();
399            assert_eq!(v.as_slice(), check);
400        }
401
402        let gpns = [1, 3, 4, 5, 8];
403        let cases = [
404            (0x1..0x5, &[(0x1001, 4)][..]),
405            (0x1..0x1005, &[(0x1001, 0xfff), (0x3000, 5)]),
406            (0x1001..0x2005, &[(0x3001, 0x1004)]),
407            (0x1001..0x5000, &[(0x3001, 0x2fff), (0x8000, 0x1000)]),
408        ];
409        for (range, data) in cases {
410            let mut v = Vec::new();
411            compute_buffer_segments(&mut v, &gpns, range);
412            check(&v, data);
413        }
414    }
415}