netvsp/
buffers.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Implementation of [`RxBufferAccess`] and friends on top of the receive
5//! buffers.
6
7use crate::MAX_MTU;
8use crate::rndisprot;
9use arrayvec::ArrayVec;
10use guestmem::GuestMemory;
11use guestmem::GuestMemoryError;
12use guestmem::LockedPages;
13use net_backend::BufferAccess;
14use net_backend::L4Protocol;
15use net_backend::RxBufferSegment;
16use net_backend::RxChecksumState;
17use net_backend::RxId;
18use net_backend::RxMetadata;
19use safeatomic::AtomicSliceOps;
20use std::ops::Range;
21use std::sync::Arc;
22use vmbus_channel::gpadl::GpadlView;
23use zerocopy::FromZeros;
24use zerocopy::Immutable;
25use zerocopy::IntoBytes;
26use zerocopy::KnownLayout;
27
28const PAGE_SIZE: usize = 4096;
29const PAGE_SIZE32: u32 = 4096;
30
31/// A type providing access to the netvsp receive buffer.
32pub struct GuestBuffers {
33    mem: GuestMemory,
34    _gpadl: GpadlView,
35    locked_pages: LockedPages,
36    gpns: Vec<u64>,
37    sub_allocation_size: u32,
38    mtu: u32,
39}
40
41/// A per-queue wrapper around guest buffers. The receive buffer is shared
42/// across all queues, but they are statically partitioned into per-queue
43/// suballocations.
44pub struct BufferPool {
45    buffers: Arc<GuestBuffers>,
46    buffer_segments: ArrayVec<RxBufferSegment, MAX_RX_SEGMENTS>,
47}
48
49impl BufferPool {
50    pub fn new(buffers: Arc<GuestBuffers>) -> Self {
51        Self {
52            buffers,
53            buffer_segments: ArrayVec::new(),
54        }
55    }
56
57    fn offset(&self, id: RxId) -> u32 {
58        id.0 * self.buffers.sub_allocation_size
59    }
60}
61
62impl GuestBuffers {
63    pub fn new(
64        mem: GuestMemory,
65        gpadl: GpadlView,
66        sub_allocation_size: u32,
67        mtu: u32,
68    ) -> Result<Self, GuestMemoryError> {
69        assert!(sub_allocation_size >= sub_allocation_size_for_mtu(mtu));
70
71        let gpns = gpadl.first().unwrap().gpns().to_vec();
72        let locked_pages = mem.lock_gpns(false, &gpns)?;
73        Ok(Self {
74            mem,
75            _gpadl: gpadl,
76            gpns,
77            sub_allocation_size,
78            locked_pages,
79            mtu,
80        })
81    }
82
83    fn write_at(&self, offset: u32, mut buf: &[u8]) {
84        let mut offset = offset as usize;
85        while !buf.is_empty() {
86            let len = (PAGE_SIZE - offset % PAGE_SIZE).min(buf.len());
87            let (this, next) = buf.split_at(len);
88            self.locked_pages.pages()[offset / PAGE_SIZE][offset % PAGE_SIZE..][..len]
89                .atomic_write(this);
90            buf = next;
91            offset += len;
92        }
93    }
94}
95
96// Reserve this many bytes for the RNDIS headers.
97const RX_HEADER_LEN: u32 = 256;
98
99// The last 36 bytes of each suballocation cannot be used due to a bug in netvsc
100// in newer versions of Windows.
101const BROKEN_CO_NETVSC_FOOTER_LEN: u32 = 36;
102
103/// Computes the suballocation size needed for the specified MTU.
104pub const fn sub_allocation_size_for_mtu(mtu: u32) -> u32 {
105    RX_HEADER_LEN + mtu + BROKEN_CO_NETVSC_FOOTER_LEN
106}
107
108const MAX_RX_SEGMENTS: usize =
109    ((sub_allocation_size_for_mtu(MAX_MTU) + (PAGE_SIZE32 - 1) * 2) / PAGE_SIZE32) as usize;
110
111/// Comutes the buffer segments for accessing
112fn compute_buffer_segments(
113    v: &mut ArrayVec<RxBufferSegment, MAX_RX_SEGMENTS>,
114    gpns: &[u64],
115    mut range: Range<u32>,
116) {
117    v.clear();
118    while !range.is_empty() {
119        let start_page = range.start / PAGE_SIZE32;
120        let start_offset = range.start % PAGE_SIZE32;
121        let max_page = (range.end - 1) / PAGE_SIZE32 + 1;
122        let mut end_page = start_page + 1;
123        while end_page < max_page && gpns[end_page as usize] == gpns[end_page as usize - 1] + 1 {
124            end_page += 1;
125        }
126
127        let gpa = gpns[start_page as usize] * PAGE_SIZE as u64 + start_offset as u64;
128        let end = (end_page * PAGE_SIZE32).min(range.end);
129
130        v.push(RxBufferSegment {
131            gpa,
132            len: (end - range.start),
133        });
134
135        range.start = end;
136    }
137}
138
139impl BufferAccess for BufferPool {
140    fn guest_memory(&self) -> &GuestMemory {
141        &self.buffers.mem
142    }
143
144    fn guest_addresses(&mut self, id: RxId) -> &[RxBufferSegment] {
145        let offset = self.offset(id);
146        compute_buffer_segments(
147            &mut self.buffer_segments,
148            &self.buffers.gpns,
149            offset + RX_HEADER_LEN..offset + RX_HEADER_LEN + self.buffers.mtu,
150        );
151        &self.buffer_segments
152    }
153
154    fn capacity(&self, _id: RxId) -> u32 {
155        self.buffers.mtu
156    }
157
158    fn write_data(&mut self, id: RxId, data: &[u8]) {
159        self.buffers.write_at(self.offset(id) + RX_HEADER_LEN, data);
160    }
161
162    fn write_header(&mut self, id: RxId, metadata: &RxMetadata) {
163        #[repr(C)]
164        #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)]
165        struct Header {
166            header: rndisprot::MessageHeader,
167            packet: rndisprot::Packet,
168            per_packet_info: PerPacketInfo,
169        }
170
171        #[repr(C)]
172        #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)]
173        struct PerPacketInfo {
174            header: rndisprot::PerPacketInfo,
175            checksum: rndisprot::RxTcpIpChecksumInfo,
176        }
177
178        let checksum = rndisprot::RxTcpIpChecksumInfo::new_zeroed()
179            .set_ip_checksum_failed(metadata.ip_checksum == RxChecksumState::Bad)
180            .set_ip_checksum_succeeded(metadata.ip_checksum.is_valid())
181            .set_ip_checksum_value_invalid(
182                metadata.ip_checksum == RxChecksumState::ValidatedButWrong,
183            )
184            .set_tcp_checksum_failed(
185                metadata.l4_protocol == L4Protocol::Tcp
186                    && metadata.l4_checksum == RxChecksumState::Bad,
187            )
188            .set_tcp_checksum_succeeded(
189                metadata.l4_protocol == L4Protocol::Tcp && metadata.l4_checksum.is_valid(),
190            )
191            .set_tcp_checksum_value_invalid(
192                metadata.l4_protocol == L4Protocol::Tcp
193                    && metadata.l4_checksum == RxChecksumState::ValidatedButWrong,
194            )
195            .set_udp_checksum_failed(
196                metadata.l4_protocol == L4Protocol::Udp
197                    && metadata.l4_checksum == RxChecksumState::Bad,
198            )
199            .set_udp_checksum_succeeded(
200                metadata.l4_protocol == L4Protocol::Udp && metadata.l4_checksum.is_valid(),
201            );
202
203        let header = Header {
204            header: rndisprot::MessageHeader {
205                message_type: rndisprot::MESSAGE_TYPE_PACKET_MSG,
206                // Always claim the full suballocation length to avoid needing
207                // to track this more accurately. This needs to match the
208                // transfer page length but is not otherwise constrained for
209                // packet messages.
210                message_length: self.buffers.sub_allocation_size,
211            },
212            packet: rndisprot::Packet {
213                data_offset: RX_HEADER_LEN - size_of::<rndisprot::MessageHeader>() as u32
214                    + metadata.offset as u32,
215                data_length: metadata.len as u32,
216                oob_data_offset: 0,
217                oob_data_length: 0,
218                num_oob_data_elements: 0,
219                per_packet_info_offset: size_of::<rndisprot::Packet>() as u32,
220                per_packet_info_length: size_of::<PerPacketInfo>() as u32,
221                vc_handle: 0,
222                reserved: 0,
223            },
224            per_packet_info: PerPacketInfo {
225                header: rndisprot::PerPacketInfo {
226                    size: size_of::<PerPacketInfo>() as u32,
227                    typ: rndisprot::PPI_TCP_IP_CHECKSUM,
228                    per_packet_information_offset: size_of::<rndisprot::PerPacketInfo>() as u32,
229                },
230                checksum,
231            },
232        };
233
234        self.buffers.write_at(self.offset(id), header.as_bytes());
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use crate::buffers::compute_buffer_segments;
241    use arrayvec::ArrayVec;
242    use net_backend::RxBufferSegment;
243
244    #[test]
245    fn test_buffer_segments() {
246        fn check(addrs: &[RxBufferSegment], check: &[(u64, u32)]) {
247            assert_eq!(addrs.len(), check.len());
248            let v: Vec<_> = addrs.iter().map(|range| (range.gpa, range.len)).collect();
249            assert_eq!(v.as_slice(), check);
250        }
251
252        let gpns = [1, 3, 4, 5, 8];
253        let cases = [
254            (0x1..0x5, &[(0x1001, 4)][..]),
255            (0x1..0x1005, &[(0x1001, 0xfff), (0x3000, 5)]),
256            (0x1001..0x2005, &[(0x3001, 0x1004)]),
257            (0x1001..0x5000, &[(0x3001, 0x2fff), (0x8000, 0x1000)]),
258        ];
259        for (range, data) in cases {
260            let mut v = ArrayVec::new();
261            compute_buffer_segments(&mut v, &gpns, range);
262            check(&v, data);
263        }
264    }
265}