openhcl_boot/host_params/dt/
dma_hint.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Calculate DMA hint value if not provided by host.
5
6use crate::boot_logger::log;
7use crate::cmdline::Vtl2GpaPoolConfig;
8use crate::cmdline::Vtl2GpaPoolLookupTable;
9use igvm_defs::PAGE_SIZE_4K;
10
11/// Lookup table for VTL2 DMA hint calculation. This table is used to retrofit
12/// dedicated DMA memory for existing configurations known at the time of writing.
13/// Dedicated DMA memory is required for devices that survive OpenHCL servicing
14/// operations (for example, NVMe and MANA devices). Those devices express
15/// their need as the "persistent memory" requirement when they create a DMA client.
16/// Since the amount of dedicated DMA memory cannot be changed at runtime, the amount
17/// of memory set aside must account for the maximum expected usage.
18///
19/// This table derives the maximum expected usage based on:
20/// 1. The number of expected MANA and NVMe devices and,
21/// 2. The amount of DMA each device needs.
22///
23/// To illustrate the second point, an NVMe device with 64 queue pairs will need
24/// the following memory (see `nvme_driver::QueuePair::new` for details):
25/// - Submission queue: 4 pages,
26/// - Completion queue: 1 page,
27/// - Extra memory per queue: 64 pages
28///
29/// If there are 32 VPs, we expect one queue pair per VP, leading to:
30/// - Total per NVMe device: 32 * (4 + 1 + 64) = 32 * 69 = 2208 pages = 8.59 MiB
31///
32/// What is not easily derivable from this source base is the max number of devices
33/// expected in any given VTL2 configuration. We derive that manually from external
34/// data sources.
35///
36/// The inputs are the number of VTL0 VPs (vp_count) and the amount of memory
37/// assigned to VTL2 (vtl2_memory_mb). The output is the recommended DMA hint
38/// value (dma_hint_mb).
39///
40/// The table is sorted by VP count, then by assigned memory.
41/// (vp_count, vtl2_memory_mb, dma_hint_mb)
42const LOOKUP_TABLE_RELEASE: &[(u16, u16, u16); 39] = &[
43    (2, 96, 2),
44    (2, 98, 4),
45    (2, 100, 4),
46    (2, 104, 4),
47    (4, 108, 2),
48    (4, 110, 6),
49    (4, 112, 6),
50    (4, 118, 8),
51    (4, 130, 12),
52    (8, 140, 4),
53    (8, 148, 10),
54    (8, 170, 20),
55    (8, 176, 20),
56    (16, 70, 2), // Default manifest is 70MiB. Allocate minimal space for a few NVMe queues.
57    (16, 234, 12),
58    (16, 256, 20), // There is another 16vp/256MB configuration that only requires 18 MB of DMA memory, pick the larger.
59    (16, 268, 38),
60    (16, 282, 54),
61    (24, 420, 66),
62    (32, 404, 22),
63    (32, 516, 36),
64    (32, 538, 74), // There is another 32vp/538MB configuration that only requires 52 MB of DMA memory, pick the larger.
65    (48, 558, 32),
66    (48, 718, 52),
67    (48, 730, 52),
68    (48, 746, 78),
69    (64, 712, 42),
70    (64, 924, 68),
71    (64, 938, 68),
72    (96, 1030, 64),
73    (96, 1042, 114), // There is another 96vp/1042MB configuration that only requires 64 MB of DMA memory, pick the larger.
74    (96, 1058, 114), // There is another 96vp/1058MB configuration that only requires 106 MB of DMA memory, pick the larger.
75    (96, 1340, 102),
76    (96, 1358, 104),
77    (96, 1382, 120),
78    (112, 1566, 288),
79    (128, 1342, 84),
80    (128, 1360, 84),
81    (896, 12912, 516), // Needs to be validated as the vNIC number is unknown. (TODO, as part of network device keepalive support).
82];
83
84/// DEV/TEST ONLY variant of the lookup table above. Since the IGVM manifest specifies additional
85/// VTL2 memory for dev (well above what is required for release configs), allow the heuristics
86/// to still kick in.
87///
88/// These are sized for ~ 3 NVMe devices worth of DMA memory.
89/// 69 pages per NVMe per VP * 3 NVMe devices = 207 pages per VP.
90const LOOKUP_TABLE_DEBUG: &[(u16, u16, u16); 6] = &[
91    (4, 496, 4),
92    (16, 512, 16), // 16 VP, 512 MB VTL2 memory is a "heavy" Hyper-V Petri VM.
93    (32, 1024, 32),
94    (32, 1536, 128), // 32 VP "very heavy", with much extra memory above what is required for dev, allocate lots of memory for DMA.
95    (64, 1024, 64),
96    (128, 1024, 128),
97];
98
99const ONE_MB: u64 = 1024 * 1024;
100
101/// Maximum allowed memory size for DMA hint calculation (1 TiB).
102const MAX_DMA_HINT_MEM_SIZE: u64 = 0xFFFFFFFF00000;
103/// Number of 4K pages in 2MiB.
104const PAGES_PER_2MB: u64 = 2 * ONE_MB / PAGE_SIZE_4K;
105// To avoid using floats, scale ratios to 1:1000.
106const RATIO: u32 = 1_000;
107
108/// Round up to next 2MiB.
109fn round_up_to_2mb(pages_4k: u64) -> u64 {
110    (pages_4k + (PAGES_PER_2MB - 1)) & !(PAGES_PER_2MB - 1)
111}
112
113/// Returns calculated DMA hint value, in 4k pages.
114pub fn vtl2_calculate_dma_hint(
115    vtl2_gpa_pool_lookup_table: Vtl2GpaPoolLookupTable,
116    vp_count: usize,
117    mem_size: u64,
118) -> u64 {
119    let mut dma_hint_4k = 0;
120    // Sanity check for the calculated memory size.
121    if mem_size > 0 && mem_size < MAX_DMA_HINT_MEM_SIZE {
122        let mem_size_mb = (mem_size / ONE_MB) as u32;
123        #[cfg(test)]
124        tracing::info!(?vp_count, ?mem_size_mb, "Calculating VTL2 DMA hint",);
125
126        let mut min_vtl2_memory_mb = u16::MAX; // minimum VTL2 memory seen for a given VP count.
127        let mut max_vtl2_memory_mb = 0; // maximum VTL2 memory seen for a given VP count.
128
129        let mut min_ratio_1000th = 100 * RATIO;
130        let mut max_ratio_1000th = RATIO;
131
132        let mut min_vp_count: u16 = 1; // Biggest VP count entry in the table that is less than vp_count.
133        let mut max_vp_count = vp_count as u16; // Smallest VP count entry in the table that is greater than vp_count, or vp_count itself.
134
135        let lookup_table = match vtl2_gpa_pool_lookup_table {
136            Vtl2GpaPoolLookupTable::Release => LOOKUP_TABLE_RELEASE.iter(),
137            Vtl2GpaPoolLookupTable::Debug => LOOKUP_TABLE_DEBUG.iter(),
138        };
139
140        // Take a first loop over the table. Ideally the table contains an exact match
141        // for the given VP count and memory size. If not, gather data for extrapolation.
142        for (vp_lookup, vtl2_memory_mb, dma_hint_mb) in lookup_table.clone() {
143            match (*vp_lookup).cmp(&(vp_count as u16)) {
144                core::cmp::Ordering::Less => {
145                    // Current entry has fewer VPs than requested.
146                    min_vp_count = min_vp_count.max(*vp_lookup);
147                }
148                core::cmp::Ordering::Equal => {
149                    if *vtl2_memory_mb == mem_size_mb as u16 {
150                        // Found exact match.
151                        dma_hint_4k = *dma_hint_mb as u64 * ONE_MB / PAGE_SIZE_4K;
152                        max_vtl2_memory_mb = *vtl2_memory_mb;
153
154                        break;
155                    } else {
156                        // Prepare for possible extrapolation.
157                        min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
158                        max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
159                        min_ratio_1000th = min_ratio_1000th
160                            .min(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
161                        max_ratio_1000th = max_ratio_1000th
162                            .max(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
163                    }
164                }
165                core::cmp::Ordering::Greater => {
166                    // Current entry has more VPs than requested.
167                    // Update the max VP count based on the table. This will be the vp_count unless
168                    // the biggest vp count in the table is smaller than the supplied vp_count.
169
170                    max_vp_count = max_vp_count.min(*vp_lookup);
171                }
172            }
173        }
174
175        // Take a second pass over the table if no exact match was found
176        // (i.e. unexpected VP count).
177        //
178        // If there was an exact match for VP count but not for memory size in the table,
179        // then we know the min and max ratios for that VP count. But, we also didn't know
180        // at that time if there was not going to be an exact match, now go look up the ratios
181        // for the nearest VP counts as well.
182        if max_vtl2_memory_mb == 0 {
183            #[cfg(test)]
184            tracing::warn!(
185                ?min_vp_count,
186                ?max_vp_count,
187                ?min_vtl2_memory_mb,
188                ?max_vtl2_memory_mb,
189                ?min_ratio_1000th,
190                ?max_ratio_1000th,
191                "Exact match not found, extrapolating DMA hint",
192            );
193            lookup_table
194                .filter(|(vp_lookup, _, _)| {
195                    *vp_lookup == min_vp_count || *vp_lookup == max_vp_count
196                })
197                .for_each(|(_vp_count, vtl2_memory_mb, dma_hint_mb)| {
198                    min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
199                    max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
200                    min_ratio_1000th =
201                        min_ratio_1000th.min(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
202                    max_ratio_1000th =
203                        max_ratio_1000th.max(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
204                });
205        }
206
207        if dma_hint_4k == 0 {
208            // Didn't find an exact match for vp_count, try to extrapolate.
209            dma_hint_4k = (mem_size_mb as u64 * RATIO as u64 * (ONE_MB / PAGE_SIZE_4K))
210                / ((min_ratio_1000th + max_ratio_1000th) as u64 / 2u64);
211
212            // And then round up to 2MiB.
213            dma_hint_4k = round_up_to_2mb(dma_hint_4k);
214
215            #[cfg(test)]
216            tracing::debug!(
217                ?min_vp_count,
218                ?max_vp_count,
219                ?min_vtl2_memory_mb,
220                ?max_vtl2_memory_mb,
221                ?min_ratio_1000th,
222                ?max_ratio_1000th,
223                ?dma_hint_4k,
224                "Extrapolated VTL2 DMA hint",
225            );
226
227            log!(
228                "Extrapolated VTL2 DMA hint: {} pages ({} MiB) for {} VPs and {} MiB VTL2 memory",
229                dma_hint_4k,
230                dma_hint_4k * PAGE_SIZE_4K / ONE_MB,
231                vp_count,
232                mem_size_mb
233            );
234        } else {
235            log!(
236                "Found exact VTL2 DMA hint: {} pages ({} MiB) for {} VPs and {} MiB VTL2 memory",
237                dma_hint_4k,
238                dma_hint_4k * PAGE_SIZE_4K / ONE_MB,
239                vp_count,
240                mem_size_mb
241            );
242        }
243    }
244
245    dma_hint_4k
246}
247
248// Decide if we will reserve memory for a VTL2 private pool. See `Vtl2GpaPoolConfig` for
249// details.
250pub fn pick_private_pool_size(
251    cmdline: Vtl2GpaPoolConfig,
252    dt: Option<u64>,
253    vp_count: usize,
254    mem_size: u64,
255) -> Option<u64> {
256    match (cmdline, dt) {
257        (Vtl2GpaPoolConfig::Off, _) => {
258            // Command line explicitly disabled the pool.
259            log!("vtl2 gpa pool disabled via command line");
260            None
261        }
262        (Vtl2GpaPoolConfig::Pages(cmd_line_pages), _) => {
263            // Command line specified explicit size, use it.
264            log!(
265                "vtl2 gpa pool enabled via command line with pages: {}",
266                cmd_line_pages
267            );
268            Some(cmd_line_pages)
269        }
270        (Vtl2GpaPoolConfig::Heuristics(table), None)
271        | (Vtl2GpaPoolConfig::Heuristics(table), Some(0)) => {
272            // Nothing more explicit, so use heuristics.
273            log!("vtl2 gpa pool coming from heuristics table: {:?}", table);
274            Some(vtl2_calculate_dma_hint(table, vp_count, mem_size))
275        }
276        (Vtl2GpaPoolConfig::Heuristics(_), Some(dt_page_count)) => {
277            // Command line specified heuristics, and the host specified size via device tree. Use
278            // the DT.
279            log!(
280                "vtl2 gpa pool enabled via device tree with pages: {}",
281                dt_page_count
282            );
283            Some(dt_page_count)
284        }
285    }
286}
287
288#[cfg(test)]
289mod test {
290    use super::*;
291    use test_with_tracing::test;
292
293    const ONE_MB: u64 = 0x10_0000;
294
295    #[test]
296    fn test_vtl2_calculate_dma_hint_release() {
297        assert_eq!(
298            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 2, 0x620_0000),
299            4 * ONE_MB / PAGE_SIZE_4K
300        );
301        assert_eq!(
302            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 4, 0x6E0_0000),
303            6 * ONE_MB / PAGE_SIZE_4K
304        );
305
306        // Test VP count higher than max from LOOKUP_TABLE.
307        assert_eq!(
308            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 112, 0x700_0000),
309            22 * ONE_MB / PAGE_SIZE_4K
310        );
311
312        // Test unusual VP count.
313        assert_eq!(
314            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 52, 0x600_0000),
315            8 * ONE_MB / PAGE_SIZE_4K
316        );
317        assert_eq!(
318            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 52, 0x800_0000),
319            10 * ONE_MB / PAGE_SIZE_4K
320        );
321    }
322
323    #[test]
324    fn test_vtl2_calculate_dma_hint_debug() {
325        assert_eq!(
326            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 4, 496 * ONE_MB),
327            4 * ONE_MB / PAGE_SIZE_4K
328        );
329        assert_eq!(
330            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 64, 1024 * ONE_MB),
331            64 * ONE_MB / PAGE_SIZE_4K
332        );
333        assert_eq!(
334            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 128, 1024 * ONE_MB),
335            128 * ONE_MB / PAGE_SIZE_4K
336        );
337        // Extrapolate beyond max memory size from LOOKUP_TABLE.
338        assert_eq!(
339            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 128, 2048 * ONE_MB),
340            256 * ONE_MB / PAGE_SIZE_4K
341        );
342    }
343
344    #[test]
345    fn test_vtl2_calculate_dma_hint_exact_matches() {
346        for (mode, table) in [
347            (Vtl2GpaPoolLookupTable::Release, LOOKUP_TABLE_RELEASE.iter()),
348            (Vtl2GpaPoolLookupTable::Debug, LOOKUP_TABLE_DEBUG.iter()),
349        ] {
350            for (vp_count, vtl2_memory_mb, dma_hint_mb) in table {
351                let calculated_dma_hint_4k = vtl2_calculate_dma_hint(
352                    mode,
353                    *vp_count as usize,
354                    (*vtl2_memory_mb as u64) * ONE_MB,
355                );
356                let expected_dma_hint_4k = (*dma_hint_mb as u64) * ONE_MB / PAGE_SIZE_4K;
357                assert_eq!(
358                    calculated_dma_hint_4k, expected_dma_hint_4k,
359                    "Failed exact match test for vp_count={}, vtl2_memory_mb={}",
360                    vp_count, vtl2_memory_mb
361                );
362            }
363        }
364    }
365
366    #[test]
367    fn test_right_pages_source() {
368        // If these assertions fail, the test cases below may need to be updated.
369        assert_ne!(
370            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 16, 256 * ONE_MB),
371            1500
372        );
373        assert_ne!(
374            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 16, 256 * ONE_MB),
375            1500
376        );
377
378        for (cmdline, dt, expected) in [
379            (Vtl2GpaPoolConfig::Off, Some(1000), None),
380            (Vtl2GpaPoolConfig::Pages(2000), Some(1000), Some(2000)),
381            (Vtl2GpaPoolConfig::Pages(2000), None, Some(2000)),
382            (
383                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
384                Some(1500),
385                Some(1500), // Device tree overrides heuristics.
386            ),
387            (
388                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Debug),
389                Some(0),
390                Some(vtl2_calculate_dma_hint(
391                    Vtl2GpaPoolLookupTable::Debug,
392                    16,
393                    256 * ONE_MB,
394                )),
395            ),
396            (
397                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Debug),
398                None,
399                Some(vtl2_calculate_dma_hint(
400                    Vtl2GpaPoolLookupTable::Debug,
401                    16,
402                    256 * ONE_MB,
403                )),
404            ),
405            (
406                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
407                Some(0),
408                Some(vtl2_calculate_dma_hint(
409                    Vtl2GpaPoolLookupTable::Release,
410                    16,
411                    256 * ONE_MB,
412                )),
413            ),
414            (
415                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
416                None,
417                Some(vtl2_calculate_dma_hint(
418                    Vtl2GpaPoolLookupTable::Release,
419                    16,
420                    256 * ONE_MB,
421                )),
422            ),
423        ] {
424            let result = pick_private_pool_size(cmdline, dt, 16, 256 * ONE_MB);
425            assert_eq!(
426                result, expected,
427                "Failed pick_private_pool_size test for cmdline={:?}, dt={:?}",
428                cmdline, dt
429            );
430        }
431    }
432}
openhcl_boot/host_params/dt/dma_hint.rs

openhcl_boot/host_params/dt/
dma_hint.rs