openhcl_boot/host_params/dt/
dma_hint.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Calculate DMA hint value if not provided by host.
5
6use crate::cmdline::Vtl2GpaPoolConfig;
7use crate::cmdline::Vtl2GpaPoolLookupTable;
8use igvm_defs::PAGE_SIZE_4K;
9
10/// Lookup table for VTL2 DMA hint calculation. This table is used to retrofit
11/// dedicated DMA memory for existing configurations known at the time of writing.
12/// Dedicated DMA memory is required for devices that survive OpenHCL servicing
13/// operations (for example, NVMe and MANA devices). Those devices express
14/// their need as the "persistent memory" requirement when they create a DMA client.
15/// Since the amount of dedicated DMA memory cannot be changed at runtime, the amount
16/// of memory set aside must account for the maximum expected usage.
17///
18/// This table derives the maximum expected usage based on:
19/// 1. The number of expected MANA and NVMe devices and,
20/// 2. The amount of DMA each device needs.
21///
22/// To illustrate the second point, an NVMe device with 64 queue pairs will need
23/// the following memory (see `nvme_driver::QueuePair::new` for details):
24/// - Submission queue: 4 pages,
25/// - Completion queue: 1 page,
26/// - Extra memory per queue: 64 pages
27///
28/// If there are 32 VPs, we expect one queue pair per VP, leading to:
29/// - Total per NVMe device: 32 * (4 + 1 + 64) = 32 * 69 = 2208 pages = 8.59 MiB
30///
31/// What is not easily derivable from this source base is the max number of devices
32/// expected in any given VTL2 configuration. We derive that manually from external
33/// data sources.
34///
35/// The inputs are the number of VTL0 VPs (vp_count) and the amount of memory
36/// assigned to VTL2 (vtl2_memory_mb). The output is the recommended DMA hint
37/// value (dma_hint_mb).
38///
39/// The table is sorted by VP count, then by assigned memory.
40/// (vp_count, vtl2_memory_mb, dma_hint_mb)
41const LOOKUP_TABLE_RELEASE: &[(u16, u16, u16); 39] = &[
42    (2, 96, 2),
43    (2, 98, 4),
44    (2, 100, 4),
45    (2, 104, 4),
46    (4, 108, 2),
47    (4, 110, 6),
48    (4, 112, 6),
49    (4, 118, 8),
50    (4, 130, 12),
51    (8, 140, 4),
52    (8, 148, 10),
53    (8, 170, 20),
54    (8, 176, 20),
55    (16, 70, 2), // Default manifest is 70MiB. Allocate minimal space for a few NVMe queues.
56    (16, 234, 12),
57    (16, 256, 20), // There is another 16vp/256MB configuration that only requires 18 MB of DMA memory, pick the larger.
58    (16, 268, 38),
59    (16, 282, 54),
60    (24, 420, 66),
61    (32, 404, 22),
62    (32, 516, 36),
63    (32, 538, 74), // There is another 32vp/538MB configuration that only requires 52 MB of DMA memory, pick the larger.
64    (48, 558, 32),
65    (48, 718, 52),
66    (48, 730, 52),
67    (48, 746, 78),
68    (64, 712, 42),
69    (64, 924, 68),
70    (64, 938, 68),
71    (96, 1030, 64),
72    (96, 1042, 114), // There is another 96vp/1042MB configuration that only requires 64 MB of DMA memory, pick the larger.
73    (96, 1058, 114), // There is another 96vp/1058MB configuration that only requires 106 MB of DMA memory, pick the larger.
74    (96, 1340, 102),
75    (96, 1358, 104),
76    (96, 1382, 120),
77    (112, 1566, 288),
78    (128, 1342, 84),
79    (128, 1360, 84),
80    (896, 12912, 516), // Needs to be validated as the vNIC number is unknown. (TODO, as part of network device keepalive support).
81];
82
83/// DEV/TEST ONLY variant of the lookup table above. Since the IGVM manifest specifies additional
84/// VTL2 memory for dev (well above what is required for release configs), allow the heuristics
85/// to still kick in.
86///
87/// These are sized for ~ 3 NVMe devices worth of DMA memory.
88/// 69 pages per NVMe per VP * 3 NVMe devices = 207 pages per VP.
89const LOOKUP_TABLE_DEBUG: &[(u16, u16, u16); 6] = &[
90    (4, 496, 4),
91    (16, 512, 16), // 16 VP, 512 MB VTL2 memory is a "heavy" Hyper-V Petri VM.
92    (32, 1024, 32),
93    (32, 1536, 128), // 32 VP "very heavy", with much extra memory above what is required for dev, allocate lots of memory for DMA.
94    (64, 1024, 64),
95    (128, 1024, 128),
96];
97
98const ONE_MB: u64 = 1024 * 1024;
99
100/// Maximum allowed memory size for DMA hint calculation (1 TiB).
101const MAX_DMA_HINT_MEM_SIZE: u64 = 0xFFFFFFFF00000;
102/// Number of 4K pages in 2MiB.
103const PAGES_PER_2MB: u64 = 2 * ONE_MB / PAGE_SIZE_4K;
104// To avoid using floats, scale ratios to 1:1000.
105const RATIO: u32 = 1_000;
106
107/// Round up to next 2MiB.
108fn round_up_to_2mb(pages_4k: u64) -> u64 {
109    (pages_4k + (PAGES_PER_2MB - 1)) & !(PAGES_PER_2MB - 1)
110}
111
112/// Returns calculated DMA hint value, in 4k pages.
113pub fn vtl2_calculate_dma_hint(
114    vtl2_gpa_pool_lookup_table: Vtl2GpaPoolLookupTable,
115    vp_count: usize,
116    mem_size: u64,
117) -> u64 {
118    let mut dma_hint_4k = 0;
119    // Sanity check for the calculated memory size.
120    if mem_size > 0 && mem_size < MAX_DMA_HINT_MEM_SIZE {
121        let mem_size_mb = (mem_size / ONE_MB) as u32;
122        #[cfg(test)]
123        tracing::info!(?vp_count, ?mem_size_mb, "Calculating VTL2 DMA hint",);
124
125        let mut min_vtl2_memory_mb = u16::MAX; // minimum VTL2 memory seen for a given VP count.
126        let mut max_vtl2_memory_mb = 0; // maximum VTL2 memory seen for a given VP count.
127
128        let mut min_ratio_1000th = 100 * RATIO;
129        let mut max_ratio_1000th = RATIO;
130
131        let mut min_vp_count: u16 = 1; // Biggest VP count entry in the table that is less than vp_count.
132        let mut max_vp_count = vp_count as u16; // Smallest VP count entry in the table that is greater than vp_count, or vp_count itself.
133
134        let lookup_table = match vtl2_gpa_pool_lookup_table {
135            Vtl2GpaPoolLookupTable::Release => LOOKUP_TABLE_RELEASE.iter(),
136            Vtl2GpaPoolLookupTable::Debug => LOOKUP_TABLE_DEBUG.iter(),
137        };
138
139        // Take a first loop over the table. Ideally the table contains an exact match
140        // for the given VP count and memory size. If not, gather data for extrapolation.
141        for (vp_lookup, vtl2_memory_mb, dma_hint_mb) in lookup_table.clone() {
142            match (*vp_lookup).cmp(&(vp_count as u16)) {
143                core::cmp::Ordering::Less => {
144                    // Current entry has fewer VPs than requested.
145                    min_vp_count = min_vp_count.max(*vp_lookup);
146                }
147                core::cmp::Ordering::Equal => {
148                    if *vtl2_memory_mb == mem_size_mb as u16 {
149                        // Found exact match.
150                        dma_hint_4k = *dma_hint_mb as u64 * ONE_MB / PAGE_SIZE_4K;
151                        max_vtl2_memory_mb = *vtl2_memory_mb;
152
153                        break;
154                    } else {
155                        // Prepare for possible extrapolation.
156                        min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
157                        max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
158                        min_ratio_1000th = min_ratio_1000th
159                            .min(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
160                        max_ratio_1000th = max_ratio_1000th
161                            .max(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
162                    }
163                }
164                core::cmp::Ordering::Greater => {
165                    // Current entry has more VPs than requested.
166                    // Update the max VP count based on the table. This will be the vp_count unless
167                    // the biggest vp count in the table is smaller than the supplied vp_count.
168
169                    max_vp_count = max_vp_count.min(*vp_lookup);
170                }
171            }
172        }
173
174        // Take a second pass over the table if no exact match was found
175        // (i.e. unexpected VP count).
176        //
177        // If there was an exact match for VP count but not for memory size in the table,
178        // then we know the min and max ratios for that VP count. But, we also didn't know
179        // at that time if there was not going to be an exact match, now go look up the ratios
180        // for the nearest VP counts as well.
181        if max_vtl2_memory_mb == 0 {
182            #[cfg(test)]
183            tracing::warn!(
184                ?min_vp_count,
185                ?max_vp_count,
186                ?min_vtl2_memory_mb,
187                ?max_vtl2_memory_mb,
188                ?min_ratio_1000th,
189                ?max_ratio_1000th,
190                "Exact match not found, extrapolating DMA hint",
191            );
192            lookup_table
193                .filter(|(vp_lookup, _, _)| {
194                    *vp_lookup == min_vp_count || *vp_lookup == max_vp_count
195                })
196                .for_each(|(_vp_count, vtl2_memory_mb, dma_hint_mb)| {
197                    min_vtl2_memory_mb = min_vtl2_memory_mb.min(*vtl2_memory_mb);
198                    max_vtl2_memory_mb = max_vtl2_memory_mb.max(*vtl2_memory_mb);
199                    min_ratio_1000th =
200                        min_ratio_1000th.min(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
201                    max_ratio_1000th =
202                        max_ratio_1000th.max(*vtl2_memory_mb as u32 * RATIO / *dma_hint_mb as u32);
203                });
204        }
205
206        if dma_hint_4k == 0 {
207            // Didn't find an exact match for vp_count, try to extrapolate.
208            dma_hint_4k = (mem_size_mb as u64 * RATIO as u64 * (ONE_MB / PAGE_SIZE_4K))
209                / ((min_ratio_1000th + max_ratio_1000th) as u64 / 2u64);
210
211            // And then round up to 2MiB.
212            dma_hint_4k = round_up_to_2mb(dma_hint_4k);
213
214            #[cfg(test)]
215            tracing::debug!(
216                ?min_vp_count,
217                ?max_vp_count,
218                ?min_vtl2_memory_mb,
219                ?max_vtl2_memory_mb,
220                ?min_ratio_1000th,
221                ?max_ratio_1000th,
222                ?dma_hint_4k,
223                "Extrapolated VTL2 DMA hint",
224            );
225
226            log::info!(
227                "Extrapolated VTL2 DMA hint: {} pages ({} MiB) for {} VPs and {} MiB VTL2 memory",
228                dma_hint_4k,
229                dma_hint_4k * PAGE_SIZE_4K / ONE_MB,
230                vp_count,
231                mem_size_mb
232            );
233        } else {
234            log::info!(
235                "Found exact VTL2 DMA hint: {} pages ({} MiB) for {} VPs and {} MiB VTL2 memory",
236                dma_hint_4k,
237                dma_hint_4k * PAGE_SIZE_4K / ONE_MB,
238                vp_count,
239                mem_size_mb
240            );
241        }
242    }
243
244    dma_hint_4k
245}
246
247// Decide if we will reserve memory for a VTL2 private pool. See `Vtl2GpaPoolConfig` for
248// details.
249pub fn pick_private_pool_size(
250    cmdline: Vtl2GpaPoolConfig,
251    dt: Option<u64>,
252    vp_count: usize,
253    mem_size: u64,
254) -> Option<u64> {
255    match (cmdline, dt) {
256        (Vtl2GpaPoolConfig::Off, _) => {
257            // Command line explicitly disabled the pool.
258            log::info!("vtl2 gpa pool disabled via command line");
259            None
260        }
261        (Vtl2GpaPoolConfig::Pages(cmd_line_pages), _) => {
262            // Command line specified explicit size, use it.
263            log::info!(
264                "vtl2 gpa pool enabled via command line with pages: {}",
265                cmd_line_pages
266            );
267            Some(cmd_line_pages)
268        }
269        (Vtl2GpaPoolConfig::Heuristics(table), None)
270        | (Vtl2GpaPoolConfig::Heuristics(table), Some(0)) => {
271            // Nothing more explicit, so use heuristics.
272            log::info!("vtl2 gpa pool coming from heuristics table: {:?}", table);
273            Some(vtl2_calculate_dma_hint(table, vp_count, mem_size))
274        }
275        (Vtl2GpaPoolConfig::Heuristics(_), Some(dt_page_count)) => {
276            // Command line specified heuristics, and the host specified size via device tree. Use
277            // the DT.
278            log::info!(
279                "vtl2 gpa pool enabled via device tree with pages: {}",
280                dt_page_count
281            );
282            Some(dt_page_count)
283        }
284    }
285}
286
287#[cfg(test)]
288mod test {
289    use super::*;
290    use test_with_tracing::test;
291
292    const ONE_MB: u64 = 0x10_0000;
293
294    #[test]
295    fn test_vtl2_calculate_dma_hint_release() {
296        assert_eq!(
297            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 2, 0x620_0000),
298            4 * ONE_MB / PAGE_SIZE_4K
299        );
300        assert_eq!(
301            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 4, 0x6E0_0000),
302            6 * ONE_MB / PAGE_SIZE_4K
303        );
304
305        // Test VP count higher than max from LOOKUP_TABLE.
306        assert_eq!(
307            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 112, 0x700_0000),
308            22 * ONE_MB / PAGE_SIZE_4K
309        );
310
311        // Test unusual VP count.
312        assert_eq!(
313            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 52, 0x600_0000),
314            8 * ONE_MB / PAGE_SIZE_4K
315        );
316        assert_eq!(
317            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 52, 0x800_0000),
318            10 * ONE_MB / PAGE_SIZE_4K
319        );
320    }
321
322    #[test]
323    fn test_vtl2_calculate_dma_hint_debug() {
324        assert_eq!(
325            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 4, 496 * ONE_MB),
326            4 * ONE_MB / PAGE_SIZE_4K
327        );
328        assert_eq!(
329            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 64, 1024 * ONE_MB),
330            64 * ONE_MB / PAGE_SIZE_4K
331        );
332        assert_eq!(
333            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 128, 1024 * ONE_MB),
334            128 * ONE_MB / PAGE_SIZE_4K
335        );
336        // Extrapolate beyond max memory size from LOOKUP_TABLE.
337        assert_eq!(
338            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 128, 2048 * ONE_MB),
339            256 * ONE_MB / PAGE_SIZE_4K
340        );
341    }
342
343    #[test]
344    fn test_vtl2_calculate_dma_hint_exact_matches() {
345        for (mode, table) in [
346            (Vtl2GpaPoolLookupTable::Release, LOOKUP_TABLE_RELEASE.iter()),
347            (Vtl2GpaPoolLookupTable::Debug, LOOKUP_TABLE_DEBUG.iter()),
348        ] {
349            for (vp_count, vtl2_memory_mb, dma_hint_mb) in table {
350                let calculated_dma_hint_4k = vtl2_calculate_dma_hint(
351                    mode,
352                    *vp_count as usize,
353                    (*vtl2_memory_mb as u64) * ONE_MB,
354                );
355                let expected_dma_hint_4k = (*dma_hint_mb as u64) * ONE_MB / PAGE_SIZE_4K;
356                assert_eq!(
357                    calculated_dma_hint_4k, expected_dma_hint_4k,
358                    "Failed exact match test for vp_count={}, vtl2_memory_mb={}",
359                    vp_count, vtl2_memory_mb
360                );
361            }
362        }
363    }
364
365    #[test]
366    fn test_right_pages_source() {
367        // If these assertions fail, the test cases below may need to be updated.
368        assert_ne!(
369            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Release, 16, 256 * ONE_MB),
370            1500
371        );
372        assert_ne!(
373            vtl2_calculate_dma_hint(Vtl2GpaPoolLookupTable::Debug, 16, 256 * ONE_MB),
374            1500
375        );
376
377        for (cmdline, dt, expected) in [
378            (Vtl2GpaPoolConfig::Off, Some(1000), None),
379            (Vtl2GpaPoolConfig::Pages(2000), Some(1000), Some(2000)),
380            (Vtl2GpaPoolConfig::Pages(2000), None, Some(2000)),
381            (
382                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
383                Some(1500),
384                Some(1500), // Device tree overrides heuristics.
385            ),
386            (
387                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Debug),
388                Some(0),
389                Some(vtl2_calculate_dma_hint(
390                    Vtl2GpaPoolLookupTable::Debug,
391                    16,
392                    256 * ONE_MB,
393                )),
394            ),
395            (
396                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Debug),
397                None,
398                Some(vtl2_calculate_dma_hint(
399                    Vtl2GpaPoolLookupTable::Debug,
400                    16,
401                    256 * ONE_MB,
402                )),
403            ),
404            (
405                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
406                Some(0),
407                Some(vtl2_calculate_dma_hint(
408                    Vtl2GpaPoolLookupTable::Release,
409                    16,
410                    256 * ONE_MB,
411                )),
412            ),
413            (
414                Vtl2GpaPoolConfig::Heuristics(Vtl2GpaPoolLookupTable::Release),
415                None,
416                Some(vtl2_calculate_dma_hint(
417                    Vtl2GpaPoolLookupTable::Release,
418                    16,
419                    256 * ONE_MB,
420                )),
421            ),
422        ] {
423            let result = pick_private_pool_size(cmdline, dt, 16, 256 * ONE_MB);
424            assert_eq!(
425                result, expected,
426                "Failed pick_private_pool_size test for cmdline={:?}, dt={:?}",
427                cmdline, dt
428            );
429        }
430    }
431}