vm_topology/
memory.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Tools to the compute guest memory layout.
5
6use memory_range::MemoryRange;
7use thiserror::Error;
8
9const PAGE_SIZE: u64 = 4096;
10const FOUR_GB: u64 = 0x1_0000_0000;
11
12/// Represents a page-aligned byte range of memory, with additional metadata.
13#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
14#[cfg_attr(feature = "mesh", derive(mesh_protobuf::Protobuf))]
15#[cfg_attr(feature = "inspect", derive(inspect::Inspect))]
16pub struct MemoryRangeWithNode {
17    /// The memory range.
18    pub range: MemoryRange,
19    /// The virtual NUMA node the range belongs to.
20    pub vnode: u32,
21}
22
23impl core::fmt::Display for MemoryRangeWithNode {
24    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
25        write!(f, "{}({})", self.range, self.vnode)
26    }
27}
28
29/// Describes the memory layout of a guest.
30#[derive(Debug, Clone)]
31#[cfg_attr(feature = "inspect", derive(inspect::Inspect))]
32pub struct MemoryLayout {
33    #[cfg_attr(feature = "inspect", inspect(with = "inspect_ranges_with_metadata"))]
34    ram: Vec<MemoryRangeWithNode>,
35    #[cfg_attr(feature = "inspect", inspect(with = "inspect_ranges"))]
36    mmio: Vec<MemoryRange>,
37    #[cfg_attr(feature = "inspect", inspect(with = "inspect_ranges"))]
38    pci_ecam: Vec<MemoryRange>,
39    #[cfg_attr(feature = "inspect", inspect(with = "inspect_ranges"))]
40    pci_mmio: Vec<MemoryRange>,
41    /// The RAM range used by VTL2. This is not present in any of the stats
42    /// above.
43    vtl2_range: Option<MemoryRange>,
44}
45
46#[cfg(feature = "inspect")]
47fn inspect_ranges(ranges: &[MemoryRange]) -> impl '_ + inspect::Inspect {
48    inspect::iter_by_key(ranges.iter().map(|range| {
49        (
50            range.to_string(),
51            inspect::adhoc(|i| {
52                i.respond().hex("length", range.len());
53            }),
54        )
55    }))
56}
57
58#[cfg(feature = "inspect")]
59fn inspect_ranges_with_metadata(ranges: &[MemoryRangeWithNode]) -> impl '_ + inspect::Inspect {
60    inspect::iter_by_key(ranges.iter().map(|range| {
61        (
62            range.range.to_string(),
63            inspect::adhoc(|i| {
64                i.respond()
65                    .hex("length", range.range.len())
66                    .hex("vnode", range.vnode);
67            }),
68        )
69    }))
70}
71
72/// Memory layout creation error.
73#[derive(Debug, Error)]
74pub enum Error {
75    /// Invalid memory size.
76    #[error("invalid memory size")]
77    BadSize,
78    /// Invalid MMIO gap configuration.
79    #[error("invalid MMIO gap configuration")]
80    BadMmioGaps,
81    /// Invalid memory ranges.
82    #[error("invalid memory or MMIO ranges")]
83    BadMemoryRanges,
84    /// VTL2 range is below the end of ram, and overlaps.
85    #[error("vtl2 range is below end of ram")]
86    Vtl2RangeBeforeEndOfRam,
87}
88
89fn validate_ranges(ranges: &[MemoryRange]) -> Result<(), Error> {
90    validate_ranges_core(ranges, |x| x)
91}
92
93fn validate_ranges_with_metadata(ranges: &[MemoryRangeWithNode]) -> Result<(), Error> {
94    validate_ranges_core(ranges, |x| &x.range)
95}
96
97/// Ensures everything in a list of ranges is non-empty, in order, and
98/// non-overlapping.
99fn validate_ranges_core<T>(ranges: &[T], getter: impl Fn(&T) -> &MemoryRange) -> Result<(), Error> {
100    if ranges.iter().any(|x| getter(x).is_empty())
101        || !ranges.iter().zip(ranges.iter().skip(1)).all(|(x, y)| {
102            let x = getter(x);
103            let y = getter(y);
104            x <= y && !x.overlaps(y)
105        })
106    {
107        return Err(Error::BadMemoryRanges);
108    }
109
110    Ok(())
111}
112
113/// The type backing an address.
114#[derive(Debug, Copy, Clone, PartialEq, Eq)]
115pub enum AddressType {
116    /// The address describes ram.
117    Ram,
118    /// The address describes mmio.
119    Mmio,
120    /// The address describes PCI ECAM.
121    PciEcam,
122    /// The address describes PCI MMIO.
123    PciMmio,
124}
125
126impl MemoryLayout {
127    /// Makes a new memory layout for a guest with `ram_size` bytes of memory
128    /// and MMIO gaps at the locations specified by `gaps`.
129    ///
130    /// `ram_size` must be a multiple of the page size. Each mmio and device
131    /// reserved gap must be non-empty, and the gaps must be in order and
132    /// non-overlapping.
133    ///
134    /// `vtl2_range` describes a range of memory reserved for VTL2.
135    /// It is not reported in ram.
136    ///
137    /// All RAM is assigned to NUMA node 0.
138    pub fn new(
139        ram_size: u64,
140        mmio_gaps: &[MemoryRange],
141        pci_ecam_gaps: &[MemoryRange],
142        pci_mmio_gaps: &[MemoryRange],
143        vtl2_range: Option<MemoryRange>,
144    ) -> Result<Self, Error> {
145        if ram_size == 0 || ram_size & (PAGE_SIZE - 1) != 0 {
146            return Err(Error::BadSize);
147        }
148
149        validate_ranges(mmio_gaps)?;
150        validate_ranges(pci_ecam_gaps)?;
151        validate_ranges(pci_mmio_gaps)?;
152
153        let mut combined_gaps = mmio_gaps
154            .iter()
155            .chain(pci_ecam_gaps)
156            .chain(pci_mmio_gaps)
157            .copied()
158            .collect::<Vec<_>>();
159        combined_gaps.sort();
160        validate_ranges(&combined_gaps)?;
161
162        let mut ram = Vec::new();
163        let mut remaining = ram_size;
164        let mut remaining_gaps = combined_gaps.iter().cloned();
165        let mut last_end = 0;
166
167        while remaining > 0 {
168            let (this, next_end) = if let Some(gap) = remaining_gaps.next() {
169                (remaining.min(gap.start() - last_end), gap.end())
170            } else {
171                (remaining, 0)
172            };
173
174            if this > 0 {
175                ram.push(MemoryRangeWithNode {
176                    range: MemoryRange::new(last_end..last_end + this),
177                    vnode: 0,
178                });
179            }
180            remaining -= this;
181            last_end = next_end;
182        }
183
184        Self::build(
185            ram,
186            mmio_gaps.to_vec(),
187            pci_ecam_gaps.to_vec(),
188            pci_mmio_gaps.to_vec(),
189            vtl2_range,
190        )
191    }
192
193    /// Makes a new memory layout for a guest with the given mmio gaps and
194    /// memory ranges.
195    ///
196    /// `memory` and `gaps` ranges must be in sorted order and non-overlapping,
197    /// and describe page aligned ranges.
198    pub fn new_from_ranges(
199        memory: &[MemoryRangeWithNode],
200        gaps: &[MemoryRange],
201    ) -> Result<Self, Error> {
202        validate_ranges_with_metadata(memory)?;
203        validate_ranges(gaps)?;
204        Self::build(memory.to_vec(), gaps.to_vec(), vec![], vec![], None)
205    }
206
207    /// Builds the memory layout.
208    ///
209    /// `ram` must already be known to be sorted.
210    fn build(
211        ram: Vec<MemoryRangeWithNode>,
212        mmio: Vec<MemoryRange>,
213        pci_ecam: Vec<MemoryRange>,
214        pci_mmio: Vec<MemoryRange>,
215        vtl2_range: Option<MemoryRange>,
216    ) -> Result<Self, Error> {
217        let mut all_ranges = ram
218            .iter()
219            .map(|x| &x.range)
220            .chain(&mmio)
221            .chain(&vtl2_range)
222            .chain(&pci_ecam)
223            .chain(&pci_mmio)
224            .copied()
225            .collect::<Vec<_>>();
226
227        all_ranges.sort();
228        validate_ranges(&all_ranges)?;
229
230        if all_ranges
231            .iter()
232            .zip(all_ranges.iter().skip(1))
233            .any(|(x, y)| x.overlaps(y))
234        {
235            return Err(Error::BadMemoryRanges);
236        }
237
238        let last_ram_entry = ram.last().ok_or(Error::BadMemoryRanges)?;
239        let end_of_ram = last_ram_entry.range.end();
240
241        if let Some(range) = vtl2_range {
242            if range.start() < end_of_ram {
243                return Err(Error::Vtl2RangeBeforeEndOfRam);
244            }
245        }
246
247        Ok(Self {
248            ram,
249            mmio,
250            pci_ecam,
251            pci_mmio,
252            vtl2_range,
253        })
254    }
255
256    /// The MMIO gap ranges.
257    pub fn mmio(&self) -> &[MemoryRange] {
258        &self.mmio
259    }
260
261    /// The populated RAM ranges. This does not include the vtl2_range.
262    pub fn ram(&self) -> &[MemoryRangeWithNode] {
263        &self.ram
264    }
265
266    /// A special memory range for VTL2, if any. This memory range is treated
267    /// like RAM, but is only used to hold VTL2 and is located above ram and
268    /// mmio.
269    pub fn vtl2_range(&self) -> Option<MemoryRange> {
270        self.vtl2_range
271    }
272
273    /// The total RAM size in bytes. This is not contiguous.
274    pub fn ram_size(&self) -> u64 {
275        self.ram.iter().map(|r| r.range.len()).sum()
276    }
277
278    /// One past the last byte of RAM.
279    pub fn end_of_ram(&self) -> u64 {
280        // always at least one RAM range
281        self.ram.last().expect("mmio set").range.end()
282    }
283
284    /// The bytes of RAM below 4GB.
285    pub fn ram_below_4gb(&self) -> u64 {
286        self.ram
287            .iter()
288            .filter(|r| r.range.end() < FOUR_GB)
289            .map(|r| r.range.len())
290            .sum()
291    }
292
293    /// The bytes of RAM at or above 4GB.
294    pub fn ram_above_4gb(&self) -> u64 {
295        self.ram
296            .iter()
297            .filter(|r| r.range.end() >= FOUR_GB)
298            .map(|r| r.range.len())
299            .sum()
300    }
301
302    /// The bytes of RAM above the high MMIO gap.
303    ///
304    /// Returns None if there aren't exactly 2 MMIO gaps.
305    pub fn ram_above_high_mmio(&self) -> Option<u64> {
306        if self.mmio.len() != 2 {
307            return None;
308        }
309
310        Some(
311            self.ram
312                .iter()
313                .filter(|r| r.range.start() >= self.mmio[1].end())
314                .map(|r| r.range.len())
315                .sum(),
316        )
317    }
318
319    /// The ending RAM address below 4GB.
320    ///
321    /// Returns None if there is no RAM mapped below 4GB.
322    pub fn max_ram_below_4gb(&self) -> Option<u64> {
323        Some(
324            self.ram
325                .iter()
326                .rev()
327                .find(|r| r.range.end() < FOUR_GB)?
328                .range
329                .end(),
330        )
331    }
332
333    /// One past the last byte of RAM, MMIO, PCI ECAM, or PCI MMIO.
334    pub fn end_of_layout(&self) -> u64 {
335        [
336            self.mmio.last().expect("mmio set").end(),
337            self.end_of_ram(),
338            self.pci_ecam.last().map(|r| r.end()).unwrap_or(0),
339            self.pci_mmio.last().map(|r| r.end()).unwrap_or(0),
340        ]
341        .into_iter()
342        .max()
343        .unwrap()
344    }
345
346    /// Probe a given address to see if it is in the memory layout described by
347    /// `self`. Returns the [`AddressType`] of the address if it is in the
348    /// layout.
349    ///
350    /// This does not check the vtl2_range.
351    pub fn probe_address(&self, address: u64) -> Option<AddressType> {
352        let ranges = self
353            .ram
354            .iter()
355            .map(|r| (&r.range, AddressType::Ram))
356            .chain(self.mmio.iter().map(|r| (r, AddressType::Mmio)))
357            .chain(self.pci_ecam.iter().map(|r| (r, AddressType::PciEcam)))
358            .chain(self.pci_mmio.iter().map(|r| (r, AddressType::PciMmio)));
359
360        for (range, address_type) in ranges {
361            if range.contains_addr(address) {
362                return Some(address_type);
363            }
364        }
365
366        None
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    const KB: u64 = 1024;
375    const MB: u64 = 1024 * KB;
376    const GB: u64 = 1024 * MB;
377    const TB: u64 = 1024 * GB;
378
379    #[test]
380    fn layout() {
381        let mmio = &[
382            MemoryRange::new(GB..2 * GB),
383            MemoryRange::new(3 * GB..4 * GB),
384        ];
385        let ram = &[
386            MemoryRangeWithNode {
387                range: MemoryRange::new(0..GB),
388                vnode: 0,
389            },
390            MemoryRangeWithNode {
391                range: MemoryRange::new(2 * GB..3 * GB),
392                vnode: 0,
393            },
394            MemoryRangeWithNode {
395                range: MemoryRange::new(4 * GB..TB + 2 * GB),
396                vnode: 0,
397            },
398        ];
399
400        let layout = MemoryLayout::new(TB, mmio, &[], &[], None).unwrap();
401        assert_eq!(
402            layout.ram(),
403            &[
404                MemoryRangeWithNode {
405                    range: MemoryRange::new(0..GB),
406                    vnode: 0
407                },
408                MemoryRangeWithNode {
409                    range: MemoryRange::new(2 * GB..3 * GB),
410                    vnode: 0
411                },
412                MemoryRangeWithNode {
413                    range: MemoryRange::new(4 * GB..TB + 2 * GB),
414                    vnode: 0
415                },
416            ]
417        );
418        assert_eq!(layout.mmio(), mmio);
419        assert_eq!(layout.ram_size(), TB);
420        assert_eq!(layout.end_of_ram(), TB + 2 * GB);
421        assert_eq!(layout.end_of_layout(), TB + 2 * GB);
422
423        let layout = MemoryLayout::new_from_ranges(ram, mmio).unwrap();
424        assert_eq!(
425            layout.ram(),
426            &[
427                MemoryRangeWithNode {
428                    range: MemoryRange::new(0..GB),
429                    vnode: 0
430                },
431                MemoryRangeWithNode {
432                    range: MemoryRange::new(2 * GB..3 * GB),
433                    vnode: 0
434                },
435                MemoryRangeWithNode {
436                    range: MemoryRange::new(4 * GB..TB + 2 * GB),
437                    vnode: 0
438                },
439            ]
440        );
441        assert_eq!(layout.mmio(), mmio);
442        assert_eq!(layout.ram_size(), TB);
443        assert_eq!(layout.end_of_ram(), TB + 2 * GB);
444        assert_eq!(layout.end_of_layout(), TB + 2 * GB);
445    }
446
447    #[test]
448    fn bad_layout() {
449        MemoryLayout::new(TB + 1, &[], &[], &[], None).unwrap_err();
450        let mmio = &[
451            MemoryRange::new(3 * GB..4 * GB),
452            MemoryRange::new(GB..2 * GB),
453        ];
454        MemoryLayout::new(TB, mmio, &[], &[], None).unwrap_err();
455
456        MemoryLayout::new_from_ranges(&[], mmio).unwrap_err();
457
458        let ram = &[MemoryRangeWithNode {
459            range: MemoryRange::new(0..GB),
460            vnode: 0,
461        }];
462        MemoryLayout::new_from_ranges(ram, mmio).unwrap_err();
463
464        let ram = &[MemoryRangeWithNode {
465            range: MemoryRange::new(0..GB + MB),
466            vnode: 0,
467        }];
468        let mmio = &[
469            MemoryRange::new(GB..2 * GB),
470            MemoryRange::new(3 * GB..4 * GB),
471        ];
472        MemoryLayout::new_from_ranges(ram, mmio).unwrap_err();
473
474        let mmio = &[
475            MemoryRange::new(GB..2 * GB),
476            MemoryRange::new(3 * GB..4 * GB),
477        ];
478        let pci_ecam = &[MemoryRange::new(GB..GB + MB)];
479        MemoryLayout::new(TB, mmio, pci_ecam, &[], None).unwrap_err();
480
481        let mmio = &[
482            MemoryRange::new(GB..2 * GB),
483            MemoryRange::new(3 * GB..4 * GB),
484        ];
485        let pci_mmio = &[MemoryRange::new(GB..GB + MB)];
486        MemoryLayout::new(TB, mmio, &[], pci_mmio, None).unwrap_err();
487
488        let pci_ecam = &[MemoryRange::new(GB..GB + MB)];
489        let pci_mmio = &[MemoryRange::new(GB..GB + MB)];
490        MemoryLayout::new(TB, &[], pci_ecam, pci_mmio, None).unwrap_err();
491    }
492
493    #[test]
494    fn pci_ranges() {
495        let mmio = &[MemoryRange::new(3 * GB..4 * GB)];
496        let pci_ecam = &[MemoryRange::new(2 * TB - GB..2 * TB)];
497        let pci_mmio = &[
498            MemoryRange::new(2 * GB..3 * GB),
499            MemoryRange::new(5 * GB..6 * GB),
500        ];
501
502        let layout = MemoryLayout::new(TB, mmio, pci_ecam, pci_mmio, None).unwrap();
503        assert_eq!(
504            layout.ram(),
505            &[
506                MemoryRangeWithNode {
507                    range: MemoryRange::new(0..2 * GB),
508                    vnode: 0,
509                },
510                MemoryRangeWithNode {
511                    range: MemoryRange::new(4 * GB..5 * GB),
512                    vnode: 0,
513                },
514                MemoryRangeWithNode {
515                    range: MemoryRange::new(6 * GB..TB + 3 * GB),
516                    vnode: 0,
517                },
518            ]
519        );
520        assert_eq!(layout.end_of_layout(), 2 * TB);
521
522        assert_eq!(layout.probe_address(2 * GB), Some(AddressType::PciMmio));
523        assert_eq!(
524            layout.probe_address(2 * GB + MB),
525            Some(AddressType::PciMmio)
526        );
527        assert_eq!(layout.probe_address(5 * GB), Some(AddressType::PciMmio));
528        assert_eq!(
529            layout.probe_address(5 * GB + MB),
530            Some(AddressType::PciMmio)
531        );
532        assert_eq!(
533            layout.probe_address(2 * TB - GB),
534            Some(AddressType::PciEcam)
535        );
536    }
537
538    #[test]
539    fn probe_address() {
540        let mmio = &[
541            MemoryRange::new(GB..2 * GB),
542            MemoryRange::new(3 * GB..4 * GB),
543        ];
544        let ram = &[
545            MemoryRangeWithNode {
546                range: MemoryRange::new(0..GB),
547                vnode: 0,
548            },
549            MemoryRangeWithNode {
550                range: MemoryRange::new(2 * GB..3 * GB),
551                vnode: 0,
552            },
553            MemoryRangeWithNode {
554                range: MemoryRange::new(4 * GB..TB + 2 * GB),
555                vnode: 0,
556            },
557        ];
558
559        let layout = MemoryLayout::new_from_ranges(ram, mmio).unwrap();
560
561        assert_eq!(layout.probe_address(0), Some(AddressType::Ram));
562        assert_eq!(layout.probe_address(256), Some(AddressType::Ram));
563        assert_eq!(layout.probe_address(2 * GB), Some(AddressType::Ram));
564        assert_eq!(layout.probe_address(4 * GB), Some(AddressType::Ram));
565        assert_eq!(layout.probe_address(TB), Some(AddressType::Ram));
566        assert_eq!(layout.probe_address(TB + 1), Some(AddressType::Ram));
567
568        assert_eq!(layout.probe_address(GB), Some(AddressType::Mmio));
569        assert_eq!(layout.probe_address(GB + 123), Some(AddressType::Mmio));
570        assert_eq!(layout.probe_address(3 * GB), Some(AddressType::Mmio));
571
572        assert_eq!(layout.probe_address(TB + 2 * GB), None);
573        assert_eq!(layout.probe_address(TB + 3 * GB), None);
574        assert_eq!(layout.probe_address(4 * TB), None);
575    }
576}