inspect/initiate/
natural_sort.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use core::cmp::Ordering;
5
6/// Compares `a` and `b` with a natural ordering, where numbers within the
7/// strings compare as numbers instead of strings.
8///
9/// For example, foo3 < foo100.
10///
11/// This implementation supports decimal unsigned integers and hexadecimal
12/// unsigned integers prefixed by "0x".
13///
14/// Internal numbers with a alphabetic suffix are treated as strings.
15///
16/// So 3foo > 100foo. But 3_foo < 100_foo.
17///
18/// This is done to avoid parsing non-prefixed hexadecimal data as decimal data,
19/// which can confuse the ordering of GUIDs and similar data.
20pub fn compare(a: impl AsRef<str>, b: impl AsRef<str>) -> Ordering {
21    SegmentIter(a.as_ref().as_bytes()).cmp(SegmentIter(b.as_ref().as_bytes()))
22}
23
24struct SegmentIter<'a>(&'a [u8]);
25
26#[derive(PartialOrd, Ord, PartialEq, Eq)]
27enum Segment<'a> {
28    Dec(u64),
29    Hex(u64),
30    Str(&'a [u8]),
31}
32
33impl<'a> Iterator for SegmentIter<'a> {
34    type Item = Segment<'a>;
35
36    fn next(&mut self) -> Option<Self::Item> {
37        let len = match self.0 {
38            [] => return None,
39            [b'0', b'x', b, ..] if b.is_ascii_hexdigit() => {
40                let (n, len) = parse_prefix(&self.0[2..], 16);
41                if let Some(n) = n {
42                    self.0 = &self.0[2 + len..];
43                    return Some(Segment::Hex(n));
44                } else {
45                    2 + len
46                }
47            }
48            [b'0'..=b'9', ..] => {
49                let (n, len) = parse_prefix(self.0, 16);
50                if let Some(n) = n {
51                    self.0 = &self.0[len..];
52                    return Some(Segment::Dec(n));
53                } else {
54                    len
55                }
56            }
57            _ => {
58                // Skip to the next digit.
59                self.0
60                    .iter()
61                    .position(|b| b.is_ascii_digit())
62                    .unwrap_or(self.0.len())
63            }
64        };
65
66        assert_ne!(len, 0);
67
68        let (seg, rest) = self.0.split_at(len);
69        self.0 = rest;
70        Some(Segment::Str(seg))
71    }
72}
73
74fn parse_prefix(v: &[u8], base: u32) -> (Option<u64>, usize) {
75    let mut n = 0u64;
76    let mut i = 0;
77    while let Some(&d) = v.get(i) {
78        let d = match d {
79            b'0'..=b'9' => d - b'0',
80            b'a'..=b'f' if base == 16 => d - b'a' + 10,
81            b'A'..=b'F' if base == 16 => d - b'A' + 10,
82            x if x.is_ascii_alphabetic() => return (None, i),
83            _ => break,
84        };
85        if let Some(m) = n.checked_mul(base.into()) {
86            n = m + d as u64;
87        } else {
88            break;
89        }
90        i += 1;
91    }
92    (Some(n), i)
93}
94
95#[cfg(test)]
96mod tests {
97    use super::compare;
98    use alloc::vec;
99
100    #[test]
101    fn test_natural_sort() {
102        let mut x = vec![
103            "foo", "foo299", "foo3", "bar_0x5", "bar_0x0f", "bar_0xg", "100foo", "100_foo",
104            "3_foo", "3foo",
105        ];
106        x.sort_by(|x, y| compare(x, y));
107        assert_eq!(
108            x.as_slice(),
109            &[
110                "3_foo", "100_foo", "100foo", "3foo", "bar_0x5", "bar_0x0f", "bar_0xg", "foo",
111                "foo3", "foo299"
112            ]
113        );
114    }
115}