openhcl_boot/
sidecar.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use crate::boot_logger::log;
5use crate::cmdline::SidecarOptions;
6use crate::host_params::MAX_CPU_COUNT;
7use crate::host_params::MAX_NUMA_NODES;
8use crate::host_params::PartitionInfo;
9use crate::host_params::shim_params::IsolationType;
10use crate::host_params::shim_params::ShimParams;
11use crate::memory::AddressSpaceManager;
12use crate::memory::AllocationPolicy;
13use crate::memory::AllocationType;
14use sidecar_defs::SidecarNodeOutput;
15use sidecar_defs::SidecarNodeParams;
16use sidecar_defs::SidecarOutput;
17use sidecar_defs::SidecarParams;
18
19/// The maximum side of a sidecar node. This is tuned to ensure that there are
20/// enough Linux CPUs to manage all the sidecar VPs.
21const MAX_SIDECAR_NODE_SIZE: usize = 32;
22
23// Assert that there are enough sidecar nodes for the maximum number of CPUs, if
24// all NUMA nodes but one have one processor.
25const _: () = assert!(
26    sidecar_defs::MAX_NODES >= (MAX_NUMA_NODES - 1) + MAX_CPU_COUNT.div_ceil(MAX_SIDECAR_NODE_SIZE)
27);
28
29pub struct SidecarConfig<'a> {
30    pub node_params: &'a [SidecarNodeParams],
31    pub nodes: &'a [SidecarNodeOutput],
32    pub start_reftime: u64,
33    pub end_reftime: u64,
34}
35
36impl SidecarConfig<'_> {
37    /// Returns an object to be appended to the Linux kernel command line to
38    /// configure it properly for sidecar.
39    pub fn kernel_command_line(&self) -> SidecarKernelCommandLine<'_> {
40        SidecarKernelCommandLine(self)
41    }
42}
43
44pub struct SidecarKernelCommandLine<'a>(&'a SidecarConfig<'a>);
45
46impl core::fmt::Display for SidecarKernelCommandLine<'_> {
47    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48        // Add something like boot_cpus=0,4,8,12 to the command line so that
49        // Linux boots with the base VP of each sidecar node. Other CPUs will
50        // be brought up by the sidecar kernel.
51        f.write_str("boot_cpus=")?;
52        let mut comma = "";
53        for node in self.0.node_params {
54            write!(f, "{}{}", comma, node.base_vp)?;
55            comma = ",";
56        }
57        Ok(())
58    }
59}
60
61pub fn start_sidecar<'a>(
62    p: &ShimParams,
63    partition_info: &PartitionInfo,
64    address_space: &mut AddressSpaceManager,
65    sidecar_params: &'a mut SidecarParams,
66    sidecar_output: &'a mut SidecarOutput,
67) -> Option<SidecarConfig<'a>> {
68    if !cfg!(target_arch = "x86_64") || p.isolation_type != IsolationType::None {
69        return None;
70    }
71
72    if p.sidecar_size == 0 {
73        log!("sidecar: not present in image");
74        return None;
75    }
76
77    match partition_info.boot_options.sidecar {
78        SidecarOptions::DisabledCommandLine => {
79            log!("sidecar: disabled via command line");
80            return None;
81        }
82        SidecarOptions::DisabledServicing => {
83            log!("sidecar: disabled because this is a servicing restore");
84            return None;
85        }
86        SidecarOptions::Enabled { enable_logging, .. } => {
87            sidecar_params.enable_logging = enable_logging;
88        }
89    }
90
91    // Ensure the host didn't provide an out-of-bounds NUMA node.
92    let max_vnode = partition_info
93        .cpus
94        .iter()
95        .map(|cpu| cpu.vnode)
96        .chain(partition_info.vtl2_ram.iter().map(|e| e.vnode))
97        .max()
98        .unwrap();
99
100    if max_vnode >= MAX_NUMA_NODES as u32 {
101        log!("sidecar: NUMA node {max_vnode} too large");
102        return None;
103    }
104
105    #[cfg(target_arch = "x86_64")]
106    if !x86defs::cpuid::VersionAndFeaturesEcx::from(
107        safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VersionAndFeatures.0, 0).ecx,
108    )
109    .x2_apic()
110    {
111        // Currently, sidecar needs x2apic to communicate with the kernel
112        log!("sidecar: x2apic not available; not using sidecar");
113        return None;
114    }
115
116    // Split the CPUs by NUMA node, and then into chunks of no more than
117    // MAX_SIDECAR_NODE_SIZE processors.
118    let cpus_by_node = || {
119        partition_info
120            .cpus
121            .chunk_by(|a, b| a.vnode == b.vnode)
122            .flat_map(|cpus| {
123                let chunks = cpus.len().div_ceil(MAX_SIDECAR_NODE_SIZE);
124                cpus.chunks(cpus.len().div_ceil(chunks))
125            })
126    };
127    if cpus_by_node().all(|cpus_by_node| cpus_by_node.len() == 1) {
128        log!("sidecar: all NUMA nodes have one CPU");
129        return None;
130    }
131    let node_count = cpus_by_node().count();
132
133    let mut total_ram;
134    {
135        let SidecarParams {
136            hypercall_page,
137            enable_logging: _,
138            node_count,
139            nodes,
140        } = sidecar_params;
141
142        *hypercall_page = 0;
143        #[cfg(target_arch = "x86_64")]
144        {
145            *hypercall_page = crate::hypercall::hvcall().hypercall_page();
146        }
147
148        let mut base_vp = 0;
149        total_ram = 0;
150        for (cpus, node) in cpus_by_node().zip(nodes) {
151            let required_ram = sidecar_defs::required_memory(cpus.len() as u32) as u64;
152            // Take some VTL2 RAM for sidecar use. Try to use the same NUMA node
153            // as the first CPU.
154            let local_vnode = cpus[0].vnode as usize;
155
156            let mem = match address_space.allocate(
157                Some(local_vnode as u32),
158                required_ram,
159                AllocationType::SidecarNode,
160                AllocationPolicy::LowMemory,
161            ) {
162                Some(mem) => mem,
163                None => {
164                    // Fallback to no numa requirement.
165                    match address_space.allocate(
166                        None,
167                        required_ram,
168                        AllocationType::SidecarNode,
169                        AllocationPolicy::LowMemory,
170                    ) {
171                        Some(mem) => {
172                            log!(
173                                "sidecar: unable to allocate memory for sidecar node on node {local_vnode}, falling back to node {}",
174                                mem.vnode
175                            );
176                            mem
177                        }
178                        None => {
179                            log!("sidecar: not enough memory for sidecar");
180                            return None;
181                        }
182                    }
183                }
184            };
185
186            *node = SidecarNodeParams {
187                memory_base: mem.range.start(),
188                memory_size: mem.range.len(),
189                base_vp,
190                vp_count: cpus.len() as u32,
191            };
192            base_vp += cpus.len() as u32;
193            *node_count += 1;
194            total_ram += required_ram;
195        }
196    }
197
198    // SAFETY: the parameter blob is trusted.
199    let sidecar_entry: extern "C" fn(&SidecarParams, &mut SidecarOutput) -> bool =
200        unsafe { core::mem::transmute(p.sidecar_entry_address) };
201
202    let boot_start_reftime = minimal_rt::reftime::reference_time();
203    log!(
204        "sidecar starting, {} nodes, {} cpus, {:#x} total bytes",
205        node_count,
206        partition_info.cpus.len(),
207        total_ram
208    );
209    if !sidecar_entry(sidecar_params, sidecar_output) {
210        panic!(
211            "failed to start sidecar: {}",
212            core::str::from_utf8(&sidecar_output.error.buf[..sidecar_output.error.len as usize])
213                .unwrap()
214        );
215    }
216    let boot_end_reftime = minimal_rt::reftime::reference_time();
217
218    let SidecarOutput { nodes, error: _ } = sidecar_output;
219    Some(SidecarConfig {
220        start_reftime: boot_start_reftime,
221        end_reftime: boot_end_reftime,
222        node_params: &sidecar_params.nodes[..node_count],
223        nodes: &nodes[..node_count],
224    })
225}