openhcl_boot/
sidecar.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use crate::cmdline::SidecarOptions;
5use crate::host_params::MAX_CPU_COUNT;
6use crate::host_params::MAX_NUMA_NODES;
7use crate::host_params::PartitionInfo;
8use crate::host_params::shim_params::IsolationType;
9use crate::host_params::shim_params::ShimParams;
10use crate::memory::AddressSpaceManager;
11use crate::memory::AllocationPolicy;
12use crate::memory::AllocationType;
13use sidecar_defs::SidecarNodeOutput;
14use sidecar_defs::SidecarNodeParams;
15use sidecar_defs::SidecarOutput;
16use sidecar_defs::SidecarParams;
17
18/// The maximum side of a sidecar node. This is tuned to ensure that there are
19/// enough Linux CPUs to manage all the sidecar VPs.
20const MAX_SIDECAR_NODE_SIZE: usize = 32;
21
22// Assert that there are enough sidecar nodes for the maximum number of CPUs, if
23// all NUMA nodes but one have one processor.
24const _: () = assert!(
25    sidecar_defs::MAX_NODES >= (MAX_NUMA_NODES - 1) + MAX_CPU_COUNT.div_ceil(MAX_SIDECAR_NODE_SIZE)
26);
27
28pub struct SidecarConfig<'a> {
29    pub node_params: &'a [SidecarNodeParams],
30    pub nodes: &'a [SidecarNodeOutput],
31    pub start_reftime: u64,
32    pub end_reftime: u64,
33}
34
35impl SidecarConfig<'_> {
36    /// Returns an object to be appended to the Linux kernel command line to
37    /// configure it properly for sidecar.
38    pub fn kernel_command_line(&self) -> SidecarKernelCommandLine<'_> {
39        SidecarKernelCommandLine(self)
40    }
41}
42
43pub struct SidecarKernelCommandLine<'a>(&'a SidecarConfig<'a>);
44
45impl core::fmt::Display for SidecarKernelCommandLine<'_> {
46    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
47        // Add something like boot_cpus=0,4,8,12 to the command line so that
48        // Linux boots with the base VP of each sidecar node. Other CPUs will
49        // be brought up by the sidecar kernel.
50        f.write_str("boot_cpus=")?;
51        let mut comma = "";
52        for node in self.0.node_params {
53            write!(f, "{}{}", comma, node.base_vp)?;
54            comma = ",";
55        }
56        Ok(())
57    }
58}
59
60pub fn start_sidecar<'a>(
61    p: &ShimParams,
62    partition_info: &PartitionInfo,
63    address_space: &mut AddressSpaceManager,
64    sidecar_params: &'a mut SidecarParams,
65    sidecar_output: &'a mut SidecarOutput,
66) -> Option<SidecarConfig<'a>> {
67    if !cfg!(target_arch = "x86_64") || p.isolation_type != IsolationType::None {
68        return None;
69    }
70
71    if p.sidecar_size == 0 {
72        log::info!("sidecar: not present in image");
73        return None;
74    }
75
76    match partition_info.boot_options.sidecar {
77        SidecarOptions::DisabledCommandLine => {
78            log::info!("sidecar: disabled via command line");
79            return None;
80        }
81        SidecarOptions::DisabledServicing => {
82            log::info!("sidecar: disabled because this is a servicing restore");
83            return None;
84        }
85        SidecarOptions::Enabled { enable_logging, .. } => {
86            sidecar_params.enable_logging = enable_logging;
87        }
88    }
89
90    // Ensure the host didn't provide an out-of-bounds NUMA node.
91    let max_vnode = partition_info
92        .cpus
93        .iter()
94        .map(|cpu| cpu.vnode)
95        .chain(partition_info.vtl2_ram.iter().map(|e| e.vnode))
96        .max()
97        .unwrap();
98
99    if max_vnode >= MAX_NUMA_NODES as u32 {
100        log::warn!("sidecar: NUMA node {max_vnode} too large");
101        return None;
102    }
103
104    #[cfg(target_arch = "x86_64")]
105    if !x86defs::cpuid::VersionAndFeaturesEcx::from(
106        safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VersionAndFeatures.0, 0).ecx,
107    )
108    .x2_apic()
109    {
110        // Currently, sidecar needs x2apic to communicate with the kernel
111        log::warn!("sidecar: x2apic not available; not using sidecar");
112        return None;
113    }
114
115    // Split the CPUs by NUMA node, and then into chunks of no more than
116    // MAX_SIDECAR_NODE_SIZE processors.
117    let cpus_by_node = || {
118        partition_info
119            .cpus
120            .chunk_by(|a, b| a.vnode == b.vnode)
121            .flat_map(|cpus| {
122                let chunks = cpus.len().div_ceil(MAX_SIDECAR_NODE_SIZE);
123                cpus.chunks(cpus.len().div_ceil(chunks))
124            })
125    };
126    if cpus_by_node().all(|cpus_by_node| cpus_by_node.len() == 1) {
127        log::info!("sidecar: all NUMA nodes have one CPU");
128        return None;
129    }
130    let node_count = cpus_by_node().count();
131
132    let mut total_ram;
133    {
134        let SidecarParams {
135            hypercall_page,
136            enable_logging: _,
137            node_count,
138            nodes,
139        } = sidecar_params;
140
141        *hypercall_page = 0;
142        #[cfg(target_arch = "x86_64")]
143        {
144            *hypercall_page = crate::hypercall::hvcall().hypercall_page();
145        }
146
147        let mut base_vp = 0;
148        total_ram = 0;
149        for (cpus, node) in cpus_by_node().zip(nodes) {
150            let required_ram = sidecar_defs::required_memory(cpus.len() as u32) as u64;
151            // Take some VTL2 RAM for sidecar use. Try to use the same NUMA node
152            // as the first CPU.
153            let local_vnode = cpus[0].vnode as usize;
154
155            let mem = match address_space.allocate(
156                Some(local_vnode as u32),
157                required_ram,
158                AllocationType::SidecarNode,
159                AllocationPolicy::LowMemory,
160            ) {
161                Some(mem) => mem,
162                None => {
163                    // Fallback to no numa requirement.
164                    match address_space.allocate(
165                        None,
166                        required_ram,
167                        AllocationType::SidecarNode,
168                        AllocationPolicy::LowMemory,
169                    ) {
170                        Some(mem) => {
171                            log::warn!(
172                                "sidecar: unable to allocate memory for sidecar node on node {local_vnode}, falling back to node {}",
173                                mem.vnode
174                            );
175                            mem
176                        }
177                        None => {
178                            log::warn!("sidecar: not enough memory for sidecar");
179                            return None;
180                        }
181                    }
182                }
183            };
184
185            *node = SidecarNodeParams {
186                memory_base: mem.range.start(),
187                memory_size: mem.range.len(),
188                base_vp,
189                vp_count: cpus.len() as u32,
190            };
191            base_vp += cpus.len() as u32;
192            *node_count += 1;
193            total_ram += required_ram;
194        }
195    }
196
197    // SAFETY: the parameter blob is trusted.
198    let sidecar_entry: extern "C" fn(&SidecarParams, &mut SidecarOutput) -> bool =
199        unsafe { core::mem::transmute(p.sidecar_entry_address) };
200
201    let boot_start_reftime = minimal_rt::reftime::reference_time();
202    log::info!(
203        "sidecar starting, {} nodes, {} cpus, {:#x} total bytes",
204        node_count,
205        partition_info.cpus.len(),
206        total_ram
207    );
208    if !sidecar_entry(sidecar_params, sidecar_output) {
209        panic!(
210            "failed to start sidecar: {}",
211            core::str::from_utf8(&sidecar_output.error.buf[..sidecar_output.error.len as usize])
212                .unwrap()
213        );
214    }
215    let boot_end_reftime = minimal_rt::reftime::reference_time();
216
217    let SidecarOutput { nodes, error: _ } = sidecar_output;
218    Some(SidecarConfig {
219        start_reftime: boot_start_reftime,
220        end_reftime: boot_end_reftime,
221        node_params: &sidecar_params.nodes[..node_count],
222        nodes: &nodes[..node_count],
223    })
224}