Skip to main content

openhcl_boot/
sidecar.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use crate::cmdline::SidecarOptions;
5use crate::host_params::MAX_CPU_COUNT;
6use crate::host_params::MAX_NUMA_NODES;
7use crate::host_params::PartitionInfo;
8use crate::host_params::shim_params::IsolationType;
9use crate::host_params::shim_params::ShimParams;
10use crate::memory::AddressSpaceManager;
11use crate::memory::AllocationPolicy;
12use crate::memory::AllocationType;
13use sidecar_defs::SidecarNodeOutput;
14use sidecar_defs::SidecarNodeParams;
15use sidecar_defs::SidecarOutput;
16use sidecar_defs::SidecarParams;
17
18/// The maximum side of a sidecar node. This is tuned to ensure that there are
19/// enough Linux CPUs to manage all the sidecar VPs.
20const MAX_SIDECAR_NODE_SIZE: usize = 32;
21
22// Assert that there are enough sidecar nodes for the maximum number of CPUs, if
23// all NUMA nodes but one have one processor.
24const _: () = assert!(
25    sidecar_defs::MAX_NODES >= (MAX_NUMA_NODES - 1) + MAX_CPU_COUNT.div_ceil(MAX_SIDECAR_NODE_SIZE)
26);
27
28pub struct SidecarConfig<'a> {
29    pub num_cpus: usize,
30    pub per_cpu_state: &'a sidecar_defs::PerCpuState,
31    pub node_params: &'a [SidecarNodeParams],
32    pub nodes: &'a [SidecarNodeOutput],
33    pub start_reftime: u64,
34    pub end_reftime: u64,
35}
36
37impl SidecarConfig<'_> {
38    /// Returns an object to be appended to the Linux kernel command line to
39    /// configure it properly for sidecar.
40    pub fn kernel_command_line(&self) -> SidecarKernelCommandLine<'_> {
41        SidecarKernelCommandLine(self)
42    }
43}
44
45pub struct SidecarKernelCommandLine<'a>(&'a SidecarConfig<'a>);
46
47impl core::fmt::Display for SidecarKernelCommandLine<'_> {
48    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
49        // Generate boot_cpus= parameter listing CPUs that Linux should start
50        // directly (all others will be managed by sidecar).
51        // When per-CPU overrides are active (servicing restore with outstanding IO),
52        // list every CPU that sidecar should NOT start.
53        // Otherwise, list just the base VP of each sidecar node (default behavior).
54        f.write_str("boot_cpus=")?;
55        let mut comma = "";
56        if self.0.per_cpu_state.per_cpu_state_specified {
57            for (i, &starts) in self.0.per_cpu_state.sidecar_starts_cpu[..self.0.num_cpus]
58                .iter()
59                .enumerate()
60            {
61                if !starts {
62                    write!(f, "{comma}{i}")?;
63                    comma = ",";
64                }
65            }
66        } else {
67            for node in self.0.node_params {
68                write!(f, "{comma}{}", node.base_vp)?;
69                comma = ",";
70            }
71        }
72        Ok(())
73    }
74}
75
76pub fn start_sidecar<'a>(
77    p: &ShimParams,
78    partition_info: &PartitionInfo,
79    address_space: &mut AddressSpaceManager,
80    sidecar_params: &'a mut SidecarParams,
81    sidecar_output: &'a mut SidecarOutput,
82) -> Option<SidecarConfig<'a>> {
83    if !cfg!(target_arch = "x86_64") || p.isolation_type != IsolationType::None {
84        return None;
85    }
86
87    if p.sidecar_size == 0 {
88        log::info!("sidecar: not present in image");
89        return None;
90    }
91
92    match partition_info.boot_options.sidecar {
93        SidecarOptions::DisabledCommandLine => {
94            log::info!("sidecar: disabled via command line");
95            return None;
96        }
97        SidecarOptions::DisabledServicing => {
98            log::info!("sidecar: disabled because this is a servicing restore");
99            return None;
100        }
101        SidecarOptions::Enabled { enable_logging, .. } => {
102            sidecar_params.enable_logging = enable_logging;
103        }
104    }
105
106    // Ensure the host didn't provide an out-of-bounds NUMA node.
107    let max_vnode = partition_info
108        .cpus
109        .iter()
110        .map(|cpu| cpu.vnode)
111        .chain(partition_info.vtl2_ram.iter().map(|e| e.vnode))
112        .max()
113        .unwrap();
114
115    if max_vnode >= MAX_NUMA_NODES as u32 {
116        log::warn!("sidecar: NUMA node {max_vnode} too large");
117        return None;
118    }
119
120    #[cfg(target_arch = "x86_64")]
121    if !x86defs::cpuid::VersionAndFeaturesEcx::from(
122        safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VersionAndFeatures.0, 0).ecx,
123    )
124    .x2_apic()
125    {
126        // Currently, sidecar needs x2apic to communicate with the kernel
127        log::warn!("sidecar: x2apic not available; not using sidecar");
128        return None;
129    }
130
131    // Split the CPUs by NUMA node, and then into chunks of no more than
132    // MAX_SIDECAR_NODE_SIZE processors.
133    let cpus_by_node = || {
134        partition_info
135            .cpus
136            .chunk_by(|a, b| a.vnode == b.vnode)
137            .flat_map(|cpus| {
138                let chunks = cpus.len().div_ceil(MAX_SIDECAR_NODE_SIZE);
139                cpus.chunks(cpus.len().div_ceil(chunks))
140            })
141    };
142    if cpus_by_node().all(|cpus_by_node| cpus_by_node.len() == 1) {
143        log::info!("sidecar: all NUMA nodes have one CPU");
144        return None;
145    }
146    let node_count = cpus_by_node().count();
147
148    let mut total_ram;
149    {
150        let SidecarParams {
151            hypercall_page,
152            enable_logging: _,
153            node_count,
154            nodes,
155            initial_state,
156        } = sidecar_params;
157
158        *hypercall_page = 0;
159        #[cfg(target_arch = "x86_64")]
160        {
161            *hypercall_page = crate::hypercall::hvcall().hypercall_page();
162        }
163
164        let mut base_vp = 0;
165        total_ram = 0;
166        *initial_state = partition_info.sidecar_cpu_overrides.clone();
167        for (cpus, node) in cpus_by_node().zip(nodes) {
168            let required_ram = sidecar_defs::required_memory(cpus.len() as u32) as u64;
169            // Take some VTL2 RAM for sidecar use. Try to use the same NUMA node
170            // as the first CPU.
171            let local_vnode = cpus[0].vnode as usize;
172
173            let mem = match address_space.allocate(
174                Some(local_vnode as u32),
175                required_ram,
176                AllocationType::SidecarNode,
177                AllocationPolicy::LowMemory,
178            ) {
179                Some(mem) => mem,
180                None => {
181                    // Fallback to no numa requirement.
182                    match address_space.allocate(
183                        None,
184                        required_ram,
185                        AllocationType::SidecarNode,
186                        AllocationPolicy::LowMemory,
187                    ) {
188                        Some(mem) => {
189                            log::warn!(
190                                "sidecar: unable to allocate memory for sidecar node on node {local_vnode}, falling back to node {}",
191                                mem.vnode
192                            );
193                            mem
194                        }
195                        None => {
196                            log::warn!("sidecar: not enough memory for sidecar");
197                            return None;
198                        }
199                    }
200                }
201            };
202
203            *node = SidecarNodeParams {
204                memory_base: mem.range.start(),
205                memory_size: mem.range.len(),
206                base_vp,
207                vp_count: cpus.len() as u32,
208            };
209            if initial_state.per_cpu_state_specified {
210                // If per-CPU state is specified, make sure to explicitly state that
211                // sidecar should not start the base vp of this node.
212                // The code that set per_cpu_state_specified should have already ensured that
213                // the array is large enough for any `base_vp` we might have here.
214                initial_state.sidecar_starts_cpu[base_vp as usize] = false;
215                log::info!(
216                    "sidecar: per_cpu_state_specified=true, marking base_vp={} as kernel-started",
217                    base_vp
218                );
219            }
220            base_vp += cpus.len() as u32;
221            *node_count += 1;
222            total_ram += required_ram;
223        }
224    }
225
226    // SAFETY: the parameter blob is trusted.
227    let sidecar_entry: extern "C" fn(&SidecarParams, &mut SidecarOutput) -> bool =
228        unsafe { core::mem::transmute(p.sidecar_entry_address) };
229
230    let boot_start_reftime = minimal_rt::reftime::reference_time();
231    log::info!(
232        "sidecar starting, {} nodes, {} cpus, {:#x} total bytes",
233        node_count,
234        partition_info.cpus.len(),
235        total_ram
236    );
237    if !sidecar_entry(sidecar_params, sidecar_output) {
238        panic!(
239            "failed to start sidecar: {}",
240            core::str::from_utf8(&sidecar_output.error.buf[..sidecar_output.error.len as usize])
241                .unwrap()
242        );
243    }
244    let boot_end_reftime = minimal_rt::reftime::reference_time();
245
246    let SidecarOutput { nodes, error: _ } = sidecar_output;
247    Some(SidecarConfig {
248        num_cpus: partition_info.cpus.len(),
249        start_reftime: boot_start_reftime,
250        end_reftime: boot_end_reftime,
251        node_params: &sidecar_params.nodes[..node_count],
252        nodes: &nodes[..node_count],
253        per_cpu_state: &sidecar_params.initial_state,
254    })
255}