sidecar/arch/x86_64/
init.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Sidecar initialization code. This code runs once, on the BSP, before the
5//! main kernel boots.
6
7use super::AFTER_INIT;
8use super::CommandErrorWriter;
9use super::ENABLE_LOG;
10use super::VSM_CAPABILITIES;
11use super::VTL_RETURN_OFFSET;
12use super::VpGlobals;
13use super::addr_space;
14use super::temporary_map;
15use crate::arch::x86_64::get_hv_vp_register;
16use crate::arch::x86_64::hypercall;
17use crate::arch::x86_64::log;
18use arrayvec::ArrayVec;
19use core::fmt::Display;
20use core::fmt::Write;
21use core::hint::spin_loop;
22use core::mem::MaybeUninit;
23use core::ptr::addr_of;
24use core::ptr::addr_of_mut;
25use core::sync::atomic::AtomicU32;
26use core::sync::atomic::Ordering::Acquire;
27use core::sync::atomic::Ordering::Relaxed;
28use core::sync::atomic::Ordering::Release;
29use hvdef::HvError;
30use hvdef::HvRegisterVsmCodePageOffsets;
31use hvdef::HvX64RegisterName;
32use hvdef::HvX64SegmentRegister;
33use hvdef::HypercallCode;
34use hvdef::hypercall::EnableVpVtlX64;
35use hvdef::hypercall::HvInputVtl;
36use hvdef::hypercall::StartVirtualProcessorX64;
37use memory_range::AlignedSubranges;
38use memory_range::MemoryRange;
39use minimal_rt::arch::hypercall::HYPERCALL_PAGE;
40use minimal_rt::enlightened_panic;
41use sidecar_defs::ControlPage;
42use sidecar_defs::CpuStatus;
43use sidecar_defs::PAGE_SIZE;
44use sidecar_defs::PER_VP_PAGES;
45use sidecar_defs::PER_VP_SHMEM_PAGES;
46use sidecar_defs::SidecarNodeOutput;
47use sidecar_defs::SidecarNodeParams;
48use sidecar_defs::SidecarOutput;
49use sidecar_defs::SidecarParams;
50use sidecar_defs::required_memory;
51use x86defs::Exception;
52use x86defs::GdtEntry;
53use x86defs::IdtAttributes;
54use x86defs::IdtEntry64;
55use x86defs::Pte;
56use zerocopy::FromZeros;
57
58unsafe extern "C" {
59    static IMAGE_PDE: Pte;
60    fn irq_entry();
61    fn exc_gpf();
62    fn exc_pf();
63}
64
65static GDT: [GdtEntry; 4] = {
66    let default_data_attributes = x86defs::X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES.as_bits();
67    let default_code_attributes = x86defs::X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES.as_bits();
68    let zero = GdtEntry {
69        limit_low: 0,
70        base_low: 0,
71        base_middle: 0,
72        attr_low: 0,
73        attr_high: 0,
74        base_high: 0,
75    };
76
77    [
78        zero,
79        zero,
80        GdtEntry {
81            limit_low: 0xffff,
82            attr_low: default_code_attributes as u8,
83            attr_high: (default_code_attributes >> 8) as u8,
84            ..zero
85        },
86        GdtEntry {
87            limit_low: 0xffff,
88            attr_low: default_data_attributes as u8,
89            attr_high: (default_data_attributes >> 8) as u8,
90            ..zero
91        },
92    ]
93};
94
95const IRQ: u8 = 0x20;
96
97static mut IDT: [IdtEntry64; IRQ as usize + 1] = {
98    let zero = IdtEntry64 {
99        offset_low: 0,
100        selector: 0,
101        attributes: IdtAttributes::new(),
102        offset_middle: 0,
103        offset_high: 0,
104        reserved: 0,
105    };
106    [zero; IRQ as usize + 1]
107};
108
109enum InitError {
110    RequiredMemory { required: u64, actual: u64 },
111    GetVsmCodePageOffset(HvError),
112    GetVsmCapabilities(HvError),
113}
114
115impl Display for InitError {
116    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
117        match self {
118            InitError::RequiredMemory { required, actual } => {
119                write!(
120                    f,
121                    "failed to provide required memory: {:#x}, actual: {:#x}",
122                    required, actual
123                )
124            }
125            InitError::GetVsmCodePageOffset(err) => {
126                write!(f, "failed to get vsm code page offset: {err}")
127            }
128            InitError::GetVsmCapabilities(err) => {
129                write!(f, "failed to get vsm capabilities: {err}")
130            }
131        }
132    }
133}
134
135enum InitVpError {
136    EnableVtl2(HvError),
137    StartVp(HvError),
138}
139
140impl Display for InitVpError {
141    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
142        match self {
143            InitVpError::EnableVtl2(err) => write!(f, "failed to enable vtl2: {err}"),
144            InitVpError::StartVp(err) => write!(f, "failed to start vp: {err}"),
145        }
146    }
147}
148
149/// BSP entry point from entry.S. Called with BSS, stack, and page tables
150/// initialized, and relocations applied.
151#[cfg_attr(not(minimal_rt), expect(dead_code))]
152pub extern "C" fn start(params: u64, output: u64) -> bool {
153    enlightened_panic::enable_enlightened_panic();
154
155    let [mut params_mapper, mut output_mapper, mut temp_mapper] = [0, 1, 2].map(|i| {
156        // SAFETY: no concurrent accessors to the same index.
157        unsafe { temporary_map::Mapper::new(i) }
158    });
159    // SAFETY: The page is not being concurrently accessed, and it has no
160    // invariant requirements.
161    let params = unsafe { params_mapper.map::<SidecarParams>(params) };
162    // SAFETY: The page is not being concurrently accessed, and it has no
163    // invariant requirements.
164    let mut output = unsafe { output_mapper.map::<SidecarOutput>(output) };
165    match init(&mut temp_mapper, &params, &mut output) {
166        Ok(()) => {
167            AFTER_INIT.store(true, Release);
168            true
169        }
170        Err(err) => {
171            let _ = write!(CommandErrorWriter(&mut output.error), "{err}");
172            false
173        }
174    }
175}
176
177/// Called on the BSP to initialize all the APs.
178fn init(
179    mapper: &mut temporary_map::Mapper,
180    params: &SidecarParams,
181    output: &mut SidecarOutput,
182) -> Result<(), InitError> {
183    let &SidecarParams {
184        hypercall_page,
185        enable_logging,
186        node_count,
187        ref nodes,
188    } = params;
189
190    ENABLE_LOG.store(enable_logging, Relaxed);
191    let nodes = &nodes[..node_count as usize];
192
193    // Copy the hypercall page locally since the main kernel will move it after
194    // this function returns.
195    {
196        // SAFETY: The page is not being concurrently accessed, and it has
197        // no invariant requirements.
198        let hypercall_page = unsafe { mapper.map::<[u8; 4096]>(hypercall_page) };
199        // SAFETY: no concurrent accessors to the page.
200        unsafe { (&raw mut HYPERCALL_PAGE).copy_from_nonoverlapping(&*hypercall_page, 1) };
201    }
202
203    // Initialize the IDT.
204    {
205        // SAFETY: no concurrent accessors.
206        let idt = unsafe { &mut *addr_of_mut!(IDT) };
207
208        let offset = exc_pf as usize as u64;
209        idt[Exception::PAGE_FAULT.0 as usize] = IdtEntry64 {
210            offset_low: offset as u16,
211            selector: 2 * 8,
212            attributes: IdtAttributes::new().with_present(false).with_gate_type(0xf),
213            offset_middle: (offset >> 16) as u16,
214            offset_high: (offset >> 32) as u32,
215            reserved: 0,
216        };
217
218        let offset = exc_gpf as usize as u64;
219        idt[Exception::GENERAL_PROTECTION_FAULT.0 as usize] = IdtEntry64 {
220            offset_low: offset as u16,
221            selector: 2 * 8,
222            attributes: IdtAttributes::new().with_present(false).with_gate_type(0xf),
223            offset_middle: (offset >> 16) as u16,
224            offset_high: (offset >> 32) as u32,
225            reserved: 0,
226        };
227
228        let offset = irq_entry as usize as u64;
229        idt[IRQ as usize] = IdtEntry64 {
230            offset_low: offset as u16,
231            selector: 2 * 8,
232            attributes: IdtAttributes::new().with_present(true).with_gate_type(0xe),
233            offset_middle: (offset >> 16) as u16,
234            offset_high: (offset >> 32) as u32,
235            reserved: 0,
236        };
237    }
238
239    // Get the byte offset in the hypercall page of the VTL return function.
240    {
241        let value = HvRegisterVsmCodePageOffsets::from(
242            get_hv_vp_register(
243                HvInputVtl::CURRENT_VTL,
244                HvX64RegisterName::VsmCodePageOffsets.into(),
245            )
246            .map_err(InitError::GetVsmCodePageOffset)?
247            .as_u64(),
248        );
249        // SAFETY: no concurrent accessors.
250        unsafe { VTL_RETURN_OFFSET = value.return_offset() }
251    }
252
253    // Get the reported VSM capabilities.
254    {
255        let value = get_hv_vp_register(
256            HvInputVtl::CURRENT_VTL,
257            HvX64RegisterName::VsmCapabilities.into(),
258        )
259        .map_err(InitError::GetVsmCapabilities)?;
260        // SAFETY: no concurrent accessors.
261        unsafe { VSM_CAPABILITIES = value.as_u64().into() }
262    }
263
264    // SAFETY: no concurrent accesses yet.
265    let node_init = unsafe { &mut *addr_of_mut!(NODE_INIT) };
266
267    // Process each node, building the `node_init` array.
268    for (node_index, (node, node_output)) in nodes.iter().zip(&mut output.nodes).enumerate() {
269        let &SidecarNodeParams {
270            memory_base,
271            memory_size,
272            base_vp,
273            vp_count,
274        } = node;
275        let memory = MemoryRange::new(memory_base..memory_base + memory_size);
276
277        log!("node {node_index}: {vp_count} VPs starting at VP {base_vp}, memory {memory}");
278
279        let required = required_memory(vp_count) as u64;
280        if memory_size < required {
281            return Err(InitError::RequiredMemory {
282                required,
283                actual: memory_size,
284            });
285        }
286
287        let (control_page_range, memory) = memory.split_at_offset(PAGE_SIZE as u64);
288        let (shmem_pages, memory) =
289            memory.split_at_offset(vp_count as u64 * PER_VP_SHMEM_PAGES as u64 * PAGE_SIZE as u64);
290
291        *node_output = SidecarNodeOutput {
292            control_page: control_page_range.start(),
293            shmem_pages_base: shmem_pages.start(),
294            shmem_pages_size: shmem_pages.len(),
295        };
296
297        // Initialize the control page.
298        {
299            // SAFETY: The page is not being concurrently accessed, and it has
300            // no invariant requirements.
301            let mut control = unsafe { mapper.map::<ControlPage>(control_page_range.start()) };
302            let ControlPage {
303                index,
304                base_cpu,
305                cpu_count,
306                request_vector,
307                response_cpu,
308                response_vector,
309                needs_attention,
310                reserved,
311                cpu_status,
312            } = &mut *control;
313            *index = (node_index as u32).into();
314            *base_cpu = base_vp.into();
315            *cpu_count = vp_count.into();
316            *request_vector = (IRQ as u32).into();
317            *response_cpu = 0.into();
318            *response_vector = 0.into();
319            *needs_attention = 0.into();
320            reserved.fill(0);
321            cpu_status[0] = CpuStatus::REMOVED.0.into();
322            cpu_status[1..vp_count as usize].fill_with(|| CpuStatus::RUN.0.into());
323            cpu_status[vp_count as usize..].fill_with(|| CpuStatus::REMOVED.0.into());
324        }
325
326        node_init.push(NodeInit {
327            node: NodeDefinition {
328                base_vp,
329                vp_count,
330                control_page_pa: control_page_range.start(),
331                shmem_pages,
332                memory,
333            },
334            next_vp: AtomicU32::new(1), // skip the base VP in each node
335        });
336    }
337
338    // Downgrade the node init array to immutable, then start booting the APs.
339    // Each AP that boots will then start helping boot additional APs.
340    //
341    // SAFETY: no concurrent mutators.
342    let node_init = unsafe { &*addr_of!(NODE_INIT) };
343    start_aps(node_init, mapper);
344
345    // Wait for all the APs to finish starting.
346    {
347        for (node, output) in nodes.iter().zip(&output.nodes) {
348            // SAFETY: The page is not being concurrently accessed, and it has
349            // no invariant requirements.
350            let control = unsafe { mapper.map::<ControlPage>(output.control_page) };
351            for status in &control.cpu_status[0..node.vp_count as usize] {
352                while status.load(Acquire) == CpuStatus::RUN.0 {
353                    spin_loop();
354                }
355            }
356        }
357    }
358
359    Ok(())
360}
361
362struct NodeInit {
363    node: NodeDefinition,
364    next_vp: AtomicU32,
365}
366
367static mut NODE_INIT: ArrayVec<NodeInit, { sidecar_defs::MAX_NODES }> = ArrayVec::new_const();
368
369fn start_aps(node_init: &[NodeInit], mapper: &mut temporary_map::Mapper) {
370    for node in node_init {
371        loop {
372            let node_cpu_index = node.next_vp.fetch_add(1, Relaxed);
373            assert!(node_cpu_index != u32::MAX);
374            if node_cpu_index >= node.node.vp_count {
375                break;
376            }
377            match node.node.start(mapper, node_cpu_index) {
378                Ok(()) => {}
379                Err(err) => {
380                    panic!(
381                        "failed to start VP {}: {}",
382                        node.node.base_vp + node_cpu_index,
383                        err
384                    );
385                }
386            }
387        }
388    }
389}
390
391/// # Safety
392/// Must be called as an AP entry point.
393unsafe fn ap_init() -> ! {
394    // Start any other pending APs.
395    {
396        // SAFETY: `NODE_INIT` is set before this routine is called.
397        let node_init = unsafe { &*addr_of!(NODE_INIT) };
398        // SAFETY: nothing else on this CPU is using the temporary map.
399        let mut mapper = unsafe { temporary_map::Mapper::new(0) };
400        start_aps(node_init, &mut mapper)
401    }
402    // SAFETY: this is an entry point.
403    unsafe { super::vp::ap_entry() }
404}
405
406struct NodeDefinition {
407    base_vp: u32,
408    vp_count: u32,
409    control_page_pa: u64,
410    shmem_pages: MemoryRange,
411    memory: MemoryRange,
412}
413
414impl NodeDefinition {
415    fn start(
416        &self,
417        mapper: &mut temporary_map::Mapper,
418        node_cpu_index: u32,
419    ) -> Result<(), InitVpError> {
420        let hv_vp_index = self.base_vp + node_cpu_index;
421
422        let shmem_pages = self.shmem_pages.start()
423            + node_cpu_index as u64 * PER_VP_SHMEM_PAGES as u64 * PAGE_SIZE as u64;
424        let command_page_pa = shmem_pages;
425        let reg_page_pa = shmem_pages + PAGE_SIZE as u64;
426        let memory_start =
427            self.memory.start() + node_cpu_index as u64 * PER_VP_PAGES as u64 * PAGE_SIZE as u64;
428        let memory =
429            MemoryRange::new(memory_start..memory_start + PER_VP_PAGES as u64 * PAGE_SIZE as u64);
430
431        let mut memory = AlignedSubranges::new(memory)
432            .with_max_range_len(PAGE_SIZE as u64)
433            .map(|r| r.start());
434        let pml4_pa = memory.next().unwrap();
435        let pdpt_pa = memory.next().unwrap();
436        let pd_pa = memory.next().unwrap();
437        let pt_pa = memory.next().unwrap();
438
439        let pte_table = |addr| {
440            Pte::new()
441                .with_address(addr)
442                .with_read_write(true)
443                .with_present(true)
444        };
445
446        {
447            // SAFETY: The page is not being concurrently accessed, and it has no
448            // invariant requirements.
449            let mut pml4 = unsafe { mapper.map::<[Pte; 512]>(pml4_pa) };
450            pml4[511] = pte_table(pdpt_pa);
451        }
452        {
453            // SAFETY: The page is not being concurrently accessed, and it has no
454            // invariant requirements.
455            let mut pdpt = unsafe { mapper.map::<Pte>(pdpt_pa) };
456            *pdpt = pte_table(pd_pa);
457        }
458        {
459            // SAFETY: The page is not being concurrently accessed, and it has no
460            // invariant requirements.
461            let mut pd = unsafe { mapper.map::<[Pte; 512]>(pd_pa) };
462            // SAFETY: the PTE is not being concurrently modified.
463            pd[0] = unsafe { IMAGE_PDE };
464            pd[1] = pte_table(pt_pa);
465        }
466        let globals_pa = {
467            // SAFETY: The page is not being concurrently accessed, and it has no
468            // invariant requirements.
469            let mut pt = unsafe { mapper.map::<[Pte; 512]>(pt_pa) };
470            addr_space::init_ap(
471                &mut pt,
472                pt_pa,
473                self.control_page_pa,
474                command_page_pa,
475                reg_page_pa,
476                &mut memory,
477            )
478        };
479        {
480            // SAFETY: The page is not being concurrently accessed, and it has no
481            // invariant requirements.
482            let mut globals = unsafe { mapper.map::<MaybeUninit<VpGlobals>>(globals_pa) };
483            globals.write(VpGlobals {
484                hv_vp_index,
485                node_cpu_index,
486                overlays_mapped: false,
487                register_page_mapped: false,
488            });
489        }
490
491        let cs = HvX64SegmentRegister {
492            base: 0,
493            limit: !0,
494            selector: 2 * 8,
495            attributes: x86defs::X64_DEFAULT_CODE_SEGMENT_ATTRIBUTES.into(),
496        };
497        let ds = HvX64SegmentRegister {
498            base: 0,
499            limit: !0,
500            selector: 3 * 8,
501            attributes: x86defs::X64_DEFAULT_DATA_SEGMENT_ATTRIBUTES.into(),
502        };
503        let gdtr = hvdef::HvX64TableRegister {
504            base: addr_of!(GDT) as u64,
505            limit: size_of_val(&GDT) as u16 - 1,
506            pad: [0; 3],
507        };
508        let idtr = hvdef::HvX64TableRegister {
509            base: addr_of!(IDT) as u64,
510            // SAFETY: just getting the size
511            limit: size_of_val(unsafe { &*addr_of!(IDT) }) as u16 - 1,
512            pad: [0; 3],
513        };
514        let context = hvdef::hypercall::InitialVpContextX64 {
515            rip: ap_init as usize as u64,
516            rsp: addr_space::stack().end() - 8, // start unaligned to match calling convention
517            rflags: x86defs::RFlags::at_reset().into(),
518            cs,
519            ds,
520            es: ds,
521            fs: ds,
522            gs: ds,
523            ss: ds,
524            tr: HvX64SegmentRegister {
525                base: 0,
526                limit: 0xffff,
527                selector: 0,
528                attributes: x86defs::X64_BUSY_TSS_SEGMENT_ATTRIBUTES.into(),
529            },
530            ldtr: FromZeros::new_zeroed(),
531            idtr,
532            gdtr,
533            efer: x86defs::X64_EFER_LMA | x86defs::X64_EFER_LME | x86defs::X64_EFER_NXE,
534            cr0: x86defs::X64_CR0_PG | x86defs::X64_CR0_PE | x86defs::X64_CR0_NE,
535            cr3: pml4_pa,
536            cr4: x86defs::X64_CR4_PAE | x86defs::X64_CR4_MCE | x86defs::X64_CR4_FXSR,
537            msr_cr_pat: x86defs::X86X_MSR_DEFAULT_PAT,
538        };
539
540        {
541            // SAFETY: no concurrent accessors.
542            let input_page = unsafe { &mut *addr_space::hypercall_input().cast() };
543            let EnableVpVtlX64 {
544                partition_id,
545                vp_index,
546                target_vtl,
547                reserved,
548                vp_vtl_context,
549            } = input_page;
550
551            *partition_id = hvdef::HV_PARTITION_ID_SELF;
552            *vp_index = hv_vp_index;
553            *target_vtl = hvdef::Vtl::Vtl2.into();
554            *vp_vtl_context = context;
555            *reserved = [0; 3];
556        }
557        match hypercall(HypercallCode::HvCallEnableVpVtl, 0) {
558            Ok(()) | Err(HvError::VtlAlreadyEnabled) => {}
559            Err(err) => return Err(InitVpError::EnableVtl2(err)),
560        }
561
562        {
563            // SAFETY: no concurrent accessors.
564            let input_page = unsafe { &mut *addr_space::hypercall_input().cast() };
565            let StartVirtualProcessorX64 {
566                partition_id,
567                vp_index,
568                target_vtl,
569                rsvd0,
570                rsvd1,
571                vp_context,
572            } = input_page;
573
574            *partition_id = hvdef::HV_PARTITION_ID_SELF;
575            *vp_index = hv_vp_index;
576            *target_vtl = hvdef::Vtl::Vtl2.into();
577            *rsvd0 = 0;
578            *rsvd1 = 0;
579            *vp_context = context;
580        }
581        hypercall(HypercallCode::HvCallStartVirtualProcessor, 0).map_err(InitVpError::StartVp)?;
582
583        Ok(())
584    }
585}