#![cfg(target_os = "linux")]
mod devmsr;
cfg_if::cfg_if!(
if #[cfg(target_arch = "x86_64")] { mod cvm_cpuid;
pub use processor::snp::SnpBacked;
pub use processor::tdx::TdxBacked;
pub use crate::processor::mshv::x64::HypervisorBackedX86 as HypervisorBacked;
use crate::processor::mshv::x64::HypervisorBackedX86Shared as HypervisorBackedShared;
use bitvec::prelude::BitArray;
use bitvec::prelude::Lsb0;
use devmsr::MsrDevice;
use hv1_emulator::hv::ProcessorVtlHv;
use processor::LapicState;
use processor::snp::SnpBackedShared;
use processor::tdx::TdxBackedShared;
use std::arch::x86_64::CpuidResult;
use virt::CpuidLeaf;
use virt::state::StateElement;
use virt::vp::MpState;
type IrrBitmap = BitArray<[u32; 8], Lsb0>;
} else if #[cfg(target_arch = "aarch64")] { pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked;
use crate::processor::mshv::arm64::HypervisorBackedArm64Shared as HypervisorBackedShared;
use hvdef::HvArm64RegisterName;
}
);
mod processor;
pub use processor::Backing;
pub use processor::UhProcessor;
use anyhow::Context as AnyhowContext;
use bitfield_struct::bitfield;
use bitvec::boxed::BitBox;
use bitvec::vec::BitVec;
use guestmem::GuestMemory;
use hcl::GuestVtl;
use hcl::ioctl::Hcl;
use hcl::ioctl::SetVsmPartitionConfigError;
use hv1_emulator::hv::GlobalHv;
use hv1_emulator::message_queues::MessageQueues;
use hv1_emulator::synic::GlobalSynic;
use hv1_emulator::synic::SintProxied;
use hv1_structs::VtlArray;
use hvdef::GuestCrashCtl;
use hvdef::HV_PAGE_SIZE;
use hvdef::HvAllArchRegisterName;
use hvdef::HvError;
use hvdef::HvMapGpaFlags;
use hvdef::HvRegisterName;
use hvdef::HvRegisterVsmPartitionConfig;
use hvdef::HvRegisterVsmPartitionStatus;
use hvdef::Vtl;
use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_EXECUTE;
use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_NONE;
use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_READ_WRITE;
use hvdef::hypercall::HV_INTERCEPT_ACCESS_MASK_WRITE;
use hvdef::hypercall::HostVisibilityType;
use hvdef::hypercall::HvGuestOsId;
use hvdef::hypercall::HvInputVtl;
use hvdef::hypercall::HvInterceptParameters;
use hvdef::hypercall::HvInterceptType;
use inspect::Inspect;
use inspect::InspectMut;
use memory_range::MemoryRange;
use pal::unix::affinity;
use pal::unix::affinity::CpuSet;
use pal_async::driver::Driver;
use pal_async::driver::SpawnDriver;
use pal_uring::IdleControl;
use parking_lot::Mutex;
use parking_lot::RwLock;
use processor::BackingSharedParams;
use processor::SidecarExitReason;
use sidecar_client::NewSidecarClientError;
use std::ops::RangeInclusive;
use std::os::fd::AsRawFd;
use std::sync::Arc;
use std::sync::Weak;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU8;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::task::Waker;
use thiserror::Error;
use user_driver::DmaClient;
use virt::CpuidLeafSet;
use virt::IsolationType;
use virt::PartitionCapabilities;
use virt::VpIndex;
use virt::irqcon::IoApicRouting;
use virt::irqcon::MsiRequest;
use virt::x86::apic_software_device::ApicSoftwareDevices;
use virt_support_apic::LocalApicSet;
use vm_topology::memory::MemoryLayout;
use vm_topology::processor::ProcessorTopology;
use vm_topology::processor::TargetVpInfo;
use vmcore::monitor::MonitorPage;
use vmcore::reference_time_source::ReferenceTimeSource;
use vmcore::vmtime::VmTimeSource;
use x86defs::snp::REG_TWEAK_BITMAP_OFFSET;
use x86defs::snp::REG_TWEAK_BITMAP_SIZE;
use x86defs::tdx::TdCallResult;
use zerocopy::FromBytes;
use zerocopy::FromZeros;
use zerocopy::Immutable;
use zerocopy::IntoBytes;
use zerocopy::KnownLayout;
#[derive(Error, Debug)]
#[expect(missing_docs)]
pub enum Error {
#[error("hcl error")]
Hcl(#[source] hcl::ioctl::Error),
#[error("failed to open sidecar client")]
Sidecar(#[source] NewSidecarClientError),
#[error("failed to install {0:?} intercept: {1:?}")]
InstallIntercept(HvInterceptType, HvError),
#[error("failed to query hypervisor register {0:#x?}")]
Register(HvRegisterName, #[source] HvError),
#[error("failed to set vsm partition config register")]
VsmPartitionConfig(#[source] SetVsmPartitionConfigError),
#[error("failed to create virtual device")]
NewDevice(#[source] virt::x86::apic_software_device::DeviceIdInUse),
#[error("failed to create cpuid tables for cvm")]
#[cfg(guest_arch = "x86_64")]
CvmCpuid(#[source] cvm_cpuid::CpuidResultsError),
#[error("failed to update hypercall msr")]
UpdateHypercallMsr,
#[error("failed to update reference tsc msr")]
UpdateReferenceTsc,
#[error("failed to map overlay page")]
MapOverlay(#[source] std::io::Error),
#[error("failed to allocate shared visibility pages for overlay")]
AllocateSharedVisOverlay(#[source] anyhow::Error),
#[error("failed to open msr device")]
OpenMsr(#[source] std::io::Error),
#[error("cpuid did not contain valid TSC frequency information")]
BadCpuidTsc,
#[error("failed to read tsc frequency")]
ReadTscFrequency(#[source] std::io::Error),
#[error(
"tsc frequency mismatch between hypervisor ({hv}) and hardware {hw}, exceeds allowed error {allowed_error}"
)]
TscFrequencyMismatch {
hv: u64,
hw: u64,
allowed_error: u64,
},
#[error("failed to set vsm partition config: {0:?}")]
FailedToSetL2Ctls(TdCallResult),
#[error("debugging is configured but the binary does not have the gdb feature")]
InvalidDebugConfiguration,
#[error("failed to allocate TLB flush page")]
AllocateTlbFlushPage(#[source] anyhow::Error),
}
#[derive(Error, Debug)]
#[expect(missing_docs)]
pub enum RevokeGuestVsmError {
#[error("failed to set vsm config")]
SetGuestVsmConfig(#[source] hcl::ioctl::SetGuestVsmConfigError),
#[error("VTL 1 is already enabled")]
Vtl1AlreadyEnabled,
}
#[derive(Inspect)]
pub struct UhPartition {
#[inspect(flatten)]
inner: Arc<UhPartitionInner>,
#[inspect(skip)]
interrupt_targets: VtlArray<Arc<UhInterruptTarget>, 2>,
}
#[derive(Inspect)]
#[inspect(extra = "UhPartitionInner::inspect_extra")]
struct UhPartitionInner {
#[inspect(skip)]
hcl: Hcl,
#[inspect(skip)] vps: Vec<UhVpInner>,
irq_routes: virt::irqcon::IrqRoutes,
caps: PartitionCapabilities,
#[inspect(skip)] enter_modes: Mutex<EnterModes>,
#[inspect(skip)]
enter_modes_atomic: AtomicU8,
cpuid: CpuidLeafSet,
lower_vtl_memory_layout: MemoryLayout,
gm: VtlArray<GuestMemory, 2>,
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
#[inspect(skip)]
crash_notification_send: mesh::Sender<VtlCrash>,
monitor_page: MonitorPage,
software_devices: Option<ApicSoftwareDevices>,
#[inspect(skip)]
vmtime: VmTimeSource,
isolation: IsolationType,
#[inspect(with = "inspect::AtomicMut")]
no_sidecar_hotplug: AtomicBool,
use_mmio_hypercalls: bool,
backing_shared: BackingShared,
intercept_debug_exceptions: bool,
#[cfg(guest_arch = "x86_64")]
#[inspect(with = "|x| inspect::iter_by_index(x.read().into_inner().map(inspect::AsHex))")]
device_vector_table: RwLock<IrrBitmap>,
}
#[derive(Inspect)]
#[inspect(external_tag)]
enum BackingShared {
Hypervisor(#[inspect(flatten)] HypervisorBackedShared),
#[cfg(guest_arch = "x86_64")]
Snp(#[inspect(flatten)] SnpBackedShared),
#[cfg(guest_arch = "x86_64")]
Tdx(#[inspect(flatten)] TdxBackedShared),
}
impl BackingShared {
fn new(
isolation: IsolationType,
partition_params: &UhPartitionNewParams<'_>,
backing_shared_params: BackingSharedParams,
) -> Result<BackingShared, Error> {
Ok(match isolation {
IsolationType::None | IsolationType::Vbs => {
assert!(backing_shared_params.cvm_state.is_none());
BackingShared::Hypervisor(HypervisorBackedShared::new(
partition_params,
backing_shared_params,
)?)
}
#[cfg(guest_arch = "x86_64")]
IsolationType::Snp => BackingShared::Snp(SnpBackedShared::new(
partition_params,
backing_shared_params,
)?),
#[cfg(guest_arch = "x86_64")]
IsolationType::Tdx => BackingShared::Tdx(TdxBackedShared::new(
partition_params,
backing_shared_params,
)?),
#[cfg(not(guest_arch = "x86_64"))]
_ => unreachable!(),
})
}
fn cvm_state(&self) -> Option<&UhCvmPartitionState> {
match self {
BackingShared::Hypervisor(_) => None,
#[cfg(guest_arch = "x86_64")]
BackingShared::Snp(SnpBackedShared { cvm, .. })
| BackingShared::Tdx(TdxBackedShared { cvm, .. }) => Some(cvm),
}
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
fn guest_vsm_disabled(&self) -> bool {
match self {
BackingShared::Hypervisor(h) => {
matches!(*h.guest_vsm.read(), GuestVsmState::NotPlatformSupported)
}
#[cfg(guest_arch = "x86_64")]
BackingShared::Snp(SnpBackedShared { cvm, .. })
| BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
matches!(*cvm.guest_vsm.read(), GuestVsmState::NotPlatformSupported)
}
}
}
fn untrusted_synic(&self) -> Option<&GlobalSynic> {
match self {
BackingShared::Hypervisor(_) => None,
#[cfg(guest_arch = "x86_64")]
BackingShared::Snp(_) => None,
#[cfg(guest_arch = "x86_64")]
BackingShared::Tdx(s) => s.untrusted_synic.as_ref(),
}
}
}
#[derive(InspectMut, Copy, Clone)]
struct EnterModes {
#[inspect(mut)]
first: EnterMode,
#[inspect(mut)]
second: EnterMode,
}
impl Default for EnterModes {
fn default() -> Self {
Self {
first: EnterMode::Fast,
second: EnterMode::IdleToVtl0,
}
}
}
impl From<EnterModes> for hcl::protocol::EnterModes {
fn from(value: EnterModes) -> Self {
Self::new()
.with_first(value.first.into())
.with_second(value.second.into())
}
}
#[derive(InspectMut, Copy, Clone)]
enum EnterMode {
Fast,
PlayIdle,
IdleToVtl0,
}
impl From<EnterMode> for hcl::protocol::EnterMode {
fn from(value: EnterMode) -> Self {
match value {
EnterMode::Fast => Self::FAST,
EnterMode::PlayIdle => Self::PLAY_IDLE,
EnterMode::IdleToVtl0 => Self::IDLE_TO_VTL0,
}
}
}
#[cfg(guest_arch = "x86_64")]
#[derive(Inspect)]
struct UhCvmVpState {
#[inspect(debug)]
direct_overlay_handle: user_driver::memory::MemoryBlock,
exit_vtl: GuestVtl,
hv: VtlArray<ProcessorVtlHv, 2>,
lapics: VtlArray<LapicState, 2>,
vtl1_enabled: bool,
}
#[cfg(guest_arch = "x86_64")]
impl UhCvmVpState {
pub(crate) fn new(
cvm_partition: &UhCvmPartitionState,
inner: &UhPartitionInner,
vp_info: &TargetVpInfo,
overlay_pages_required: usize,
) -> Result<Self, Error> {
let direct_overlay_handle = cvm_partition
.shared_dma_client
.allocate_dma_buffer(overlay_pages_required * HV_PAGE_SIZE as usize)
.map_err(Error::AllocateSharedVisOverlay)?;
let apic_base = virt::vp::Apic::at_reset(&inner.caps, vp_info).apic_base;
let lapics = VtlArray::from_fn(|vtl| {
let apic_set = &cvm_partition.lapic[vtl];
let mut lapic = apic_set.add_apic(vp_info);
lapic.set_apic_base(apic_base).unwrap();
let activity = if vtl == Vtl::Vtl0 && !vp_info.base.is_bsp() {
MpState::WaitForSipi
} else {
MpState::Running
};
LapicState::new(lapic, activity)
});
let hv = VtlArray::from_fn(|vtl| {
cvm_partition
.hv
.add_vp(inner.gm[vtl].clone(), vp_info.base.vp_index, vtl)
});
Ok(Self {
direct_overlay_handle,
exit_vtl: GuestVtl::Vtl0,
hv,
lapics,
vtl1_enabled: false,
})
}
}
#[derive(Inspect)]
struct UhCvmPartitionState {
#[cfg(guest_arch = "x86_64")]
#[inspect(skip)]
cpuid: cvm_cpuid::CpuidResults,
#[inspect(
with = "|arr| inspect::iter_by_index(arr.iter()).map_value(|bb| inspect::iter_by_index(bb.iter().map(|v| *v)))"
)]
tlb_locked_vps: VtlArray<BitBox<AtomicU64>, 2>,
#[inspect(with = "inspect::iter_by_index")]
vps: Vec<UhCvmVpInner>,
shared_memory: GuestMemory,
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
#[inspect(skip)]
isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
lapic: VtlArray<LocalApicSet, 2>,
hv: GlobalHv,
guest_vsm: RwLock<GuestVsmState<CvmVtl1State>>,
shared_dma_client: Arc<dyn DmaClient>,
private_dma_client: Arc<dyn DmaClient>,
hide_isolation: bool,
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
impl UhCvmPartitionState {
fn vp_inner(&self, vp_index: u32) -> &UhCvmVpInner {
&self.vps[vp_index as usize]
}
fn is_lower_vtl_startup_denied(&self) -> bool {
matches!(
*self.guest_vsm.read(),
GuestVsmState::Enabled {
vtl1: CvmVtl1State {
deny_lower_vtl_startup: true,
..
}
}
)
}
}
#[derive(Inspect)]
struct UhCvmVpInner {
tlb_lock_info: VtlArray<TlbLockInfo, 2>,
vtl1_enable_called: Mutex<bool>,
started: AtomicBool,
#[inspect(with = "|arr| inspect::iter_by_index(arr.iter().map(|v| v.lock().is_some()))")]
hv_start_enable_vtl_vp: VtlArray<Mutex<Option<Box<VpStartEnableVtl>>>, 2>,
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
#[derive(Inspect)]
#[inspect(tag = "guest vsm state")]
enum GuestVsmState<T: Inspect> {
NotPlatformSupported,
NotGuestEnabled,
Enabled { vtl1: T },
}
impl<T: Inspect> GuestVsmState<T> {
pub fn from_availability(guest_vsm_available: bool) -> Self {
if guest_vsm_available {
GuestVsmState::NotGuestEnabled
} else {
GuestVsmState::NotPlatformSupported
}
}
}
#[derive(Default, Inspect)]
struct CvmVtl1State {
enabled_on_any_vp: bool,
zero_memory_on_reset: bool,
deny_lower_vtl_startup: bool,
pub mbec_enabled: bool,
pub shadow_supervisor_stack_enabled: bool,
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
struct TscReferenceTimeSource {
tsc_scale: u64,
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
impl TscReferenceTimeSource {
fn new(tsc_frequency: u64) -> Self {
TscReferenceTimeSource {
tsc_scale: (((10_000_000_u128) << 64) / tsc_frequency as u128) as u64,
}
}
}
impl ReferenceTimeSource for TscReferenceTimeSource {
fn now_100ns(&self) -> u64 {
#[cfg(guest_arch = "x86_64")]
{
let tsc = safe_intrinsics::rdtsc();
((self.tsc_scale as u128 * tsc as u128) >> 64) as u64
}
#[cfg(guest_arch = "aarch64")]
{
todo!("AARCH64_TODO");
}
}
fn is_backed_by_tsc(&self) -> bool {
true
}
}
#[cfg(guest_arch = "aarch64")]
impl virt::irqcon::ControlGic for UhPartitionInner {
fn set_spi_irq(&self, irq_id: u32, high: bool) {
if let Err(err) = self.hcl.request_interrupt(
hvdef::HvInterruptControl::new()
.with_arm64_asserted(high)
.with_interrupt_type(hvdef::HvInterruptType::HvArm64InterruptTypeFixed),
0,
irq_id,
GuestVtl::Vtl0,
) {
tracelimit::warn_ratelimited!(
error = &err as &dyn std::error::Error,
irq = irq_id,
asserted = high,
"failed to request spi"
);
}
}
}
#[cfg(guest_arch = "aarch64")]
impl virt::Aarch64Partition for UhPartition {
fn control_gic(&self, vtl: Vtl) -> Arc<dyn virt::irqcon::ControlGic> {
debug_assert!(vtl == Vtl::Vtl0);
self.inner.clone()
}
}
pub struct UhProcessorBox {
partition: Arc<UhPartitionInner>,
vp_info: TargetVpInfo,
}
impl UhProcessorBox {
pub fn vp_index(&self) -> VpIndex {
self.vp_info.base.vp_index
}
pub fn sidecar_base_cpu(&self) -> Option<u32> {
self.partition
.hcl
.sidecar_base_cpu(self.vp_info.base.vp_index.index())
}
pub fn bind_processor<'a, T: Backing>(
&'a mut self,
driver: &impl Driver,
control: Option<&'a mut IdleControl>,
) -> Result<UhProcessor<'a, T>, Error> {
if let Some(control) = &control {
let vp_index = self.vp_info.base.vp_index;
let mut current = Default::default();
affinity::get_current_thread_affinity(&mut current).unwrap();
assert_eq!(¤t, CpuSet::new().set(vp_index.index()));
self.partition
.hcl
.set_poll_file(
self.partition.vp(vp_index).unwrap().cpu_index,
control.ring_fd().as_raw_fd(),
)
.map_err(Error::Hcl)?;
}
UhProcessor::new(driver, &self.partition, self.vp_info, control)
}
pub fn set_sidecar_exit_due_to_task(&self, task: Arc<str>) {
self.partition
.vp(self.vp_info.base.vp_index)
.unwrap()
.set_sidecar_exit_reason(SidecarExitReason::TaskRequest(task))
}
}
#[derive(Debug, Inspect)]
struct UhVpInner {
wake_reasons: AtomicU64,
#[inspect(skip)]
waker: RwLock<Option<Waker>>,
message_queues: VtlArray<MessageQueues, 2>,
#[inspect(skip)]
vp_info: TargetVpInfo,
cpu_index: u32,
sidecar_exit_reason: Mutex<Option<SidecarExitReason>>,
}
impl UhVpInner {
pub fn vp_index(&self) -> VpIndex {
self.vp_info.base.vp_index
}
}
#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
#[derive(Debug, Inspect)]
enum InitialVpContextOperation {
StartVp,
EnableVpVtl,
}
#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
#[derive(Debug, Inspect)]
struct VpStartEnableVtl {
operation: InitialVpContextOperation,
#[inspect(skip)]
context: hvdef::hypercall::InitialVpContextX64,
}
#[derive(Debug, Inspect)]
struct TlbLockInfo {
#[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
blocked_vps: BitBox<AtomicU64>,
#[inspect(with = "|bb| inspect::iter_by_index(bb.iter().map(|v| *v))")]
blocking_vps: BitBox<AtomicU64>,
blocking_vp_count: AtomicU32,
sleeping: AtomicBool,
}
#[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))]
impl TlbLockInfo {
fn new(vp_count: usize) -> Self {
Self {
blocked_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
blocking_vps: BitVec::repeat(false, vp_count).into_boxed_bitslice(),
blocking_vp_count: AtomicU32::new(0),
sleeping: false.into(),
}
}
}
#[bitfield(u32)]
#[derive(IntoBytes, Immutable, KnownLayout, FromBytes)]
struct WakeReason {
extint: bool,
message_queues: bool,
hv_start_enable_vtl_vp: bool,
intcon: bool,
update_proxy_irr_filter: bool,
#[bits(27)]
_reserved: u32,
}
impl WakeReason {
const EXTINT: Self = Self::new().with_extint(true);
const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true);
#[cfg(guest_arch = "x86_64")]
const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); const INTCON: Self = Self::new().with_intcon(true);
#[cfg(guest_arch = "x86_64")]
const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true);
}
impl UhPartition {
pub fn revoke_guest_vsm(&self) -> Result<(), RevokeGuestVsmError> {
fn revoke<T: Inspect>(vsm_state: &mut GuestVsmState<T>) -> Result<(), RevokeGuestVsmError> {
if matches!(vsm_state, GuestVsmState::Enabled { .. }) {
return Err(RevokeGuestVsmError::Vtl1AlreadyEnabled);
}
*vsm_state = GuestVsmState::NotPlatformSupported;
Ok(())
}
match &self.inner.backing_shared {
BackingShared::Hypervisor(s) => {
revoke(&mut *s.guest_vsm.write())?;
self.inner
.hcl
.set_guest_vsm_partition_config(false)
.map_err(RevokeGuestVsmError::SetGuestVsmConfig)?;
}
#[cfg(guest_arch = "x86_64")]
BackingShared::Snp(SnpBackedShared { cvm, .. })
| BackingShared::Tdx(TdxBackedShared { cvm, .. }) => {
revoke(&mut *cvm.guest_vsm.write())?;
}
};
Ok(())
}
pub fn reference_time(&self) -> u64 {
self.inner
.hcl
.reference_time()
.expect("should not fail to get the reference time")
}
}
impl virt::Partition for UhPartition {
fn supports_reset(&self) -> Option<&dyn virt::ResetPartition<Error = Self::Error>> {
None
}
fn caps(&self) -> &PartitionCapabilities {
&self.inner.caps
}
fn request_msi(&self, vtl: Vtl, request: MsiRequest) {
self.inner
.request_msi(vtl.try_into().expect("higher vtl not configured"), request)
}
fn request_yield(&self, _vp_index: VpIndex) {
unimplemented!()
}
}
impl virt::X86Partition for UhPartition {
fn ioapic_routing(&self) -> Arc<dyn IoApicRouting> {
self.inner.clone()
}
fn pulse_lint(&self, vp_index: VpIndex, vtl: Vtl, lint: u8) {
let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
if let Some(apic) = &self.inner.lapic(vtl) {
apic.lint(vp_index, lint.into(), |vp_index| {
self.inner
.vp(vp_index)
.unwrap()
.wake(vtl, WakeReason::INTCON);
});
} else if lint == 0 {
self.inner
.vp(vp_index)
.unwrap()
.wake(vtl, WakeReason::EXTINT);
} else {
unimplemented!()
}
}
}
impl UhPartitionInner {
fn vp(&self, index: VpIndex) -> Option<&'_ UhVpInner> {
self.vps.get(index.index() as usize)
}
fn lapic(&self, vtl: GuestVtl) -> Option<&LocalApicSet> {
self.backing_shared.cvm_state().map(|x| &x.lapic[vtl])
}
fn hv(&self) -> Option<&GlobalHv> {
self.backing_shared.cvm_state().map(|x| &x.hv)
}
#[cfg(guest_arch = "x86_64")]
fn request_proxy_irr_filter_update(
&self,
vtl: GuestVtl,
device_vector: u8,
req_vp_index: VpIndex,
) {
tracing::debug!(
?vtl,
device_vector,
req_vp_index = req_vp_index.index(),
"request_proxy_irr_filter_update"
);
{
let mut device_vector_table = self.device_vector_table.write();
device_vector_table.set(device_vector as usize, true);
}
for vp in self.vps.iter() {
if vp.vp_index() != req_vp_index {
vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER);
}
}
}
#[cfg(guest_arch = "x86_64")]
fn fill_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) {
let device_vector_table = self.device_vector_table.read();
for idx in device_vector_table.iter_ones() {
irr_vectors.set(idx, true);
}
}
fn inspect_extra(&self, resp: &mut inspect::Response<'_>) {
let mut wake_vps = false;
resp.field_mut(
"enter_modes",
&mut inspect::adhoc_mut(|req| {
let update = req.is_update();
{
let mut modes = self.enter_modes.lock();
modes.inspect_mut(req);
if update {
self.enter_modes_atomic.store(
hcl::protocol::EnterModes::from(*modes).into(),
Ordering::Relaxed,
);
wake_vps = true;
}
}
}),
);
if wake_vps {
for vp in self.vps.iter() {
vp.wake_vtl2();
}
}
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
fn vsm_status(&self) -> Result<HvRegisterVsmPartitionStatus, hcl::ioctl::Error> {
let reg = self.hcl.get_vp_register(
HvAllArchRegisterName::VsmPartitionStatus,
HvInputVtl::CURRENT_VTL,
)?;
Ok(reg.as_u64().into())
}
}
impl virt::Synic for UhPartition {
fn post_message(&self, vtl: Vtl, vp_index: VpIndex, sint: u8, typ: u32, payload: &[u8]) {
let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
let Some(vp) = self.inner.vp(vp_index) else {
tracelimit::warn_ratelimited!(
vp = vp_index.index(),
"invalid vp target for post_message"
);
return;
};
vp.post_message(
vtl,
sint,
&hvdef::HvMessage::new(hvdef::HvMessageType(typ), 0, payload),
);
}
fn new_guest_event_port(&self) -> Box<dyn vmcore::synic::GuestEventPort> {
Box::new(UhEventPort {
partition: Arc::downgrade(&self.inner),
params: Default::default(),
})
}
fn prefer_os_events(&self) -> bool {
false
}
fn monitor_support(&self) -> Option<&dyn virt::SynicMonitor> {
if self.inner.isolation.is_hardware_isolated() {
None
} else {
Some(self)
}
}
}
impl virt::SynicMonitor for UhPartition {
fn set_monitor_page(&self, gpa: Option<u64>) -> anyhow::Result<()> {
let old_gpa = self.inner.monitor_page.set_gpa(gpa);
if let Some(old_gpa) = old_gpa {
self.inner
.hcl
.modify_vtl_protection_mask(
MemoryRange::new(old_gpa..old_gpa + HV_PAGE_SIZE),
hvdef::HV_MAP_GPA_PERMISSIONS_ALL,
HvInputVtl::CURRENT_VTL,
)
.context("failed to unregister old monitor page")?;
tracing::debug!(old_gpa, "unregistered monitor page");
}
if let Some(gpa) = gpa {
let result = self
.inner
.hcl
.modify_vtl_protection_mask(
MemoryRange::new(gpa..gpa + HV_PAGE_SIZE),
HvMapGpaFlags::new().with_readable(true),
HvInputVtl::CURRENT_VTL,
)
.context("failed to register monitor page");
if result.is_err() {
self.inner.monitor_page.set_gpa(None);
return result;
}
tracing::debug!(gpa, "registered monitor page");
}
Ok(())
}
fn register_monitor(
&self,
monitor_id: vmcore::monitor::MonitorId,
connection_id: u32,
) -> Box<dyn Send> {
self.inner
.monitor_page
.register_monitor(monitor_id, connection_id)
}
}
impl UhPartitionInner {
#[cfg(guest_arch = "x86_64")]
pub(crate) fn synic_interrupt(
&self,
vp_index: VpIndex,
vtl: GuestVtl,
) -> impl '_ + hv1_emulator::RequestInterrupt {
move |vector, auto_eoi| {
self.lapic(vtl).unwrap().synic_interrupt(
vp_index,
vector as u8,
auto_eoi,
|vp_index| self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON),
);
}
}
#[cfg(guest_arch = "aarch64")]
fn synic_interrupt(
&self,
_vp_index: VpIndex,
_vtl: GuestVtl,
) -> impl '_ + hv1_emulator::RequestInterrupt {
move |_, _| {}
}
}
#[derive(Debug)]
struct UhEventPort {
partition: Weak<UhPartitionInner>,
params: Arc<Mutex<Option<UhEventPortParams>>>,
}
#[derive(Debug, Copy, Clone)]
struct UhEventPortParams {
vp: VpIndex,
sint: u8,
flag: u16,
vtl: GuestVtl,
}
impl vmcore::synic::GuestEventPort for UhEventPort {
fn interrupt(&self) -> vmcore::interrupt::Interrupt {
let partition = self.partition.clone();
let params = self.params.clone();
vmcore::interrupt::Interrupt::from_fn(move || {
let Some(UhEventPortParams {
vp,
sint,
flag,
vtl,
}) = *params.lock()
else {
return;
};
let Some(partition) = partition.upgrade() else {
return;
};
tracing::trace!(vp = vp.index(), sint, flag, "signal_event");
if let Some(hv) = partition.hv() {
match hv.synic[vtl].signal_event(
&partition.gm[vtl],
vp,
sint,
flag,
&mut partition.synic_interrupt(vp, vtl),
) {
Ok(_) => {}
Err(SintProxied) => {
tracing::trace!(
vp = vp.index(),
sint,
flag,
"forwarding event to untrusted synic"
);
if let Some(synic) = partition.backing_shared.untrusted_synic() {
synic
.signal_event(
&partition.gm[vtl],
vp,
sint,
flag,
&mut partition.synic_interrupt(vp, vtl),
)
.ok();
} else {
partition.hcl.signal_event_direct(vp.index(), sint, flag)
}
}
}
} else {
partition.hcl.signal_event_direct(vp.index(), sint, flag);
}
})
}
fn clear(&mut self) {
*self.params.lock() = None;
}
fn set(
&mut self,
vtl: Vtl,
vp: u32,
sint: u8,
flag: u16,
) -> Result<(), vmcore::synic::HypervisorError> {
let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
*self.params.lock() = Some(UhEventPortParams {
vp: VpIndex::new(vp),
sint,
flag,
vtl,
});
Ok(())
}
}
impl virt::Hv1 for UhPartition {
type Error = Error;
type Device = virt::x86::apic_software_device::ApicSoftwareDevice;
fn new_virtual_device(
&self,
) -> Option<&dyn virt::DeviceBuilder<Device = Self::Device, Error = Self::Error>> {
self.inner.software_devices.is_some().then_some(self)
}
}
impl virt::DeviceBuilder for UhPartition {
fn build(&self, vtl: Vtl, device_id: u64) -> Result<Self::Device, Self::Error> {
let vtl = GuestVtl::try_from(vtl).expect("higher vtl not configured");
let device = self
.inner
.software_devices
.as_ref()
.expect("checked in new_virtual_device")
.new_device(self.interrupt_targets[vtl].clone(), device_id)
.map_err(Error::NewDevice)?;
Ok(device)
}
}
impl virt::VtlMemoryProtection for UhPartition {
fn modify_vtl_page_setting(&self, pfn: u64, flags: HvMapGpaFlags) -> anyhow::Result<()> {
let address = pfn << hvdef::HV_PAGE_SHIFT;
self.inner
.hcl
.modify_vtl_protection_mask(
MemoryRange::new(address..address + HV_PAGE_SIZE),
flags,
HvInputVtl::CURRENT_VTL,
)
.context("failed to modify VTL page permissions")
}
}
struct UhInterruptTarget {
partition: Arc<UhPartitionInner>,
vtl: GuestVtl,
}
impl pci_core::msi::MsiInterruptTarget for UhInterruptTarget {
fn new_interrupt(&self) -> Box<dyn pci_core::msi::MsiControl> {
let partition = self.partition.clone();
let vtl = self.vtl;
Box::new(move |address, data| partition.request_msi(vtl, MsiRequest { address, data }))
}
}
impl UhPartitionInner {
fn request_msi(&self, vtl: GuestVtl, request: MsiRequest) {
if let Some(lapic) = self.lapic(vtl) {
tracing::trace!(?request, "interrupt");
lapic.request_interrupt(request.address, request.data, |vp_index| {
self.vp(vp_index).unwrap().wake(vtl, WakeReason::INTCON)
});
} else {
let (address, data) = request.as_x86();
if let Err(err) = self.hcl.request_interrupt(
request.hv_x86_interrupt_control(),
address.virt_destination().into(),
data.vector().into(),
vtl,
) {
tracelimit::warn_ratelimited!(
error = &err as &dyn std::error::Error,
address = request.address,
data = request.data,
"failed to request msi"
);
}
}
}
}
impl IoApicRouting for UhPartitionInner {
fn set_irq_route(&self, irq: u8, request: Option<MsiRequest>) {
self.irq_routes.set_irq_route(irq, request)
}
fn assert_irq(&self, irq: u8) {
self.irq_routes
.assert_irq(irq, |request| self.request_msi(GuestVtl::Vtl0, request))
}
}
fn set_vtl2_vsm_partition_config(hcl: &Hcl) -> Result<(), Error> {
let caps = hcl.get_vsm_capabilities().map_err(Error::Hcl)?;
let hardware_isolated = hcl.isolation().is_hardware_isolated();
let isolated = hcl.isolation().is_isolated();
let config = HvRegisterVsmPartitionConfig::new()
.with_default_vtl_protection_mask(0xF)
.with_enable_vtl_protection(!hardware_isolated)
.with_zero_memory_on_reset(!hardware_isolated)
.with_intercept_cpuid_unimplemented(!hardware_isolated)
.with_intercept_page(caps.intercept_page_available())
.with_intercept_unrecoverable_exception(true)
.with_intercept_not_present(caps.intercept_not_present_available() && !isolated)
.with_intercept_acceptance(isolated)
.with_intercept_enable_vtl_protection(isolated && !hardware_isolated)
.with_intercept_system_reset(caps.intercept_system_reset_available());
hcl.set_vtl2_vsm_partition_config(config)
.map_err(Error::VsmPartitionConfig)
}
pub struct UhPartitionNewParams<'a> {
pub isolation: IsolationType,
pub hide_isolation: bool,
pub lower_vtl_memory_layout: &'a MemoryLayout,
pub topology: &'a ProcessorTopology,
pub cvm_cpuid_info: Option<&'a [u8]>,
pub snp_secrets: Option<&'a [u8]>,
pub env_cvm_guest_vsm: bool,
pub vtom: Option<u64>,
pub handle_synic: bool,
pub no_sidecar_hotplug: bool,
pub use_mmio_hypercalls: bool,
pub intercept_debug_exceptions: bool,
}
pub struct UhLateParams<'a> {
pub gm: VtlArray<GuestMemory, 2>,
#[cfg(guest_arch = "x86_64")]
pub cpuid: Vec<CpuidLeaf>,
pub crash_notification_send: mesh::Sender<VtlCrash>,
pub vmtime: &'a VmTimeSource,
pub cvm_params: Option<CvmLateParams>,
}
pub struct CvmLateParams {
pub shared_gm: GuestMemory,
pub isolated_memory_protector: Arc<dyn ProtectIsolatedMemory>,
pub shared_dma_client: Arc<dyn DmaClient>,
pub private_dma_client: Arc<dyn DmaClient>,
}
pub trait ProtectIsolatedMemory: Send + Sync {
fn change_host_visibility(
&self,
shared: bool,
gpns: &[u64],
tlb_access: &mut dyn TlbFlushLockAccess,
) -> Result<(), (HvError, usize)>;
fn query_host_visibility(
&self,
gpns: &[u64],
host_visibility: &mut [HostVisibilityType],
) -> Result<(), (HvError, usize)>;
fn default_vtl0_protections(&self) -> HvMapGpaFlags;
fn change_default_vtl_protections(
&self,
vtl: GuestVtl,
protections: HvMapGpaFlags,
tlb_access: &mut dyn TlbFlushLockAccess,
) -> Result<(), HvError>;
fn change_vtl_protections(
&self,
vtl: GuestVtl,
gpns: &[u64],
protections: HvMapGpaFlags,
tlb_access: &mut dyn TlbFlushLockAccess,
) -> Result<(), (HvError, usize)>;
fn change_hypercall_overlay(
&self,
vtl: GuestVtl,
gpn: u64,
tlb_access: &mut dyn TlbFlushLockAccess,
);
fn disable_hypercall_overlay(&self, vtl: GuestVtl, tlb_access: &mut dyn TlbFlushLockAccess);
fn set_vtl1_protections_enabled(&self);
fn vtl1_protections_enabled(&self) -> bool;
}
pub trait TlbFlushLockAccess {
fn flush(&mut self, vtl: GuestVtl);
fn flush_entire(&mut self);
fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl);
}
pub struct UhProtoPartition<'a> {
params: UhPartitionNewParams<'a>,
hcl: Hcl,
#[cfg(guest_arch = "x86_64")]
cvm_cpuid: Option<cvm_cpuid::CpuidResults>,
guest_vsm_available: bool,
}
impl<'a> UhProtoPartition<'a> {
pub fn new<T: SpawnDriver>(
params: UhPartitionNewParams<'a>,
driver: impl FnMut(u32) -> T,
) -> Result<Self, Error> {
let hcl_isolation = match params.isolation {
IsolationType::None => hcl::ioctl::IsolationType::None,
IsolationType::Vbs => hcl::ioctl::IsolationType::Vbs,
IsolationType::Snp => hcl::ioctl::IsolationType::Snp,
IsolationType::Tdx => hcl::ioctl::IsolationType::Tdx,
};
let sidecar = sidecar_client::SidecarClient::new(driver).map_err(Error::Sidecar)?;
let hcl = Hcl::new(hcl_isolation, sidecar).map_err(Error::Hcl)?;
let mut allowed_hypercalls = vec![
hvdef::HypercallCode::HvCallGetVpRegisters,
hvdef::HypercallCode::HvCallSetVpRegisters,
hvdef::HypercallCode::HvCallInstallIntercept,
hvdef::HypercallCode::HvCallTranslateVirtualAddress,
hvdef::HypercallCode::HvCallPostMessageDirect,
hvdef::HypercallCode::HvCallSignalEventDirect,
hvdef::HypercallCode::HvCallModifyVtlProtectionMask,
hvdef::HypercallCode::HvCallTranslateVirtualAddressEx,
hvdef::HypercallCode::HvCallCheckSparseGpaPageVtlAccess,
hvdef::HypercallCode::HvCallAssertVirtualInterrupt,
hvdef::HypercallCode::HvCallGetVpIndexFromApicId,
hvdef::HypercallCode::HvCallAcceptGpaPages,
hvdef::HypercallCode::HvCallModifySparseGpaPageHostVisibility,
];
if params.isolation.is_hardware_isolated() {
allowed_hypercalls.extend(vec![
hvdef::HypercallCode::HvCallEnablePartitionVtl,
hvdef::HypercallCode::HvCallRetargetDeviceInterrupt,
hvdef::HypercallCode::HvCallEnableVpVtl,
]);
}
if params.use_mmio_hypercalls {
allowed_hypercalls.extend(vec![
hvdef::HypercallCode::HvCallMemoryMappedIoRead,
hvdef::HypercallCode::HvCallMemoryMappedIoWrite,
]);
}
hcl.set_allowed_hypercalls(allowed_hypercalls.as_slice());
set_vtl2_vsm_partition_config(&hcl)?;
#[cfg(guest_arch = "x86_64")]
let cvm_cpuid = match params.isolation {
IsolationType::Snp => Some(
cvm_cpuid::CpuidResults::new(cvm_cpuid::CpuidResultsIsolationType::Snp {
cpuid_pages: params.cvm_cpuid_info.unwrap(),
})
.map_err(Error::CvmCpuid)?,
),
IsolationType::Tdx => Some(
cvm_cpuid::CpuidResults::new(cvm_cpuid::CpuidResultsIsolationType::Tdx)
.map_err(Error::CvmCpuid)?,
),
IsolationType::Vbs | IsolationType::None => None,
};
let guest_vsm_available = Self::check_guest_vsm_support(
&hcl,
¶ms,
#[cfg(guest_arch = "x86_64")]
cvm_cpuid.as_ref(),
)?;
Ok(UhProtoPartition {
hcl,
params,
#[cfg(guest_arch = "x86_64")]
cvm_cpuid,
guest_vsm_available,
})
}
pub fn guest_vsm_available(&self) -> bool {
self.guest_vsm_available
}
pub async fn build(
self,
late_params: UhLateParams<'_>,
) -> Result<(UhPartition, Vec<UhProcessorBox>), Error> {
let Self {
mut hcl,
params,
#[cfg(guest_arch = "x86_64")]
cvm_cpuid,
guest_vsm_available,
} = self;
let isolation = params.isolation;
let is_hardware_isolated = isolation.is_hardware_isolated();
if params.intercept_debug_exceptions {
if !cfg!(feature = "gdb") {
return Err(Error::InvalidDebugConfiguration);
}
cfg_if::cfg_if! {
if #[cfg(guest_arch = "x86_64")] {
if isolation != IsolationType::Tdx {
let debug_exception_vector = 0x1;
hcl.register_intercept(
HvInterceptType::HvInterceptTypeException,
HV_INTERCEPT_ACCESS_MASK_EXECUTE,
HvInterceptParameters::new_exception(debug_exception_vector),
)
.map_err(|err| Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err))?;
}
} else {
return Err(Error::InvalidDebugConfiguration);
}
}
}
if !is_hardware_isolated {
if cfg!(guest_arch = "x86_64") {
hcl.register_intercept(
HvInterceptType::HvInterceptTypeX64Msr,
HV_INTERCEPT_ACCESS_MASK_READ_WRITE,
HvInterceptParameters::new_zeroed(),
)
.map_err(|err| {
Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64Msr, err)
})?;
hcl.register_intercept(
HvInterceptType::HvInterceptTypeX64ApicEoi,
HV_INTERCEPT_ACCESS_MASK_WRITE,
HvInterceptParameters::new_zeroed(),
)
.map_err(|err| {
Error::InstallIntercept(HvInterceptType::HvInterceptTypeX64ApicEoi, err)
})?;
} else {
if false {
todo!("AARCH64_TODO");
}
}
}
if isolation == IsolationType::Snp {
hcl.register_intercept(
HvInterceptType::HvInterceptTypeException,
HV_INTERCEPT_ACCESS_MASK_EXECUTE,
HvInterceptParameters::new_exception(0x1D),
)
.map_err(|err| {
Error::InstallIntercept(HvInterceptType::HvInterceptTypeException, err)
})?;
let mut bitmap = [0u8; 64];
if let Some(secrets) = params.snp_secrets {
bitmap.copy_from_slice(
&secrets
[REG_TWEAK_BITMAP_OFFSET..REG_TWEAK_BITMAP_OFFSET + REG_TWEAK_BITMAP_SIZE],
);
}
hcl.set_snp_register_bitmap(bitmap);
}
hcl.add_vps(
params.topology.vp_count(),
late_params
.cvm_params
.as_ref()
.map(|x| &x.private_dma_client),
)
.map_err(Error::Hcl)?;
let vps: Vec<_> = params
.topology
.vps_arch()
.map(|vp_info| {
let cpu_index = vp_info.base.vp_index.index();
UhVpInner::new(cpu_index, vp_info)
})
.collect();
#[cfg(guest_arch = "x86_64")]
let software_devices = {
let res = if !is_hardware_isolated {
hcl.register_intercept(
HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
HV_INTERCEPT_ACCESS_MASK_EXECUTE,
HvInterceptParameters::new_zeroed(),
)
} else {
Ok(())
};
match res {
Ok(()) => Some(ApicSoftwareDevices::new(
params.topology.vps_arch().map(|vp| vp.apic_id).collect(),
)),
Err(HvError::InvalidParameter | HvError::AccessDenied) => None,
Err(err) => {
return Err(Error::InstallIntercept(
HvInterceptType::HvInterceptTypeRetargetInterruptWithUnknownDeviceId,
err,
));
}
}
};
#[cfg(guest_arch = "aarch64")]
let software_devices = None;
#[cfg(guest_arch = "aarch64")]
let (caps, cpuid) = (
virt::aarch64::Aarch64PartitionCapabilities {},
CpuidLeafSet::new(Vec::new()),
);
#[cfg(guest_arch = "x86_64")]
let cpuid = UhPartition::construct_cpuid_results(
&late_params.cpuid,
params.topology,
guest_vsm_available,
params.vtom,
isolation,
params.hide_isolation,
);
#[cfg(guest_arch = "x86_64")]
let caps = UhPartition::construct_capabilities(
params.topology,
&cpuid,
cvm_cpuid.as_ref(),
isolation,
params.hide_isolation,
);
if params.handle_synic && !matches!(isolation, IsolationType::Tdx) {
hcl.register_intercept(
HvInterceptType::HvInterceptTypeUnknownSynicConnection,
HV_INTERCEPT_ACCESS_MASK_EXECUTE,
HvInterceptParameters::new_zeroed(),
)
.expect("registering synic intercept cannot fail");
}
#[cfg(guest_arch = "x86_64")]
let cvm_state = if is_hardware_isolated {
Some(Self::construct_cvm_state(
¶ms,
late_params.cvm_params.unwrap(),
&caps,
cvm_cpuid.unwrap(),
guest_vsm_available,
)?)
} else {
None
};
#[cfg(guest_arch = "aarch64")]
let cvm_state = None;
let enter_modes = EnterModes::default();
let partition = Arc::new(UhPartitionInner {
hcl,
vps,
irq_routes: Default::default(),
caps,
enter_modes: Mutex::new(enter_modes),
enter_modes_atomic: u8::from(hcl::protocol::EnterModes::from(enter_modes)).into(),
gm: late_params.gm,
cpuid,
crash_notification_send: late_params.crash_notification_send,
monitor_page: MonitorPage::new(),
software_devices,
lower_vtl_memory_layout: params.lower_vtl_memory_layout.clone(),
vmtime: late_params.vmtime.clone(),
isolation,
no_sidecar_hotplug: params.no_sidecar_hotplug.into(),
use_mmio_hypercalls: params.use_mmio_hypercalls,
backing_shared: BackingShared::new(
isolation,
¶ms,
BackingSharedParams {
cvm_state,
guest_vsm_available,
},
)?,
#[cfg(guest_arch = "x86_64")]
device_vector_table: RwLock::new(IrrBitmap::new(Default::default())),
intercept_debug_exceptions: params.intercept_debug_exceptions,
});
if cfg!(guest_arch = "x86_64") {
partition.manage_io_port_intercept_region(0, !0, true);
}
let vps = params
.topology
.vps_arch()
.map(|vp_info| UhProcessorBox {
partition: partition.clone(),
vp_info,
})
.collect();
Ok((
UhPartition {
inner: partition.clone(),
interrupt_targets: VtlArray::from_fn(|vtl| {
Arc::new(UhInterruptTarget {
partition: partition.clone(),
vtl: vtl.try_into().unwrap(),
})
}),
},
vps,
))
}
}
impl UhPartition {
pub fn vtl0_guest_os_id(&self) -> Result<HvGuestOsId, Error> {
let id = if let Some(hv) = self.inner.hv() {
hv.guest_os_id(Vtl::Vtl0)
} else {
let reg_value = self
.inner
.hcl
.get_vp_register(HvAllArchRegisterName::GuestOsId, Vtl::Vtl0.into())
.map_err(Error::Hcl)?;
HvGuestOsId::from(reg_value.as_u64())
};
Ok(id)
}
pub fn register_host_io_port_fast_path(
&self,
range: RangeInclusive<u16>,
) -> HostIoPortFastPathHandle {
assert!(!self.inner.isolation.is_hardware_isolated());
self.inner
.manage_io_port_intercept_region(*range.start(), *range.end(), false);
HostIoPortFastPathHandle {
inner: Arc::downgrade(&self.inner),
begin: *range.start(),
end: *range.end(),
}
}
pub fn set_pm_timer_assist(&self, port: Option<u16>) -> Result<(), HvError> {
self.inner.hcl.set_pm_timer_assist(port)
}
}
impl UhProtoPartition<'_> {
fn check_guest_vsm_support(
hcl: &Hcl,
params: &UhPartitionNewParams<'_>,
#[cfg(guest_arch = "x86_64")] cvm_cpuid: Option<&cvm_cpuid::CpuidResults>,
) -> Result<bool, Error> {
match params.isolation {
IsolationType::None | IsolationType::Vbs => {}
#[cfg(guest_arch = "x86_64")]
IsolationType::Tdx => {
if !params.env_cvm_guest_vsm {
return Ok(false);
}
}
#[cfg(guest_arch = "x86_64")]
IsolationType::Snp => {
if !params.env_cvm_guest_vsm {
return Ok(false);
}
let rmp_query = x86defs::cpuid::ExtendedSevFeaturesEax::from(
cvm_cpuid
.unwrap()
.registered_result(x86defs::cpuid::CpuidFunction::ExtendedSevFeatures, 0)
.eax,
)
.rmp_query();
if !rmp_query {
tracing::info!("rmp query not supported, cannot enable vsm");
return Ok(false);
}
}
#[allow(unreachable_patterns)]
isolation => panic!("unsupported isolation type {:?}", isolation),
}
#[cfg(guest_arch = "x86_64")]
let privs = {
let result = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES, 0);
result.eax as u64 | ((result.ebx as u64) << 32)
};
#[cfg(guest_arch = "aarch64")]
let privs = hcl
.get_vp_register(
HvArm64RegisterName::PrivilegesAndFeaturesInfo,
HvInputVtl::CURRENT_VTL,
)
.map_err(Error::Hcl)?
.as_u64();
if !hvdef::HvPartitionPrivilege::from(privs).access_vsm() {
return Ok(false);
}
let guest_vsm_config = hcl.get_guest_vsm_partition_config().map_err(Error::Hcl)?;
Ok(guest_vsm_config.maximum_vtl() >= u8::from(GuestVtl::Vtl1))
}
#[cfg(guest_arch = "x86_64")]
fn construct_cvm_state(
params: &UhPartitionNewParams<'_>,
late_params: CvmLateParams,
caps: &PartitionCapabilities,
cpuid: cvm_cpuid::CpuidResults,
guest_vsm_available: bool,
) -> Result<UhCvmPartitionState, Error> {
let vp_count = params.topology.vp_count() as usize;
let vps = (0..vp_count)
.map(|vp_index| UhCvmVpInner {
tlb_lock_info: VtlArray::from_fn(|_| TlbLockInfo::new(vp_count)),
vtl1_enable_called: Mutex::new(false),
started: AtomicBool::new(vp_index == 0),
hv_start_enable_vtl_vp: VtlArray::from_fn(|_| Mutex::new(None)),
})
.collect();
let tlb_locked_vps =
VtlArray::from_fn(|_| BitVec::repeat(false, vp_count).into_boxed_bitslice());
let lapic = VtlArray::from_fn(|_| {
LocalApicSet::builder()
.x2apic_capable(caps.x2apic)
.hyperv_enlightenments(true)
.build()
});
let tsc_frequency = get_tsc_frequency(params.isolation)?;
let ref_time = Box::new(TscReferenceTimeSource::new(tsc_frequency));
let hv = GlobalHv::new(hv1_emulator::hv::GlobalHvParams {
max_vp_count: params.topology.vp_count(),
vendor: caps.vendor,
tsc_frequency,
ref_time,
});
if guest_vsm_available {
tracing::warn!(
"Advertising guest vsm as being supported to the guest. This feature is in development, so the guest might crash."
);
}
Ok(UhCvmPartitionState {
cpuid,
tlb_locked_vps,
vps,
shared_memory: late_params.shared_gm,
isolated_memory_protector: late_params.isolated_memory_protector,
lapic,
hv,
guest_vsm: RwLock::new(GuestVsmState::from_availability(guest_vsm_available)),
shared_dma_client: late_params.shared_dma_client,
private_dma_client: late_params.private_dma_client,
hide_isolation: params.hide_isolation,
})
}
}
impl UhPartition {
#[cfg(guest_arch = "x86_64")]
fn construct_cpuid_results(
initial_cpuid: &[CpuidLeaf],
topology: &ProcessorTopology<vm_topology::processor::x86::X86Topology>,
access_vsm: bool,
vtom: Option<u64>,
isolation: IsolationType,
hide_isolation: bool,
) -> CpuidLeafSet {
let mut cpuid = CpuidLeafSet::new(Vec::new());
if isolation.is_hardware_isolated() {
let hv_version = safe_intrinsics::cpuid(hvdef::HV_CPUID_FUNCTION_MS_HV_VERSION, 0);
cpuid.extend(&hv1_emulator::cpuid::hv_cpuid_leaves(
topology,
if hide_isolation {
IsolationType::None
} else {
isolation
},
access_vsm,
[
hv_version.eax,
hv_version.ebx,
hv_version.ecx,
hv_version.edx,
],
vtom,
));
}
cpuid.extend(initial_cpuid);
cpuid
}
#[cfg(guest_arch = "x86_64")]
fn construct_capabilities(
topology: &ProcessorTopology,
cpuid: &CpuidLeafSet,
cvm_cpuid: Option<&cvm_cpuid::CpuidResults>,
isolation: IsolationType,
hide_isolation: bool,
) -> virt::x86::X86PartitionCapabilities {
let mut native_cpuid_fn;
let mut cvm_cpuid_fn;
let cpuid_fn: &mut dyn FnMut(u32, u32) -> [u32; 4] = if let Some(cvm_cpuid) = cvm_cpuid {
let bsp = topology.vp_arch(VpIndex::BSP).apic_id;
cvm_cpuid_fn = move |leaf, sub_leaf| {
let CpuidResult { eax, ebx, ecx, edx } = cvm_cpuid.guest_result(
x86defs::cpuid::CpuidFunction(leaf),
sub_leaf,
&cvm_cpuid::CpuidGuestState {
xfem: 1,
xss: 0,
cr4: 0,
apic_id: bsp,
},
);
cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
};
&mut cvm_cpuid_fn
} else {
native_cpuid_fn = |leaf, sub_leaf| {
let CpuidResult { eax, ebx, ecx, edx } = safe_intrinsics::cpuid(leaf, sub_leaf);
cpuid.result(leaf, sub_leaf, &[eax, ebx, ecx, edx])
};
&mut native_cpuid_fn
};
let mut caps = virt::x86::X86PartitionCapabilities::from_cpuid(topology, cpuid_fn);
match isolation {
IsolationType::Tdx => {
assert_eq!(caps.vtom.is_some(), !hide_isolation);
caps.nxe_forced_on = true;
}
IsolationType::Snp => {
assert_eq!(caps.vtom.is_some(), !hide_isolation);
}
_ => {
assert!(caps.vtom.is_none());
}
}
caps
}
pub fn host_mmio_read(&self, addr: u64, data: &mut [u8]) {
if !self.inner.use_mmio_hypercalls {
return;
}
if let Err(err) = self.inner.hcl.memory_mapped_io_read(addr, data) {
tracelimit::error_ratelimited!(
error = &err as &dyn std::error::Error,
"Failed host MMIO read"
);
}
}
pub fn host_mmio_write(&self, addr: u64, data: &[u8]) {
if !self.inner.use_mmio_hypercalls {
return;
}
if let Err(err) = self.inner.hcl.memory_mapped_io_write(addr, data) {
tracelimit::error_ratelimited!(
error = &err as &dyn std::error::Error,
"Failed host MMIO write"
);
}
}
}
#[cfg(guest_arch = "x86_64")]
fn get_tsc_frequency(isolation: IsolationType) -> Result<u64, Error> {
let msr = MsrDevice::new(0).map_err(Error::OpenMsr)?;
let hv_frequency = msr
.read_msr(hvdef::HV_X64_MSR_TSC_FREQUENCY)
.map_err(Error::ReadTscFrequency)?;
let hw_info = match isolation {
IsolationType::Tdx => {
let max_function =
safe_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VendorAndMaxFunction.0, 0)
.eax;
if max_function < x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0 {
return Err(Error::BadCpuidTsc);
}
let result = safe_intrinsics::cpuid(
x86defs::cpuid::CpuidFunction::CoreCrystalClockInformation.0,
0,
);
let ratio_denom = result.eax;
let ratio_num = result.ebx;
let clock = result.ecx;
if ratio_num == 0 || ratio_denom == 0 || clock == 0 {
return Err(Error::BadCpuidTsc);
}
let allowed_error = 12_500_000;
Some((
clock as u64 * ratio_num as u64 / ratio_denom as u64,
allowed_error,
))
}
IsolationType::Snp => {
None
}
IsolationType::Vbs | IsolationType::None => None,
};
if let Some((hw_frequency, allowed_error)) = hw_info {
let delta = hw_frequency.abs_diff(hv_frequency);
if delta > allowed_error {
return Err(Error::TscFrequencyMismatch {
hv: hv_frequency,
hw: hw_frequency,
allowed_error,
});
}
}
Ok(hv_frequency)
}
impl UhPartitionInner {
fn manage_io_port_intercept_region(&self, begin: u16, end: u16, active: bool) {
if self.isolation.is_hardware_isolated() {
return;
}
static SKIP_RANGE: AtomicBool = AtomicBool::new(false);
let access_type_mask = if active {
HV_INTERCEPT_ACCESS_MASK_READ_WRITE
} else {
HV_INTERCEPT_ACCESS_MASK_NONE
};
if !SKIP_RANGE.load(Ordering::Relaxed) {
match self.hcl.register_intercept(
HvInterceptType::HvInterceptTypeX64IoPortRange,
access_type_mask,
HvInterceptParameters::new_io_port_range(begin..=end),
) {
Ok(()) => return,
Err(HvError::InvalidParameter) => {
SKIP_RANGE.store(true, Ordering::Relaxed);
tracing::warn!("old hypervisor build; using slow path for intercept ranges");
}
Err(err) => {
panic!("io port range registration failure: {err:?}");
}
}
}
for port in begin..=end {
self.hcl
.register_intercept(
HvInterceptType::HvInterceptTypeX64IoPort,
access_type_mask,
HvInterceptParameters::new_io_port(port),
)
.expect("registering io intercept cannot fail");
}
}
fn is_gpa_lower_vtl_ram(&self, gpa: u64) -> bool {
self.lower_vtl_memory_layout
.ram()
.iter()
.any(|m| m.range.contains_addr(gpa))
}
fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
if self.is_gpa_lower_vtl_ram(gpa) {
!write || self.monitor_page.gpa() != Some(gpa & !(HV_PAGE_SIZE - 1))
} else {
false
}
}
#[cfg(guest_arch = "x86_64")]
fn cpuid_result(&self, eax: u32, ecx: u32, default: &[u32; 4]) -> [u32; 4] {
let r = self.cpuid.result(eax, ecx, default);
if eax == hvdef::HV_CPUID_FUNCTION_MS_HV_FEATURES {
let mut features = hvdef::HvFeatures::from_cpuid(r);
if self.backing_shared.guest_vsm_disabled() {
features.set_privileges(features.privileges().with_access_vsm(false));
}
features.into_cpuid()
} else {
r
}
}
}
#[must_use]
pub struct HostIoPortFastPathHandle {
inner: Weak<UhPartitionInner>,
begin: u16,
end: u16,
}
impl Drop for HostIoPortFastPathHandle {
fn drop(&mut self) {
if let Some(inner) = self.inner.upgrade() {
inner.manage_io_port_intercept_region(self.begin, self.end, true);
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct VtlCrash {
pub vp_index: VpIndex,
pub last_vtl: GuestVtl,
pub control: GuestCrashCtl,
pub parameters: [u64; 5],
}
#[cfg_attr(guest_arch = "aarch64", expect(dead_code))]
fn validate_vtl_gpa_flags(
flags: HvMapGpaFlags,
mbec_enabled: bool,
shadow_supervisor_stack_enabled: bool,
) -> bool {
if flags.adjustable() {
return false;
}
if flags.kernel_executable() != flags.user_executable() {
if (flags.kernel_executable() && !flags.user_executable()) || !mbec_enabled {
return false;
}
}
if flags.writable()
|| flags.kernel_executable()
|| flags.user_executable()
|| flags.supervisor_shadow_stack()
|| flags.paging_writability()
|| flags.verify_paging_writability()
{
if !flags.readable() {
return false;
}
}
if flags.supervisor_shadow_stack()
&& ((!flags.kernel_executable() && !flags.user_executable())
|| shadow_supervisor_stack_enabled)
{
return false;
}
true
}