1mod tlb_flush;
7
8use super::BackingPrivate;
9use super::BackingSharedParams;
10use super::HardwareIsolatedBacking;
11use super::UhEmulationState;
12use super::UhHypercallHandler;
13use super::hardware_cvm;
14use super::vp_state;
15use super::vp_state::UhVpStateAccess;
16use crate::BackingShared;
17use crate::GuestVtl;
18use crate::IsolationType;
19use crate::TlbFlushLockAccess;
20use crate::UhCvmPartitionState;
21use crate::UhCvmVpState;
22use crate::UhPartitionInner;
23use crate::UhPartitionNewParams;
24use crate::UhProcessor;
25use crate::WakeReason;
26use crate::get_tsc_frequency;
27use cvm_tracing::CVM_ALLOWED;
28use cvm_tracing::CVM_CONFIDENTIAL;
29use guestmem::GuestMemory;
30use hcl::ioctl::ProcessorRunner;
31use hcl::ioctl::tdx::Tdx;
32use hcl::ioctl::tdx::TdxPrivateRegs;
33use hcl::protocol::hcl_intr_offload_flags;
34use hcl::protocol::tdx_tdg_vp_enter_exit_info;
35use hv1_emulator::hv::ProcessorVtlHv;
36use hv1_emulator::synic::GlobalSynic;
37use hv1_emulator::synic::ProcessorSynic;
38use hv1_hypercall::AsHandler;
39use hv1_hypercall::HvRepResult;
40use hv1_hypercall::HypercallIo;
41use hv1_structs::ProcessorSet;
42use hv1_structs::VtlArray;
43use hvdef::HV_PAGE_SIZE;
44use hvdef::HvError;
45use hvdef::HvSynicSimpSiefp;
46use hvdef::HvX64PendingExceptionEvent;
47use hvdef::HvX64RegisterName;
48use hvdef::Vtl;
49use hvdef::hypercall::HvFlushFlags;
50use hvdef::hypercall::HvGvaRange;
51use inspect::Inspect;
52use inspect::InspectMut;
53use inspect_counters::Counter;
54use std::sync::atomic::AtomicU8;
55use std::sync::atomic::Ordering;
56use thiserror::Error;
57use tlb_flush::FLUSH_GVA_LIST_SIZE;
58use tlb_flush::TdxFlushState;
59use tlb_flush::TdxPartitionFlushState;
60use virt::EmulatorMonitorSupport;
61use virt::Processor;
62use virt::VpHaltReason;
63use virt::VpIndex;
64use virt::io::CpuIo;
65use virt::state::StateElement;
66use virt::vp;
67use virt::vp::AccessVpState;
68use virt::vp::MpState;
69use virt::vp::Registers;
70use virt::x86::MsrError;
71use virt::x86::MsrErrorExt;
72use virt::x86::SegmentRegister;
73use virt::x86::TableRegister;
74use virt_support_apic::ApicClient;
75use virt_support_apic::OffloadNotSupported;
76use virt_support_x86emu::emulate::EmulatedMemoryOperation;
77use virt_support_x86emu::emulate::EmulatorSupport as X86EmulatorSupport;
78use virt_support_x86emu::emulate::TranslateMode;
79use virt_support_x86emu::emulate::emulate_insn_memory_op;
80use virt_support_x86emu::emulate::emulate_io;
81use virt_support_x86emu::emulate::emulate_translate_gva;
82use virt_support_x86emu::translate::TranslationRegisters;
83use vmcore::vmtime::VmTimeAccess;
84use x86defs::RFlags;
85use x86defs::X64_CR0_ET;
86use x86defs::X64_CR0_NE;
87use x86defs::X64_CR0_PE;
88use x86defs::X64_CR0_PG;
89use x86defs::X64_CR4_MCE;
90use x86defs::X64_CR4_UMIP;
91use x86defs::X64_CR4_VMXE;
92use x86defs::X64_EFER_FFXSR;
93use x86defs::X64_EFER_LMA;
94use x86defs::X64_EFER_LME;
95use x86defs::X64_EFER_NXE;
96use x86defs::X64_EFER_SVME;
97use x86defs::X86X_MSR_EFER;
98use x86defs::apic::X2APIC_MSR_BASE;
99use x86defs::tdx::TdCallResultCode;
100use x86defs::tdx::TdVmCallR10Result;
101use x86defs::tdx::TdxGp;
102use x86defs::tdx::TdxInstructionInfo;
103use x86defs::tdx::TdxL2Ctls;
104use x86defs::tdx::TdxVpEnterRaxResult;
105use x86defs::vmx::ApicPage;
106use x86defs::vmx::ApicRegister;
107use x86defs::vmx::CR_ACCESS_TYPE_LMSW;
108use x86defs::vmx::CR_ACCESS_TYPE_MOV_TO_CR;
109use x86defs::vmx::CrAccessQualification;
110use x86defs::vmx::ExitQualificationIo;
111use x86defs::vmx::GdtrOrIdtrInstruction;
112use x86defs::vmx::GdtrOrIdtrInstructionInfo;
113use x86defs::vmx::INTERRUPT_TYPE_EXTERNAL;
114use x86defs::vmx::INTERRUPT_TYPE_HARDWARE_EXCEPTION;
115use x86defs::vmx::INTERRUPT_TYPE_NMI;
116use x86defs::vmx::IO_SIZE_8_BIT;
117use x86defs::vmx::IO_SIZE_16_BIT;
118use x86defs::vmx::IO_SIZE_32_BIT;
119use x86defs::vmx::Interruptibility;
120use x86defs::vmx::InterruptionInformation;
121use x86defs::vmx::LdtrOrTrInstruction;
122use x86defs::vmx::LdtrOrTrInstructionInfo;
123use x86defs::vmx::ProcessorControls;
124use x86defs::vmx::SecondaryProcessorControls;
125use x86defs::vmx::VMX_ENTRY_CONTROL_LONG_MODE_GUEST;
126use x86defs::vmx::VMX_FEATURE_CONTROL_LOCKED;
127use x86defs::vmx::VmcsField;
128use x86defs::vmx::VmxEptExitQualification;
129use x86defs::vmx::VmxExit;
130use x86defs::vmx::VmxExitBasic;
131use x86emu::Gp;
132use x86emu::Segment;
133
134const MSR_ALLOWED_READ: &[u32] = &[
136 x86defs::X86X_MSR_TSC,
137 x86defs::X86X_MSR_TSC_AUX,
138 X86X_MSR_EFER,
139 x86defs::X86X_MSR_STAR,
140 x86defs::X86X_MSR_LSTAR,
141 x86defs::X86X_MSR_SFMASK,
142 x86defs::X86X_MSR_SYSENTER_CS,
143 x86defs::X86X_MSR_SYSENTER_ESP,
144 x86defs::X86X_MSR_SYSENTER_EIP,
145];
146
147const MSR_ALLOWED_READ_WRITE: &[u32] = &[
149 x86defs::X64_MSR_FS_BASE,
150 x86defs::X64_MSR_GS_BASE,
151 x86defs::X64_MSR_KERNEL_GS_BASE,
152 x86defs::X86X_MSR_SPEC_CTRL,
153 x86defs::X86X_MSR_U_CET,
154 x86defs::X86X_MSR_S_CET,
155 x86defs::X86X_MSR_PL0_SSP,
156 x86defs::X86X_MSR_PL1_SSP,
157 x86defs::X86X_MSR_PL2_SSP,
158 x86defs::X86X_MSR_PL3_SSP,
159 x86defs::X86X_MSR_INTERRUPT_SSP_TABLE_ADDR,
160 x86defs::X86X_IA32_MSR_XFD,
161 x86defs::X86X_IA32_MSR_XFD_ERR,
162];
163
164#[derive(Debug, Error)]
165#[error("unknown exit {0:#x?}")]
166struct UnknownVmxExit(VmxExit);
167
168#[derive(Debug, Error)]
169#[error("bad guest state on VP.ENTER")]
170struct VmxBadGuestState;
171
172#[derive(Debug, Error)]
173#[error("failed to run")]
174struct TdxRunVpError(#[source] hcl::ioctl::Error);
175
176#[derive(Debug)]
177struct TdxExit<'a>(&'a tdx_tdg_vp_enter_exit_info);
178
179impl TdxExit<'_> {
180 fn code(&self) -> TdxVpEnterRaxResult {
181 self.0.rax.into()
182 }
183 fn qualification(&self) -> u64 {
184 self.0.rcx
185 }
186 fn gla(&self) -> Option<u64> {
187 if self.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION {
189 Some(self.0.rdx)
190 } else {
191 None
192 }
193 }
194 fn gpa(&self) -> Option<u64> {
195 if self.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION {
197 Some(self.0.r8)
198 } else {
199 None
200 }
201 }
202 fn _exit_interruption_info(&self) -> InterruptionInformation {
203 (self.0.r9 as u32).into()
204 }
205 fn _exit_interruption_error_code(&self) -> u32 {
206 (self.0.r9 >> 32) as u32
207 }
208 fn idt_vectoring_info(&self) -> InterruptionInformation {
209 (self.0.r10 as u32).into()
210 }
211 fn idt_vectoring_error_code(&self) -> u32 {
212 (self.0.r10 >> 32) as u32
213 }
214 fn instr_info(&self) -> TdxInstructionInfo {
215 self.0.r11.into()
216 }
217 fn cs(&self) -> SegmentRegister {
218 SegmentRegister {
219 selector: self.0.rsi as u16,
220 base: self.0.rdi,
221 limit: (self.0.rsi >> 32) as u32,
222 attributes: (self.0.rsi >> 16) as u16,
223 }
224 }
225 fn cpl(&self) -> u8 {
226 self.0.r12 as u8 & 3
227 }
228}
229
230#[derive(Debug, Inspect)]
232enum ShadowedRegister {
233 Cr0,
234 Cr4,
235}
236
237impl ShadowedRegister {
238 fn name(&self) -> &'static str {
239 match self {
240 Self::Cr0 => "cr0",
241 Self::Cr4 => "cr4",
242 }
243 }
244
245 fn physical_vmcs_field(&self) -> VmcsField {
246 match self {
247 Self::Cr0 => VmcsField::VMX_VMCS_GUEST_CR0,
248 Self::Cr4 => VmcsField::VMX_VMCS_GUEST_CR4,
249 }
250 }
251
252 fn shadow_vmcs_field(&self) -> VmcsField {
253 match self {
254 Self::Cr0 => VmcsField::VMX_VMCS_CR0_READ_SHADOW,
255 Self::Cr4 => VmcsField::VMX_VMCS_CR4_READ_SHADOW,
256 }
257 }
258
259 fn guest_owned_mask(&self) -> u64 {
260 match self {
264 Self::Cr0 => {
265 X64_CR0_ET
266 | x86defs::X64_CR0_MP
267 | x86defs::X64_CR0_EM
268 | x86defs::X64_CR0_TS
269 | x86defs::X64_CR0_WP
270 | x86defs::X64_CR0_AM
271 | X64_CR0_PE
272 | X64_CR0_PG
273 }
274 Self::Cr4 => {
275 x86defs::X64_CR4_VME
276 | x86defs::X64_CR4_PVI
277 | x86defs::X64_CR4_TSD
278 | x86defs::X64_CR4_DE
279 | x86defs::X64_CR4_PSE
280 | x86defs::X64_CR4_PAE
281 | x86defs::X64_CR4_PGE
282 | x86defs::X64_CR4_PCE
283 | x86defs::X64_CR4_FXSR
284 | x86defs::X64_CR4_XMMEXCPT
285 | X64_CR4_UMIP
286 | x86defs::X64_CR4_LA57
287 | x86defs::X64_CR4_RWFSGS
288 | x86defs::X64_CR4_PCIDE
289 | x86defs::X64_CR4_OSXSAVE
290 | x86defs::X64_CR4_SMEP
291 | x86defs::X64_CR4_SMAP
292 | x86defs::X64_CR4_CET
293 }
294 }
295 }
296}
297
298#[derive(Inspect)]
303struct VirtualRegister {
304 register: ShadowedRegister,
306 vtl: GuestVtl,
308 shadow_value: u64,
310 allowed_bits: u64,
312}
313
314impl VirtualRegister {
315 fn new(reg: ShadowedRegister, vtl: GuestVtl, initial_value: u64, allowed_bits: u64) -> Self {
316 Self {
317 register: reg,
318 vtl,
319 shadow_value: initial_value,
320 allowed_bits,
321 }
322 }
323
324 fn write<'a>(
328 &mut self,
329 value: u64,
330 runner: &mut ProcessorRunner<'a, Tdx<'a>>,
331 ) -> Result<(), vp_state::Error> {
332 tracing::trace!(?self.register, value, "write virtual register");
333
334 if value & !self.allowed_bits != 0 {
335 return Err(vp_state::Error::InvalidValue(
336 value,
337 self.register.name(),
338 "disallowed bit set",
339 ));
340 }
341
342 let old_physical_reg = runner.read_vmcs64(self.vtl, self.register.physical_vmcs_field());
345
346 tracing::trace!(old_physical_reg, "old_physical_reg");
347
348 let guest_owned_mask = self.register.guest_owned_mask();
349 if (old_physical_reg ^ value) & guest_owned_mask != 0 {
350 let new_physical_reg =
351 (old_physical_reg & !guest_owned_mask) | (value & guest_owned_mask);
352
353 tracing::trace!(new_physical_reg, "new_physical_reg");
354
355 runner.write_vmcs64(
356 self.vtl,
357 self.register.physical_vmcs_field(),
358 !0,
359 new_physical_reg,
360 );
361 }
362
363 self.shadow_value = value;
364 runner.write_vmcs64(self.vtl, self.register.shadow_vmcs_field(), !0, value);
365 Ok(())
366 }
367
368 fn read<'a>(&self, runner: &ProcessorRunner<'a, Tdx<'a>>) -> u64 {
369 let physical_reg = runner.read_vmcs64(self.vtl, self.register.physical_vmcs_field());
370
371 let guest_owned_mask = self.register.guest_owned_mask();
374 (self.shadow_value & !self.register.guest_owned_mask()) | (physical_reg & guest_owned_mask)
375 }
376}
377
378struct TdxTscDeadlineService {
392 tsc_scale_100ns: u128,
394}
395
396impl TdxTscDeadlineService {
397 fn ref_time_to_tsc(&self, ref_time: u64) -> u64 {
399 ((ref_time as u128 * self.tsc_scale_100ns) >> 64) as u64
402 }
403
404 fn is_before(ref_time: u64, ref_time_last: u64) -> bool {
410 let delta = ref_time.wrapping_sub(ref_time_last);
411 (delta as i64) < 0
412 }
413}
414
415impl hardware_cvm::HardwareIsolatedGuestTimer<TdxBacked> for TdxTscDeadlineService {
416 fn is_hardware_virtualized(&self) -> bool {
417 true
418 }
419
420 fn update_deadline(
424 &self,
425 vp: &mut UhProcessor<'_, TdxBacked>,
426 ref_time_now: u64,
427 ref_time_next: u64,
428 ) {
429 let vp_state = vp
430 .backing
431 .tsc_deadline_state
432 .as_mut()
433 .expect("TdxTscDeadlineService requires tsc_deadline_state");
434
435 if vp_state
437 .deadline_100ns
438 .is_none_or(|last| Self::is_before(ref_time_next, last))
439 {
440 vp_state.deadline_100ns = Some(ref_time_next);
442
443 let state = vp.runner.tdx_l2_tsc_deadline_state_mut();
444 if vp_state
445 .last_deadline_100ns
446 .is_none_or(|last| last != ref_time_next)
447 {
448 let ref_time_from_now = ref_time_next.saturating_sub(ref_time_now);
449 let tsc_delta = self.ref_time_to_tsc(ref_time_from_now);
450 let deadline = safe_intrinsics::rdtsc().wrapping_add(tsc_delta);
451
452 state.deadline = deadline;
453 state.update_deadline = 1;
454
455 tracing::trace!(
456 ref_time_from_now,
457 tsc_delta,
458 deadline,
459 "updating deadline for TDX L2-VM TSC deadline timer"
460 );
461 } else {
462 state.update_deadline = 0;
463 }
464 }
465 }
466
467 fn clear_deadline(&self, vp: &mut UhProcessor<'_, TdxBacked>) {
469 let vp_state = vp
470 .backing
471 .tsc_deadline_state
472 .as_mut()
473 .expect("TdxTscDeadlineService requires tsc_deadline_state");
474
475 vp_state.deadline_100ns = None;
476
477 let state = vp.runner.tdx_l2_tsc_deadline_state_mut();
478 state.update_deadline = 0;
479 }
480
481 fn sync_deadline_state(&self, vp: &mut UhProcessor<'_, TdxBacked>) {
483 let vp_state = vp
484 .backing
485 .tsc_deadline_state
486 .as_mut()
487 .expect("TdxTscDeadlineService requires tsc_deadline_state");
488
489 vp_state.last_deadline_100ns = vp_state.deadline_100ns;
490 }
491}
492
493#[derive(Inspect, Default)]
495struct TdxTscDeadline {
496 #[inspect(hex)]
498 deadline_100ns: Option<u64>,
499 #[inspect(hex)]
502 last_deadline_100ns: Option<u64>,
503}
504
505#[derive(InspectMut)]
507pub struct TdxBacked {
508 #[inspect(mut)]
509 vtls: VtlArray<TdxVtl, 2>,
510
511 untrusted_synic: Option<ProcessorSynic>,
512 #[inspect(hex, iter_by_index)]
513 eoi_exit_bitmap: [u64; 4],
514
515 #[inspect(skip)]
517 flush_page: user_driver::memory::MemoryBlock,
518
519 #[inspect(flatten)]
520 cvm: UhCvmVpState,
521
522 #[inspect(flatten)]
524 tsc_deadline_state: Option<TdxTscDeadline>,
525}
526
527#[derive(InspectMut)]
528struct TdxVtl {
529 efer: u64,
531 cr0: VirtualRegister,
533 cr4: VirtualRegister,
535
536 msr_cstar: u64,
538
539 tpr_threshold: u8,
540 #[inspect(skip)]
541 processor_controls: ProcessorControls,
542 #[inspect(skip)]
543 interruption_information: InterruptionInformation,
544 exception_error_code: u32,
545 interruption_set: bool,
546
547 #[inspect(mut)]
548 private_regs: TdxPrivateRegs,
549
550 flush_state: TdxFlushState,
552
553 enter_stats: EnterStats,
554 exit_stats: ExitStats,
555}
556
557#[derive(Default)]
558pub struct TdxEmulationCache {
559 segs: [Option<SegmentRegister>; 6],
560 cr0: Option<u64>,
561}
562
563#[derive(Inspect, Default)]
564struct EnterStats {
565 success: Counter,
566 host_routed_async: Counter,
567 l2_exit_pending_intr: Counter,
568 pending_intr: Counter,
569 host_routed_td_vmcall: Counter,
570}
571
572#[derive(Inspect, Default)]
573struct ExitStats {
574 io: Counter,
575 msr_read: Counter,
576 msr_write: Counter,
577 ept_violation: Counter,
578 cpuid: Counter,
579 cr_access: Counter,
580 xsetbv: Counter,
581 tpr_below_threshold: Counter,
582 interrupt_window: Counter,
583 nmi_window: Counter,
584 vmcall: Counter,
585 smi_intr: Counter,
586 wbinvd: Counter,
587 hw_interrupt: Counter,
588 tdcall: Counter,
589 hlt: Counter,
590 pause: Counter,
591 needs_interrupt_reinject: Counter,
592 exception: Counter,
593 descriptor_table: Counter,
594 timer_expired: Counter,
595}
596
597enum UhDirectOverlay {
598 Sipp,
599 Sifp,
600 Count,
601}
602
603impl HardwareIsolatedBacking for TdxBacked {
604 fn cvm_state(&self) -> &UhCvmVpState {
605 &self.cvm
606 }
607
608 fn cvm_state_mut(&mut self) -> &mut UhCvmVpState {
609 &mut self.cvm
610 }
611
612 fn cvm_partition_state(shared: &Self::Shared) -> &UhCvmPartitionState {
613 &shared.cvm
614 }
615
616 fn switch_vtl(this: &mut UhProcessor<'_, Self>, _source_vtl: GuestVtl, target_vtl: GuestVtl) {
617 this.backing.cvm_state_mut().exit_vtl = target_vtl;
621 }
622
623 fn translation_registers(
624 &self,
625 this: &UhProcessor<'_, Self>,
626 vtl: GuestVtl,
627 ) -> TranslationRegisters {
628 let cr0 = this.backing.vtls[vtl].cr0.read(&this.runner);
629 let cr4 = this.backing.vtls[vtl].cr4.read(&this.runner);
630 let efer = this.backing.vtls[vtl].efer;
631 let cr3 = this.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR3);
632 let ss = this.read_segment(vtl, TdxSegmentReg::Ss).into();
633 let rflags = this.backing.vtls[vtl].private_regs.rflags;
634
635 TranslationRegisters {
636 cr0,
637 cr4,
638 efer,
639 cr3,
640 ss,
641 rflags,
642 encryption_mode: this.partition.caps.vtom.map_or(
643 virt_support_x86emu::translate::EncryptionMode::None,
644 virt_support_x86emu::translate::EncryptionMode::Vtom,
645 ),
646 }
647 }
648
649 fn tlb_flush_lock_access<'a>(
650 vp_index: Option<VpIndex>,
651 partition: &'a UhPartitionInner,
652 shared: &'a Self::Shared,
653 ) -> impl TlbFlushLockAccess + 'a {
654 TdxTlbLockFlushAccess {
655 vp_index,
656 partition,
657 shared,
658 }
659 }
660
661 fn pending_event_vector(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> Option<u8> {
662 let event_inject = this.backing.vtls[vtl].interruption_information;
663 if event_inject.valid() {
664 Some(event_inject.vector())
665 } else {
666 None
667 }
668 }
669
670 fn set_pending_exception(
671 this: &mut UhProcessor<'_, Self>,
672 vtl: GuestVtl,
673 event: HvX64PendingExceptionEvent,
674 ) {
675 let new_intr = InterruptionInformation::new()
676 .with_valid(true)
677 .with_deliver_error_code(event.deliver_error_code())
678 .with_vector(event.vector().try_into().unwrap())
679 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION);
680
681 this.backing.vtls[vtl].interruption_information = new_intr;
682 this.backing.vtls[vtl].exception_error_code = event.error_code();
683 }
684
685 fn cr0(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 {
686 this.read_cr0(vtl)
687 }
688
689 fn cr4(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 {
690 this.read_cr4(vtl)
691 }
692
693 fn intercept_message_state(
694 this: &UhProcessor<'_, Self>,
695 vtl: GuestVtl,
696 include_optional_state: bool,
697 ) -> super::InterceptMessageState {
698 let exit = TdxExit(this.runner.tdx_vp_enter_exit_info());
699 let backing_vtl = &this.backing.vtls[vtl];
700 let shared_gps = this.runner.tdx_enter_guest_gps();
701
702 super::InterceptMessageState {
703 instruction_length_and_cr8: exit.instr_info().length() as u8,
704 cpl: exit.cpl(),
705 efer_lma: backing_vtl.efer & X64_EFER_LMA != 0,
706 cs: exit.cs().into(),
707 rip: backing_vtl.private_regs.rip,
708 rflags: backing_vtl.private_regs.rflags,
709 rax: shared_gps[TdxGp::RAX],
710 rdx: shared_gps[TdxGp::RDX],
711 optional: if include_optional_state {
712 Some(super::InterceptMessageOptionalState {
713 ds: this.read_segment(vtl, TdxSegmentReg::Ds).into(),
714 es: this.read_segment(vtl, TdxSegmentReg::Es).into(),
715 })
716 } else {
717 None
718 },
719 rcx: shared_gps[TdxGp::RCX],
720 rsi: shared_gps[TdxGp::RSI],
721 rdi: shared_gps[TdxGp::RDI],
722 }
723 }
724
725 fn cr_intercept_registration(
726 this: &mut UhProcessor<'_, Self>,
727 intercept_control: hvdef::HvRegisterCrInterceptControl,
728 ) {
729 let vtl = GuestVtl::Vtl0;
731 let intercept_masks = &this
732 .backing
733 .cvm_state()
734 .vtl1
735 .as_ref()
736 .unwrap()
737 .reg_intercept;
738
739 this.runner.write_vmcs64(
741 vtl,
742 VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK,
743 !0,
744 this.shared.cr_guest_host_mask(ShadowedRegister::Cr0)
745 | if intercept_control.cr0_write() {
746 intercept_masks.cr0_mask
747 } else {
748 0
749 },
750 );
751 this.runner.write_vmcs64(
752 vtl,
753 VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK,
754 !0,
755 this.shared.cr_guest_host_mask(ShadowedRegister::Cr4)
756 | if intercept_control.cr4_write() {
757 intercept_masks.cr4_mask
758 } else {
759 0
760 },
761 );
762
763 let intercept_tables = intercept_control.gdtr_write()
765 | intercept_control.idtr_write()
766 | intercept_control.ldtr_write()
767 | intercept_control.tr_write();
768 this.runner.write_vmcs32(
769 vtl,
770 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
771 SecondaryProcessorControls::new()
772 .with_descriptor_table_exiting(true)
773 .into_bits(),
774 SecondaryProcessorControls::new()
775 .with_descriptor_table_exiting(intercept_tables)
776 .into_bits(),
777 );
778
779 this.runner.set_msr_bit(
783 vtl,
784 x86defs::X86X_MSR_S_CET,
785 true,
786 intercept_control.msr_scet_write(),
787 );
788 this.runner.set_msr_bit(
789 vtl,
790 x86defs::X86X_MSR_PL0_SSP,
791 true,
792 intercept_control.msr_pls_ssp_write(),
793 );
794 this.runner.set_msr_bit(
795 vtl,
796 x86defs::X86X_MSR_PL1_SSP,
797 true,
798 intercept_control.msr_pls_ssp_write(),
799 );
800 this.runner.set_msr_bit(
801 vtl,
802 x86defs::X86X_MSR_PL2_SSP,
803 true,
804 intercept_control.msr_pls_ssp_write(),
805 );
806 this.runner.set_msr_bit(
807 vtl,
808 x86defs::X86X_MSR_PL3_SSP,
809 true,
810 intercept_control.msr_pls_ssp_write(),
811 );
812 this.runner.set_msr_bit(
813 vtl,
814 x86defs::X86X_MSR_INTERRUPT_SSP_TABLE_ADDR,
815 true,
816 intercept_control.msr_pls_ssp_write(),
817 );
818 }
819
820 fn is_interrupt_pending(
821 this: &mut UhProcessor<'_, Self>,
822 vtl: GuestVtl,
823 check_rflags: bool,
824 dev: &impl CpuIo,
825 ) -> bool {
826 let backing_vtl = &this.backing.vtls[vtl];
827 if backing_vtl.interruption_information.valid()
828 && backing_vtl.interruption_information.interruption_type() == INTERRUPT_TYPE_NMI
829 {
830 return true;
831 }
832
833 let (vector, ppr) = if this.backing.cvm.lapics[vtl].lapic.is_offloaded() {
834 let vector = backing_vtl.private_regs.rvi;
835 let ppr = std::cmp::max(
836 backing_vtl.private_regs.svi.into(),
837 this.runner.tdx_apic_page(vtl).tpr.value,
838 );
839 (vector, ppr)
840 } else {
841 let lapic = &mut this.backing.cvm.lapics[vtl].lapic;
842 let vector = lapic.next_irr().unwrap_or(0);
843 let ppr = lapic
844 .access(&mut TdxApicClient {
845 partition: this.partition,
846 apic_page: this.runner.tdx_apic_page_mut(vtl),
847 dev,
848 vmtime: &this.vmtime,
849 vtl,
850 })
851 .get_ppr();
852 (vector, ppr)
853 };
854 let vector_priority = (vector as u32) >> 4;
855 let ppr_priority = ppr >> 4;
856
857 if vector_priority <= ppr_priority {
858 return false;
859 }
860
861 if check_rflags && !RFlags::from_bits(backing_vtl.private_regs.rflags).interrupt_enable() {
862 return false;
863 }
864
865 let interruptibility: Interruptibility = this
866 .runner
867 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
868 .into();
869
870 if interruptibility.blocked_by_sti() || interruptibility.blocked_by_movss() {
871 return false;
872 }
873
874 true
875 }
876
877 fn untrusted_synic_mut(&mut self) -> Option<&mut ProcessorSynic> {
878 self.untrusted_synic.as_mut()
879 }
880
881 fn update_deadline(this: &mut UhProcessor<'_, Self>, ref_time_now: u64, next_ref_time: u64) {
882 this.shared
883 .guest_timer
884 .update_deadline(this, ref_time_now, next_ref_time);
885 }
886
887 fn clear_deadline(this: &mut UhProcessor<'_, Self>) {
888 this.shared.guest_timer.clear_deadline(this);
889 }
890}
891
892#[derive(Inspect)]
894pub struct TdxBackedShared {
895 #[inspect(flatten)]
896 pub(crate) cvm: UhCvmPartitionState,
897 pub(crate) untrusted_synic: Option<GlobalSynic>,
901 flush_state: VtlArray<TdxPartitionFlushState, 2>,
902 #[inspect(iter_by_index)]
903 active_vtl: Vec<AtomicU8>,
904 cr4_allowed_bits: u64,
906 #[inspect(skip)]
908 guest_timer: Box<dyn hardware_cvm::HardwareIsolatedGuestTimer<TdxBacked>>,
909}
910
911impl TdxBackedShared {
912 pub(crate) fn new(
913 partition_params: &UhPartitionNewParams<'_>,
914 params: BackingSharedParams<'_>,
915 ) -> Result<Self, crate::Error> {
916 let untrusted_synic = (partition_params.handle_synic && !partition_params.hide_isolation)
922 .then(|| GlobalSynic::new(partition_params.topology.vp_count()));
923
924 let cr4_fixed1 = params.hcl.read_vmx_cr4_fixed1();
926 let cr4_allowed_bits =
927 (ShadowedRegister::Cr4.guest_owned_mask() | X64_CR4_MCE) & cr4_fixed1;
928
929 let cvm = params.cvm_state.unwrap();
930
931 let guest_timer: Box<dyn hardware_cvm::HardwareIsolatedGuestTimer<TdxBacked>> =
933 match params.lower_vtl_timer_virt_available {
934 true => {
935 let tsc_frequency = get_tsc_frequency(IsolationType::Tdx).unwrap();
938 const NUM_100NS_IN_SEC: u128 = 10_000_000;
939 let tsc_scale_100ns = ((tsc_frequency as u128) << 64) / NUM_100NS_IN_SEC;
940
941 tracing::info!(CVM_ALLOWED, "enabling TDX L2-VM TSC deadline timer service");
942
943 Box::new(TdxTscDeadlineService { tsc_scale_100ns })
944 }
945 false => {
946 Box::new(hardware_cvm::VmTimeGuestTimer)
948 }
949 };
950
951 Ok(Self {
952 untrusted_synic,
953 flush_state: VtlArray::from_fn(|_| TdxPartitionFlushState::new()),
954 cvm,
955 active_vtl: std::iter::repeat_n(2, partition_params.topology.vp_count() as usize)
957 .map(AtomicU8::new)
958 .collect(),
959 cr4_allowed_bits,
960 guest_timer,
961 })
962 }
963
964 fn cr_guest_host_mask(&self, reg: ShadowedRegister) -> u64 {
966 match reg {
967 ShadowedRegister::Cr0 => {
968 !ShadowedRegister::Cr0.guest_owned_mask() | X64_CR0_PE | X64_CR0_PG
969 }
970 ShadowedRegister::Cr4 => {
971 !(ShadowedRegister::Cr4.guest_owned_mask() & self.cr4_allowed_bits)
972 }
973 }
974 }
975}
976
977impl TdxBacked {
978 pub fn shared_pages_required_per_cpu() -> u64 {
981 UhDirectOverlay::Count as u64
982 }
983}
984
985struct UntrustedSynicVtlProts<'a>(&'a GuestMemory);
989
990impl hv1_emulator::VtlProtectAccess for UntrustedSynicVtlProts<'_> {
991 fn check_modify_and_lock_overlay_page(
992 &mut self,
993 gpn: u64,
994 _check_perms: hvdef::HvMapGpaFlags,
995 _new_perms: Option<hvdef::HvMapGpaFlags>,
996 ) -> Result<guestmem::LockedPages, HvError> {
997 self.0
998 .lock_gpns(false, &[gpn])
999 .map_err(|_| HvError::OperationFailed)
1000 }
1001
1002 fn unlock_overlay_page(&mut self, _gpn: u64) -> Result<(), HvError> {
1003 Ok(())
1004 }
1005}
1006
1007#[expect(private_interfaces)]
1008impl BackingPrivate for TdxBacked {
1009 type HclBacking<'tdx> = Tdx<'tdx>;
1010 type Shared = TdxBackedShared;
1011 type EmulationCache = TdxEmulationCache;
1012
1013 fn shared(shared: &BackingShared) -> &Self::Shared {
1014 let BackingShared::Tdx(shared) = shared else {
1015 unreachable!()
1016 };
1017 shared
1018 }
1019
1020 fn new(
1021 params: super::BackingParams<'_, '_, Self>,
1022 shared: &TdxBackedShared,
1023 ) -> Result<Self, crate::Error> {
1024 params.runner.write_vmcs32(
1032 GuestVtl::Vtl0,
1033 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
1034 SecondaryProcessorControls::new()
1035 .with_mode_based_execute_control(true)
1036 .into(),
1037 SecondaryProcessorControls::new()
1038 .with_mode_based_execute_control(true)
1039 .into(),
1040 );
1041
1042 let controls = TdxL2Ctls::new()
1043 .with_enable_shared_ept(!shared.cvm.hide_isolation)
1045 .with_enable_tdvmcall(shared.untrusted_synic.is_none() && !shared.cvm.hide_isolation);
1047
1048 params
1049 .runner
1050 .set_l2_ctls(GuestVtl::Vtl0, controls)
1051 .map_err(crate::Error::FailedToSetL2Ctls)?;
1052
1053 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
1054 let initial_cr0 = params
1057 .runner
1058 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0);
1059 assert_eq!(initial_cr0, X64_CR0_PE | X64_CR0_NE);
1060
1061 params.runner.write_vmcs64(
1065 vtl,
1066 VmcsField::VMX_VMCS_CR0_READ_SHADOW,
1067 !0,
1068 X64_CR0_PE | X64_CR0_NE,
1069 );
1070 params.runner.write_vmcs64(
1071 vtl,
1072 VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK,
1073 !0,
1074 shared.cr_guest_host_mask(ShadowedRegister::Cr0),
1075 );
1076
1077 let initial_cr4 = params
1078 .runner
1079 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4);
1080 assert_eq!(initial_cr4, X64_CR4_MCE | X64_CR4_VMXE);
1081
1082 params
1083 .runner
1084 .write_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_READ_SHADOW, !0, 0);
1085 params.runner.write_vmcs64(
1086 vtl,
1087 VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK,
1088 !0,
1089 shared.cr_guest_host_mask(ShadowedRegister::Cr4),
1090 );
1091
1092 for msr in MSR_ALLOWED_READ {
1096 params.runner.set_msr_bit(vtl, *msr, false, false);
1097 }
1098 for msr in MSR_ALLOWED_READ_WRITE {
1099 params.runner.set_msr_bit(vtl, *msr, false, false);
1100 params.runner.set_msr_bit(vtl, *msr, true, false);
1101 }
1102
1103 if params.partition.intercept_debug_exceptions {
1105 if cfg!(feature = "gdb") {
1106 let initial_exception_bitmap = params
1107 .runner
1108 .read_vmcs32(vtl, VmcsField::VMX_VMCS_EXCEPTION_BITMAP);
1109
1110 let exception_bitmap =
1111 initial_exception_bitmap | (1 << x86defs::Exception::DEBUG.0);
1112
1113 params.runner.write_vmcs32(
1114 vtl,
1115 VmcsField::VMX_VMCS_EXCEPTION_BITMAP,
1116 !0,
1117 exception_bitmap,
1118 );
1119 } else {
1120 return Err(super::Error::InvalidDebugConfiguration);
1121 }
1122 }
1123 }
1124
1125 let flush_page = shared
1126 .cvm
1127 .private_dma_client
1128 .allocate_dma_buffer(HV_PAGE_SIZE as usize)
1129 .map_err(crate::Error::AllocateTlbFlushPage)?;
1130
1131 let untrusted_synic = shared
1132 .untrusted_synic
1133 .as_ref()
1134 .map(|synic| synic.add_vp(params.vp_info.base.vp_index));
1135
1136 Ok(Self {
1137 vtls: VtlArray::from_fn(|vtl| {
1138 let vtl: GuestVtl = vtl.try_into().unwrap();
1139 TdxVtl {
1140 efer: params
1141 .runner
1142 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER),
1143 cr0: VirtualRegister::new(
1144 ShadowedRegister::Cr0,
1145 vtl,
1146 params
1147 .runner
1148 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0),
1149 !0,
1150 ),
1151 cr4: VirtualRegister::new(
1152 ShadowedRegister::Cr4,
1153 vtl,
1154 params
1155 .runner
1156 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4),
1157 shared.cr4_allowed_bits,
1158 ),
1159 msr_cstar: 0,
1160 tpr_threshold: 0,
1161 processor_controls: params
1162 .runner
1163 .read_vmcs32(vtl, VmcsField::VMX_VMCS_PROCESSOR_CONTROLS)
1164 .into(),
1165 interruption_information: Default::default(),
1166 exception_error_code: 0,
1167 interruption_set: false,
1168 flush_state: TdxFlushState::new(),
1169 private_regs: TdxPrivateRegs::new(vtl),
1170 enter_stats: Default::default(),
1171 exit_stats: Default::default(),
1172 }
1173 }),
1174 untrusted_synic,
1175 eoi_exit_bitmap: [0; 4],
1176 flush_page,
1177 cvm: UhCvmVpState::new(
1178 &shared.cvm,
1179 params.partition,
1180 params.vp_info,
1181 UhDirectOverlay::Count as usize,
1182 )?,
1183 tsc_deadline_state: shared
1184 .guest_timer
1185 .is_hardware_virtualized()
1186 .then(TdxTscDeadline::default),
1187 })
1188 }
1189
1190 type StateAccess<'p, 'a>
1191 = UhVpStateAccess<'a, 'p, Self>
1192 where
1193 Self: 'a + 'p,
1194 'p: 'a;
1195
1196 fn access_vp_state<'a, 'p>(
1197 this: &'a mut UhProcessor<'p, Self>,
1198 vtl: GuestVtl,
1199 ) -> Self::StateAccess<'p, 'a> {
1200 UhVpStateAccess::new(this, vtl)
1201 }
1202
1203 fn init(this: &mut UhProcessor<'_, Self>) {
1204 let pfns = &this.backing.cvm.direct_overlay_handle.pfns();
1207 let reg = |gpn| {
1208 u64::from(
1209 HvSynicSimpSiefp::new()
1210 .with_base_gpn(gpn)
1211 .with_enabled(true),
1212 )
1213 };
1214
1215 let values: &[(HvX64RegisterName, u64); 2] = &[
1216 (
1217 HvX64RegisterName::Sifp,
1218 reg(pfns[UhDirectOverlay::Sifp as usize]),
1219 ),
1220 (
1221 HvX64RegisterName::Sipp,
1222 reg(pfns[UhDirectOverlay::Sipp as usize]),
1223 ),
1224 ];
1225
1226 let reg_count = if let Some(synic) = &mut this.backing.untrusted_synic {
1227 let prot_access = &mut UntrustedSynicVtlProts(&this.partition.gm[GuestVtl::Vtl0]);
1228
1229 synic
1230 .set_simp(reg(pfns[UhDirectOverlay::Sipp as usize]), prot_access)
1231 .unwrap();
1232 synic
1233 .set_siefp(reg(pfns[UhDirectOverlay::Sifp as usize]), prot_access)
1234 .unwrap();
1235 1
1239 } else {
1240 2
1241 };
1242
1243 this.runner
1244 .set_vp_registers_hvcall(Vtl::Vtl0, &values[..reg_count])
1245 .expect("set_vp_registers hypercall for direct overlays should succeed");
1246
1247 this.set_apic_offload(GuestVtl::Vtl0, true);
1249 this.backing.cvm.lapics[GuestVtl::Vtl0]
1250 .lapic
1251 .enable_offload();
1252
1253 this.set_apic_offload(GuestVtl::Vtl1, false);
1255
1256 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
1261 let registers = Registers::at_reset(&this.partition.caps, &this.inner.vp_info);
1262
1263 let mut state = this.access_state(vtl.into());
1264 state
1265 .set_registers(®isters)
1266 .expect("Resetting to architectural state should succeed");
1267
1268 state.commit().expect("committing state should succeed");
1269 }
1270
1271 *this.runner.fx_state_mut() =
1274 vp::Xsave::at_reset(&this.partition.caps, &this.inner.vp_info).fxsave();
1275 }
1276
1277 async fn run_vp(
1278 this: &mut UhProcessor<'_, Self>,
1279 dev: &impl CpuIo,
1280 _stop: &mut virt::StopVp<'_>,
1281 ) -> Result<(), VpHaltReason> {
1282 this.run_vp_tdx(dev).await
1283 }
1284
1285 fn poll_apic(this: &mut UhProcessor<'_, Self>, vtl: GuestVtl, scan_irr: bool) {
1286 if !this.try_poll_apic(vtl, scan_irr) {
1287 tracing::info!(CVM_ALLOWED, "disabling APIC offload due to auto EOI");
1288 let page = this.runner.tdx_apic_page_mut(vtl);
1289 let (irr, isr) = pull_apic_offload(page);
1290
1291 this.backing.cvm.lapics[vtl]
1292 .lapic
1293 .disable_offload(&irr, &isr);
1294 this.set_apic_offload(vtl, false);
1295 this.try_poll_apic(vtl, false);
1296 }
1297 }
1298
1299 fn request_extint_readiness(_this: &mut UhProcessor<'_, Self>) {
1300 unreachable!("extint managed through software apic")
1301 }
1302
1303 fn request_untrusted_sint_readiness(this: &mut UhProcessor<'_, Self>, sints: u16) {
1304 if let Some(synic) = &mut this.backing.untrusted_synic {
1305 synic.request_sint_readiness(sints);
1306 } else {
1307 tracelimit::error_ratelimited!(CVM_ALLOWED, "untrusted synic is not configured");
1308 }
1309 }
1310
1311 fn hv(&self, vtl: GuestVtl) -> Option<&ProcessorVtlHv> {
1312 Some(&self.cvm.hv[vtl])
1313 }
1314
1315 fn hv_mut(&mut self, vtl: GuestVtl) -> Option<&mut ProcessorVtlHv> {
1316 Some(&mut self.cvm.hv[vtl])
1317 }
1318
1319 fn handle_vp_start_enable_vtl_wake(this: &mut UhProcessor<'_, Self>, vtl: GuestVtl) {
1320 this.hcvm_handle_vp_start_enable_vtl(vtl)
1321 }
1322
1323 fn vtl1_inspectable(this: &UhProcessor<'_, Self>) -> bool {
1324 this.hcvm_vtl1_inspectable()
1325 }
1326
1327 fn process_interrupts(
1328 this: &mut UhProcessor<'_, Self>,
1329 scan_irr: VtlArray<bool, 2>,
1330 first_scan_irr: &mut bool,
1331 dev: &impl CpuIo,
1332 ) -> bool {
1333 this.cvm_process_interrupts(scan_irr, first_scan_irr, dev)
1334 }
1335}
1336
1337impl UhProcessor<'_, TdxBacked> {
1338 fn try_poll_apic(&mut self, vtl: GuestVtl, scan_irr: bool) -> bool {
1341 let mut scan = TdxApicScanner {
1342 processor_controls: self.backing.vtls[vtl]
1343 .processor_controls
1344 .with_nmi_window_exiting(false)
1345 .with_interrupt_window_exiting(false),
1346 vp: self,
1347 tpr_threshold: 0,
1348 };
1349
1350 hardware_cvm::apic::poll_apic_core(&mut scan, vtl, scan_irr);
1352
1353 let TdxApicScanner {
1354 vp: _,
1355 processor_controls: new_processor_controls,
1356 tpr_threshold: new_tpr_threshold,
1357 } = scan;
1358
1359 if self.backing.cvm.lapics[vtl].activity != MpState::WaitForSipi
1361 && self.backing.vtls[vtl].tpr_threshold != new_tpr_threshold
1362 {
1363 tracing::trace!(new_tpr_threshold, ?vtl, "setting tpr threshold");
1364 self.runner.write_vmcs32(
1365 vtl,
1366 VmcsField::VMX_VMCS_TPR_THRESHOLD,
1367 !0,
1368 new_tpr_threshold.into(),
1369 );
1370 self.backing.vtls[vtl].tpr_threshold = new_tpr_threshold;
1371 }
1372
1373 if self.backing.vtls[vtl].processor_controls != new_processor_controls {
1374 tracing::trace!(?new_processor_controls, ?vtl, "requesting window change");
1375 self.runner.write_vmcs32(
1376 vtl,
1377 VmcsField::VMX_VMCS_PROCESSOR_CONTROLS,
1378 !0,
1379 new_processor_controls.into(),
1380 );
1381 self.backing.vtls[vtl].processor_controls = new_processor_controls;
1382 }
1383
1384 if vtl == GuestVtl::Vtl0 {
1386 let mut update_rvi = false;
1387 let r: Result<(), OffloadNotSupported> = self.backing.cvm.lapics[vtl]
1388 .lapic
1389 .push_to_offload(|irr, isr, tmr| {
1390 let apic_page = self.runner.tdx_apic_page_mut(vtl);
1391
1392 for (((irr, page_irr), isr), page_isr) in irr
1393 .iter()
1394 .zip(&mut apic_page.irr)
1395 .zip(isr)
1396 .zip(&mut apic_page.isr)
1397 {
1398 page_irr.value |= *irr;
1399 page_isr.value |= *isr;
1400 }
1401
1402 let svi = top_vector(&apic_page.isr);
1404 self.backing.vtls[vtl].private_regs.svi = svi;
1405 update_rvi = true;
1406
1407 let fields = [
1409 VmcsField::VMX_VMCS_EOI_EXIT_0,
1410 VmcsField::VMX_VMCS_EOI_EXIT_1,
1411 VmcsField::VMX_VMCS_EOI_EXIT_2,
1412 VmcsField::VMX_VMCS_EOI_EXIT_3,
1413 ];
1414 for ((&field, eoi_exit), (i, tmr)) in fields
1415 .iter()
1416 .zip(&mut self.backing.eoi_exit_bitmap)
1417 .zip(tmr.chunks_exact(2).enumerate())
1418 {
1419 let tmr = tmr[0] as u64 | ((tmr[1] as u64) << 32);
1420 if *eoi_exit != tmr {
1421 self.runner.write_vmcs64(vtl, field, !0, tmr);
1422 *eoi_exit = tmr;
1423 self.runner.proxy_irr_exit_mut_vtl0()[i * 2] = tmr as u32;
1429 self.runner.proxy_irr_exit_mut_vtl0()[i * 2 + 1] = (tmr >> 32) as u32;
1430 }
1431 }
1432 });
1433
1434 if let Err(OffloadNotSupported) = r {
1435 return false;
1438 }
1439
1440 if update_rvi {
1441 let page = self.runner.tdx_apic_page_mut(vtl);
1442 let rvi = top_vector(&page.irr);
1443 self.backing.vtls[vtl].private_regs.rvi = rvi;
1444 }
1445 }
1446
1447 if (self.backing.cvm.lapics[vtl].activity != MpState::Running)
1449 && self.backing.cvm.lapics[vtl].lapic.is_offloaded()
1450 && self.backing.vtls[vtl].private_regs.rvi != 0
1451 {
1452 self.backing.cvm.lapics[vtl].activity = MpState::Running;
1465 }
1466
1467 true
1468 }
1469
1470 fn access_apic_without_offload<R>(
1471 &mut self,
1472 vtl: GuestVtl,
1473 f: impl FnOnce(&mut Self) -> R,
1474 ) -> R {
1475 let offloaded = self.backing.cvm.lapics[vtl].lapic.is_offloaded();
1476 if offloaded {
1477 let (irr, isr) = pull_apic_offload(self.runner.tdx_apic_page_mut(vtl));
1478 self.backing.cvm.lapics[vtl]
1479 .lapic
1480 .disable_offload(&irr, &isr);
1481 }
1482 let r = f(self);
1483 if offloaded {
1484 self.backing.cvm.lapics[vtl].lapic.enable_offload();
1485 }
1486 r
1487 }
1488
1489 fn set_apic_offload(&mut self, vtl: GuestVtl, offload: bool) {
1490 let offload_bitmap = if offload {
1492 (1 << x86defs::apic::ApicRegister::TPR.0)
1493 | (1 << x86defs::apic::ApicRegister::EOI.0)
1494 | (1 << x86defs::apic::ApicRegister::SELF_IPI.0)
1495 } else {
1496 0
1497 };
1498 for offset in [0, 0x100] {
1500 self.runner
1501 .write_msr_bitmap(vtl, offset + X2APIC_MSR_BASE / 64, !0, !offload_bitmap);
1502 }
1503
1504 self.runner.write_vmcs32(
1506 vtl,
1507 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
1508 SecondaryProcessorControls::new()
1509 .with_virtual_interrupt_delivery(true)
1510 .into(),
1511 SecondaryProcessorControls::new()
1512 .with_virtual_interrupt_delivery(offload)
1513 .into(),
1514 );
1515
1516 if offload
1518 && self.backing.vtls[vtl]
1519 .interruption_information
1520 .interruption_type()
1521 == INTERRUPT_TYPE_EXTERNAL
1522 {
1523 self.backing.vtls[vtl]
1524 .interruption_information
1525 .set_valid(false);
1526 }
1527 }
1528}
1529
1530struct TdxApicScanner<'a, 'b> {
1531 vp: &'a mut UhProcessor<'b, TdxBacked>,
1532 processor_controls: ProcessorControls,
1533 tpr_threshold: u8,
1534}
1535
1536impl<'b> hardware_cvm::apic::ApicBacking<'b, TdxBacked> for TdxApicScanner<'_, 'b> {
1537 fn vp(&mut self) -> &mut UhProcessor<'b, TdxBacked> {
1538 self.vp
1539 }
1540
1541 fn handle_interrupt(&mut self, vtl: GuestVtl, vector: u8) {
1542 if self.vp.backing.cvm.lapics[vtl].activity == MpState::Idle {
1544 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1545 }
1546 if self.vp.backing.vtls[vtl].interruption_information.valid()
1550 && self.vp.backing.vtls[vtl]
1551 .interruption_information
1552 .interruption_type()
1553 != INTERRUPT_TYPE_EXTERNAL
1554 {
1555 self.processor_controls.set_interrupt_window_exiting(true);
1556 return;
1557 }
1558
1559 let interruptibility: Interruptibility = self
1561 .vp
1562 .runner
1563 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
1564 .into();
1565
1566 let rflags = RFlags::from(self.vp.backing.vtls[vtl].private_regs.rflags);
1567 if !rflags.interrupt_enable()
1568 || interruptibility.blocked_by_sti()
1569 || interruptibility.blocked_by_movss()
1570 {
1571 self.processor_controls.set_interrupt_window_exiting(true);
1572 return;
1573 }
1574
1575 let priority = vector >> 4;
1576 let apic = self.vp.runner.tdx_apic_page(vtl);
1577 if (apic.tpr.value as u8 >> 4) >= priority {
1578 self.tpr_threshold = priority;
1579 return;
1580 }
1581
1582 self.vp.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
1583 .with_valid(true)
1584 .with_vector(vector)
1585 .with_interruption_type(INTERRUPT_TYPE_EXTERNAL);
1586
1587 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1588 }
1589
1590 fn handle_nmi(&mut self, vtl: GuestVtl) {
1591 if self.vp.backing.cvm.lapics[vtl].activity == MpState::Idle {
1594 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1595 }
1596 if self.vp.backing.vtls[vtl].interruption_information.valid()
1600 && self.vp.backing.vtls[vtl]
1601 .interruption_information
1602 .interruption_type()
1603 != INTERRUPT_TYPE_EXTERNAL
1604 {
1605 self.processor_controls.set_nmi_window_exiting(true);
1606 return;
1607 }
1608
1609 let interruptibility: Interruptibility = self
1610 .vp
1611 .runner
1612 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
1613 .into();
1614
1615 if interruptibility.blocked_by_nmi()
1616 || interruptibility.blocked_by_sti()
1617 || interruptibility.blocked_by_movss()
1618 {
1619 self.processor_controls.set_nmi_window_exiting(true);
1620 return;
1621 }
1622
1623 self.vp.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
1624 .with_valid(true)
1625 .with_vector(2)
1626 .with_interruption_type(INTERRUPT_TYPE_NMI);
1627
1628 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1629 }
1630
1631 fn handle_sipi(&mut self, vtl: GuestVtl, cs: SegmentRegister) {
1632 self.vp.write_segment(vtl, TdxSegmentReg::Cs, cs).unwrap();
1633 self.vp.backing.vtls[vtl].private_regs.rip = 0;
1634 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1635 }
1636}
1637
1638impl UhProcessor<'_, TdxBacked> {
1639 async fn run_vp_tdx(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason> {
1640 let next_vtl = self.backing.cvm.exit_vtl;
1641
1642 if self.backing.vtls[next_vtl].interruption_information.valid() {
1643 tracing::trace!(
1644 vector = self.backing.vtls[next_vtl]
1645 .interruption_information
1646 .vector(),
1647 vp_index = self.vp_index().index(),
1648 ?next_vtl,
1649 "injecting interrupt"
1650 );
1651
1652 self.runner.write_vmcs32(
1653 next_vtl,
1654 VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO,
1655 !0,
1656 self.backing.vtls[next_vtl].interruption_information.into(),
1657 );
1658 if self.backing.vtls[next_vtl]
1659 .interruption_information
1660 .deliver_error_code()
1661 {
1662 self.runner.write_vmcs32(
1663 next_vtl,
1664 VmcsField::VMX_VMCS_ENTRY_EXCEPTION_ERROR_CODE,
1665 !0,
1666 self.backing.vtls[next_vtl].exception_error_code,
1667 );
1668 }
1669 self.backing.vtls[next_vtl].interruption_set = true;
1670 } else if self.backing.vtls[next_vtl].interruption_set {
1671 self.runner
1672 .write_vmcs32(next_vtl, VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO, !0, 0);
1673 self.backing.vtls[next_vtl].interruption_set = false;
1674 }
1675
1676 self.shared.active_vtl[self.vp_index().index() as usize]
1690 .store(next_vtl as u8, Ordering::SeqCst);
1691
1692 self.do_tlb_flush(next_vtl);
1693 self.unlock_tlb_lock(Vtl::Vtl2);
1694 let tlb_halt = self.should_halt_for_tlb_unlock(next_vtl);
1695
1696 let activity = self.backing.cvm.lapics[next_vtl].activity;
1700 let kernel_known_state =
1701 matches!(activity, MpState::Running | MpState::Halted | MpState::Idle);
1702 let halted_other = tlb_halt || !kernel_known_state;
1703
1704 self.runner
1705 .set_halted(activity != MpState::Running || tlb_halt);
1706
1707 let offload_enabled = self.backing.cvm.lapics[next_vtl].lapic.can_offload_irr()
1719 && !self.backing.vtls[next_vtl].interruption_information.valid()
1720 && self.backing.vtls[next_vtl]
1721 .private_regs
1722 .vp_entry_flags
1723 .invd_translations()
1724 == 0;
1725 let x2apic_enabled = self.backing.cvm.lapics[next_vtl].lapic.x2apic_enabled();
1726
1727 let offload_flags = hcl_intr_offload_flags::new()
1728 .with_offload_intr_inject(offload_enabled)
1729 .with_offload_x2apic(offload_enabled && x2apic_enabled)
1730 .with_halted_other(halted_other)
1731 .with_halted_hlt(activity == MpState::Halted)
1732 .with_halted_idle(activity == MpState::Idle);
1733
1734 *self.runner.offload_flags_mut() = offload_flags;
1735
1736 self.runner
1737 .write_private_regs(&self.backing.vtls[next_vtl].private_regs);
1738
1739 let has_intercept = self
1740 .runner
1741 .run()
1742 .map_err(|e| dev.fatal_error(TdxRunVpError(e).into()))?;
1743
1744 self.shared.active_vtl[self.vp_index().index() as usize].store(2, Ordering::Relaxed);
1748
1749 let entered_from_vtl = next_vtl;
1750 self.runner
1751 .read_private_regs(&mut self.backing.vtls[entered_from_vtl].private_regs);
1752
1753 self.shared.guest_timer.sync_deadline_state(self);
1755
1756 if offload_enabled && kernel_known_state {
1758 let offload_flags = self.runner.offload_flags_mut();
1759
1760 self.backing.cvm.lapics[entered_from_vtl].activity =
1761 match (offload_flags.halted_hlt(), offload_flags.halted_idle()) {
1762 (false, false) => MpState::Running,
1763 (true, false) => MpState::Halted,
1764 (false, true) => MpState::Idle,
1765 (true, true) => {
1766 tracelimit::warn_ratelimited!(
1767 CVM_ALLOWED,
1768 "Kernel indicates VP is both halted and idle!"
1769 );
1770 activity
1771 }
1772 };
1773 }
1774
1775 if !has_intercept {
1776 return Ok(());
1777 }
1778
1779 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
1780
1781 if exit_info.code().tdx_exit() >= TdCallResultCode::PENDING_INTERRUPT {
1783 self.backing.vtls[entered_from_vtl]
1784 .enter_stats
1785 .pending_intr
1786 .increment();
1787 return Ok(());
1788 }
1789
1790 self.backing.vtls[entered_from_vtl]
1792 .private_regs
1793 .vp_entry_flags
1794 .set_invd_translations(0);
1795
1796 let stat = match exit_info.code().tdx_exit() {
1798 TdCallResultCode::SUCCESS => {
1799 &mut self.backing.vtls[entered_from_vtl].enter_stats.success
1800 }
1801 TdCallResultCode::L2_EXIT_HOST_ROUTED_ASYNC => {
1802 &mut self.backing.vtls[entered_from_vtl]
1803 .enter_stats
1804 .host_routed_async
1805 }
1806 TdCallResultCode::L2_EXIT_PENDING_INTERRUPT => {
1807 &mut self.backing.vtls[entered_from_vtl]
1808 .enter_stats
1809 .l2_exit_pending_intr
1810 }
1811 TdCallResultCode::L2_EXIT_HOST_ROUTED_TDVMCALL => {
1812 assert_eq!(
1818 exit_info.code().vmx_exit(),
1819 VmxExit::new().with_basic_reason(VmxExitBasic::TDCALL)
1820 );
1821 &mut self.backing.vtls[entered_from_vtl]
1822 .enter_stats
1823 .host_routed_td_vmcall
1824 }
1825 _ => panic!("unexpected tdx exit code {:?}", exit_info.code()),
1826 };
1827
1828 stat.increment();
1829 self.handle_vmx_exit(dev, entered_from_vtl).await?;
1830 Ok(())
1831 }
1832
1833 async fn handle_vmx_exit(
1834 &mut self,
1835 dev: &impl CpuIo,
1836 intercepted_vtl: GuestVtl,
1837 ) -> Result<(), VpHaltReason> {
1838 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
1839
1840 let vmx_exit = exit_info.code().vmx_exit();
1842 if vmx_exit.vm_enter_failed() {
1843 return Err(self.handle_vm_enter_failed(dev, intercepted_vtl, vmx_exit));
1844 }
1845
1846 let next_interruption = exit_info.idt_vectoring_info();
1847
1848 if self.backing.vtls[intercepted_vtl]
1850 .interruption_information
1851 .valid()
1852 && (!next_interruption.valid()
1853 || self.backing.vtls[intercepted_vtl]
1854 .interruption_information
1855 .interruption_type()
1856 != next_interruption.interruption_type())
1857 {
1858 match self.backing.vtls[intercepted_vtl]
1859 .interruption_information
1860 .interruption_type()
1861 {
1862 INTERRUPT_TYPE_EXTERNAL
1863 if !self.backing.cvm.lapics[intercepted_vtl]
1864 .lapic
1865 .is_offloaded() =>
1866 {
1867 tracing::trace!(
1869 vector = self.backing.vtls[intercepted_vtl]
1870 .interruption_information
1871 .vector(),
1872 "acknowledging interrupt"
1873 );
1874 self.backing.cvm.lapics[intercepted_vtl]
1875 .lapic
1876 .acknowledge_interrupt(
1877 self.backing.vtls[intercepted_vtl]
1878 .interruption_information
1879 .vector(),
1880 );
1881 }
1882 INTERRUPT_TYPE_NMI => {
1883 tracing::debug!("acknowledging NMI");
1885 self.backing.cvm.lapics[intercepted_vtl].nmi_pending = false;
1886 }
1887 _ => {}
1888 }
1889 }
1890
1891 if self.backing.cvm.lapics[intercepted_vtl]
1892 .lapic
1893 .is_offloaded()
1894 {
1895 if next_interruption.valid() {
1908 tracing::debug!(
1909 ?next_interruption,
1910 vp_index = self.vp_index().index(),
1911 "exit requires reinjecting interrupt"
1912 );
1913 self.backing.vtls[intercepted_vtl].interruption_information = next_interruption;
1914 self.backing.vtls[intercepted_vtl].exception_error_code =
1915 exit_info.idt_vectoring_error_code();
1916 self.backing.vtls[intercepted_vtl]
1917 .exit_stats
1918 .needs_interrupt_reinject
1919 .increment();
1920 } else {
1921 self.backing.vtls[intercepted_vtl].interruption_information = Default::default();
1922 }
1923 } else {
1924 if next_interruption.valid()
1928 && !matches!(
1929 next_interruption.interruption_type(),
1930 INTERRUPT_TYPE_EXTERNAL | INTERRUPT_TYPE_NMI
1931 )
1932 {
1933 self.backing.vtls[intercepted_vtl].interruption_information = next_interruption;
1934 self.backing.vtls[intercepted_vtl].exception_error_code =
1935 exit_info.idt_vectoring_error_code();
1936 } else {
1937 self.backing.vtls[intercepted_vtl].interruption_information = Default::default();
1938 }
1939 }
1940
1941 let mut breakpoint_debug_exception = false;
1942 let stat = match vmx_exit.basic_reason() {
1943 VmxExitBasic::IO_INSTRUCTION => {
1944 let io_qual = ExitQualificationIo::from(exit_info.qualification() as u32);
1945
1946 let len = match io_qual.access_size() {
1947 IO_SIZE_8_BIT => 1,
1948 IO_SIZE_16_BIT => 2,
1949 IO_SIZE_32_BIT => 4,
1950 _ => panic!(
1951 "tdx module returned invalid io instr size {}",
1952 io_qual.access_size()
1953 ),
1954 };
1955
1956 let port_access_protected = self.cvm_try_protect_io_port_access(
1957 intercepted_vtl,
1958 io_qual.port(),
1959 io_qual.is_in(),
1960 len,
1961 io_qual.is_string(),
1962 io_qual.rep_prefix(),
1963 );
1964
1965 if !port_access_protected {
1966 if io_qual.is_string() || io_qual.rep_prefix() {
1967 self.emulate(
1972 dev,
1973 self.backing.vtls[intercepted_vtl]
1974 .interruption_information
1975 .valid(),
1976 intercepted_vtl,
1977 TdxEmulationCache::default(),
1978 )
1979 .await?;
1980 } else {
1981 let mut rax = self.runner.tdx_enter_guest_gps()[TdxGp::RAX];
1982 emulate_io(
1983 self.inner.vp_info.base.vp_index,
1984 !io_qual.is_in(),
1985 io_qual.port(),
1986 &mut rax,
1987 len,
1988 dev,
1989 )
1990 .await;
1991 self.runner.tdx_enter_guest_gps_mut()[TdxGp::RAX] = rax;
1992
1993 self.advance_to_next_instruction(intercepted_vtl);
1994 }
1995 }
1996
1997 &mut self.backing.vtls[intercepted_vtl].exit_stats.io
1998 }
1999 VmxExitBasic::MSR_READ => {
2000 let msr = self.runner.tdx_enter_guest_gps()[TdxGp::RCX] as u32;
2001
2002 let result = self.backing.cvm.lapics[intercepted_vtl]
2003 .lapic
2004 .access(&mut TdxApicClient {
2005 partition: self.partition,
2006 vmtime: &self.vmtime,
2007 apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
2008 dev,
2009 vtl: intercepted_vtl,
2010 })
2011 .msr_read(msr)
2012 .or_else_if_unknown(|| self.read_msr_cvm(msr, intercepted_vtl))
2013 .or_else_if_unknown(|| self.read_msr_tdx(msr, intercepted_vtl));
2014
2015 let value = match result {
2016 Ok(v) => Some(v),
2017 Err(MsrError::Unknown) => {
2018 tracelimit::warn_ratelimited!(CVM_ALLOWED, msr, "unknown tdx vm msr read");
2019 Some(0)
2020 }
2021 Err(MsrError::InvalidAccess) => None,
2022 };
2023
2024 let inject_gp = if let Some(value) = value {
2025 let gps = self.runner.tdx_enter_guest_gps_mut();
2026 gps[TdxGp::RAX] = (value as u32).into();
2027 gps[TdxGp::RDX] = ((value >> 32) as u32).into();
2028 false
2029 } else {
2030 true
2031 };
2032
2033 if inject_gp {
2034 self.inject_gpf(intercepted_vtl);
2035 } else {
2036 self.advance_to_next_instruction(intercepted_vtl);
2037 }
2038 &mut self.backing.vtls[intercepted_vtl].exit_stats.msr_read
2039 }
2040 VmxExitBasic::MSR_WRITE => {
2041 let gps = self.runner.tdx_enter_guest_gps();
2042 let msr = gps[TdxGp::RCX] as u32;
2043 let value =
2044 (gps[TdxGp::RAX] as u32 as u64) | ((gps[TdxGp::RDX] as u32 as u64) << 32);
2045
2046 if !self.cvm_try_protect_msr_write(intercepted_vtl, msr) {
2047 let result = self.backing.cvm.lapics[intercepted_vtl]
2048 .lapic
2049 .access(&mut TdxApicClient {
2050 partition: self.partition,
2051 vmtime: &self.vmtime,
2052 apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
2053 dev,
2054 vtl: intercepted_vtl,
2055 })
2056 .msr_write(msr, value)
2057 .or_else_if_unknown(|| self.write_msr_cvm(msr, value, intercepted_vtl))
2058 .or_else_if_unknown(|| self.write_msr_tdx(msr, value, intercepted_vtl))
2059 .or_else_if_unknown(|| {
2060 if MSR_ALLOWED_READ_WRITE.contains(&msr) {
2062 unreachable!("intercepted a write to MSR {msr}, configured for passthrough by default, that wasn't registered for intercepts by a higher VTL");
2063 }
2064 Err(MsrError::Unknown)
2065 });
2066
2067 let inject_gp = match result {
2068 Ok(()) => false,
2069 Err(MsrError::Unknown) => {
2070 tracelimit::warn_ratelimited!(
2071 CVM_ALLOWED,
2072 msr,
2073 "unknown tdx vm msr write"
2074 );
2075 tracelimit::warn_ratelimited!(
2076 CVM_CONFIDENTIAL,
2077 value,
2078 "unknown tdx vm msr write"
2079 );
2080 false
2081 }
2082 Err(MsrError::InvalidAccess) => true,
2083 };
2084
2085 if inject_gp {
2086 self.inject_gpf(intercepted_vtl);
2087 } else {
2088 self.advance_to_next_instruction(intercepted_vtl);
2089 }
2090 }
2091 &mut self.backing.vtls[intercepted_vtl].exit_stats.msr_write
2092 }
2093 VmxExitBasic::CPUID => {
2094 let gps = self.runner.tdx_enter_guest_gps();
2095 let leaf = gps[TdxGp::RAX] as u32;
2096 let subleaf = gps[TdxGp::RCX] as u32;
2097 let [eax, ebx, ecx, edx] = self.cvm_cpuid_result(intercepted_vtl, leaf, subleaf);
2098 let gps = self.runner.tdx_enter_guest_gps_mut();
2099 gps[TdxGp::RAX] = eax.into();
2100 gps[TdxGp::RBX] = ebx.into();
2101 gps[TdxGp::RCX] = ecx.into();
2102 gps[TdxGp::RDX] = edx.into();
2103 self.advance_to_next_instruction(intercepted_vtl);
2104 &mut self.backing.vtls[intercepted_vtl].exit_stats.cpuid
2105 }
2106 VmxExitBasic::VMCALL_INSTRUCTION => {
2107 if exit_info.cpl() != 0 {
2108 self.inject_gpf(intercepted_vtl);
2109 } else {
2110 let is_64bit = self.long_mode(intercepted_vtl);
2111 let guest_memory = &self.partition.gm[intercepted_vtl];
2112 let handler = UhHypercallHandler {
2113 trusted: !self.cvm_partition().hide_isolation,
2114 vp: &mut *self,
2115 bus: dev,
2116 intercepted_vtl,
2117 };
2118
2119 UhHypercallHandler::TDX_DISPATCHER.dispatch(
2120 guest_memory,
2121 hv1_hypercall::X64RegisterIo::new(handler, is_64bit),
2122 );
2123 }
2124 &mut self.backing.vtls[intercepted_vtl].exit_stats.vmcall
2125 }
2126 VmxExitBasic::HLT_INSTRUCTION => {
2127 self.backing.cvm.lapics[intercepted_vtl].activity = MpState::Halted;
2128 self.clear_interrupt_shadow(intercepted_vtl);
2129 self.advance_to_next_instruction(intercepted_vtl);
2130 &mut self.backing.vtls[intercepted_vtl].exit_stats.hlt
2131 }
2132 VmxExitBasic::CR_ACCESS => {
2133 let qual = CrAccessQualification::from(exit_info.qualification());
2134 let cr;
2135 let value;
2136 match qual.access_type() {
2137 CR_ACCESS_TYPE_MOV_TO_CR => {
2138 cr = qual.cr();
2139 value = self.runner.tdx_enter_guest_gps()[qual.gp_register() as usize];
2140 }
2141 CR_ACCESS_TYPE_LMSW => {
2142 cr = 0;
2143 let cr0 = self.backing.vtls[intercepted_vtl].cr0.read(&self.runner);
2144 value = (qual.lmsw_source_data() as u64 & 0xf) | (cr0 & !0xf);
2146 }
2147 access_type => unreachable!("not registered for cr access type {access_type}"),
2148 }
2149
2150 let cr = match cr {
2151 0 => HvX64RegisterName::Cr0,
2152 4 => HvX64RegisterName::Cr4,
2153 _ => unreachable!("not registered for cr{cr} accesses"),
2154 };
2155
2156 if !self.cvm_try_protect_secure_register_write(intercepted_vtl, cr, value) {
2157 let r = match cr {
2158 HvX64RegisterName::Cr0 => self.backing.vtls[intercepted_vtl]
2159 .cr0
2160 .write(value, &mut self.runner),
2161 HvX64RegisterName::Cr4 => self.backing.vtls[intercepted_vtl]
2162 .cr4
2163 .write(value, &mut self.runner),
2164 _ => unreachable!(),
2165 };
2166 if r.is_ok() {
2167 self.update_execution_mode(intercepted_vtl);
2168 self.advance_to_next_instruction(intercepted_vtl);
2169 } else {
2170 tracelimit::warn_ratelimited!(
2171 CVM_ALLOWED,
2172 ?cr,
2173 value,
2174 "failed to write cr"
2175 );
2176 self.inject_gpf(intercepted_vtl);
2177 }
2178 }
2179 &mut self.backing.vtls[intercepted_vtl].exit_stats.cr_access
2180 }
2181 VmxExitBasic::XSETBV => {
2182 let gps = self.runner.tdx_enter_guest_gps();
2183 if let Some(value) =
2184 hardware_cvm::validate_xsetbv_exit(hardware_cvm::XsetbvExitInput {
2185 rax: gps[TdxGp::RAX],
2186 rcx: gps[TdxGp::RCX],
2187 rdx: gps[TdxGp::RDX],
2188 cr4: self.backing.vtls[intercepted_vtl].cr4.read(&self.runner),
2189 cpl: exit_info.cpl(),
2190 })
2191 {
2192 if !self.cvm_try_protect_secure_register_write(
2193 intercepted_vtl,
2194 HvX64RegisterName::Xfem,
2195 value,
2196 ) {
2197 self.runner
2198 .set_vp_register(intercepted_vtl, HvX64RegisterName::Xfem, value.into())
2199 .unwrap();
2200 self.advance_to_next_instruction(intercepted_vtl);
2201 }
2202 } else {
2203 self.inject_gpf(intercepted_vtl);
2204 }
2205 &mut self.backing.vtls[intercepted_vtl].exit_stats.xsetbv
2206 }
2207 VmxExitBasic::WBINVD_INSTRUCTION => {
2208 let no_invalidate = exit_info.qualification() != 0;
2210 if no_invalidate {
2211 self.runner.tdx_vp_state_flags_mut().set_wbnoinvd(true);
2212 } else {
2213 self.runner.tdx_vp_state_flags_mut().set_wbinvd(true);
2214 }
2215
2216 self.advance_to_next_instruction(intercepted_vtl);
2217 &mut self.backing.vtls[intercepted_vtl].exit_stats.wbinvd
2218 }
2219 VmxExitBasic::EPT_VIOLATION => {
2220 let gpa = exit_info.gpa().expect("is EPT exit");
2221 let ept_info = VmxEptExitQualification::from(exit_info.qualification());
2222 if !next_interruption.valid() && ept_info.nmi_unmasking_due_to_iret() {
2225 let mask = Interruptibility::new().with_blocked_by_nmi(true);
2226 let value = Interruptibility::new().with_blocked_by_nmi(true);
2227 let old_interruptibility: Interruptibility = self
2228 .runner
2229 .write_vmcs32(
2230 intercepted_vtl,
2231 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
2232 mask.into(),
2233 value.into(),
2234 )
2235 .into();
2236 assert!(!old_interruptibility.blocked_by_nmi());
2237 } else {
2238 let is_write = ept_info.access_mask() & 0b10 != 0;
2239 if self.check_mem_fault(intercepted_vtl, gpa, is_write, ept_info) {
2240 self.emulate(
2241 dev,
2242 self.backing.vtls[intercepted_vtl]
2243 .interruption_information
2244 .valid(),
2245 intercepted_vtl,
2246 TdxEmulationCache::default(),
2247 )
2248 .await?;
2249 }
2250 }
2251
2252 &mut self.backing.vtls[intercepted_vtl].exit_stats.ept_violation
2253 }
2254 VmxExitBasic::TPR_BELOW_THRESHOLD => {
2255 &mut self.backing.vtls[intercepted_vtl]
2257 .exit_stats
2258 .tpr_below_threshold
2259 }
2260 VmxExitBasic::INTERRUPT_WINDOW => {
2261 &mut self.backing.vtls[intercepted_vtl]
2263 .exit_stats
2264 .interrupt_window
2265 }
2266 VmxExitBasic::NMI_WINDOW => {
2267 &mut self.backing.vtls[intercepted_vtl].exit_stats.nmi_window
2269 }
2270 VmxExitBasic::HW_INTERRUPT => {
2271 if cfg!(feature = "gdb") {
2272 let debug_regs = self
2274 .access_state(intercepted_vtl.into())
2275 .debug_regs()
2276 .expect("register query should not fail");
2277 breakpoint_debug_exception = debug_regs.dr6.trailing_zeros() < 4;
2280 }
2281 &mut self.backing.vtls[intercepted_vtl].exit_stats.hw_interrupt
2282 }
2283 VmxExitBasic::SMI_INTR => &mut self.backing.vtls[intercepted_vtl].exit_stats.smi_intr,
2284 VmxExitBasic::PAUSE_INSTRUCTION => {
2285 &mut self.backing.vtls[intercepted_vtl].exit_stats.pause
2286 }
2287 VmxExitBasic::TDCALL => {
2288 if self.backing.untrusted_synic.is_some() {
2291 assert_eq!(intercepted_vtl, GuestVtl::Vtl0);
2292 self.handle_tdvmcall(dev, intercepted_vtl);
2293 } else if self.cvm_partition().hide_isolation {
2294 self.backing.vtls[intercepted_vtl].interruption_information =
2296 InterruptionInformation::new()
2297 .with_valid(true)
2298 .with_vector(x86defs::Exception::INVALID_OPCODE.0)
2299 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION);
2300 }
2301 &mut self.backing.vtls[intercepted_vtl].exit_stats.tdcall
2302 }
2303 VmxExitBasic::EXCEPTION => {
2304 tracing::trace!(
2305 "Caught Exception: {:?}",
2306 exit_info._exit_interruption_info()
2307 );
2308 if cfg!(feature = "gdb") {
2309 breakpoint_debug_exception = true;
2310 }
2311 &mut self.backing.vtls[intercepted_vtl].exit_stats.exception
2312 }
2313 VmxExitBasic::TRIPLE_FAULT => {
2314 return Err(VpHaltReason::TripleFault {
2315 vtl: intercepted_vtl.into(),
2316 });
2317 }
2318 VmxExitBasic::GDTR_OR_IDTR => {
2319 let info = GdtrOrIdtrInstructionInfo::from(exit_info.instr_info().info());
2320 tracing::trace!("Intercepted GDT or IDT instruction: {:?}", info);
2321 let reg = match info.instruction() {
2322 GdtrOrIdtrInstruction::Sidt | GdtrOrIdtrInstruction::Lidt => {
2323 HvX64RegisterName::Idtr
2324 }
2325 GdtrOrIdtrInstruction::Sgdt | GdtrOrIdtrInstruction::Lgdt => {
2326 HvX64RegisterName::Gdtr
2327 }
2328 };
2329 if (info.instruction().is_load()
2331 && !self.cvm_try_protect_secure_register_write(intercepted_vtl, reg, 0))
2332 || !info.instruction().is_load()
2333 {
2334 self.emulate_gdtr_or_idtr(intercepted_vtl, dev).await?;
2335 }
2336 &mut self.backing.vtls[intercepted_vtl]
2337 .exit_stats
2338 .descriptor_table
2339 }
2340 VmxExitBasic::LDTR_OR_TR => {
2341 let info = LdtrOrTrInstructionInfo::from(exit_info.instr_info().info());
2342 tracing::trace!("Intercepted LDT or TR instruction: {:?}", info);
2343 let reg = match info.instruction() {
2344 LdtrOrTrInstruction::Sldt | LdtrOrTrInstruction::Lldt => {
2345 HvX64RegisterName::Ldtr
2346 }
2347 LdtrOrTrInstruction::Str | LdtrOrTrInstruction::Ltr => HvX64RegisterName::Tr,
2348 };
2349 if (info.instruction().is_load()
2351 && !self.cvm_try_protect_secure_register_write(intercepted_vtl, reg, 0))
2352 || !info.instruction().is_load()
2353 {
2354 self.emulate_ldtr_or_tr(intercepted_vtl, dev).await?;
2355 }
2356 &mut self.backing.vtls[intercepted_vtl]
2357 .exit_stats
2358 .descriptor_table
2359 }
2360 VmxExitBasic::TIMER_EXPIRED => {
2361 &mut self.backing.vtls[intercepted_vtl].exit_stats.timer_expired
2363 }
2364 _ => {
2365 return Err(dev.fatal_error(UnknownVmxExit(exit_info.code().vmx_exit()).into()));
2366 }
2367 };
2368 stat.increment();
2369
2370 if cfg!(feature = "gdb") && breakpoint_debug_exception {
2373 self.handle_debug_exception(dev, intercepted_vtl)?;
2374 }
2375
2376 Ok(())
2377 }
2378
2379 fn trace_processor_state(&self, vtl: GuestVtl) {
2381 let raw_exit = self.runner.tdx_vp_enter_exit_info();
2382 tracing::error!(CVM_CONFIDENTIAL, ?raw_exit, "raw tdx vp enter exit info");
2383
2384 let gprs = self.runner.tdx_enter_guest_gps();
2385 tracing::error!(CVM_CONFIDENTIAL, ?gprs, "guest gpr list");
2386
2387 let TdxPrivateRegs {
2388 rflags,
2389 rip,
2390 rsp,
2391 ssp,
2392 rvi,
2393 svi,
2394 msr_kernel_gs_base,
2395 msr_star,
2396 msr_lstar,
2397 msr_sfmask,
2398 msr_xss,
2399 msr_tsc_aux,
2400 vp_entry_flags,
2401 } = self.backing.vtls[vtl].private_regs;
2402 tracing::error!(
2403 CVM_CONFIDENTIAL,
2404 rflags,
2405 rip,
2406 rsp,
2407 ssp,
2408 rvi,
2409 svi,
2410 msr_kernel_gs_base,
2411 msr_star,
2412 msr_lstar,
2413 msr_sfmask,
2414 msr_xss,
2415 msr_tsc_aux,
2416 ?vp_entry_flags,
2417 "private registers"
2418 );
2419
2420 let physical_cr0 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0);
2421 let shadow_cr0 = self
2422 .runner
2423 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR0_READ_SHADOW);
2424 let cr0_guest_host_mask: u64 = self
2425 .runner
2426 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK);
2427 tracing::error!(
2428 CVM_CONFIDENTIAL,
2429 physical_cr0,
2430 shadow_cr0,
2431 cr0_guest_host_mask,
2432 "cr0 values"
2433 );
2434
2435 let physical_cr4 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4);
2436 let shadow_cr4 = self
2437 .runner
2438 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_READ_SHADOW);
2439 let cr4_guest_host_mask = self
2440 .runner
2441 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK);
2442 tracing::error!(
2443 CVM_CONFIDENTIAL,
2444 physical_cr4,
2445 shadow_cr4,
2446 cr4_guest_host_mask,
2447 "cr4 values"
2448 );
2449
2450 let cr3 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR3);
2451 tracing::error!(CVM_CONFIDENTIAL, cr3, "cr3");
2452
2453 let cached_efer = self.backing.vtls[vtl].efer;
2454 let vmcs_efer = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER);
2455 let entry_controls = self
2456 .runner
2457 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_CONTROLS);
2458 tracing::error!(CVM_CONFIDENTIAL, cached_efer, vmcs_efer, "efer");
2459 tracing::error!(CVM_CONFIDENTIAL, entry_controls, "entry controls");
2460
2461 let cs = self.read_segment(vtl, TdxSegmentReg::Cs);
2462 let ds = self.read_segment(vtl, TdxSegmentReg::Ds);
2463 let es = self.read_segment(vtl, TdxSegmentReg::Es);
2464 let fs = self.read_segment(vtl, TdxSegmentReg::Fs);
2465 let gs = self.read_segment(vtl, TdxSegmentReg::Gs);
2466 let ss = self.read_segment(vtl, TdxSegmentReg::Ss);
2467 let tr = self.read_segment(vtl, TdxSegmentReg::Tr);
2468 let ldtr = self.read_segment(vtl, TdxSegmentReg::Ldtr);
2469
2470 tracing::error!(
2471 CVM_CONFIDENTIAL,
2472 ?cs,
2473 ?ds,
2474 ?es,
2475 ?fs,
2476 ?gs,
2477 ?ss,
2478 ?tr,
2479 ?ldtr,
2480 "segment values"
2481 );
2482
2483 let exception_bitmap = self
2484 .runner
2485 .read_vmcs32(vtl, VmcsField::VMX_VMCS_EXCEPTION_BITMAP);
2486 tracing::error!(CVM_CONFIDENTIAL, exception_bitmap, "exception bitmap");
2487
2488 let cached_processor_controls = self.backing.vtls[vtl].processor_controls;
2489 let vmcs_processor_controls = ProcessorControls::from(
2490 self.runner
2491 .read_vmcs32(vtl, VmcsField::VMX_VMCS_PROCESSOR_CONTROLS),
2492 );
2493 let vmcs_secondary_processor_controls = SecondaryProcessorControls::from(
2494 self.runner
2495 .read_vmcs32(vtl, VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS),
2496 );
2497 tracing::error!(
2498 CVM_CONFIDENTIAL,
2499 ?cached_processor_controls,
2500 ?vmcs_processor_controls,
2501 ?vmcs_secondary_processor_controls,
2502 "processor controls"
2503 );
2504
2505 if cached_processor_controls != vmcs_processor_controls {
2506 tracing::error!(CVM_ALLOWED, "BUGBUG: processor controls mismatch");
2507 }
2508
2509 let cached_tpr_threshold = self.backing.vtls[vtl].tpr_threshold;
2510 let vmcs_tpr_threshold = self
2511 .runner
2512 .read_vmcs32(vtl, VmcsField::VMX_VMCS_TPR_THRESHOLD);
2513 tracing::error!(
2514 CVM_CONFIDENTIAL,
2515 cached_tpr_threshold,
2516 vmcs_tpr_threshold,
2517 "tpr threshold"
2518 );
2519
2520 let cached_eoi_exit_bitmap = self.backing.eoi_exit_bitmap;
2521 let vmcs_eoi_exit_bitmap = {
2522 let fields = [
2523 VmcsField::VMX_VMCS_EOI_EXIT_0,
2524 VmcsField::VMX_VMCS_EOI_EXIT_1,
2525 VmcsField::VMX_VMCS_EOI_EXIT_2,
2526 VmcsField::VMX_VMCS_EOI_EXIT_3,
2527 ];
2528 fields
2529 .iter()
2530 .map(|field| self.runner.read_vmcs64(vtl, *field))
2531 .collect::<Vec<_>>()
2532 };
2533 tracing::error!(
2534 CVM_CONFIDENTIAL,
2535 ?cached_eoi_exit_bitmap,
2536 ?vmcs_eoi_exit_bitmap,
2537 "eoi exit bitmap"
2538 );
2539
2540 let cached_interrupt_information = self.backing.vtls[vtl].interruption_information;
2541 let cached_interruption_set = self.backing.vtls[vtl].interruption_set;
2542 let vmcs_interrupt_information = self
2543 .runner
2544 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO);
2545 let vmcs_entry_exception_code = self
2546 .runner
2547 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_EXCEPTION_ERROR_CODE);
2548 tracing::error!(
2549 CVM_CONFIDENTIAL,
2550 ?cached_interrupt_information,
2551 cached_interruption_set,
2552 vmcs_interrupt_information,
2553 vmcs_entry_exception_code,
2554 "interrupt information"
2555 );
2556
2557 let guest_interruptibility = self
2558 .runner
2559 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY);
2560 tracing::error!(
2561 CVM_CONFIDENTIAL,
2562 guest_interruptibility,
2563 "guest interruptibility"
2564 );
2565
2566 let vmcs_sysenter_cs = self
2567 .runner
2568 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR);
2569 let vmcs_sysenter_esp = self
2570 .runner
2571 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR);
2572 let vmcs_sysenter_eip = self
2573 .runner
2574 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR);
2575 tracing::error!(
2576 CVM_CONFIDENTIAL,
2577 vmcs_sysenter_cs,
2578 vmcs_sysenter_esp,
2579 vmcs_sysenter_eip,
2580 "sysenter values"
2581 );
2582
2583 let vmcs_pat = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT);
2584 tracing::error!(CVM_CONFIDENTIAL, vmcs_pat, "guest PAT");
2585 }
2586
2587 fn handle_vm_enter_failed(
2588 &self,
2589 dev: &impl CpuIo,
2590 vtl: GuestVtl,
2591 vmx_exit: VmxExit,
2592 ) -> VpHaltReason {
2593 assert!(vmx_exit.vm_enter_failed());
2594 match vmx_exit.basic_reason() {
2595 VmxExitBasic::BAD_GUEST_STATE => {
2596 tracing::error!(CVM_ALLOWED, "VP.ENTER failed with bad guest state");
2599 self.trace_processor_state(vtl);
2600
2601 dev.fatal_error(VmxBadGuestState.into())
2602 }
2603 _ => dev.fatal_error(UnknownVmxExit(vmx_exit).into()),
2604 }
2605 }
2606
2607 fn advance_to_next_instruction(&mut self, vtl: GuestVtl) {
2608 let instr_info = TdxExit(self.runner.tdx_vp_enter_exit_info()).instr_info();
2609 let rip = &mut self.backing.vtls[vtl].private_regs.rip;
2610 *rip = rip.wrapping_add(instr_info.length().into());
2611 }
2612
2613 fn clear_interrupt_shadow(&mut self, vtl: GuestVtl) {
2614 let mask = Interruptibility::new().with_blocked_by_sti(true);
2615 let value = Interruptibility::new().with_blocked_by_sti(false);
2616 self.runner.write_vmcs32(
2617 vtl,
2618 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
2619 mask.into(),
2620 value.into(),
2621 );
2622 }
2623
2624 fn inject_gpf(&mut self, vtl: GuestVtl) {
2625 self.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
2626 .with_valid(true)
2627 .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0)
2628 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION)
2629 .with_deliver_error_code(true);
2630 self.backing.vtls[vtl].exception_error_code = 0;
2631 }
2632
2633 fn handle_tdvmcall(&mut self, dev: &impl CpuIo, intercepted_vtl: GuestVtl) {
2634 let regs = self.runner.tdx_enter_guest_gps();
2635 if regs[TdxGp::R10] == 0 {
2636 let result = match VmxExitBasic(regs[TdxGp::R11] as u16) {
2638 VmxExitBasic::MSR_WRITE => {
2639 let msr = regs[TdxGp::R12] as u32;
2640 let value = regs[TdxGp::R13];
2641 match self.write_tdvmcall_msr(msr, value, intercepted_vtl) {
2642 Ok(()) => {
2643 tracing::debug!(msr, value, "tdvmcall msr write");
2644 TdVmCallR10Result::SUCCESS
2645 }
2646 Err(err) => {
2647 tracelimit::warn_ratelimited!(
2648 CVM_ALLOWED,
2649 msr,
2650 ?err,
2651 "failed tdvmcall msr write"
2652 );
2653 tracelimit::warn_ratelimited!(
2654 CVM_CONFIDENTIAL,
2655 value,
2656 "failed tdvmcall msr write"
2657 );
2658 TdVmCallR10Result::OPERAND_INVALID
2659 }
2660 }
2661 }
2662 VmxExitBasic::MSR_READ => {
2663 let msr = regs[TdxGp::R12] as u32;
2664 match self.read_tdvmcall_msr(msr, intercepted_vtl) {
2665 Ok(value) => {
2666 tracing::debug!(msr, value, "tdvmcall msr read");
2667 self.runner.tdx_enter_guest_gps_mut()[TdxGp::R11] = value;
2668 TdVmCallR10Result::SUCCESS
2669 }
2670 Err(err) => {
2671 tracelimit::warn_ratelimited!(
2672 CVM_ALLOWED,
2673 msr,
2674 ?err,
2675 "failed tdvmcall msr read"
2676 );
2677 TdVmCallR10Result::OPERAND_INVALID
2678 }
2679 }
2680 }
2681 subfunction => {
2682 tracelimit::warn_ratelimited!(
2683 CVM_ALLOWED,
2684 ?subfunction,
2685 "architectural vmcall not supported"
2686 );
2687 TdVmCallR10Result::OPERAND_INVALID
2688 }
2689 };
2690 self.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = result.0;
2691 self.backing.vtls[intercepted_vtl].private_regs.rip = self.backing.vtls
2692 [intercepted_vtl]
2693 .private_regs
2694 .rip
2695 .wrapping_add(4);
2696 } else {
2697 let guest_memory = &self.shared.cvm.shared_memory;
2704 let handler = UhHypercallHandler {
2705 vp: &mut *self,
2706 bus: dev,
2707 trusted: false,
2708 intercepted_vtl,
2709 };
2710
2711 UhHypercallHandler::TDCALL_DISPATCHER.dispatch(guest_memory, TdHypercall(handler));
2712 }
2713 }
2714
2715 fn read_tdvmcall_msr(&mut self, msr: u32, intercepted_vtl: GuestVtl) -> Result<u64, MsrError> {
2716 match msr {
2717 msr @ (hvdef::HV_X64_MSR_GUEST_OS_ID | hvdef::HV_X64_MSR_VP_INDEX) => {
2718 self.backing.cvm.hv[intercepted_vtl].msr_read(msr)
2719 }
2720 _ => self
2721 .backing
2722 .untrusted_synic
2723 .as_mut()
2724 .unwrap()
2725 .read_nontimer_msr(msr),
2726 }
2727 }
2728
2729 fn write_tdvmcall_msr(
2730 &mut self,
2731 msr: u32,
2732 value: u64,
2733 intercepted_vtl: GuestVtl,
2734 ) -> Result<(), MsrError> {
2735 match msr {
2736 hvdef::HV_X64_MSR_GUEST_OS_ID => {
2737 self.backing.cvm.hv[intercepted_vtl].msr_write_guest_os_id(value)
2738 }
2739 _ => {
2740 self.backing
2744 .untrusted_synic
2745 .as_mut()
2746 .unwrap()
2747 .write_nontimer_msr(
2748 msr,
2749 value,
2750 &mut UntrustedSynicVtlProts(&self.partition.gm[GuestVtl::Vtl0]),
2751 )?;
2752 if matches!(msr, hvdef::HV_X64_MSR_SINT0..=hvdef::HV_X64_MSR_SINT15) {
2755 if let Err(err) = self.runner.set_vp_register(
2756 intercepted_vtl,
2757 HvX64RegisterName(
2758 HvX64RegisterName::Sint0.0 + (msr - hvdef::HV_X64_MSR_SINT0),
2759 ),
2760 value.into(),
2761 ) {
2762 tracelimit::warn_ratelimited!(
2763 CVM_ALLOWED,
2764 error = &err as &dyn std::error::Error,
2765 "failed to set sint register"
2766 );
2767 }
2768 }
2769 }
2770 }
2771
2772 Ok(())
2773 }
2774
2775 fn read_msr_tdx(&mut self, msr: u32, vtl: GuestVtl) -> Result<u64, MsrError> {
2776 match msr {
2783 x86defs::X86X_IA32_MSR_PLATFORM_ID => {
2785 Ok(0)
2793 }
2794
2795 x86defs::X86X_MSR_MTRR_CAP => {
2796 Ok(0x400)
2799 }
2800 x86defs::X86X_MSR_MTRR_DEF_TYPE => {
2801 Ok(0)
2805 }
2806 x86defs::X86X_MSR_CSTAR => Ok(self.backing.vtls[vtl].msr_cstar),
2807 x86defs::X86X_MSR_MCG_CAP => Ok(0),
2808 x86defs::X86X_MSR_MCG_STATUS => Ok(0),
2809 x86defs::X86X_MSR_MC_UPDATE_PATCH_LEVEL => Ok(0xFFFFFFFF),
2810 x86defs::X86X_MSR_XSS => Ok(self.backing.vtls[vtl].private_regs.msr_xss),
2811 x86defs::X86X_IA32_MSR_MISC_ENABLE => Ok(hv1_emulator::x86::MISC_ENABLE.into()),
2812 x86defs::X86X_IA32_MSR_FEATURE_CONTROL => Ok(VMX_FEATURE_CONTROL_LOCKED),
2813 x86defs::X86X_MSR_CR_PAT => {
2814 let pat = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT);
2815 Ok(pat)
2816 }
2817
2818 x86defs::X86X_MSR_MISC_FEATURE_ENABLES
2821 | x86defs::X86X_MSR_PLATFORM_INFO
2822 | x86defs::X86X_MSR_PPIN_CTL
2823 | x86defs::X86X_IA32_MSR_SMI_COUNT
2824 | x86defs::X86X_MSR_UMWAIT_CONTROL
2825 | x86defs::X86X_AMD_MSR_DE_CFG
2826 | x86defs::X86X_IA32_MSR_RAPL_POWER_UNIT
2827 | x86defs::X86X_IA32_MSR_PKG_ENERGY_STATUS
2828 | x86defs::X86X_IA32_MSR_DRAM_ENERGY_STATUS
2829 | x86defs::X86X_IA32_MSR_PP0_ENERGY_STATUS => Ok(0),
2830
2831 hvdef::HV_X64_MSR_GUEST_IDLE => {
2832 self.backing.cvm.lapics[vtl].activity = MpState::Idle;
2833 self.clear_interrupt_shadow(vtl);
2834 Ok(0)
2835 }
2836 X86X_MSR_EFER => Ok(self.backing.vtls[vtl].efer),
2837
2838 _ => Err(MsrError::Unknown),
2839 }
2840 }
2841
2842 fn write_msr_tdx(&mut self, msr: u32, value: u64, vtl: GuestVtl) -> Result<(), MsrError> {
2843 let state = &mut self.backing.vtls[vtl].private_regs;
2844
2845 match msr {
2846 X86X_MSR_EFER => {
2847 self.write_efer(vtl, value)
2848 .map_err(|_| MsrError::InvalidAccess)?;
2849 self.update_execution_mode(vtl);
2850 }
2851 x86defs::X86X_MSR_STAR => state.msr_star = value,
2852 x86defs::X86X_MSR_CSTAR => self.backing.vtls[vtl].msr_cstar = value,
2853 x86defs::X86X_MSR_LSTAR => state.msr_lstar = value,
2854 x86defs::X86X_MSR_SFMASK => state.msr_sfmask = value,
2855 x86defs::X86X_MSR_TSC_AUX => state.msr_tsc_aux = value,
2856 x86defs::X86X_MSR_SYSENTER_CS => {
2857 self.runner.write_vmcs32(
2858 vtl,
2859 VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR,
2860 !0,
2861 value as u32,
2862 );
2863 }
2864 x86defs::X86X_MSR_SYSENTER_EIP => {
2865 self.runner.write_vmcs64(
2866 vtl,
2867 VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR,
2868 !0,
2869 value,
2870 );
2871 }
2872 x86defs::X86X_MSR_SYSENTER_ESP => {
2873 self.runner.write_vmcs64(
2874 vtl,
2875 VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR,
2876 !0,
2877 value,
2878 );
2879 }
2880 x86defs::X86X_MSR_XSS => state.msr_xss = value,
2881 x86defs::X86X_MSR_MC_UPDATE_PATCH_LEVEL => {
2882 if value != 0 {
2884 return Err(MsrError::InvalidAccess);
2885 }
2886 }
2887 x86defs::X86X_IA32_MSR_MISC_ENABLE => {}
2888 x86defs::X86X_MSR_CR_PAT => {
2889 self.runner
2890 .write_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT, !0, value);
2891 }
2892
2893 x86defs::X86X_MSR_MCG_STATUS => {
2894 if x86defs::X86xMcgStatusRegister::from(value).reserved0() != 0 {
2896 return Err(MsrError::InvalidAccess);
2897 }
2898 }
2899
2900 x86defs::X86X_MSR_MTRR_DEF_TYPE => {}
2902
2903 x86defs::X86X_MSR_BIOS_UPDT_TRIG => {}
2906
2907 x86defs::X86X_MSR_MISC_FEATURE_ENABLES
2910 | x86defs::X86X_MSR_PLATFORM_INFO
2911 | x86defs::X86X_MSR_PPIN_CTL
2912 | x86defs::X86X_IA32_MSR_SMI_COUNT
2913 | x86defs::X86X_MSR_UMWAIT_CONTROL
2914 | x86defs::X86X_AMD_MSR_DE_CFG
2915 | x86defs::X86X_IA32_MSR_RAPL_POWER_UNIT
2916 | x86defs::X86X_IA32_MSR_PKG_ENERGY_STATUS
2917 | x86defs::X86X_IA32_MSR_DRAM_ENERGY_STATUS
2918 | x86defs::X86X_IA32_MSR_PP0_ENERGY_STATUS => {}
2919
2920 _ => return Err(MsrError::Unknown),
2921 }
2922
2923 Ok(())
2924 }
2925
2926 fn write_segment(
2927 &mut self,
2928 vtl: GuestVtl,
2929 seg: TdxSegmentReg,
2930 reg: SegmentRegister,
2931 ) -> Result<(), vp_state::Error> {
2932 self.runner
2934 .write_vmcs16(vtl, seg.selector(), !0, reg.selector);
2935 self.runner.write_vmcs64(vtl, seg.base(), !0, reg.base);
2936 self.runner.write_vmcs32(vtl, seg.limit(), !0, reg.limit);
2937
2938 let mut attributes = x86defs::vmx::VmxSegmentAttributes::from(reg.attributes as u32);
2940 attributes.set_null(!attributes.present());
2941
2942 self.runner
2943 .write_vmcs32(vtl, seg.attributes(), !0, attributes.into());
2944
2945 Ok(())
2946 }
2947
2948 fn read_segment(&self, vtl: GuestVtl, seg: TdxSegmentReg) -> SegmentRegister {
2949 let selector = self.runner.read_vmcs16(vtl, seg.selector());
2950 let base = self.runner.read_vmcs64(vtl, seg.base());
2951 let limit = self.runner.read_vmcs32(vtl, seg.limit());
2952 let attributes = self.runner.read_vmcs32(vtl, seg.attributes());
2953
2954 SegmentRegister {
2955 selector,
2956 base,
2957 limit,
2958 attributes: attributes as u16,
2959 }
2960 }
2961
2962 fn long_mode(&self, vtl: GuestVtl) -> bool {
2963 let backing = &self.backing.vtls[vtl];
2964 backing.cr0.read(&self.runner) & X64_CR0_PE != 0 && backing.efer & X64_EFER_LMA != 0
2965 }
2966}
2967
2968impl<T: CpuIo> X86EmulatorSupport for UhEmulationState<'_, '_, T, TdxBacked> {
2969 fn vp_index(&self) -> VpIndex {
2970 self.vp.vp_index()
2971 }
2972
2973 fn flush(&mut self) {
2974 }
2976
2977 fn vendor(&self) -> x86defs::cpuid::Vendor {
2978 self.vp.partition.caps.vendor
2979 }
2980
2981 fn gp(&mut self, reg: Gp) -> u64 {
2982 self.vp.runner.tdx_enter_guest_gps()[reg as usize]
2983 }
2984
2985 fn set_gp(&mut self, reg: Gp, v: u64) {
2986 self.vp.runner.tdx_enter_guest_gps_mut()[reg as usize] = v;
2987 }
2988
2989 fn xmm(&mut self, index: usize) -> u128 {
2990 u128::from_ne_bytes(self.vp.runner.fx_state().xmm[index])
2991 }
2992
2993 fn set_xmm(&mut self, index: usize, v: u128) {
2994 self.vp.runner.fx_state_mut().xmm[index] = v.to_ne_bytes();
2995 }
2996
2997 fn rip(&mut self) -> u64 {
2998 self.vp.backing.vtls[self.vtl].private_regs.rip
2999 }
3000
3001 fn set_rip(&mut self, v: u64) {
3002 self.vp.backing.vtls[self.vtl].private_regs.rip = v;
3003 }
3004
3005 fn segment(&mut self, index: Segment) -> x86defs::SegmentRegister {
3006 let tdx_segment_index = match index {
3007 Segment::CS => TdxSegmentReg::Cs,
3008 Segment::ES => TdxSegmentReg::Es,
3009 Segment::SS => TdxSegmentReg::Ss,
3010 Segment::DS => TdxSegmentReg::Ds,
3011 Segment::FS => TdxSegmentReg::Fs,
3012 Segment::GS => TdxSegmentReg::Gs,
3013 };
3014 let reg = match tdx_segment_index {
3015 TdxSegmentReg::Cs => self.cache.segs[index as usize]
3016 .get_or_insert_with(|| TdxExit(self.vp.runner.tdx_vp_enter_exit_info()).cs()),
3017 _ => self.cache.segs[index as usize]
3018 .get_or_insert_with(|| self.vp.read_segment(self.vtl, tdx_segment_index)),
3019 };
3020 (*reg).into()
3021 }
3022
3023 fn efer(&mut self) -> u64 {
3024 self.vp.backing.vtls[self.vtl].efer
3025 }
3026
3027 fn cr0(&mut self) -> u64 {
3028 let reg = self
3029 .cache
3030 .cr0
3031 .get_or_insert_with(|| self.vp.backing.vtls[self.vtl].cr0.read(&self.vp.runner));
3032 *reg
3033 }
3034
3035 fn rflags(&mut self) -> RFlags {
3036 self.vp.backing.vtls[self.vtl].private_regs.rflags.into()
3037 }
3038
3039 fn set_rflags(&mut self, v: RFlags) {
3040 self.vp.backing.vtls[self.vtl].private_regs.rflags = v.into();
3041 }
3042
3043 fn instruction_bytes(&self) -> &[u8] {
3044 &[]
3045 }
3046
3047 fn physical_address(&self) -> Option<u64> {
3048 TdxExit(self.vp.runner.tdx_vp_enter_exit_info()).gpa()
3049 }
3050
3051 fn initial_gva_translation(
3052 &mut self,
3053 ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
3054 let exit_info = TdxExit(self.vp.runner.tdx_vp_enter_exit_info());
3055 let ept_info = VmxEptExitQualification::from(exit_info.qualification());
3056
3057 if exit_info.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION
3058 && ept_info.gva_valid()
3059 {
3060 Some(virt_support_x86emu::emulate::InitialTranslation {
3061 gva: exit_info.gla().expect("already validated EPT exit"),
3062 gpa: exit_info.gpa().expect("already validated EPT exit"),
3063 translate_mode: match ept_info.access_mask() {
3064 0x1 => TranslateMode::Read,
3065 0x2 | 0x3 => TranslateMode::Write,
3078 0x4 => TranslateMode::Execute,
3079 _ => panic!("unexpected ept access mask 0x{:x}", ept_info.access_mask()),
3080 },
3081 })
3082 } else {
3083 None
3084 }
3085 }
3086
3087 fn interruption_pending(&self) -> bool {
3088 self.interruption_pending
3089 }
3090
3091 fn check_vtl_access(
3092 &mut self,
3093 _gpa: u64,
3094 _mode: TranslateMode,
3095 ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError> {
3096 Ok(())
3098 }
3099
3100 fn translate_gva(
3101 &mut self,
3102 gva: u64,
3103 mode: TranslateMode,
3104 ) -> Result<
3105 virt_support_x86emu::emulate::EmuTranslateResult,
3106 virt_support_x86emu::emulate::EmuTranslateError,
3107 > {
3108 emulate_translate_gva(self, gva, mode)
3109 }
3110
3111 fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
3112 assert!(event_info.reg_0.event_pending());
3113 assert_eq!(
3114 event_info.reg_0.event_type(),
3115 hvdef::HV_X64_PENDING_EVENT_EXCEPTION
3116 );
3117 assert!(!self.interruption_pending);
3118
3119 TdxBacked::set_pending_exception(
3122 self.vp,
3123 self.vtl,
3124 HvX64PendingExceptionEvent::from(event_info.reg_0.into_bits()),
3125 );
3126 }
3127
3128 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
3129 let vtom = self.vp.partition.caps.vtom.unwrap_or(0);
3132 debug_assert!(vtom == 0 || vtom.is_power_of_two());
3133 self.vp.partition.is_gpa_mapped(gpa & !vtom, write)
3134 }
3135
3136 fn lapic_base_address(&self) -> Option<u64> {
3137 self.vp.backing.cvm.lapics[self.vtl].lapic.base_address()
3138 }
3139
3140 fn lapic_read(&mut self, address: u64, data: &mut [u8]) {
3141 self.vp.backing.cvm.lapics[self.vtl]
3142 .lapic
3143 .access(&mut TdxApicClient {
3144 partition: self.vp.partition,
3145 dev: self.devices,
3146 vmtime: &self.vp.vmtime,
3147 apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
3148 vtl: self.vtl,
3149 })
3150 .mmio_read(address, data);
3151 }
3152
3153 fn lapic_write(&mut self, address: u64, data: &[u8]) {
3154 self.vp.backing.cvm.lapics[self.vtl]
3155 .lapic
3156 .access(&mut TdxApicClient {
3157 partition: self.vp.partition,
3158 dev: self.devices,
3159 vmtime: &self.vp.vmtime,
3160 apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
3161 vtl: self.vtl,
3162 })
3163 .mmio_write(address, data);
3164 }
3165
3166 fn monitor_support(&self) -> Option<&dyn EmulatorMonitorSupport> {
3167 Some(self)
3168 }
3169}
3170
3171#[derive(Debug)]
3172enum TdxSegmentReg {
3173 Es,
3174 Cs,
3175 Ss,
3176 Ds,
3177 Fs,
3178 Gs,
3179 Ldtr,
3180 Tr,
3181}
3182
3183impl TdxSegmentReg {
3184 fn selector(&self) -> VmcsField {
3186 match self {
3187 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_SELECTOR,
3188 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_SELECTOR,
3189 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_SELECTOR,
3190 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_SELECTOR,
3191 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_SELECTOR,
3192 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_SELECTOR,
3193 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_SELECTOR,
3194 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_SELECTOR,
3195 }
3196 }
3197
3198 fn base(&self) -> VmcsField {
3200 match self {
3201 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_BASE,
3202 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_BASE,
3203 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_BASE,
3204 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_BASE,
3205 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_BASE,
3206 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_BASE,
3207 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_BASE,
3208 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_BASE,
3209 }
3210 }
3211
3212 fn limit(&self) -> VmcsField {
3214 match self {
3215 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_LIMIT,
3216 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_LIMIT,
3217 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_LIMIT,
3218 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_LIMIT,
3219 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_LIMIT,
3220 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_LIMIT,
3221 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_LIMIT,
3222 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_LIMIT,
3223 }
3224 }
3225
3226 fn attributes(&self) -> VmcsField {
3228 match self {
3229 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_AR,
3230 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_AR,
3231 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_AR,
3232 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_AR,
3233 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_AR,
3234 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_AR,
3235 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_AR,
3236 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_AR,
3237 }
3238 }
3239}
3240
3241#[derive(Debug)]
3242enum TdxTableReg {
3243 Idtr,
3244 Gdtr,
3245}
3246
3247impl TdxTableReg {
3248 fn base_code(&self) -> VmcsField {
3249 match self {
3250 Self::Idtr => VmcsField::VMX_VMCS_GUEST_IDTR_BASE,
3251 Self::Gdtr => VmcsField::VMX_VMCS_GUEST_GDTR_BASE,
3252 }
3253 }
3254
3255 fn limit_code(&self) -> VmcsField {
3256 match self {
3257 Self::Idtr => VmcsField::VMX_VMCS_GUEST_IDTR_LIMIT,
3258 Self::Gdtr => VmcsField::VMX_VMCS_GUEST_GDTR_LIMIT,
3259 }
3260 }
3261}
3262
3263impl UhProcessor<'_, TdxBacked> {
3264 fn write_efer(&mut self, vtl: GuestVtl, efer: u64) -> Result<(), vp_state::Error> {
3270 if efer & (X64_EFER_SVME | X64_EFER_FFXSR) != 0 {
3271 return Err(vp_state::Error::InvalidValue(
3272 efer,
3273 "EFER",
3274 "SVME or FFXSR set",
3275 ));
3276 }
3277
3278 if efer & X64_EFER_NXE == 0 {
3280 return Err(vp_state::Error::InvalidValue(efer, "EFER", "NXE not set"));
3281 }
3282
3283 if self.backing.vtls[vtl].efer != efer {
3285 self.backing.vtls[vtl].efer = efer;
3286 self.runner
3287 .write_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER, !0, efer);
3288 }
3289
3290 Ok(())
3291 }
3292
3293 fn read_cr0(&self, vtl: GuestVtl) -> u64 {
3296 self.backing.vtls[vtl].cr0.read(&self.runner)
3297 }
3298
3299 fn write_cr0(&mut self, vtl: GuestVtl, value: u64) -> Result<(), vp_state::Error> {
3301 self.backing.vtls[vtl]
3302 .cr0
3303 .write(value | X64_CR0_ET, &mut self.runner)
3304 }
3305
3306 fn read_cr4(&self, vtl: GuestVtl) -> u64 {
3307 self.backing.vtls[vtl].cr4.read(&self.runner)
3308 }
3309
3310 fn write_cr4(&mut self, vtl: GuestVtl, value: u64) -> Result<(), vp_state::Error> {
3311 self.backing.vtls[vtl].cr4.write(value, &mut self.runner)
3312 }
3313
3314 fn write_table_register(&mut self, vtl: GuestVtl, table: TdxTableReg, reg: TableRegister) {
3315 self.runner
3316 .write_vmcs64(vtl, table.base_code(), !0, reg.base);
3317 self.runner
3318 .write_vmcs32(vtl, table.limit_code(), !0, reg.limit.into());
3319 }
3320
3321 fn read_table_register(&self, vtl: GuestVtl, table: TdxTableReg) -> TableRegister {
3322 let base = self.runner.read_vmcs64(vtl, table.base_code());
3323 let limit = self.runner.read_vmcs32(vtl, table.limit_code());
3324
3325 TableRegister {
3326 base,
3327 limit: limit as u16,
3328 }
3329 }
3330
3331 fn update_execution_mode(&mut self, vtl: GuestVtl) {
3333 let lme = self.backing.vtls[vtl].efer & X64_EFER_LME == X64_EFER_LME;
3334 let pg = self.read_cr0(vtl) & X64_CR0_PG == X64_CR0_PG;
3335 let efer_lma = self.backing.vtls[vtl].efer & X64_EFER_LMA == X64_EFER_LMA;
3336 let lma = lme && pg;
3337
3338 if lma != efer_lma {
3339 let new_efer = self.backing.vtls[vtl].efer ^ X64_EFER_LMA;
3341 self.write_efer(vtl, new_efer)
3342 .expect("EFER was valid before, it should still be valid");
3343 }
3344
3345 self.runner.write_vmcs32(
3346 vtl,
3347 VmcsField::VMX_VMCS_ENTRY_CONTROLS,
3348 VMX_ENTRY_CONTROL_LONG_MODE_GUEST,
3349 if lma {
3350 VMX_ENTRY_CONTROL_LONG_MODE_GUEST
3351 } else {
3352 0
3353 },
3354 );
3355 }
3356
3357 async fn emulate_gdtr_or_idtr(
3358 &mut self,
3359 vtl: GuestVtl,
3360 dev: &impl CpuIo,
3361 ) -> Result<(), VpHaltReason> {
3362 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
3363 assert_eq!(
3364 exit_info.code().vmx_exit().basic_reason(),
3365 VmxExitBasic::GDTR_OR_IDTR
3366 );
3367 let instr_info = GdtrOrIdtrInstructionInfo::from(exit_info.instr_info().info());
3368
3369 if (instr_info.instruction().is_load() && exit_info.cpl() != 0)
3372 || (!instr_info.instruction().is_load()
3373 && exit_info.cpl() > 0
3374 && self.read_cr4(vtl) & X64_CR4_UMIP != 0)
3375 {
3376 self.inject_gpf(vtl);
3377 return Ok(());
3378 }
3379
3380 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3381 exit_info.qualification(),
3382 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3383 (!instr_info.index_register_invalid()).then_some(instr_info.index_register()),
3384 instr_info.scaling(),
3385 instr_info.address_size(),
3386 instr_info.segment_register(),
3387 );
3388
3389 let gm = &self.partition.gm[vtl];
3390 let interruption_pending = self.backing.vtls[vtl].interruption_information.valid();
3391 let len = 2 + if self.long_mode(vtl) { 8 } else { 4 };
3392 let mut buf = [0u8; 10];
3393
3394 match instr_info.instruction() {
3395 GdtrOrIdtrInstruction::Sidt | GdtrOrIdtrInstruction::Sgdt => {
3396 let table = self.read_table_register(
3397 vtl,
3398 if matches!(instr_info.instruction(), GdtrOrIdtrInstruction::Sidt) {
3399 TdxTableReg::Idtr
3400 } else {
3401 TdxTableReg::Gdtr
3402 },
3403 );
3404 buf[..2].copy_from_slice(&table.limit.to_le_bytes());
3405 buf[2..].copy_from_slice(&table.base.to_le_bytes());
3406 let mut emulation_state = UhEmulationState {
3407 vp: &mut *self,
3408 interruption_pending,
3409 devices: dev,
3410 vtl,
3411 cache: TdxEmulationCache::default(),
3412 };
3413 emulate_insn_memory_op(
3414 &mut emulation_state,
3415 gm,
3416 dev,
3417 gva,
3418 segment,
3419 x86emu::AlignmentMode::Unaligned,
3420 EmulatedMemoryOperation::Write(&buf[..len]),
3421 )
3422 .await?;
3423 }
3424
3425 GdtrOrIdtrInstruction::Lgdt | GdtrOrIdtrInstruction::Lidt => {
3426 let mut emulation_state = UhEmulationState {
3427 vp: &mut *self,
3428 interruption_pending,
3429 devices: dev,
3430 vtl,
3431 cache: TdxEmulationCache::default(),
3432 };
3433 emulate_insn_memory_op(
3434 &mut emulation_state,
3435 gm,
3436 dev,
3437 gva,
3438 segment,
3439 x86emu::AlignmentMode::Unaligned,
3440 EmulatedMemoryOperation::Read(&mut buf[..len]),
3441 )
3442 .await?;
3443 let table = TableRegister {
3444 limit: u16::from_le_bytes(buf[..2].try_into().unwrap()),
3445 base: u64::from_le_bytes(buf[2..len].try_into().unwrap()),
3446 };
3447 self.write_table_register(
3448 vtl,
3449 if matches!(instr_info.instruction(), GdtrOrIdtrInstruction::Lidt) {
3450 TdxTableReg::Idtr
3451 } else {
3452 TdxTableReg::Gdtr
3453 },
3454 table,
3455 );
3456 }
3457 }
3458
3459 self.advance_to_next_instruction(vtl);
3460 Ok(())
3461 }
3462
3463 async fn emulate_ldtr_or_tr(
3464 &mut self,
3465 vtl: GuestVtl,
3466 dev: &impl CpuIo,
3467 ) -> Result<(), VpHaltReason> {
3468 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
3469 assert_eq!(
3470 exit_info.code().vmx_exit().basic_reason(),
3471 VmxExitBasic::LDTR_OR_TR
3472 );
3473 let instr_info = LdtrOrTrInstructionInfo::from(exit_info.instr_info().info());
3474
3475 if (instr_info.instruction().is_load() && exit_info.cpl() != 0)
3478 || (!instr_info.instruction().is_load()
3479 && exit_info.cpl() > 0
3480 && self.read_cr4(vtl) & X64_CR4_UMIP != 0)
3481 {
3482 self.inject_gpf(vtl);
3483 return Ok(());
3484 }
3485
3486 let gm = &self.partition.gm[vtl];
3487 let interruption_pending = self.backing.vtls[vtl].interruption_information.valid();
3488
3489 match instr_info.instruction() {
3490 LdtrOrTrInstruction::Sldt | LdtrOrTrInstruction::Str => {
3491 let value = self.runner.read_vmcs16(
3492 vtl,
3493 if matches!(instr_info.instruction(), LdtrOrTrInstruction::Sldt) {
3494 TdxSegmentReg::Ldtr
3495 } else {
3496 TdxSegmentReg::Tr
3497 }
3498 .selector(),
3499 );
3500
3501 if instr_info.memory_or_register() {
3502 let gps = self.runner.tdx_enter_guest_gps_mut();
3503 gps[instr_info.register_1() as usize] = value.into();
3504 } else {
3505 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3506 exit_info.qualification(),
3507 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3508 (!instr_info.index_register_invalid())
3509 .then_some(instr_info.index_register()),
3510 instr_info.scaling(),
3511 instr_info.address_size(),
3512 instr_info.segment_register(),
3513 );
3514 let mut emulation_state = UhEmulationState {
3515 vp: &mut *self,
3516 interruption_pending,
3517 devices: dev,
3518 vtl,
3519 cache: TdxEmulationCache::default(),
3520 };
3521 emulate_insn_memory_op(
3522 &mut emulation_state,
3523 gm,
3524 dev,
3525 gva,
3526 segment,
3527 x86emu::AlignmentMode::Standard,
3528 EmulatedMemoryOperation::Write(&value.to_le_bytes()),
3529 )
3530 .await?;
3531 }
3532 }
3533
3534 LdtrOrTrInstruction::Lldt | LdtrOrTrInstruction::Ltr => {
3535 let value = if instr_info.memory_or_register() {
3536 let gps = self.runner.tdx_enter_guest_gps();
3537 gps[instr_info.register_1() as usize] as u16
3538 } else {
3539 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3540 exit_info.qualification(),
3541 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3542 (!instr_info.index_register_invalid())
3543 .then_some(instr_info.index_register()),
3544 instr_info.scaling(),
3545 instr_info.address_size(),
3546 instr_info.segment_register(),
3547 );
3548 let mut emulation_state = UhEmulationState {
3549 vp: &mut *self,
3550 interruption_pending,
3551 devices: dev,
3552 vtl,
3553 cache: TdxEmulationCache::default(),
3554 };
3555 let mut buf = [0u8; 2];
3556 emulate_insn_memory_op(
3557 &mut emulation_state,
3558 gm,
3559 dev,
3560 gva,
3561 segment,
3562 x86emu::AlignmentMode::Standard,
3563 EmulatedMemoryOperation::Read(&mut buf),
3564 )
3565 .await?;
3566 u16::from_le_bytes(buf)
3567 };
3568 self.runner.write_vmcs16(
3569 vtl,
3570 if matches!(instr_info.instruction(), LdtrOrTrInstruction::Lldt) {
3571 TdxSegmentReg::Ldtr
3572 } else {
3573 TdxSegmentReg::Tr
3574 }
3575 .selector(),
3576 !0,
3577 value,
3578 );
3579 }
3580 }
3581
3582 self.advance_to_next_instruction(vtl);
3583 Ok(())
3584 }
3585
3586 fn compute_gva_for_table_access_emulation(
3587 &self,
3588 qualification: u64,
3589 base_reg: Option<u8>,
3590 index_reg: Option<u8>,
3591 scaling: u8,
3592 address_size: u8,
3593 segment_register: u8,
3594 ) -> (u64, Segment) {
3595 let gps = self.runner.tdx_enter_guest_gps();
3596
3597 let mut gva = qualification;
3599 if let Some(base_register) = base_reg {
3600 gva += gps[base_register as usize];
3601 }
3602 if let Some(index_register) = index_reg {
3603 gva += gps[index_register as usize] << scaling;
3604 }
3605 match address_size {
3606 0 => gva &= 0xFFFF,
3608 1 => gva &= 0xFFFFFFFF,
3610 2 => {}
3612 _ => unreachable!(),
3613 }
3614
3615 let segment = match segment_register {
3616 0 => Segment::ES,
3617 1 => Segment::CS,
3618 2 => Segment::SS,
3619 3 => Segment::DS,
3620 4 => Segment::FS,
3621 5 => Segment::GS,
3622 _ => unreachable!(),
3623 };
3624
3625 (gva, segment)
3626 }
3627}
3628
3629struct TdxApicClient<'a, T> {
3630 partition: &'a UhPartitionInner,
3631 apic_page: &'a mut ApicPage,
3632 dev: &'a T,
3633 vmtime: &'a VmTimeAccess,
3634 vtl: GuestVtl,
3635}
3636
3637impl<T: CpuIo> ApicClient for TdxApicClient<'_, T> {
3638 fn cr8(&mut self) -> u32 {
3639 self.apic_page.tpr.value >> 4
3640 }
3641
3642 fn set_cr8(&mut self, value: u32) {
3643 self.apic_page.tpr.value = value << 4;
3644 }
3645
3646 fn set_apic_base(&mut self, _value: u64) {
3647 }
3649
3650 fn wake(&mut self, vp_index: VpIndex) {
3651 self.partition.vps[vp_index.index() as usize].wake(self.vtl, WakeReason::INTCON);
3652 }
3653
3654 fn eoi(&mut self, vector: u8) {
3655 self.dev.handle_eoi(vector.into())
3656 }
3657
3658 fn now(&mut self) -> vmcore::vmtime::VmTime {
3659 self.vmtime.now()
3660 }
3661
3662 fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]) {
3663 pull_apic_offload(self.apic_page)
3664 }
3665}
3666
3667fn pull_apic_offload(page: &mut ApicPage) -> ([u32; 8], [u32; 8]) {
3668 let mut irr = [0; 8];
3669 let mut isr = [0; 8];
3670 for (((irr, page_irr), isr), page_isr) in irr
3671 .iter_mut()
3672 .zip(page.irr.iter_mut())
3673 .zip(isr.iter_mut())
3674 .zip(page.isr.iter_mut())
3675 {
3676 *irr = std::mem::take(&mut page_irr.value);
3677 *isr = std::mem::take(&mut page_isr.value);
3678 }
3679 (irr, isr)
3680}
3681
3682impl<T> hv1_hypercall::X64RegisterState for UhHypercallHandler<'_, '_, T, TdxBacked> {
3683 fn rip(&mut self) -> u64 {
3684 self.vp.backing.vtls[self.intercepted_vtl].private_regs.rip
3685 }
3686
3687 fn set_rip(&mut self, rip: u64) {
3688 self.vp.backing.vtls[self.intercepted_vtl].private_regs.rip = rip;
3689 }
3690
3691 fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
3692 let gps = self.vp.runner.tdx_enter_guest_gps();
3693 match n {
3694 hv1_hypercall::X64HypercallRegister::Rax => gps[TdxGp::RAX],
3695 hv1_hypercall::X64HypercallRegister::Rcx => gps[TdxGp::RCX],
3696 hv1_hypercall::X64HypercallRegister::Rdx => gps[TdxGp::RDX],
3697 hv1_hypercall::X64HypercallRegister::Rbx => gps[TdxGp::RBX],
3698 hv1_hypercall::X64HypercallRegister::Rsi => gps[TdxGp::RSI],
3699 hv1_hypercall::X64HypercallRegister::Rdi => gps[TdxGp::RDI],
3700 hv1_hypercall::X64HypercallRegister::R8 => gps[TdxGp::R8],
3701 }
3702 }
3703
3704 fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
3705 let gps = self.vp.runner.tdx_enter_guest_gps_mut();
3706 match n {
3707 hv1_hypercall::X64HypercallRegister::Rax => gps[TdxGp::RAX] = value,
3708 hv1_hypercall::X64HypercallRegister::Rcx => gps[TdxGp::RCX] = value,
3709 hv1_hypercall::X64HypercallRegister::Rdx => gps[TdxGp::RDX] = value,
3710 hv1_hypercall::X64HypercallRegister::Rbx => gps[TdxGp::RBX] = value,
3711 hv1_hypercall::X64HypercallRegister::Rsi => gps[TdxGp::RSI] = value,
3712 hv1_hypercall::X64HypercallRegister::Rdi => gps[TdxGp::RDI] = value,
3713 hv1_hypercall::X64HypercallRegister::R8 => gps[TdxGp::R8] = value,
3714 }
3715 }
3716
3717 fn xmm(&mut self, n: usize) -> u128 {
3719 u128::from_ne_bytes(self.vp.runner.fx_state().xmm[n])
3720 }
3721
3722 fn set_xmm(&mut self, n: usize, value: u128) {
3723 self.vp.runner.fx_state_mut().xmm[n] = value.to_ne_bytes();
3724 }
3725}
3726
3727impl<T: CpuIo> UhHypercallHandler<'_, '_, T, TdxBacked> {
3728 const TDX_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
3729 Self,
3730 [
3731 hv1_hypercall::HvModifySparseGpaPageHostVisibility,
3732 hv1_hypercall::HvQuerySparseGpaPageHostVisibility,
3733 hv1_hypercall::HvX64StartVirtualProcessor,
3734 hv1_hypercall::HvGetVpIndexFromApicId,
3735 hv1_hypercall::HvRetargetDeviceInterrupt,
3736 hv1_hypercall::HvFlushVirtualAddressList,
3737 hv1_hypercall::HvFlushVirtualAddressListEx,
3738 hv1_hypercall::HvFlushVirtualAddressSpace,
3739 hv1_hypercall::HvFlushVirtualAddressSpaceEx,
3740 hv1_hypercall::HvPostMessage,
3741 hv1_hypercall::HvSignalEvent,
3742 hv1_hypercall::HvExtQueryCapabilities,
3743 hv1_hypercall::HvGetVpRegisters,
3744 hv1_hypercall::HvSetVpRegisters,
3745 hv1_hypercall::HvEnablePartitionVtl,
3746 hv1_hypercall::HvX64EnableVpVtl,
3747 hv1_hypercall::HvVtlCall,
3748 hv1_hypercall::HvVtlReturn,
3749 hv1_hypercall::HvModifyVtlProtectionMask,
3750 hv1_hypercall::HvX64TranslateVirtualAddress,
3751 hv1_hypercall::HvSendSyntheticClusterIpi,
3752 hv1_hypercall::HvSendSyntheticClusterIpiEx,
3753 hv1_hypercall::HvInstallIntercept,
3754 hv1_hypercall::HvAssertVirtualInterrupt,
3755 ]
3756 );
3757
3758 const TDCALL_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
3762 Self,
3763 [hv1_hypercall::HvPostMessage, hv1_hypercall::HvSignalEvent],
3764 );
3765}
3766
3767impl AccessVpState for UhVpStateAccess<'_, '_, TdxBacked> {
3768 type Error = vp_state::Error;
3769
3770 fn caps(&self) -> &virt::x86::X86PartitionCapabilities {
3771 &self.vp.partition.caps
3772 }
3773
3774 fn commit(&mut self) -> Result<(), Self::Error> {
3775 Ok(())
3776 }
3777
3778 fn registers(&mut self) -> Result<Registers, Self::Error> {
3779 let gps = self.vp.runner.tdx_enter_guest_gps();
3780
3781 let cs = self.vp.read_segment(self.vtl, TdxSegmentReg::Cs);
3782 let ds = self.vp.read_segment(self.vtl, TdxSegmentReg::Ds);
3783 let es = self.vp.read_segment(self.vtl, TdxSegmentReg::Es);
3784 let fs = self.vp.read_segment(self.vtl, TdxSegmentReg::Fs);
3785 let gs = self.vp.read_segment(self.vtl, TdxSegmentReg::Gs);
3786 let ss = self.vp.read_segment(self.vtl, TdxSegmentReg::Ss);
3787 let tr = self.vp.read_segment(self.vtl, TdxSegmentReg::Tr);
3788 let ldtr = self.vp.read_segment(self.vtl, TdxSegmentReg::Ldtr);
3789
3790 let gdtr = self.vp.read_table_register(self.vtl, TdxTableReg::Gdtr);
3791 let idtr = self.vp.read_table_register(self.vtl, TdxTableReg::Idtr);
3792
3793 let cr0 = self.vp.read_cr0(self.vtl);
3794 let cr2 = self.vp.runner.cr2();
3795 let cr3 = self
3796 .vp
3797 .runner
3798 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_CR3);
3799 let cr4 = self.vp.read_cr4(self.vtl);
3800
3801 let cr8 = self.vp.runner.tdx_apic_page(self.vtl).tpr.value >> 4;
3802
3803 let efer = self.vp.backing.vtls[self.vtl].efer;
3804
3805 Ok(Registers {
3806 rax: gps[TdxGp::RAX],
3807 rcx: gps[TdxGp::RCX],
3808 rdx: gps[TdxGp::RDX],
3809 rbx: gps[TdxGp::RBX],
3810 rsp: self.vp.backing.vtls[self.vtl].private_regs.rsp,
3811 rbp: gps[TdxGp::RBP],
3812 rsi: gps[TdxGp::RSI],
3813 rdi: gps[TdxGp::RDI],
3814 r8: gps[TdxGp::R8],
3815 r9: gps[TdxGp::R9],
3816 r10: gps[TdxGp::R10],
3817 r11: gps[TdxGp::R11],
3818 r12: gps[TdxGp::R12],
3819 r13: gps[TdxGp::R13],
3820 r14: gps[TdxGp::R14],
3821 r15: gps[TdxGp::R15],
3822 rip: self.vp.backing.vtls[self.vtl].private_regs.rip,
3823 rflags: self.vp.backing.vtls[self.vtl].private_regs.rflags,
3824 cs,
3825 ds,
3826 es,
3827 fs,
3828 gs,
3829 ss,
3830 tr,
3831 ldtr,
3832 gdtr,
3833 idtr,
3834 cr0,
3835 cr2,
3836 cr3,
3837 cr4,
3838 cr8: cr8.into(),
3839 efer,
3840 })
3841 }
3842
3843 fn set_registers(&mut self, value: &Registers) -> Result<(), Self::Error> {
3844 let Registers {
3845 rax,
3846 rcx,
3847 rdx,
3848 rbx,
3849 rsp,
3850 rbp,
3851 rsi,
3852 rdi,
3853 r8,
3854 r9,
3855 r10,
3856 r11,
3857 r12,
3858 r13,
3859 r14,
3860 r15,
3861 rip,
3862 rflags,
3863 cs,
3864 ds,
3865 es,
3866 fs,
3867 gs,
3868 ss,
3869 tr,
3870 ldtr,
3871 gdtr,
3872 idtr,
3873 cr0,
3874 cr2,
3875 cr3,
3876 cr4,
3877 cr8,
3878 efer,
3879 } = value;
3880
3881 let gps = self.vp.runner.tdx_enter_guest_gps_mut();
3882 gps[TdxGp::RAX] = *rax;
3883 gps[TdxGp::RCX] = *rcx;
3884 gps[TdxGp::RDX] = *rdx;
3885 gps[TdxGp::RBX] = *rbx;
3886 self.vp.backing.vtls[self.vtl].private_regs.rsp = *rsp;
3887 gps[TdxGp::RBP] = *rbp;
3888 gps[TdxGp::RSI] = *rsi;
3889 gps[TdxGp::RDI] = *rdi;
3890 gps[TdxGp::R8] = *r8;
3891 gps[TdxGp::R9] = *r9;
3892 gps[TdxGp::R10] = *r10;
3893 gps[TdxGp::R11] = *r11;
3894 gps[TdxGp::R12] = *r12;
3895 gps[TdxGp::R13] = *r13;
3896 gps[TdxGp::R14] = *r14;
3897 gps[TdxGp::R15] = *r15;
3898 self.vp.backing.vtls[self.vtl].private_regs.rip = *rip;
3899 self.vp.backing.vtls[self.vtl].private_regs.rflags = *rflags;
3901
3902 self.vp.write_segment(self.vtl, TdxSegmentReg::Cs, *cs)?;
3904 self.vp.write_segment(self.vtl, TdxSegmentReg::Ds, *ds)?;
3905 self.vp.write_segment(self.vtl, TdxSegmentReg::Es, *es)?;
3906 self.vp.write_segment(self.vtl, TdxSegmentReg::Fs, *fs)?;
3907 self.vp.write_segment(self.vtl, TdxSegmentReg::Gs, *gs)?;
3908 self.vp.write_segment(self.vtl, TdxSegmentReg::Ss, *ss)?;
3909 self.vp.write_segment(self.vtl, TdxSegmentReg::Tr, *tr)?;
3910 self.vp
3911 .write_segment(self.vtl, TdxSegmentReg::Ldtr, *ldtr)?;
3912
3913 self.vp
3915 .write_table_register(self.vtl, TdxTableReg::Gdtr, *gdtr);
3916 self.vp
3917 .write_table_register(self.vtl, TdxTableReg::Idtr, *idtr);
3918
3919 self.vp.write_cr0(self.vtl, *cr0)?;
3920
3921 self.vp.runner.set_cr2(*cr2);
3924
3925 self.vp
3926 .runner
3927 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_CR3, !0, *cr3);
3928
3929 self.vp.write_cr4(self.vtl, *cr4)?;
3930
3931 self.vp.runner.tdx_apic_page_mut(self.vtl).tpr.value = (*cr8 << 4) as u32;
3932
3933 self.vp.write_efer(self.vtl, *efer)?;
3934
3935 self.vp.update_execution_mode(self.vtl);
3937
3938 Ok(())
3939 }
3940
3941 fn activity(&mut self) -> Result<vp::Activity, Self::Error> {
3942 let lapic = &self.vp.backing.cvm.lapics[self.vtl];
3943 let interruptibility: Interruptibility = self
3944 .vp
3945 .runner
3946 .read_vmcs32(self.vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
3947 .into();
3948 Ok(vp::Activity {
3949 mp_state: lapic.activity,
3950 nmi_pending: lapic.nmi_pending,
3951 nmi_masked: interruptibility.blocked_by_nmi(),
3952 interrupt_shadow: interruptibility.blocked_by_sti()
3953 || interruptibility.blocked_by_movss(),
3954 pending_event: None, pending_interruption: None, })
3957 }
3958
3959 fn set_activity(&mut self, value: &vp::Activity) -> Result<(), Self::Error> {
3960 let &vp::Activity {
3961 mp_state,
3962 nmi_pending,
3963 nmi_masked,
3964 interrupt_shadow,
3965 pending_event: _, pending_interruption: _, } = value;
3968 self.vp.backing.cvm.lapics[self.vtl].activity = mp_state;
3969 self.vp.backing.cvm.lapics[self.vtl].nmi_pending = nmi_pending;
3970 let interruptibility = Interruptibility::new()
3971 .with_blocked_by_movss(interrupt_shadow)
3972 .with_blocked_by_nmi(nmi_masked);
3973 self.vp.runner.write_vmcs32(
3974 self.vtl,
3975 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
3976 !0,
3977 interruptibility.into(),
3978 );
3979 Ok(())
3980 }
3981
3982 fn xsave(&mut self) -> Result<vp::Xsave, Self::Error> {
3983 Err(vp_state::Error::Unimplemented("xsave"))
3985 }
3986
3987 fn set_xsave(&mut self, _value: &vp::Xsave) -> Result<(), Self::Error> {
3988 Err(vp_state::Error::Unimplemented("xsave"))
3990 }
3991
3992 fn apic(&mut self) -> Result<vp::Apic, Self::Error> {
3993 self.vp.access_apic_without_offload(self.vtl, |vp| {
3994 Ok(vp.backing.cvm.lapics[self.vtl].lapic.save())
3995 })
3996 }
3997
3998 fn set_apic(&mut self, value: &vp::Apic) -> Result<(), Self::Error> {
3999 self.vp.access_apic_without_offload(self.vtl, |vp| {
4000 vp.backing.cvm.lapics[self.vtl]
4001 .lapic
4002 .restore(value)
4003 .map_err(vp_state::Error::InvalidApicBase)?;
4004
4005 Ok(())
4006 })
4007 }
4008
4009 fn xcr(&mut self) -> Result<vp::Xcr0, Self::Error> {
4010 Ok(vp::Xcr0 {
4011 value: self
4012 .vp
4013 .runner
4014 .get_vp_register(self.vtl, HvX64RegisterName::Xfem)
4015 .unwrap()
4016 .as_u64(),
4017 })
4018 }
4019
4020 fn set_xcr(&mut self, _value: &vp::Xcr0) -> Result<(), Self::Error> {
4021 Err(vp_state::Error::Unimplemented("xcr"))
4022 }
4023
4024 fn xss(&mut self) -> Result<vp::Xss, Self::Error> {
4025 Ok(vp::Xss {
4026 value: self.vp.backing.vtls[self.vtl].private_regs.msr_xss,
4027 })
4028 }
4029
4030 fn set_xss(&mut self, value: &vp::Xss) -> Result<(), Self::Error> {
4031 self.vp.backing.vtls[self.vtl].private_regs.msr_xss = value.value;
4032 Ok(())
4033 }
4034
4035 fn mtrrs(&mut self) -> Result<vp::Mtrrs, Self::Error> {
4036 Ok(vp::Mtrrs {
4037 msr_mtrr_def_type: 0, fixed: [0; 11], variable: [0; 16], })
4041 }
4042
4043 fn set_mtrrs(&mut self, _value: &vp::Mtrrs) -> Result<(), Self::Error> {
4044 Ok(())
4046 }
4047
4048 fn pat(&mut self) -> Result<vp::Pat, Self::Error> {
4049 let msr_cr_pat = self
4050 .vp
4051 .runner
4052 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_PAT);
4053 Ok(vp::Pat { value: msr_cr_pat })
4054 }
4055
4056 fn set_pat(&mut self, value: &vp::Pat) -> Result<(), Self::Error> {
4057 self.vp
4058 .runner
4059 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_PAT, !0, value.value);
4060 Ok(())
4061 }
4062
4063 fn virtual_msrs(&mut self) -> Result<vp::VirtualMsrs, Self::Error> {
4064 let state = &self.vp.backing.vtls[self.vtl].private_regs;
4065
4066 let sysenter_cs = self
4067 .vp
4068 .runner
4069 .read_vmcs32(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR)
4070 .into();
4071 let sysenter_eip = self
4072 .vp
4073 .runner
4074 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR);
4075 let sysenter_esp = self
4076 .vp
4077 .runner
4078 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR);
4079
4080 Ok(vp::VirtualMsrs {
4081 kernel_gs_base: state.msr_kernel_gs_base,
4082 sysenter_cs,
4083 sysenter_eip,
4084 sysenter_esp,
4085 star: state.msr_star,
4086 lstar: state.msr_lstar,
4087 cstar: self.vp.backing.vtls[self.vtl].msr_cstar,
4088 sfmask: state.msr_sfmask,
4089 })
4090 }
4091
4092 fn set_virtual_msrs(&mut self, value: &vp::VirtualMsrs) -> Result<(), Self::Error> {
4093 let &vp::VirtualMsrs {
4094 kernel_gs_base,
4095 sysenter_cs,
4096 sysenter_eip,
4097 sysenter_esp,
4098 star,
4099 lstar,
4100 cstar,
4101 sfmask,
4102 } = value;
4103
4104 let state = &mut self.vp.backing.vtls[self.vtl].private_regs;
4105 state.msr_kernel_gs_base = kernel_gs_base;
4106 state.msr_star = star;
4107 state.msr_lstar = lstar;
4108 state.msr_sfmask = sfmask;
4109
4110 self.vp.runner.write_vmcs32(
4111 self.vtl,
4112 VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR,
4113 !0,
4114 sysenter_cs as u32,
4115 );
4116 self.vp.runner.write_vmcs64(
4117 self.vtl,
4118 VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR,
4119 !0,
4120 sysenter_eip,
4121 );
4122 self.vp.runner.write_vmcs64(
4123 self.vtl,
4124 VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR,
4125 !0,
4126 sysenter_esp,
4127 );
4128
4129 self.vp.backing.vtls[self.vtl].msr_cstar = cstar;
4130
4131 Ok(())
4132 }
4133
4134 fn debug_regs(&mut self) -> Result<vp::DebugRegisters, Self::Error> {
4135 let mut values = [0u64.into(); 5];
4136 self.vp
4137 .runner
4138 .get_vp_registers(
4139 self.vtl,
4140 &[
4141 HvX64RegisterName::Dr0,
4142 HvX64RegisterName::Dr1,
4143 HvX64RegisterName::Dr2,
4144 HvX64RegisterName::Dr3,
4145 HvX64RegisterName::Dr6,
4146 ],
4147 &mut values,
4148 )
4149 .map_err(vp_state::Error::GetRegisters)?;
4150
4151 let dr7 = self
4152 .vp
4153 .runner
4154 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_DR7);
4155
4156 Ok(vp::DebugRegisters {
4157 dr0: values[0].as_u64(),
4158 dr1: values[1].as_u64(),
4159 dr2: values[2].as_u64(),
4160 dr3: values[3].as_u64(),
4161 dr6: values[4].as_u64(),
4162 dr7,
4163 })
4164 }
4165
4166 fn set_debug_regs(&mut self, value: &vp::DebugRegisters) -> Result<(), Self::Error> {
4167 let &vp::DebugRegisters {
4168 dr0,
4169 dr1,
4170 dr2,
4171 dr3,
4172 dr6,
4173 dr7,
4174 } = value;
4175 self.vp
4176 .runner
4177 .set_vp_registers(
4178 self.vtl,
4179 [
4180 (HvX64RegisterName::Dr0, dr0),
4181 (HvX64RegisterName::Dr1, dr1),
4182 (HvX64RegisterName::Dr2, dr2),
4183 (HvX64RegisterName::Dr3, dr3),
4184 (HvX64RegisterName::Dr6, dr6),
4185 ],
4186 )
4187 .map_err(vp_state::Error::SetRegisters)?;
4188
4189 self.vp
4190 .runner
4191 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_DR7, !0, dr7);
4192
4193 Ok(())
4194 }
4195
4196 fn tsc(&mut self) -> Result<vp::Tsc, Self::Error> {
4197 Err(vp_state::Error::Unimplemented("tsc"))
4198 }
4199
4200 fn set_tsc(&mut self, _value: &vp::Tsc) -> Result<(), Self::Error> {
4201 Err(vp_state::Error::Unimplemented("tsc"))
4202 }
4203
4204 fn tsc_aux(&mut self) -> Result<vp::TscAux, Self::Error> {
4205 Ok(vp::TscAux {
4206 value: self.vp.backing.vtls[self.vtl].private_regs.msr_tsc_aux,
4207 })
4208 }
4209
4210 fn set_tsc_aux(&mut self, value: &vp::TscAux) -> Result<(), Self::Error> {
4211 self.vp.backing.vtls[self.vtl].private_regs.msr_tsc_aux = value.value;
4212 Ok(())
4213 }
4214
4215 fn cet(&mut self) -> Result<vp::Cet, Self::Error> {
4216 Err(vp_state::Error::Unimplemented("cet"))
4217 }
4218
4219 fn set_cet(&mut self, _value: &vp::Cet) -> Result<(), Self::Error> {
4220 Err(vp_state::Error::Unimplemented("cet"))
4221 }
4222
4223 fn cet_ss(&mut self) -> Result<vp::CetSs, Self::Error> {
4224 Err(vp_state::Error::Unimplemented("cet_ss"))
4225 }
4226
4227 fn set_cet_ss(&mut self, _value: &vp::CetSs) -> Result<(), Self::Error> {
4228 Err(vp_state::Error::Unimplemented("cet_ss"))
4229 }
4230
4231 fn synic_msrs(&mut self) -> Result<vp::SyntheticMsrs, Self::Error> {
4232 Err(vp_state::Error::Unimplemented("synic_msrs"))
4233 }
4234
4235 fn set_synic_msrs(&mut self, _value: &vp::SyntheticMsrs) -> Result<(), Self::Error> {
4236 Err(vp_state::Error::Unimplemented("synic_msrs"))
4237 }
4238
4239 fn synic_message_page(&mut self) -> Result<vp::SynicMessagePage, Self::Error> {
4240 Err(vp_state::Error::Unimplemented("synic_message_page"))
4241 }
4242
4243 fn set_synic_message_page(&mut self, _value: &vp::SynicMessagePage) -> Result<(), Self::Error> {
4244 Err(vp_state::Error::Unimplemented("synic_message_page"))
4245 }
4246
4247 fn synic_event_flags_page(&mut self) -> Result<vp::SynicEventFlagsPage, Self::Error> {
4248 Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
4249 }
4250
4251 fn set_synic_event_flags_page(
4252 &mut self,
4253 _value: &vp::SynicEventFlagsPage,
4254 ) -> Result<(), Self::Error> {
4255 Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
4256 }
4257
4258 fn synic_message_queues(&mut self) -> Result<vp::SynicMessageQueues, Self::Error> {
4259 Err(vp_state::Error::Unimplemented("synic_message_queues"))
4260 }
4261
4262 fn set_synic_message_queues(
4263 &mut self,
4264 _value: &vp::SynicMessageQueues,
4265 ) -> Result<(), Self::Error> {
4266 Err(vp_state::Error::Unimplemented("synic_message_queues"))
4267 }
4268
4269 fn synic_timers(&mut self) -> Result<vp::SynicTimers, Self::Error> {
4270 Err(vp_state::Error::Unimplemented("synic_timers"))
4271 }
4272
4273 fn set_synic_timers(&mut self, _value: &vp::SynicTimers) -> Result<(), Self::Error> {
4274 Err(vp_state::Error::Unimplemented("synic_timers"))
4275 }
4276}
4277
4278fn top_vector(reg: &[ApicRegister; 8]) -> u8 {
4282 reg.iter()
4283 .enumerate()
4284 .rev()
4285 .find_map(|(i, r)| {
4286 (r.value != 0).then(|| (i as u32 * 32 + (31 - r.value.leading_zeros())) as u8)
4287 })
4288 .unwrap_or(0)
4289}
4290
4291struct TdHypercall<'a, 'b, T>(UhHypercallHandler<'a, 'b, T, TdxBacked>);
4292
4293impl<'a, 'b, T> AsHandler<UhHypercallHandler<'a, 'b, T, TdxBacked>> for TdHypercall<'a, 'b, T> {
4294 fn as_handler(&mut self) -> &mut UhHypercallHandler<'a, 'b, T, TdxBacked> {
4295 &mut self.0
4296 }
4297}
4298
4299impl<T> HypercallIo for TdHypercall<'_, '_, T> {
4300 fn advance_ip(&mut self) {
4301 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = 0;
4302 self.0.vp.backing.vtls[self.0.intercepted_vtl]
4303 .private_regs
4304 .rip = self.0.vp.backing.vtls[self.0.intercepted_vtl]
4305 .private_regs
4306 .rip
4307 .wrapping_add(4);
4308 }
4309
4310 fn retry(&mut self, control: u64) {
4311 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = control;
4312 self.set_result(hvdef::hypercall::HypercallOutput::from(HvError::Timeout).into());
4313 }
4314
4315 fn control(&mut self) -> u64 {
4316 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::R10]
4317 }
4318
4319 fn input_gpa(&mut self) -> u64 {
4320 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::RDX]
4321 }
4322
4323 fn output_gpa(&mut self) -> u64 {
4324 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::R8]
4325 }
4326
4327 fn fast_register_pair_count(&mut self) -> usize {
4328 7
4329 }
4330
4331 fn extended_fast_hypercalls_ok(&mut self) -> bool {
4332 false
4333 }
4334
4335 fn fast_input(&mut self, buf: &mut [[u64; 2]], _output_register_pairs: usize) -> usize {
4336 self.fast_regs(0, buf);
4337 buf.len()
4338 }
4339
4340 fn fast_output(&mut self, _starting_pair_index: usize, buf: &[[u64; 2]]) {
4341 assert!(buf.is_empty());
4342 }
4343
4344 fn vtl_input(&mut self) -> u64 {
4345 unreachable!()
4346 }
4347
4348 fn set_result(&mut self, n: u64) {
4349 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R11] = n;
4350 }
4351
4352 fn fast_regs(&mut self, starting_pair_index: usize, buf: &mut [[u64; 2]]) {
4353 let regs = self.0.vp.runner.tdx_enter_guest_gps();
4354 let fx_state = self.0.vp.runner.fx_state();
4355 for (i, [low, high]) in buf.iter_mut().enumerate() {
4356 let index = i + starting_pair_index;
4357 if index == 0 {
4358 *low = regs[TdxGp::RDX];
4359 *high = regs[TdxGp::R8];
4360 } else {
4361 let value = u128::from_ne_bytes(fx_state.xmm[index - 1]);
4362 *low = value as u64;
4363 *high = (value >> 64) as u64;
4364 }
4365 }
4366 }
4367}
4368
4369impl<T> hv1_hypercall::VtlSwitchOps for UhHypercallHandler<'_, '_, T, TdxBacked> {
4370 fn advance_ip(&mut self) {
4371 let long_mode = self.vp.long_mode(self.intercepted_vtl);
4372 let mut io = hv1_hypercall::X64RegisterIo::new(self, long_mode);
4373 io.advance_ip();
4374 }
4375
4376 fn inject_invalid_opcode_fault(&mut self) {
4377 self.vp.backing.vtls[self.intercepted_vtl].interruption_information =
4378 InterruptionInformation::new()
4379 .with_valid(true)
4380 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION)
4381 .with_vector(x86defs::Exception::INVALID_OPCODE.0);
4382 }
4383}
4384
4385impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressList for UhHypercallHandler<'_, '_, T, TdxBacked> {
4386 fn flush_virtual_address_list(
4387 &mut self,
4388 processor_set: ProcessorSet<'_>,
4389 flags: HvFlushFlags,
4390 gva_ranges: &[HvGvaRange],
4391 ) -> HvRepResult {
4392 hv1_hypercall::FlushVirtualAddressListEx::flush_virtual_address_list_ex(
4393 self,
4394 processor_set,
4395 flags,
4396 gva_ranges,
4397 )
4398 }
4399}
4400
4401impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressListEx
4402 for UhHypercallHandler<'_, '_, T, TdxBacked>
4403{
4404 fn flush_virtual_address_list_ex(
4405 &mut self,
4406 processor_set: ProcessorSet<'_>,
4407 flags: HvFlushFlags,
4408 gva_ranges: &[HvGvaRange],
4409 ) -> HvRepResult {
4410 self.hcvm_validate_flush_inputs(processor_set, flags, true)
4411 .map_err(|e| (e, 0))?;
4412
4413 let vtl = self.intercepted_vtl;
4414 let flush_state = &self.vp.shared.flush_state[vtl];
4415
4416 if let Err(()) = Self::add_ranges_to_tlb_flush_list(
4418 flush_state,
4419 gva_ranges,
4420 flags.use_extended_range_format(),
4421 ) {
4422 if flags.non_global_mappings_only() {
4423 flush_state
4424 .flush_entire_non_global_counter
4425 .fetch_add(1, Ordering::Relaxed);
4426 } else {
4427 flush_state
4428 .flush_entire_counter
4429 .fetch_add(1, Ordering::Relaxed);
4430 }
4431 }
4432
4433 TdxTlbLockFlushAccess {
4435 vp_index: Some(self.vp.vp_index()),
4436 partition: self.vp.partition,
4437 shared: self.vp.shared,
4438 }
4439 .wake_processors_for_tlb_flush(vtl, (!flags.all_processors()).then_some(processor_set));
4440
4441 self.vp.set_wait_for_tlb_locks(vtl);
4443
4444 Ok(())
4445 }
4446}
4447
4448impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressSpace
4449 for UhHypercallHandler<'_, '_, T, TdxBacked>
4450{
4451 fn flush_virtual_address_space(
4452 &mut self,
4453 processor_set: ProcessorSet<'_>,
4454 flags: HvFlushFlags,
4455 ) -> hvdef::HvResult<()> {
4456 hv1_hypercall::FlushVirtualAddressSpaceEx::flush_virtual_address_space_ex(
4457 self,
4458 processor_set,
4459 flags,
4460 )
4461 }
4462}
4463
4464impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressSpaceEx
4465 for UhHypercallHandler<'_, '_, T, TdxBacked>
4466{
4467 fn flush_virtual_address_space_ex(
4468 &mut self,
4469 processor_set: ProcessorSet<'_>,
4470 flags: HvFlushFlags,
4471 ) -> hvdef::HvResult<()> {
4472 self.hcvm_validate_flush_inputs(processor_set, flags, false)?;
4473 let vtl = self.intercepted_vtl;
4474
4475 let flush_state = &self.vp.shared.flush_state[vtl];
4476
4477 if flags.non_global_mappings_only() {
4479 flush_state
4480 .flush_entire_non_global_counter
4481 .fetch_add(1, Ordering::Relaxed);
4482 } else {
4483 flush_state
4484 .flush_entire_counter
4485 .fetch_add(1, Ordering::Relaxed);
4486 }
4487
4488 TdxTlbLockFlushAccess {
4490 vp_index: Some(self.vp.vp_index()),
4491 partition: self.vp.partition,
4492 shared: self.vp.shared,
4493 }
4494 .wake_processors_for_tlb_flush(vtl, (!flags.all_processors()).then_some(processor_set));
4495
4496 self.vp.set_wait_for_tlb_locks(vtl);
4498
4499 Ok(())
4500 }
4501}
4502
4503impl<T: CpuIo> UhHypercallHandler<'_, '_, T, TdxBacked> {
4504 fn add_ranges_to_tlb_flush_list(
4505 flush_state: &TdxPartitionFlushState,
4506 gva_ranges: &[HvGvaRange],
4507 use_extended_range_format: bool,
4508 ) -> Result<(), ()> {
4509 if gva_ranges.len() > FLUSH_GVA_LIST_SIZE {
4511 return Err(());
4512 }
4513
4514 if use_extended_range_format
4515 && gva_ranges
4516 .iter()
4517 .any(|range| range.as_extended().large_page())
4518 {
4519 return Err(());
4522 }
4523
4524 flush_state
4525 .gva_list
4526 .write()
4527 .extend(gva_ranges.iter().copied());
4528
4529 Ok(())
4530 }
4531}
4532
4533impl TdxTlbLockFlushAccess<'_> {
4534 fn wake_processors_for_tlb_flush(
4535 &mut self,
4536 target_vtl: GuestVtl,
4537 processor_set: Option<ProcessorSet<'_>>,
4538 ) {
4539 match processor_set {
4540 Some(processors) => {
4541 self.wake_processors_for_tlb_flush_inner(target_vtl, processors);
4542 }
4543 None => self.wake_processors_for_tlb_flush_inner(
4544 target_vtl,
4545 0..(self.partition.vps.len() as u32),
4546 ),
4547 }
4548 }
4549
4550 fn wake_processors_for_tlb_flush_inner(
4551 &mut self,
4552 target_vtl: GuestVtl,
4553 processors: impl IntoIterator<Item = u32>,
4554 ) {
4555 std::sync::atomic::fence(Ordering::SeqCst);
4563 self.partition.hcl.kick_cpus(
4564 processors.into_iter().filter(|&vp| {
4565 self.shared.active_vtl[vp as usize].load(Ordering::Relaxed) == target_vtl as u8
4566 }),
4567 true,
4568 true,
4569 );
4570 }
4571}
4572
4573struct TdxTlbLockFlushAccess<'a> {
4574 vp_index: Option<VpIndex>,
4575 partition: &'a UhPartitionInner,
4576 shared: &'a TdxBackedShared,
4577}
4578
4579impl TlbFlushLockAccess for TdxTlbLockFlushAccess<'_> {
4580 fn flush(&mut self, vtl: GuestVtl) {
4581 self.shared.flush_state[vtl]
4582 .flush_entire_counter
4583 .fetch_add(1, Ordering::Relaxed);
4584
4585 self.wake_processors_for_tlb_flush(vtl, None);
4586 self.set_wait_for_tlb_locks(vtl);
4587 }
4588
4589 fn flush_entire(&mut self) {
4590 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
4591 self.shared.flush_state[vtl]
4592 .flush_entire_counter
4593 .fetch_add(1, Ordering::Relaxed);
4594 }
4595 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
4596 self.wake_processors_for_tlb_flush(vtl, None);
4597 self.set_wait_for_tlb_locks(vtl);
4598 }
4599 }
4600
4601 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl) {
4602 if let Some(vp_index) = self.vp_index {
4603 hardware_cvm::tlb_lock::TlbLockAccess {
4604 vp_index,
4605 cvm_partition: &self.shared.cvm,
4606 }
4607 .set_wait_for_tlb_locks(vtl);
4608 }
4609 }
4610}
4611
4612mod save_restore {
4613 use super::TdxBacked;
4614 use super::UhProcessor;
4615 use vmcore::save_restore::RestoreError;
4616 use vmcore::save_restore::SaveError;
4617 use vmcore::save_restore::SaveRestore;
4618 use vmcore::save_restore::SavedStateNotSupported;
4619
4620 impl SaveRestore for UhProcessor<'_, TdxBacked> {
4621 type SavedState = SavedStateNotSupported;
4622
4623 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
4624 Err(SaveError::NotSupported)
4625 }
4626
4627 fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
4628 match state {}
4629 }
4630 }
4631}