1mod tlb_flush;
7
8use super::BackingPrivate;
9use super::BackingSharedParams;
10use super::HardwareIsolatedBacking;
11use super::UhEmulationState;
12use super::UhHypercallHandler;
13use super::UhRunVpError;
14use super::hardware_cvm;
15use super::vp_state;
16use super::vp_state::UhVpStateAccess;
17use crate::BackingShared;
18use crate::GuestVtl;
19use crate::TlbFlushLockAccess;
20use crate::UhCvmPartitionState;
21use crate::UhCvmVpState;
22use crate::UhPartitionInner;
23use crate::UhPartitionNewParams;
24use crate::UhProcessor;
25use crate::WakeReason;
26use cvm_tracing::CVM_ALLOWED;
27use cvm_tracing::CVM_CONFIDENTIAL;
28use guestmem::GuestMemory;
29use hcl::ioctl::ProcessorRunner;
30use hcl::ioctl::tdx::Tdx;
31use hcl::ioctl::tdx::TdxPrivateRegs;
32use hcl::protocol::hcl_intr_offload_flags;
33use hcl::protocol::tdx_tdg_vp_enter_exit_info;
34use hv1_emulator::hv::ProcessorVtlHv;
35use hv1_emulator::synic::GlobalSynic;
36use hv1_emulator::synic::ProcessorSynic;
37use hv1_hypercall::AsHandler;
38use hv1_hypercall::HvRepResult;
39use hv1_hypercall::HypercallIo;
40use hv1_structs::ProcessorSet;
41use hv1_structs::VtlArray;
42use hvdef::HV_PAGE_SIZE;
43use hvdef::HvError;
44use hvdef::HvSynicSimpSiefp;
45use hvdef::HvX64PendingExceptionEvent;
46use hvdef::HvX64RegisterName;
47use hvdef::Vtl;
48use hvdef::hypercall::HvFlushFlags;
49use hvdef::hypercall::HvGvaRange;
50use inspect::Inspect;
51use inspect::InspectMut;
52use inspect_counters::Counter;
53use std::sync::atomic::AtomicU8;
54use std::sync::atomic::Ordering;
55use tlb_flush::FLUSH_GVA_LIST_SIZE;
56use tlb_flush::TdxFlushState;
57use tlb_flush::TdxPartitionFlushState;
58use virt::Processor;
59use virt::VpHaltReason;
60use virt::VpIndex;
61use virt::io::CpuIo;
62use virt::state::StateElement;
63use virt::vp;
64use virt::vp::AccessVpState;
65use virt::vp::MpState;
66use virt::vp::Registers;
67use virt::x86::MsrError;
68use virt::x86::MsrErrorExt;
69use virt::x86::SegmentRegister;
70use virt::x86::TableRegister;
71use virt_support_apic::ApicClient;
72use virt_support_apic::OffloadNotSupported;
73use virt_support_x86emu::emulate::EmulatedMemoryOperation;
74use virt_support_x86emu::emulate::EmulatorSupport as X86EmulatorSupport;
75use virt_support_x86emu::emulate::TranslateMode;
76use virt_support_x86emu::emulate::emulate_insn_memory_op;
77use virt_support_x86emu::emulate::emulate_io;
78use virt_support_x86emu::emulate::emulate_translate_gva;
79use virt_support_x86emu::translate::TranslationRegisters;
80use vm_topology::memory::AddressType;
81use vmcore::vmtime::VmTimeAccess;
82use x86defs::RFlags;
83use x86defs::X64_CR0_ET;
84use x86defs::X64_CR0_NE;
85use x86defs::X64_CR0_PE;
86use x86defs::X64_CR0_PG;
87use x86defs::X64_CR4_MCE;
88use x86defs::X64_CR4_UMIP;
89use x86defs::X64_CR4_VMXE;
90use x86defs::X64_EFER_FFXSR;
91use x86defs::X64_EFER_LMA;
92use x86defs::X64_EFER_LME;
93use x86defs::X64_EFER_NXE;
94use x86defs::X64_EFER_SVME;
95use x86defs::X86X_MSR_EFER;
96use x86defs::apic::X2APIC_MSR_BASE;
97use x86defs::tdx::TdCallResultCode;
98use x86defs::tdx::TdVmCallR10Result;
99use x86defs::tdx::TdxGp;
100use x86defs::tdx::TdxInstructionInfo;
101use x86defs::tdx::TdxL2Ctls;
102use x86defs::tdx::TdxVpEnterRaxResult;
103use x86defs::vmx::ApicPage;
104use x86defs::vmx::ApicRegister;
105use x86defs::vmx::CR_ACCESS_TYPE_LMSW;
106use x86defs::vmx::CR_ACCESS_TYPE_MOV_TO_CR;
107use x86defs::vmx::CrAccessQualification;
108use x86defs::vmx::ExitQualificationIo;
109use x86defs::vmx::GdtrOrIdtrInstruction;
110use x86defs::vmx::GdtrOrIdtrInstructionInfo;
111use x86defs::vmx::INTERRUPT_TYPE_EXTERNAL;
112use x86defs::vmx::INTERRUPT_TYPE_HARDWARE_EXCEPTION;
113use x86defs::vmx::INTERRUPT_TYPE_NMI;
114use x86defs::vmx::IO_SIZE_8_BIT;
115use x86defs::vmx::IO_SIZE_16_BIT;
116use x86defs::vmx::IO_SIZE_32_BIT;
117use x86defs::vmx::Interruptibility;
118use x86defs::vmx::InterruptionInformation;
119use x86defs::vmx::LdtrOrTrInstruction;
120use x86defs::vmx::LdtrOrTrInstructionInfo;
121use x86defs::vmx::ProcessorControls;
122use x86defs::vmx::SecondaryProcessorControls;
123use x86defs::vmx::VMX_ENTRY_CONTROL_LONG_MODE_GUEST;
124use x86defs::vmx::VMX_FEATURE_CONTROL_LOCKED;
125use x86defs::vmx::VmcsField;
126use x86defs::vmx::VmxEptExitQualification;
127use x86defs::vmx::VmxExit;
128use x86defs::vmx::VmxExitBasic;
129use x86emu::Gp;
130use x86emu::Segment;
131
132const MSR_ALLOWED_READ: &[u32] = &[
134 x86defs::X86X_MSR_TSC,
135 x86defs::X86X_MSR_TSC_AUX,
136 X86X_MSR_EFER,
137 x86defs::X86X_MSR_STAR,
138 x86defs::X86X_MSR_LSTAR,
139 x86defs::X86X_MSR_SFMASK,
140 x86defs::X86X_MSR_SYSENTER_CS,
141 x86defs::X86X_MSR_SYSENTER_ESP,
142 x86defs::X86X_MSR_SYSENTER_EIP,
143];
144
145const MSR_ALLOWED_READ_WRITE: &[u32] = &[
147 x86defs::X64_MSR_FS_BASE,
148 x86defs::X64_MSR_GS_BASE,
149 x86defs::X64_MSR_KERNEL_GS_BASE,
150 x86defs::X86X_MSR_SPEC_CTRL,
151 x86defs::X86X_MSR_U_CET,
152 x86defs::X86X_MSR_S_CET,
153 x86defs::X86X_MSR_PL0_SSP,
154 x86defs::X86X_MSR_PL1_SSP,
155 x86defs::X86X_MSR_PL2_SSP,
156 x86defs::X86X_MSR_PL3_SSP,
157 x86defs::X86X_MSR_INTERRUPT_SSP_TABLE_ADDR,
158 x86defs::X86X_IA32_MSR_XFD,
159 x86defs::X86X_IA32_MSR_XFD_ERR,
160];
161
162#[derive(Debug)]
163struct TdxExit<'a>(&'a tdx_tdg_vp_enter_exit_info);
164
165impl TdxExit<'_> {
166 fn code(&self) -> TdxVpEnterRaxResult {
167 self.0.rax.into()
168 }
169 fn qualification(&self) -> u64 {
170 self.0.rcx
171 }
172 fn gla(&self) -> Option<u64> {
173 if self.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION {
175 Some(self.0.rdx)
176 } else {
177 None
178 }
179 }
180 fn gpa(&self) -> Option<u64> {
181 if self.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION {
183 Some(self.0.r8)
184 } else {
185 None
186 }
187 }
188 fn _exit_interruption_info(&self) -> InterruptionInformation {
189 (self.0.r9 as u32).into()
190 }
191 fn _exit_interruption_error_code(&self) -> u32 {
192 (self.0.r9 >> 32) as u32
193 }
194 fn idt_vectoring_info(&self) -> InterruptionInformation {
195 (self.0.r10 as u32).into()
196 }
197 fn idt_vectoring_error_code(&self) -> u32 {
198 (self.0.r10 >> 32) as u32
199 }
200 fn instr_info(&self) -> TdxInstructionInfo {
201 self.0.r11.into()
202 }
203 fn cs(&self) -> SegmentRegister {
204 SegmentRegister {
205 selector: self.0.rsi as u16,
206 base: self.0.rdi,
207 limit: (self.0.rsi >> 32) as u32,
208 attributes: (self.0.rsi >> 16) as u16,
209 }
210 }
211 fn cpl(&self) -> u8 {
212 self.0.r12 as u8 & 3
213 }
214}
215
216#[derive(Debug, Inspect)]
218enum ShadowedRegister {
219 Cr0,
220 Cr4,
221}
222
223impl ShadowedRegister {
224 fn name(&self) -> &'static str {
225 match self {
226 Self::Cr0 => "cr0",
227 Self::Cr4 => "cr4",
228 }
229 }
230
231 fn physical_vmcs_field(&self) -> VmcsField {
232 match self {
233 Self::Cr0 => VmcsField::VMX_VMCS_GUEST_CR0,
234 Self::Cr4 => VmcsField::VMX_VMCS_GUEST_CR4,
235 }
236 }
237
238 fn shadow_vmcs_field(&self) -> VmcsField {
239 match self {
240 Self::Cr0 => VmcsField::VMX_VMCS_CR0_READ_SHADOW,
241 Self::Cr4 => VmcsField::VMX_VMCS_CR4_READ_SHADOW,
242 }
243 }
244
245 fn guest_owned_mask(&self) -> u64 {
246 match self {
250 Self::Cr0 => {
251 X64_CR0_ET
252 | x86defs::X64_CR0_MP
253 | x86defs::X64_CR0_EM
254 | x86defs::X64_CR0_TS
255 | x86defs::X64_CR0_WP
256 | x86defs::X64_CR0_AM
257 | X64_CR0_PE
258 | X64_CR0_PG
259 }
260 Self::Cr4 => {
261 x86defs::X64_CR4_VME
262 | x86defs::X64_CR4_PVI
263 | x86defs::X64_CR4_TSD
264 | x86defs::X64_CR4_DE
265 | x86defs::X64_CR4_PSE
266 | x86defs::X64_CR4_PAE
267 | x86defs::X64_CR4_PGE
268 | x86defs::X64_CR4_PCE
269 | x86defs::X64_CR4_FXSR
270 | x86defs::X64_CR4_XMMEXCPT
271 | X64_CR4_UMIP
272 | x86defs::X64_CR4_LA57
273 | x86defs::X64_CR4_RWFSGS
274 | x86defs::X64_CR4_PCIDE
275 | x86defs::X64_CR4_OSXSAVE
276 | x86defs::X64_CR4_SMEP
277 | x86defs::X64_CR4_SMAP
278 | x86defs::X64_CR4_CET
279 }
280 }
281 }
282}
283
284#[derive(Inspect)]
289struct VirtualRegister {
290 register: ShadowedRegister,
292 vtl: GuestVtl,
294 shadow_value: u64,
296 allowed_bits: u64,
298}
299
300impl VirtualRegister {
301 fn new(reg: ShadowedRegister, vtl: GuestVtl, initial_value: u64, allowed_bits: u64) -> Self {
302 Self {
303 register: reg,
304 vtl,
305 shadow_value: initial_value,
306 allowed_bits,
307 }
308 }
309
310 fn write<'a>(
314 &mut self,
315 value: u64,
316 runner: &mut ProcessorRunner<'a, Tdx<'a>>,
317 ) -> Result<(), vp_state::Error> {
318 tracing::trace!(?self.register, value, "write virtual register");
319
320 if value & !self.allowed_bits != 0 {
321 return Err(vp_state::Error::InvalidValue(
322 value,
323 self.register.name(),
324 "disallowed bit set",
325 ));
326 }
327
328 let old_physical_reg = runner.read_vmcs64(self.vtl, self.register.physical_vmcs_field());
331
332 tracing::trace!(old_physical_reg, "old_physical_reg");
333
334 let guest_owned_mask = self.register.guest_owned_mask();
335 if (old_physical_reg ^ value) & guest_owned_mask != 0 {
336 let new_physical_reg =
337 (old_physical_reg & !guest_owned_mask) | (value & guest_owned_mask);
338
339 tracing::trace!(new_physical_reg, "new_physical_reg");
340
341 runner.write_vmcs64(
342 self.vtl,
343 self.register.physical_vmcs_field(),
344 !0,
345 new_physical_reg,
346 );
347 }
348
349 self.shadow_value = value;
350 runner.write_vmcs64(self.vtl, self.register.shadow_vmcs_field(), !0, value);
351 Ok(())
352 }
353
354 fn read<'a>(&self, runner: &ProcessorRunner<'a, Tdx<'a>>) -> u64 {
355 let physical_reg = runner.read_vmcs64(self.vtl, self.register.physical_vmcs_field());
356
357 let guest_owned_mask = self.register.guest_owned_mask();
360 (self.shadow_value & !self.register.guest_owned_mask()) | (physical_reg & guest_owned_mask)
361 }
362}
363
364#[derive(InspectMut)]
366pub struct TdxBacked {
367 #[inspect(mut)]
368 vtls: VtlArray<TdxVtl, 2>,
369
370 untrusted_synic: Option<ProcessorSynic>,
371 #[inspect(hex, iter_by_index)]
372 eoi_exit_bitmap: [u64; 4],
373
374 #[inspect(skip)]
376 flush_page: user_driver::memory::MemoryBlock,
377
378 #[inspect(flatten)]
379 cvm: UhCvmVpState,
380}
381
382#[derive(InspectMut)]
383struct TdxVtl {
384 efer: u64,
386 cr0: VirtualRegister,
388 cr4: VirtualRegister,
390
391 msr_cstar: u64,
393
394 tpr_threshold: u8,
395 #[inspect(skip)]
396 processor_controls: ProcessorControls,
397 #[inspect(skip)]
398 interruption_information: InterruptionInformation,
399 exception_error_code: u32,
400 interruption_set: bool,
401
402 #[inspect(mut)]
403 private_regs: TdxPrivateRegs,
404
405 flush_state: TdxFlushState,
407
408 enter_stats: EnterStats,
409 exit_stats: ExitStats,
410}
411
412#[derive(Default)]
413pub struct TdxEmulationCache {
414 segs: [Option<SegmentRegister>; 6],
415 cr0: Option<u64>,
416}
417
418#[derive(Inspect, Default)]
419struct EnterStats {
420 success: Counter,
421 host_routed_async: Counter,
422 l2_exit_pending_intr: Counter,
423 pending_intr: Counter,
424 host_routed_td_vmcall: Counter,
425}
426
427#[derive(Inspect, Default)]
428struct ExitStats {
429 io: Counter,
430 msr_read: Counter,
431 msr_write: Counter,
432 ept_violation: Counter,
433 cpuid: Counter,
434 cr_access: Counter,
435 xsetbv: Counter,
436 tpr_below_threshold: Counter,
437 interrupt_window: Counter,
438 nmi_window: Counter,
439 vmcall: Counter,
440 smi_intr: Counter,
441 wbinvd: Counter,
442 hw_interrupt: Counter,
443 tdcall: Counter,
444 hlt: Counter,
445 pause: Counter,
446 needs_interrupt_reinject: Counter,
447 exception: Counter,
448 descriptor_table: Counter,
449}
450
451enum UhDirectOverlay {
452 Sipp,
453 Sifp,
454 Count,
455}
456
457impl HardwareIsolatedBacking for TdxBacked {
458 fn cvm_state(&self) -> &UhCvmVpState {
459 &self.cvm
460 }
461
462 fn cvm_state_mut(&mut self) -> &mut UhCvmVpState {
463 &mut self.cvm
464 }
465
466 fn cvm_partition_state(shared: &Self::Shared) -> &UhCvmPartitionState {
467 &shared.cvm
468 }
469
470 fn switch_vtl(this: &mut UhProcessor<'_, Self>, _source_vtl: GuestVtl, target_vtl: GuestVtl) {
471 this.backing.cvm_state_mut().exit_vtl = target_vtl;
475 }
476
477 fn translation_registers(
478 &self,
479 this: &UhProcessor<'_, Self>,
480 vtl: GuestVtl,
481 ) -> TranslationRegisters {
482 let cr0 = this.backing.vtls[vtl].cr0.read(&this.runner);
483 let cr4 = this.backing.vtls[vtl].cr4.read(&this.runner);
484 let efer = this.backing.vtls[vtl].efer;
485 let cr3 = this.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR3);
486 let ss = this.read_segment(vtl, TdxSegmentReg::Ss).into();
487 let rflags = this.backing.vtls[vtl].private_regs.rflags;
488
489 TranslationRegisters {
490 cr0,
491 cr4,
492 efer,
493 cr3,
494 ss,
495 rflags,
496 encryption_mode: this.partition.caps.vtom.map_or(
497 virt_support_x86emu::translate::EncryptionMode::None,
498 virt_support_x86emu::translate::EncryptionMode::Vtom,
499 ),
500 }
501 }
502
503 fn tlb_flush_lock_access<'a>(
504 vp_index: Option<VpIndex>,
505 partition: &'a UhPartitionInner,
506 shared: &'a Self::Shared,
507 ) -> impl TlbFlushLockAccess + 'a {
508 TdxTlbLockFlushAccess {
509 vp_index,
510 partition,
511 shared,
512 }
513 }
514
515 fn pending_event_vector(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> Option<u8> {
516 let event_inject = this.backing.vtls[vtl].interruption_information;
517 if event_inject.valid() {
518 Some(event_inject.vector())
519 } else {
520 None
521 }
522 }
523
524 fn set_pending_exception(
525 this: &mut UhProcessor<'_, Self>,
526 vtl: GuestVtl,
527 event: HvX64PendingExceptionEvent,
528 ) {
529 let new_intr = InterruptionInformation::new()
530 .with_valid(true)
531 .with_deliver_error_code(event.deliver_error_code())
532 .with_vector(event.vector().try_into().unwrap())
533 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION);
534
535 this.backing.vtls[vtl].interruption_information = new_intr;
536 this.backing.vtls[vtl].exception_error_code = event.error_code();
537 }
538
539 fn cr0(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 {
540 this.read_cr0(vtl)
541 }
542
543 fn cr4(this: &UhProcessor<'_, Self>, vtl: GuestVtl) -> u64 {
544 this.read_cr4(vtl)
545 }
546
547 fn intercept_message_state(
548 this: &UhProcessor<'_, Self>,
549 vtl: GuestVtl,
550 include_optional_state: bool,
551 ) -> super::InterceptMessageState {
552 let exit = TdxExit(this.runner.tdx_vp_enter_exit_info());
553 let backing_vtl = &this.backing.vtls[vtl];
554 let shared_gps = this.runner.tdx_enter_guest_gps();
555
556 super::InterceptMessageState {
557 instruction_length_and_cr8: exit.instr_info().length() as u8,
558 cpl: exit.cpl(),
559 efer_lma: backing_vtl.efer & X64_EFER_LMA != 0,
560 cs: exit.cs().into(),
561 rip: backing_vtl.private_regs.rip,
562 rflags: backing_vtl.private_regs.rflags,
563 rax: shared_gps[TdxGp::RAX],
564 rdx: shared_gps[TdxGp::RDX],
565 optional: if include_optional_state {
566 Some(super::InterceptMessageOptionalState {
567 ds: this.read_segment(vtl, TdxSegmentReg::Ds).into(),
568 es: this.read_segment(vtl, TdxSegmentReg::Es).into(),
569 })
570 } else {
571 None
572 },
573 rcx: shared_gps[TdxGp::RCX],
574 rsi: shared_gps[TdxGp::RSI],
575 rdi: shared_gps[TdxGp::RDI],
576 }
577 }
578
579 fn cr_intercept_registration(
580 this: &mut UhProcessor<'_, Self>,
581 intercept_control: hvdef::HvRegisterCrInterceptControl,
582 ) {
583 let vtl = GuestVtl::Vtl0;
585 let intercept_masks = &this
586 .backing
587 .cvm_state()
588 .vtl1
589 .as_ref()
590 .unwrap()
591 .reg_intercept;
592
593 this.runner.write_vmcs64(
595 vtl,
596 VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK,
597 !0,
598 this.shared.cr_guest_host_mask(ShadowedRegister::Cr0)
599 | if intercept_control.cr0_write() {
600 intercept_masks.cr0_mask
601 } else {
602 0
603 },
604 );
605 this.runner.write_vmcs64(
606 vtl,
607 VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK,
608 !0,
609 this.shared.cr_guest_host_mask(ShadowedRegister::Cr4)
610 | if intercept_control.cr4_write() {
611 intercept_masks.cr4_mask
612 } else {
613 0
614 },
615 );
616
617 let intercept_tables = intercept_control.gdtr_write()
619 | intercept_control.idtr_write()
620 | intercept_control.ldtr_write()
621 | intercept_control.tr_write();
622 this.runner.write_vmcs32(
623 vtl,
624 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
625 SecondaryProcessorControls::new()
626 .with_descriptor_table_exiting(true)
627 .into_bits(),
628 SecondaryProcessorControls::new()
629 .with_descriptor_table_exiting(intercept_tables)
630 .into_bits(),
631 );
632
633 this.runner.set_msr_bit(
637 vtl,
638 x86defs::X86X_MSR_S_CET,
639 true,
640 intercept_control.msr_scet_write(),
641 );
642 this.runner.set_msr_bit(
643 vtl,
644 x86defs::X86X_MSR_PL0_SSP,
645 true,
646 intercept_control.msr_pls_ssp_write(),
647 );
648 this.runner.set_msr_bit(
649 vtl,
650 x86defs::X86X_MSR_PL1_SSP,
651 true,
652 intercept_control.msr_pls_ssp_write(),
653 );
654 this.runner.set_msr_bit(
655 vtl,
656 x86defs::X86X_MSR_PL2_SSP,
657 true,
658 intercept_control.msr_pls_ssp_write(),
659 );
660 this.runner.set_msr_bit(
661 vtl,
662 x86defs::X86X_MSR_PL3_SSP,
663 true,
664 intercept_control.msr_pls_ssp_write(),
665 );
666 this.runner.set_msr_bit(
667 vtl,
668 x86defs::X86X_MSR_INTERRUPT_SSP_TABLE_ADDR,
669 true,
670 intercept_control.msr_pls_ssp_write(),
671 );
672 }
673
674 fn is_interrupt_pending(
675 this: &mut UhProcessor<'_, Self>,
676 vtl: GuestVtl,
677 check_rflags: bool,
678 dev: &impl CpuIo,
679 ) -> bool {
680 let backing_vtl = &this.backing.vtls[vtl];
681 if backing_vtl.interruption_information.valid()
682 && backing_vtl.interruption_information.interruption_type() == INTERRUPT_TYPE_NMI
683 {
684 return true;
685 }
686
687 let (vector, ppr) = if this.backing.cvm.lapics[vtl].lapic.is_offloaded() {
688 let vector = backing_vtl.private_regs.rvi;
689 let ppr = std::cmp::max(
690 backing_vtl.private_regs.svi.into(),
691 this.runner.tdx_apic_page(vtl).tpr.value,
692 );
693 (vector, ppr)
694 } else {
695 let lapic = &mut this.backing.cvm.lapics[vtl].lapic;
696 let vector = lapic.next_irr().unwrap_or(0);
697 let ppr = lapic
698 .access(&mut TdxApicClient {
699 partition: this.partition,
700 apic_page: this.runner.tdx_apic_page_mut(vtl),
701 dev,
702 vmtime: &this.vmtime,
703 vtl,
704 })
705 .get_ppr();
706 (vector, ppr)
707 };
708 let vector_priority = (vector as u32) >> 4;
709 let ppr_priority = ppr >> 4;
710
711 if vector_priority <= ppr_priority {
712 return false;
713 }
714
715 if check_rflags && !RFlags::from_bits(backing_vtl.private_regs.rflags).interrupt_enable() {
716 return false;
717 }
718
719 let interruptibility: Interruptibility = this
720 .runner
721 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
722 .into();
723
724 if interruptibility.blocked_by_sti() || interruptibility.blocked_by_movss() {
725 return false;
726 }
727
728 true
729 }
730
731 fn untrusted_synic_mut(&mut self) -> Option<&mut ProcessorSynic> {
732 self.untrusted_synic.as_mut()
733 }
734}
735
736#[derive(Inspect)]
738pub struct TdxBackedShared {
739 #[inspect(flatten)]
740 pub(crate) cvm: UhCvmPartitionState,
741 pub(crate) untrusted_synic: Option<GlobalSynic>,
745 flush_state: VtlArray<TdxPartitionFlushState, 2>,
746 #[inspect(iter_by_index)]
747 active_vtl: Vec<AtomicU8>,
748 cr4_allowed_bits: u64,
750}
751
752impl TdxBackedShared {
753 pub(crate) fn new(
754 partition_params: &UhPartitionNewParams<'_>,
755 params: BackingSharedParams<'_>,
756 ) -> Result<Self, crate::Error> {
757 let untrusted_synic = (partition_params.handle_synic && !partition_params.hide_isolation)
763 .then(|| GlobalSynic::new(partition_params.topology.vp_count()));
764
765 let cr4_fixed1 = params.hcl.read_vmx_cr4_fixed1();
767 let cr4_allowed_bits =
768 (ShadowedRegister::Cr4.guest_owned_mask() | X64_CR4_MCE) & cr4_fixed1;
769
770 Ok(Self {
771 untrusted_synic,
772 flush_state: VtlArray::from_fn(|_| TdxPartitionFlushState::new()),
773 cvm: params.cvm_state.unwrap(),
774 active_vtl: std::iter::repeat_n(2, partition_params.topology.vp_count() as usize)
776 .map(AtomicU8::new)
777 .collect(),
778 cr4_allowed_bits,
779 })
780 }
781
782 fn cr_guest_host_mask(&self, reg: ShadowedRegister) -> u64 {
784 match reg {
785 ShadowedRegister::Cr0 => {
786 !ShadowedRegister::Cr0.guest_owned_mask() | X64_CR0_PE | X64_CR0_PG
787 }
788 ShadowedRegister::Cr4 => {
789 !(ShadowedRegister::Cr4.guest_owned_mask() & self.cr4_allowed_bits)
790 }
791 }
792 }
793}
794
795impl TdxBacked {
796 pub fn shared_pages_required_per_cpu() -> u64 {
799 UhDirectOverlay::Count as u64
800 }
801}
802
803struct UntrustedSynicVtlProts<'a>(&'a GuestMemory);
807
808impl hv1_emulator::VtlProtectAccess for UntrustedSynicVtlProts<'_> {
809 fn check_modify_and_lock_overlay_page(
810 &mut self,
811 gpn: u64,
812 _check_perms: hvdef::HvMapGpaFlags,
813 _new_perms: Option<hvdef::HvMapGpaFlags>,
814 ) -> Result<guestmem::LockedPages, HvError> {
815 self.0
816 .lock_gpns(false, &[gpn])
817 .map_err(|_| HvError::OperationFailed)
818 }
819
820 fn unlock_overlay_page(&mut self, _gpn: u64) -> Result<(), HvError> {
821 Ok(())
822 }
823}
824
825#[expect(private_interfaces)]
826impl BackingPrivate for TdxBacked {
827 type HclBacking<'tdx> = Tdx<'tdx>;
828 type Shared = TdxBackedShared;
829 type EmulationCache = TdxEmulationCache;
830
831 fn shared(shared: &BackingShared) -> &Self::Shared {
832 let BackingShared::Tdx(shared) = shared else {
833 unreachable!()
834 };
835 shared
836 }
837
838 fn new(
839 params: super::BackingParams<'_, '_, Self>,
840 shared: &TdxBackedShared,
841 ) -> Result<Self, crate::Error> {
842 params.runner.write_vmcs32(
850 GuestVtl::Vtl0,
851 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
852 SecondaryProcessorControls::new()
853 .with_mode_based_execute_control(true)
854 .into(),
855 SecondaryProcessorControls::new()
856 .with_mode_based_execute_control(true)
857 .into(),
858 );
859
860 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
861 let controls = TdxL2Ctls::new()
862 .with_enable_shared_ept(!shared.cvm.hide_isolation)
864 .with_enable_tdvmcall(
866 shared.untrusted_synic.is_none() && !shared.cvm.hide_isolation,
867 );
868
869 params
870 .runner
871 .set_l2_ctls(vtl, controls)
872 .map_err(crate::Error::FailedToSetL2Ctls)?;
873
874 let initial_cr0 = params
877 .runner
878 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0);
879 assert_eq!(initial_cr0, X64_CR0_PE | X64_CR0_NE);
880
881 params.runner.write_vmcs64(
885 vtl,
886 VmcsField::VMX_VMCS_CR0_READ_SHADOW,
887 !0,
888 X64_CR0_PE | X64_CR0_NE,
889 );
890 params.runner.write_vmcs64(
891 vtl,
892 VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK,
893 !0,
894 shared.cr_guest_host_mask(ShadowedRegister::Cr0),
895 );
896
897 let initial_cr4 = params
898 .runner
899 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4);
900 assert_eq!(initial_cr4, X64_CR4_MCE | X64_CR4_VMXE);
901
902 params
903 .runner
904 .write_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_READ_SHADOW, !0, 0);
905 params.runner.write_vmcs64(
906 vtl,
907 VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK,
908 !0,
909 shared.cr_guest_host_mask(ShadowedRegister::Cr4),
910 );
911
912 for msr in MSR_ALLOWED_READ {
916 params.runner.set_msr_bit(vtl, *msr, false, false);
917 }
918 for msr in MSR_ALLOWED_READ_WRITE {
919 params.runner.set_msr_bit(vtl, *msr, false, false);
920 params.runner.set_msr_bit(vtl, *msr, true, false);
921 }
922
923 if params.partition.intercept_debug_exceptions {
925 if cfg!(feature = "gdb") {
926 let initial_exception_bitmap = params
927 .runner
928 .read_vmcs32(vtl, VmcsField::VMX_VMCS_EXCEPTION_BITMAP);
929
930 let exception_bitmap =
931 initial_exception_bitmap | (1 << x86defs::Exception::DEBUG.0);
932
933 params.runner.write_vmcs32(
934 vtl,
935 VmcsField::VMX_VMCS_EXCEPTION_BITMAP,
936 !0,
937 exception_bitmap,
938 );
939 } else {
940 return Err(super::Error::InvalidDebugConfiguration);
941 }
942 }
943 }
944
945 let flush_page = shared
946 .cvm
947 .private_dma_client
948 .allocate_dma_buffer(HV_PAGE_SIZE as usize)
949 .map_err(crate::Error::AllocateTlbFlushPage)?;
950
951 let untrusted_synic = shared
952 .untrusted_synic
953 .as_ref()
954 .map(|synic| synic.add_vp(params.vp_info.base.vp_index));
955
956 Ok(Self {
957 vtls: VtlArray::from_fn(|vtl| {
958 let vtl: GuestVtl = vtl.try_into().unwrap();
959 TdxVtl {
960 efer: params
961 .runner
962 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER),
963 cr0: VirtualRegister::new(
964 ShadowedRegister::Cr0,
965 vtl,
966 params
967 .runner
968 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0),
969 !0,
970 ),
971 cr4: VirtualRegister::new(
972 ShadowedRegister::Cr4,
973 vtl,
974 params
975 .runner
976 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4),
977 shared.cr4_allowed_bits,
978 ),
979 msr_cstar: 0,
980 tpr_threshold: 0,
981 processor_controls: params
982 .runner
983 .read_vmcs32(vtl, VmcsField::VMX_VMCS_PROCESSOR_CONTROLS)
984 .into(),
985 interruption_information: Default::default(),
986 exception_error_code: 0,
987 interruption_set: false,
988 flush_state: TdxFlushState::new(),
989 private_regs: TdxPrivateRegs::new(vtl),
990 enter_stats: Default::default(),
991 exit_stats: Default::default(),
992 }
993 }),
994 untrusted_synic,
995 eoi_exit_bitmap: [0; 4],
996 flush_page,
997 cvm: UhCvmVpState::new(
998 &shared.cvm,
999 params.partition,
1000 params.vp_info,
1001 UhDirectOverlay::Count as usize,
1002 )?,
1003 })
1004 }
1005
1006 type StateAccess<'p, 'a>
1007 = UhVpStateAccess<'a, 'p, Self>
1008 where
1009 Self: 'a + 'p,
1010 'p: 'a;
1011
1012 fn access_vp_state<'a, 'p>(
1013 this: &'a mut UhProcessor<'p, Self>,
1014 vtl: GuestVtl,
1015 ) -> Self::StateAccess<'p, 'a> {
1016 UhVpStateAccess::new(this, vtl)
1017 }
1018
1019 fn init(this: &mut UhProcessor<'_, Self>) {
1020 let pfns = &this.backing.cvm.direct_overlay_handle.pfns();
1023 let reg = |gpn| {
1024 u64::from(
1025 HvSynicSimpSiefp::new()
1026 .with_base_gpn(gpn)
1027 .with_enabled(true),
1028 )
1029 };
1030
1031 let values: &[(HvX64RegisterName, u64); 2] = &[
1032 (
1033 HvX64RegisterName::Sifp,
1034 reg(pfns[UhDirectOverlay::Sifp as usize]),
1035 ),
1036 (
1037 HvX64RegisterName::Sipp,
1038 reg(pfns[UhDirectOverlay::Sipp as usize]),
1039 ),
1040 ];
1041
1042 let reg_count = if let Some(synic) = &mut this.backing.untrusted_synic {
1043 let prot_access = &mut UntrustedSynicVtlProts(&this.partition.gm[GuestVtl::Vtl0]);
1044
1045 synic
1046 .set_simp(reg(pfns[UhDirectOverlay::Sipp as usize]), prot_access)
1047 .unwrap();
1048 synic
1049 .set_siefp(reg(pfns[UhDirectOverlay::Sifp as usize]), prot_access)
1050 .unwrap();
1051 1
1055 } else {
1056 2
1057 };
1058
1059 this.runner
1060 .set_vp_registers_hvcall(Vtl::Vtl0, &values[..reg_count])
1061 .expect("set_vp_registers hypercall for direct overlays should succeed");
1062
1063 this.set_apic_offload(GuestVtl::Vtl0, true);
1065 this.backing.cvm.lapics[GuestVtl::Vtl0]
1066 .lapic
1067 .enable_offload();
1068
1069 this.set_apic_offload(GuestVtl::Vtl1, false);
1071
1072 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
1077 let registers = Registers::at_reset(&this.partition.caps, &this.inner.vp_info);
1078
1079 let mut state = this.access_state(vtl.into());
1080 state
1081 .set_registers(®isters)
1082 .expect("Resetting to architectural state should succeed");
1083
1084 state.commit().expect("committing state should succeed");
1085 }
1086
1087 *this.runner.fx_state_mut() =
1090 vp::Xsave::at_reset(&this.partition.caps, &this.inner.vp_info).fxsave();
1091 }
1092
1093 async fn run_vp(
1094 this: &mut UhProcessor<'_, Self>,
1095 dev: &impl CpuIo,
1096 _stop: &mut virt::StopVp<'_>,
1097 ) -> Result<(), VpHaltReason<UhRunVpError>> {
1098 this.run_vp_tdx(dev).await
1099 }
1100
1101 fn poll_apic(
1102 this: &mut UhProcessor<'_, Self>,
1103 vtl: GuestVtl,
1104 scan_irr: bool,
1105 ) -> Result<(), UhRunVpError> {
1106 if !this.try_poll_apic(vtl, scan_irr)? {
1107 tracing::info!(CVM_ALLOWED, "disabling APIC offload due to auto EOI");
1108 let page = this.runner.tdx_apic_page_mut(vtl);
1109 let (irr, isr) = pull_apic_offload(page);
1110
1111 this.backing.cvm.lapics[vtl]
1112 .lapic
1113 .disable_offload(&irr, &isr);
1114 this.set_apic_offload(vtl, false);
1115 this.try_poll_apic(vtl, false)?;
1116 }
1117
1118 Ok(())
1119 }
1120
1121 fn request_extint_readiness(_this: &mut UhProcessor<'_, Self>) {
1122 unreachable!("extint managed through software apic")
1123 }
1124
1125 fn request_untrusted_sint_readiness(this: &mut UhProcessor<'_, Self>, sints: u16) {
1126 if let Some(synic) = &mut this.backing.untrusted_synic {
1127 synic.request_sint_readiness(sints);
1128 } else {
1129 tracelimit::error_ratelimited!(CVM_ALLOWED, "untrusted synic is not configured");
1130 }
1131 }
1132
1133 fn hv(&self, vtl: GuestVtl) -> Option<&ProcessorVtlHv> {
1134 Some(&self.cvm.hv[vtl])
1135 }
1136
1137 fn hv_mut(&mut self, vtl: GuestVtl) -> Option<&mut ProcessorVtlHv> {
1138 Some(&mut self.cvm.hv[vtl])
1139 }
1140
1141 fn handle_vp_start_enable_vtl_wake(
1142 this: &mut UhProcessor<'_, Self>,
1143 vtl: GuestVtl,
1144 ) -> Result<(), UhRunVpError> {
1145 this.hcvm_handle_vp_start_enable_vtl(vtl)
1146 }
1147
1148 fn vtl1_inspectable(this: &UhProcessor<'_, Self>) -> bool {
1149 this.hcvm_vtl1_inspectable()
1150 }
1151
1152 fn process_interrupts(
1153 this: &mut UhProcessor<'_, Self>,
1154 scan_irr: VtlArray<bool, 2>,
1155 first_scan_irr: &mut bool,
1156 dev: &impl CpuIo,
1157 ) -> Result<bool, VpHaltReason<UhRunVpError>> {
1158 this.cvm_process_interrupts(scan_irr, first_scan_irr, dev)
1159 }
1160}
1161
1162impl UhProcessor<'_, TdxBacked> {
1163 fn try_poll_apic(&mut self, vtl: GuestVtl, scan_irr: bool) -> Result<bool, UhRunVpError> {
1166 let mut scan = TdxApicScanner {
1167 processor_controls: self.backing.vtls[vtl]
1168 .processor_controls
1169 .with_nmi_window_exiting(false)
1170 .with_interrupt_window_exiting(false),
1171 vp: self,
1172 tpr_threshold: 0,
1173 };
1174
1175 hardware_cvm::apic::poll_apic_core(&mut scan, vtl, scan_irr)?;
1177
1178 let TdxApicScanner {
1179 vp: _,
1180 processor_controls: new_processor_controls,
1181 tpr_threshold: new_tpr_threshold,
1182 } = scan;
1183
1184 if self.backing.cvm.lapics[vtl].activity != MpState::WaitForSipi
1186 && self.backing.vtls[vtl].tpr_threshold != new_tpr_threshold
1187 {
1188 tracing::trace!(new_tpr_threshold, ?vtl, "setting tpr threshold");
1189 self.runner.write_vmcs32(
1190 vtl,
1191 VmcsField::VMX_VMCS_TPR_THRESHOLD,
1192 !0,
1193 new_tpr_threshold.into(),
1194 );
1195 self.backing.vtls[vtl].tpr_threshold = new_tpr_threshold;
1196 }
1197
1198 if self.backing.vtls[vtl].processor_controls != new_processor_controls {
1199 tracing::trace!(?new_processor_controls, ?vtl, "requesting window change");
1200 self.runner.write_vmcs32(
1201 vtl,
1202 VmcsField::VMX_VMCS_PROCESSOR_CONTROLS,
1203 !0,
1204 new_processor_controls.into(),
1205 );
1206 self.backing.vtls[vtl].processor_controls = new_processor_controls;
1207 }
1208
1209 if vtl == GuestVtl::Vtl0 {
1211 let mut update_rvi = false;
1212 let r: Result<(), OffloadNotSupported> = self.backing.cvm.lapics[vtl]
1213 .lapic
1214 .push_to_offload(|irr, isr, tmr| {
1215 let apic_page = self.runner.tdx_apic_page_mut(vtl);
1216
1217 for (((irr, page_irr), isr), page_isr) in irr
1218 .iter()
1219 .zip(&mut apic_page.irr)
1220 .zip(isr)
1221 .zip(&mut apic_page.isr)
1222 {
1223 page_irr.value |= *irr;
1224 page_isr.value |= *isr;
1225 }
1226
1227 let svi = top_vector(&apic_page.isr);
1229 self.backing.vtls[vtl].private_regs.svi = svi;
1230 update_rvi = true;
1231
1232 let fields = [
1234 VmcsField::VMX_VMCS_EOI_EXIT_0,
1235 VmcsField::VMX_VMCS_EOI_EXIT_1,
1236 VmcsField::VMX_VMCS_EOI_EXIT_2,
1237 VmcsField::VMX_VMCS_EOI_EXIT_3,
1238 ];
1239 for ((&field, eoi_exit), (i, tmr)) in fields
1240 .iter()
1241 .zip(&mut self.backing.eoi_exit_bitmap)
1242 .zip(tmr.chunks_exact(2).enumerate())
1243 {
1244 let tmr = tmr[0] as u64 | ((tmr[1] as u64) << 32);
1245 if *eoi_exit != tmr {
1246 self.runner.write_vmcs64(vtl, field, !0, tmr);
1247 *eoi_exit = tmr;
1248 self.runner.proxy_irr_exit_mut_vtl0()[i * 2] = tmr as u32;
1254 self.runner.proxy_irr_exit_mut_vtl0()[i * 2 + 1] = (tmr >> 32) as u32;
1255 }
1256 }
1257 });
1258
1259 if let Err(OffloadNotSupported) = r {
1260 return Ok(false);
1263 }
1264
1265 if update_rvi {
1266 let page = self.runner.tdx_apic_page_mut(vtl);
1267 let rvi = top_vector(&page.irr);
1268 self.backing.vtls[vtl].private_regs.rvi = rvi;
1269 }
1270 }
1271
1272 if (self.backing.cvm.lapics[vtl].activity != MpState::Running)
1274 && self.backing.cvm.lapics[vtl].lapic.is_offloaded()
1275 && self.backing.vtls[vtl].private_regs.rvi != 0
1276 {
1277 self.backing.cvm.lapics[vtl].activity = MpState::Running;
1290 }
1291
1292 Ok(true)
1293 }
1294
1295 fn access_apic_without_offload<R>(
1296 &mut self,
1297 vtl: GuestVtl,
1298 f: impl FnOnce(&mut Self) -> R,
1299 ) -> R {
1300 let offloaded = self.backing.cvm.lapics[vtl].lapic.is_offloaded();
1301 if offloaded {
1302 let (irr, isr) = pull_apic_offload(self.runner.tdx_apic_page_mut(vtl));
1303 self.backing.cvm.lapics[vtl]
1304 .lapic
1305 .disable_offload(&irr, &isr);
1306 }
1307 let r = f(self);
1308 if offloaded {
1309 self.backing.cvm.lapics[vtl].lapic.enable_offload();
1310 }
1311 r
1312 }
1313
1314 fn set_apic_offload(&mut self, vtl: GuestVtl, offload: bool) {
1315 let offload_bitmap = if offload {
1317 (1 << x86defs::apic::ApicRegister::TPR.0)
1318 | (1 << x86defs::apic::ApicRegister::EOI.0)
1319 | (1 << x86defs::apic::ApicRegister::SELF_IPI.0)
1320 } else {
1321 0
1322 };
1323 for offset in [0, 0x100] {
1325 self.runner
1326 .write_msr_bitmap(vtl, offset + X2APIC_MSR_BASE / 64, !0, !offload_bitmap);
1327 }
1328
1329 self.runner.write_vmcs32(
1331 vtl,
1332 VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS,
1333 SecondaryProcessorControls::new()
1334 .with_virtual_interrupt_delivery(true)
1335 .into(),
1336 SecondaryProcessorControls::new()
1337 .with_virtual_interrupt_delivery(offload)
1338 .into(),
1339 );
1340
1341 if offload
1343 && self.backing.vtls[vtl]
1344 .interruption_information
1345 .interruption_type()
1346 == INTERRUPT_TYPE_EXTERNAL
1347 {
1348 self.backing.vtls[vtl]
1349 .interruption_information
1350 .set_valid(false);
1351 }
1352 }
1353}
1354
1355struct TdxApicScanner<'a, 'b> {
1356 vp: &'a mut UhProcessor<'b, TdxBacked>,
1357 processor_controls: ProcessorControls,
1358 tpr_threshold: u8,
1359}
1360
1361impl<'b> hardware_cvm::apic::ApicBacking<'b, TdxBacked> for TdxApicScanner<'_, 'b> {
1362 fn vp(&mut self) -> &mut UhProcessor<'b, TdxBacked> {
1363 self.vp
1364 }
1365
1366 fn handle_interrupt(&mut self, vtl: GuestVtl, vector: u8) -> Result<(), UhRunVpError> {
1367 if self.vp.backing.cvm.lapics[vtl].activity == MpState::Idle {
1369 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1370 }
1371 if self.vp.backing.vtls[vtl].interruption_information.valid()
1375 && self.vp.backing.vtls[vtl]
1376 .interruption_information
1377 .interruption_type()
1378 != INTERRUPT_TYPE_EXTERNAL
1379 {
1380 self.processor_controls.set_interrupt_window_exiting(true);
1381 return Ok(());
1382 }
1383
1384 let interruptibility: Interruptibility = self
1386 .vp
1387 .runner
1388 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
1389 .into();
1390
1391 let rflags = RFlags::from(self.vp.backing.vtls[vtl].private_regs.rflags);
1392 if !rflags.interrupt_enable()
1393 || interruptibility.blocked_by_sti()
1394 || interruptibility.blocked_by_movss()
1395 {
1396 self.processor_controls.set_interrupt_window_exiting(true);
1397 return Ok(());
1398 }
1399
1400 let priority = vector >> 4;
1401 let apic = self.vp.runner.tdx_apic_page(vtl);
1402 if (apic.tpr.value as u8 >> 4) >= priority {
1403 self.tpr_threshold = priority;
1404 return Ok(());
1405 }
1406
1407 self.vp.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
1408 .with_valid(true)
1409 .with_vector(vector)
1410 .with_interruption_type(INTERRUPT_TYPE_EXTERNAL);
1411
1412 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1413 Ok(())
1414 }
1415
1416 fn handle_nmi(&mut self, vtl: GuestVtl) -> Result<(), UhRunVpError> {
1417 if self.vp.backing.cvm.lapics[vtl].activity == MpState::Idle {
1420 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1421 }
1422 if self.vp.backing.vtls[vtl].interruption_information.valid()
1426 && self.vp.backing.vtls[vtl]
1427 .interruption_information
1428 .interruption_type()
1429 != INTERRUPT_TYPE_EXTERNAL
1430 {
1431 self.processor_controls.set_nmi_window_exiting(true);
1432 return Ok(());
1433 }
1434
1435 let interruptibility: Interruptibility = self
1436 .vp
1437 .runner
1438 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
1439 .into();
1440
1441 if interruptibility.blocked_by_nmi()
1442 || interruptibility.blocked_by_sti()
1443 || interruptibility.blocked_by_movss()
1444 {
1445 self.processor_controls.set_nmi_window_exiting(true);
1446 return Ok(());
1447 }
1448
1449 self.vp.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
1450 .with_valid(true)
1451 .with_vector(2)
1452 .with_interruption_type(INTERRUPT_TYPE_NMI);
1453
1454 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1455 Ok(())
1456 }
1457
1458 fn handle_sipi(&mut self, vtl: GuestVtl, cs: SegmentRegister) -> Result<(), UhRunVpError> {
1459 self.vp.write_segment(vtl, TdxSegmentReg::Cs, cs).unwrap();
1460 self.vp.backing.vtls[vtl].private_regs.rip = 0;
1461 self.vp.backing.cvm.lapics[vtl].activity = MpState::Running;
1462
1463 Ok(())
1464 }
1465}
1466
1467impl UhProcessor<'_, TdxBacked> {
1468 async fn run_vp_tdx(&mut self, dev: &impl CpuIo) -> Result<(), VpHaltReason<UhRunVpError>> {
1469 let next_vtl = self.backing.cvm.exit_vtl;
1470
1471 if self.backing.vtls[next_vtl].interruption_information.valid() {
1472 tracing::trace!(
1473 vector = self.backing.vtls[next_vtl]
1474 .interruption_information
1475 .vector(),
1476 vp_index = self.vp_index().index(),
1477 ?next_vtl,
1478 "injecting interrupt"
1479 );
1480
1481 self.runner.write_vmcs32(
1482 next_vtl,
1483 VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO,
1484 !0,
1485 self.backing.vtls[next_vtl].interruption_information.into(),
1486 );
1487 if self.backing.vtls[next_vtl]
1488 .interruption_information
1489 .deliver_error_code()
1490 {
1491 self.runner.write_vmcs32(
1492 next_vtl,
1493 VmcsField::VMX_VMCS_ENTRY_EXCEPTION_ERROR_CODE,
1494 !0,
1495 self.backing.vtls[next_vtl].exception_error_code,
1496 );
1497 }
1498 self.backing.vtls[next_vtl].interruption_set = true;
1499 } else if self.backing.vtls[next_vtl].interruption_set {
1500 self.runner
1501 .write_vmcs32(next_vtl, VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO, !0, 0);
1502 self.backing.vtls[next_vtl].interruption_set = false;
1503 }
1504
1505 self.shared.active_vtl[self.vp_index().index() as usize]
1519 .store(next_vtl as u8, Ordering::SeqCst);
1520
1521 self.do_tlb_flush(next_vtl);
1522 self.unlock_tlb_lock(Vtl::Vtl2);
1523 let tlb_halt = self.should_halt_for_tlb_unlock(next_vtl);
1524
1525 let activity = self.backing.cvm.lapics[next_vtl].activity;
1529 let kernel_known_state =
1530 matches!(activity, MpState::Running | MpState::Halted | MpState::Idle);
1531 let halted_other = tlb_halt || !kernel_known_state;
1532
1533 self.runner
1534 .set_halted(activity != MpState::Running || tlb_halt);
1535
1536 let offload_enabled = self.backing.cvm.lapics[next_vtl].lapic.can_offload_irr()
1548 && !self.backing.vtls[next_vtl].interruption_information.valid()
1549 && self.backing.vtls[next_vtl]
1550 .private_regs
1551 .vp_entry_flags
1552 .invd_translations()
1553 != 0;
1554 let x2apic_enabled = self.backing.cvm.lapics[next_vtl].lapic.x2apic_enabled();
1555
1556 let offload_flags = hcl_intr_offload_flags::new()
1557 .with_offload_intr_inject(offload_enabled)
1558 .with_offload_x2apic(offload_enabled && x2apic_enabled)
1559 .with_halted_other(halted_other)
1560 .with_halted_hlt(activity == MpState::Halted)
1561 .with_halted_idle(activity == MpState::Idle);
1562
1563 *self.runner.offload_flags_mut() = offload_flags;
1564
1565 self.runner
1566 .write_private_regs(&self.backing.vtls[next_vtl].private_regs);
1567
1568 let has_intercept = self
1569 .runner
1570 .run()
1571 .map_err(|e| VpHaltReason::Hypervisor(UhRunVpError::Run(e)))?;
1572
1573 self.shared.active_vtl[self.vp_index().index() as usize].store(2, Ordering::Relaxed);
1577
1578 let entered_from_vtl = next_vtl;
1579 self.runner
1580 .read_private_regs(&mut self.backing.vtls[entered_from_vtl].private_regs);
1581
1582 if offload_enabled && kernel_known_state {
1584 let offload_flags = self.runner.offload_flags_mut();
1585
1586 self.backing.cvm.lapics[entered_from_vtl].activity =
1587 match (offload_flags.halted_hlt(), offload_flags.halted_idle()) {
1588 (false, false) => MpState::Running,
1589 (true, false) => MpState::Halted,
1590 (false, true) => MpState::Idle,
1591 (true, true) => {
1592 tracelimit::warn_ratelimited!(
1593 CVM_ALLOWED,
1594 "Kernel indicates VP is both halted and idle!"
1595 );
1596 activity
1597 }
1598 };
1599 }
1600
1601 if !has_intercept {
1602 return Ok(());
1603 }
1604
1605 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
1606
1607 if exit_info.code().tdx_exit() >= TdCallResultCode::PENDING_INTERRUPT {
1609 self.backing.vtls[entered_from_vtl]
1610 .enter_stats
1611 .pending_intr
1612 .increment();
1613 return Ok(());
1614 }
1615
1616 self.backing.vtls[entered_from_vtl]
1618 .private_regs
1619 .vp_entry_flags
1620 .set_invd_translations(0);
1621
1622 let stat = match exit_info.code().tdx_exit() {
1624 TdCallResultCode::SUCCESS => {
1625 &mut self.backing.vtls[entered_from_vtl].enter_stats.success
1626 }
1627 TdCallResultCode::L2_EXIT_HOST_ROUTED_ASYNC => {
1628 &mut self.backing.vtls[entered_from_vtl]
1629 .enter_stats
1630 .host_routed_async
1631 }
1632 TdCallResultCode::L2_EXIT_PENDING_INTERRUPT => {
1633 &mut self.backing.vtls[entered_from_vtl]
1634 .enter_stats
1635 .l2_exit_pending_intr
1636 }
1637 TdCallResultCode::L2_EXIT_HOST_ROUTED_TDVMCALL => {
1638 assert_eq!(
1644 exit_info.code().vmx_exit(),
1645 VmxExit::new().with_basic_reason(VmxExitBasic::TDCALL)
1646 );
1647 &mut self.backing.vtls[entered_from_vtl]
1648 .enter_stats
1649 .host_routed_td_vmcall
1650 }
1651 _ => panic!("unexpected tdx exit code {:?}", exit_info.code()),
1652 };
1653
1654 stat.increment();
1655 self.handle_vmx_exit(dev, entered_from_vtl).await?;
1656 Ok(())
1657 }
1658
1659 async fn handle_vmx_exit(
1660 &mut self,
1661 dev: &impl CpuIo,
1662 intercepted_vtl: GuestVtl,
1663 ) -> Result<(), VpHaltReason<UhRunVpError>> {
1664 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
1665
1666 let vmx_exit = exit_info.code().vmx_exit();
1668 if vmx_exit.vm_enter_failed() {
1669 return Err(self.handle_vm_enter_failed(intercepted_vtl, vmx_exit));
1670 }
1671
1672 let next_interruption = exit_info.idt_vectoring_info();
1673
1674 if self.backing.vtls[intercepted_vtl]
1676 .interruption_information
1677 .valid()
1678 && (!next_interruption.valid()
1679 || self.backing.vtls[intercepted_vtl]
1680 .interruption_information
1681 .interruption_type()
1682 != next_interruption.interruption_type())
1683 {
1684 match self.backing.vtls[intercepted_vtl]
1685 .interruption_information
1686 .interruption_type()
1687 {
1688 INTERRUPT_TYPE_EXTERNAL
1689 if !self.backing.cvm.lapics[intercepted_vtl]
1690 .lapic
1691 .is_offloaded() =>
1692 {
1693 tracing::trace!(
1695 vector = self.backing.vtls[intercepted_vtl]
1696 .interruption_information
1697 .vector(),
1698 "acknowledging interrupt"
1699 );
1700 self.backing.cvm.lapics[intercepted_vtl]
1701 .lapic
1702 .acknowledge_interrupt(
1703 self.backing.vtls[intercepted_vtl]
1704 .interruption_information
1705 .vector(),
1706 );
1707 }
1708 INTERRUPT_TYPE_NMI => {
1709 tracing::debug!("acknowledging NMI");
1711 self.backing.cvm.lapics[intercepted_vtl].nmi_pending = false;
1712 }
1713 _ => {}
1714 }
1715 }
1716
1717 if self.backing.cvm.lapics[intercepted_vtl]
1718 .lapic
1719 .is_offloaded()
1720 {
1721 if next_interruption.valid() {
1734 tracing::debug!(
1735 ?next_interruption,
1736 vp_index = self.vp_index().index(),
1737 "exit requires reinjecting interrupt"
1738 );
1739 self.backing.vtls[intercepted_vtl].interruption_information = next_interruption;
1740 self.backing.vtls[intercepted_vtl].exception_error_code =
1741 exit_info.idt_vectoring_error_code();
1742 self.backing.vtls[intercepted_vtl]
1743 .exit_stats
1744 .needs_interrupt_reinject
1745 .increment();
1746 } else {
1747 self.backing.vtls[intercepted_vtl].interruption_information = Default::default();
1748 }
1749 } else {
1750 if next_interruption.valid()
1754 && !matches!(
1755 next_interruption.interruption_type(),
1756 INTERRUPT_TYPE_EXTERNAL | INTERRUPT_TYPE_NMI
1757 )
1758 {
1759 self.backing.vtls[intercepted_vtl].interruption_information = next_interruption;
1760 self.backing.vtls[intercepted_vtl].exception_error_code =
1761 exit_info.idt_vectoring_error_code();
1762 } else {
1763 self.backing.vtls[intercepted_vtl].interruption_information = Default::default();
1764 }
1765 }
1766
1767 let mut breakpoint_debug_exception = false;
1768 let stat = match vmx_exit.basic_reason() {
1769 VmxExitBasic::IO_INSTRUCTION => {
1770 let io_qual = ExitQualificationIo::from(exit_info.qualification() as u32);
1771
1772 let len = match io_qual.access_size() {
1773 IO_SIZE_8_BIT => 1,
1774 IO_SIZE_16_BIT => 2,
1775 IO_SIZE_32_BIT => 4,
1776 _ => panic!(
1777 "tdx module returned invalid io instr size {}",
1778 io_qual.access_size()
1779 ),
1780 };
1781
1782 let port_access_protected = self.cvm_try_protect_io_port_access(
1783 intercepted_vtl,
1784 io_qual.port(),
1785 io_qual.is_in(),
1786 len,
1787 io_qual.is_string(),
1788 io_qual.rep_prefix(),
1789 );
1790
1791 if !port_access_protected {
1792 if io_qual.is_string() || io_qual.rep_prefix() {
1793 self.emulate(
1798 dev,
1799 self.backing.vtls[intercepted_vtl]
1800 .interruption_information
1801 .valid(),
1802 intercepted_vtl,
1803 TdxEmulationCache::default(),
1804 )
1805 .await?;
1806 } else {
1807 let mut rax = self.runner.tdx_enter_guest_gps()[TdxGp::RAX];
1808 emulate_io(
1809 self.inner.vp_info.base.vp_index,
1810 !io_qual.is_in(),
1811 io_qual.port(),
1812 &mut rax,
1813 len,
1814 dev,
1815 )
1816 .await;
1817 self.runner.tdx_enter_guest_gps_mut()[TdxGp::RAX] = rax;
1818
1819 self.advance_to_next_instruction(intercepted_vtl);
1820 }
1821 }
1822
1823 &mut self.backing.vtls[intercepted_vtl].exit_stats.io
1824 }
1825 VmxExitBasic::MSR_READ => {
1826 let msr = self.runner.tdx_enter_guest_gps()[TdxGp::RCX] as u32;
1827
1828 let result = self.backing.cvm.lapics[intercepted_vtl]
1829 .lapic
1830 .access(&mut TdxApicClient {
1831 partition: self.partition,
1832 vmtime: &self.vmtime,
1833 apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
1834 dev,
1835 vtl: intercepted_vtl,
1836 })
1837 .msr_read(msr)
1838 .or_else_if_unknown(|| self.read_msr_cvm(msr, intercepted_vtl))
1839 .or_else_if_unknown(|| self.read_msr_tdx(msr, intercepted_vtl));
1840
1841 let value = match result {
1842 Ok(v) => Some(v),
1843 Err(MsrError::Unknown) => {
1844 tracelimit::warn_ratelimited!(CVM_ALLOWED, msr, "unknown tdx vm msr read");
1845 Some(0)
1846 }
1847 Err(MsrError::InvalidAccess) => None,
1848 };
1849
1850 let inject_gp = if let Some(value) = value {
1851 let gps = self.runner.tdx_enter_guest_gps_mut();
1852 gps[TdxGp::RAX] = (value as u32).into();
1853 gps[TdxGp::RDX] = ((value >> 32) as u32).into();
1854 false
1855 } else {
1856 true
1857 };
1858
1859 if inject_gp {
1860 self.inject_gpf(intercepted_vtl);
1861 } else {
1862 self.advance_to_next_instruction(intercepted_vtl);
1863 }
1864 &mut self.backing.vtls[intercepted_vtl].exit_stats.msr_read
1865 }
1866 VmxExitBasic::MSR_WRITE => {
1867 let gps = self.runner.tdx_enter_guest_gps();
1868 let msr = gps[TdxGp::RCX] as u32;
1869 let value =
1870 (gps[TdxGp::RAX] as u32 as u64) | ((gps[TdxGp::RDX] as u32 as u64) << 32);
1871
1872 if !self.cvm_try_protect_msr_write(intercepted_vtl, msr) {
1873 let result = self.backing.cvm.lapics[intercepted_vtl]
1874 .lapic
1875 .access(&mut TdxApicClient {
1876 partition: self.partition,
1877 vmtime: &self.vmtime,
1878 apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
1879 dev,
1880 vtl: intercepted_vtl,
1881 })
1882 .msr_write(msr, value)
1883 .or_else_if_unknown(|| self.write_msr_cvm(msr, value, intercepted_vtl))
1884 .or_else_if_unknown(|| self.write_msr_tdx(msr, value, intercepted_vtl))
1885 .or_else_if_unknown(|| {
1886 if MSR_ALLOWED_READ_WRITE.contains(&msr) {
1888 unreachable!("intercepted a write to MSR {msr}, configured for passthrough by default, that wasn't registered for intercepts by a higher VTL");
1889 }
1890 Err(MsrError::Unknown)
1891 });
1892
1893 let inject_gp = match result {
1894 Ok(()) => false,
1895 Err(MsrError::Unknown) => {
1896 tracelimit::warn_ratelimited!(
1897 CVM_ALLOWED,
1898 msr,
1899 "unknown tdx vm msr write"
1900 );
1901 tracelimit::warn_ratelimited!(
1902 CVM_CONFIDENTIAL,
1903 value,
1904 "unknown tdx vm msr write"
1905 );
1906 false
1907 }
1908 Err(MsrError::InvalidAccess) => true,
1909 };
1910
1911 if inject_gp {
1912 self.inject_gpf(intercepted_vtl);
1913 } else {
1914 self.advance_to_next_instruction(intercepted_vtl);
1915 }
1916 }
1917 &mut self.backing.vtls[intercepted_vtl].exit_stats.msr_write
1918 }
1919 VmxExitBasic::CPUID => {
1920 let gps = self.runner.tdx_enter_guest_gps();
1921 let leaf = gps[TdxGp::RAX] as u32;
1922 let subleaf = gps[TdxGp::RCX] as u32;
1923 let [eax, ebx, ecx, edx] = self.cvm_cpuid_result(intercepted_vtl, leaf, subleaf);
1924 let gps = self.runner.tdx_enter_guest_gps_mut();
1925 gps[TdxGp::RAX] = eax.into();
1926 gps[TdxGp::RBX] = ebx.into();
1927 gps[TdxGp::RCX] = ecx.into();
1928 gps[TdxGp::RDX] = edx.into();
1929 self.advance_to_next_instruction(intercepted_vtl);
1930 &mut self.backing.vtls[intercepted_vtl].exit_stats.cpuid
1931 }
1932 VmxExitBasic::VMCALL_INSTRUCTION => {
1933 if exit_info.cpl() != 0 {
1934 self.inject_gpf(intercepted_vtl);
1935 } else {
1936 let is_64bit = self.long_mode(intercepted_vtl);
1937 let guest_memory = &self.partition.gm[intercepted_vtl];
1938 let handler = UhHypercallHandler {
1939 trusted: !self.cvm_partition().hide_isolation,
1940 vp: &mut *self,
1941 bus: dev,
1942 intercepted_vtl,
1943 };
1944
1945 UhHypercallHandler::TDX_DISPATCHER.dispatch(
1946 guest_memory,
1947 hv1_hypercall::X64RegisterIo::new(handler, is_64bit),
1948 );
1949 }
1950 &mut self.backing.vtls[intercepted_vtl].exit_stats.vmcall
1951 }
1952 VmxExitBasic::HLT_INSTRUCTION => {
1953 self.backing.cvm.lapics[intercepted_vtl].activity = MpState::Halted;
1954 self.clear_interrupt_shadow(intercepted_vtl);
1955 self.advance_to_next_instruction(intercepted_vtl);
1956 &mut self.backing.vtls[intercepted_vtl].exit_stats.hlt
1957 }
1958 VmxExitBasic::CR_ACCESS => {
1959 let qual = CrAccessQualification::from(exit_info.qualification());
1960 let cr;
1961 let value;
1962 match qual.access_type() {
1963 CR_ACCESS_TYPE_MOV_TO_CR => {
1964 cr = qual.cr();
1965 value = self.runner.tdx_enter_guest_gps()[qual.gp_register() as usize];
1966 }
1967 CR_ACCESS_TYPE_LMSW => {
1968 cr = 0;
1969 let cr0 = self.backing.vtls[intercepted_vtl].cr0.read(&self.runner);
1970 value = (qual.lmsw_source_data() as u64 & 0xf) | (cr0 & !0xf);
1972 }
1973 access_type => unreachable!("not registered for cr access type {access_type}"),
1974 }
1975
1976 let cr = match cr {
1977 0 => HvX64RegisterName::Cr0,
1978 4 => HvX64RegisterName::Cr4,
1979 _ => unreachable!("not registered for cr{cr} accesses"),
1980 };
1981
1982 if !self.cvm_try_protect_secure_register_write(intercepted_vtl, cr, value) {
1983 let r = match cr {
1984 HvX64RegisterName::Cr0 => self.backing.vtls[intercepted_vtl]
1985 .cr0
1986 .write(value, &mut self.runner),
1987 HvX64RegisterName::Cr4 => self.backing.vtls[intercepted_vtl]
1988 .cr4
1989 .write(value, &mut self.runner),
1990 _ => unreachable!(),
1991 };
1992 if r.is_ok() {
1993 self.update_execution_mode(intercepted_vtl);
1994 self.advance_to_next_instruction(intercepted_vtl);
1995 } else {
1996 tracelimit::warn_ratelimited!(
1997 CVM_ALLOWED,
1998 ?cr,
1999 value,
2000 "failed to write cr"
2001 );
2002 self.inject_gpf(intercepted_vtl);
2003 }
2004 }
2005 &mut self.backing.vtls[intercepted_vtl].exit_stats.cr_access
2006 }
2007 VmxExitBasic::XSETBV => {
2008 let gps = self.runner.tdx_enter_guest_gps();
2009 if let Some(value) =
2010 hardware_cvm::validate_xsetbv_exit(hardware_cvm::XsetbvExitInput {
2011 rax: gps[TdxGp::RAX],
2012 rcx: gps[TdxGp::RCX],
2013 rdx: gps[TdxGp::RDX],
2014 cr4: self.backing.vtls[intercepted_vtl].cr4.read(&self.runner),
2015 cpl: exit_info.cpl(),
2016 })
2017 {
2018 if !self.cvm_try_protect_secure_register_write(
2019 intercepted_vtl,
2020 HvX64RegisterName::Xfem,
2021 value,
2022 ) {
2023 self.runner
2024 .set_vp_register(intercepted_vtl, HvX64RegisterName::Xfem, value.into())
2025 .map_err(|err| {
2026 VpHaltReason::Hypervisor(UhRunVpError::EmulationState(err))
2027 })?;
2028 self.advance_to_next_instruction(intercepted_vtl);
2029 }
2030 } else {
2031 self.inject_gpf(intercepted_vtl);
2032 }
2033 &mut self.backing.vtls[intercepted_vtl].exit_stats.xsetbv
2034 }
2035 VmxExitBasic::WBINVD_INSTRUCTION => {
2036 let no_invalidate = exit_info.qualification() != 0;
2038 if no_invalidate {
2039 self.runner.tdx_vp_state_flags_mut().set_wbnoinvd(true);
2040 } else {
2041 self.runner.tdx_vp_state_flags_mut().set_wbinvd(true);
2042 }
2043
2044 self.advance_to_next_instruction(intercepted_vtl);
2045 &mut self.backing.vtls[intercepted_vtl].exit_stats.wbinvd
2046 }
2047 VmxExitBasic::EPT_VIOLATION => {
2048 let gpa = exit_info.gpa().expect("is EPT exit");
2049 let ept_info = VmxEptExitQualification::from(exit_info.qualification());
2050 if !next_interruption.valid() && ept_info.nmi_unmasking_due_to_iret() {
2053 let mask = Interruptibility::new().with_blocked_by_nmi(true);
2054 let value = Interruptibility::new().with_blocked_by_nmi(true);
2055 let old_interruptibility: Interruptibility = self
2056 .runner
2057 .write_vmcs32(
2058 intercepted_vtl,
2059 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
2060 mask.into(),
2061 value.into(),
2062 )
2063 .into();
2064 assert!(!old_interruptibility.blocked_by_nmi());
2065 } else {
2066 self.handle_ept(intercepted_vtl, dev, gpa, ept_info).await?;
2067 }
2068
2069 &mut self.backing.vtls[intercepted_vtl].exit_stats.ept_violation
2070 }
2071 VmxExitBasic::TPR_BELOW_THRESHOLD => {
2072 &mut self.backing.vtls[intercepted_vtl]
2074 .exit_stats
2075 .tpr_below_threshold
2076 }
2077 VmxExitBasic::INTERRUPT_WINDOW => {
2078 &mut self.backing.vtls[intercepted_vtl]
2080 .exit_stats
2081 .interrupt_window
2082 }
2083 VmxExitBasic::NMI_WINDOW => {
2084 &mut self.backing.vtls[intercepted_vtl].exit_stats.nmi_window
2086 }
2087 VmxExitBasic::HW_INTERRUPT => {
2088 if cfg!(feature = "gdb") {
2089 let debug_regs = self
2091 .access_state(intercepted_vtl.into())
2092 .debug_regs()
2093 .expect("register query should not fail");
2094 breakpoint_debug_exception = debug_regs.dr6.trailing_zeros() < 4;
2097 }
2098 &mut self.backing.vtls[intercepted_vtl].exit_stats.hw_interrupt
2099 }
2100 VmxExitBasic::SMI_INTR => &mut self.backing.vtls[intercepted_vtl].exit_stats.smi_intr,
2101 VmxExitBasic::PAUSE_INSTRUCTION => {
2102 &mut self.backing.vtls[intercepted_vtl].exit_stats.pause
2103 }
2104 VmxExitBasic::TDCALL => {
2105 if self.backing.untrusted_synic.is_some() {
2108 assert_eq!(intercepted_vtl, GuestVtl::Vtl0);
2109 self.handle_tdvmcall(dev, intercepted_vtl);
2110 } else if self.cvm_partition().hide_isolation {
2111 self.backing.vtls[intercepted_vtl].interruption_information =
2113 InterruptionInformation::new()
2114 .with_valid(true)
2115 .with_vector(x86defs::Exception::INVALID_OPCODE.0)
2116 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION);
2117 }
2118 &mut self.backing.vtls[intercepted_vtl].exit_stats.tdcall
2119 }
2120 VmxExitBasic::EXCEPTION => {
2121 tracing::trace!(
2122 "Caught Exception: {:?}",
2123 exit_info._exit_interruption_info()
2124 );
2125 if cfg!(feature = "gdb") {
2126 breakpoint_debug_exception = true;
2127 }
2128 &mut self.backing.vtls[intercepted_vtl].exit_stats.exception
2129 }
2130 VmxExitBasic::TRIPLE_FAULT => {
2131 return Err(VpHaltReason::TripleFault {
2132 vtl: intercepted_vtl.into(),
2133 });
2134 }
2135 VmxExitBasic::GDTR_OR_IDTR => {
2136 let info = GdtrOrIdtrInstructionInfo::from(exit_info.instr_info().info());
2137 tracing::trace!("Intercepted GDT or IDT instruction: {:?}", info);
2138 let reg = match info.instruction() {
2139 GdtrOrIdtrInstruction::Sidt | GdtrOrIdtrInstruction::Lidt => {
2140 HvX64RegisterName::Idtr
2141 }
2142 GdtrOrIdtrInstruction::Sgdt | GdtrOrIdtrInstruction::Lgdt => {
2143 HvX64RegisterName::Gdtr
2144 }
2145 };
2146 if (info.instruction().is_load()
2148 && !self.cvm_try_protect_secure_register_write(intercepted_vtl, reg, 0))
2149 || !info.instruction().is_load()
2150 {
2151 self.emulate_gdtr_or_idtr(intercepted_vtl, dev).await?;
2152 }
2153 &mut self.backing.vtls[intercepted_vtl]
2154 .exit_stats
2155 .descriptor_table
2156 }
2157 VmxExitBasic::LDTR_OR_TR => {
2158 let info = LdtrOrTrInstructionInfo::from(exit_info.instr_info().info());
2159 tracing::trace!("Intercepted LDT or TR instruction: {:?}", info);
2160 let reg = match info.instruction() {
2161 LdtrOrTrInstruction::Sldt | LdtrOrTrInstruction::Lldt => {
2162 HvX64RegisterName::Ldtr
2163 }
2164 LdtrOrTrInstruction::Str | LdtrOrTrInstruction::Ltr => HvX64RegisterName::Tr,
2165 };
2166 if (info.instruction().is_load()
2168 && !self.cvm_try_protect_secure_register_write(intercepted_vtl, reg, 0))
2169 || !info.instruction().is_load()
2170 {
2171 self.emulate_ldtr_or_tr(intercepted_vtl, dev).await?;
2172 }
2173 &mut self.backing.vtls[intercepted_vtl]
2174 .exit_stats
2175 .descriptor_table
2176 }
2177 _ => {
2178 return Err(VpHaltReason::Hypervisor(UhRunVpError::UnknownVmxExit(
2179 exit_info.code().vmx_exit(),
2180 )));
2181 }
2182 };
2183 stat.increment();
2184
2185 if cfg!(feature = "gdb") && breakpoint_debug_exception {
2188 self.handle_debug_exception(intercepted_vtl)?;
2189 }
2190
2191 Ok(())
2192 }
2193
2194 fn trace_processor_state(&self, vtl: GuestVtl) {
2196 let raw_exit = self.runner.tdx_vp_enter_exit_info();
2197 tracing::error!(CVM_CONFIDENTIAL, ?raw_exit, "raw tdx vp enter exit info");
2198
2199 let gprs = self.runner.tdx_enter_guest_gps();
2200 tracing::error!(CVM_CONFIDENTIAL, ?gprs, "guest gpr list");
2201
2202 let TdxPrivateRegs {
2203 rflags,
2204 rip,
2205 rsp,
2206 ssp,
2207 rvi,
2208 svi,
2209 msr_kernel_gs_base,
2210 msr_star,
2211 msr_lstar,
2212 msr_sfmask,
2213 msr_xss,
2214 msr_tsc_aux,
2215 vp_entry_flags,
2216 } = self.backing.vtls[vtl].private_regs;
2217 tracing::error!(
2218 CVM_CONFIDENTIAL,
2219 rflags,
2220 rip,
2221 rsp,
2222 ssp,
2223 rvi,
2224 svi,
2225 msr_kernel_gs_base,
2226 msr_star,
2227 msr_lstar,
2228 msr_sfmask,
2229 msr_xss,
2230 msr_tsc_aux,
2231 ?vp_entry_flags,
2232 "private registers"
2233 );
2234
2235 let physical_cr0 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR0);
2236 let shadow_cr0 = self
2237 .runner
2238 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR0_READ_SHADOW);
2239 let cr0_guest_host_mask: u64 = self
2240 .runner
2241 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR0_GUEST_HOST_MASK);
2242 tracing::error!(
2243 CVM_CONFIDENTIAL,
2244 physical_cr0,
2245 shadow_cr0,
2246 cr0_guest_host_mask,
2247 "cr0 values"
2248 );
2249
2250 let physical_cr4 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR4);
2251 let shadow_cr4 = self
2252 .runner
2253 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_READ_SHADOW);
2254 let cr4_guest_host_mask = self
2255 .runner
2256 .read_vmcs64(vtl, VmcsField::VMX_VMCS_CR4_GUEST_HOST_MASK);
2257 tracing::error!(
2258 CVM_CONFIDENTIAL,
2259 physical_cr4,
2260 shadow_cr4,
2261 cr4_guest_host_mask,
2262 "cr4 values"
2263 );
2264
2265 let cr3 = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_CR3);
2266 tracing::error!(CVM_CONFIDENTIAL, cr3, "cr3");
2267
2268 let cached_efer = self.backing.vtls[vtl].efer;
2269 let vmcs_efer = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER);
2270 let entry_controls = self
2271 .runner
2272 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_CONTROLS);
2273 tracing::error!(CVM_CONFIDENTIAL, cached_efer, vmcs_efer, "efer");
2274 tracing::error!(CVM_CONFIDENTIAL, entry_controls, "entry controls");
2275
2276 let cs = self.read_segment(vtl, TdxSegmentReg::Cs);
2277 let ds = self.read_segment(vtl, TdxSegmentReg::Ds);
2278 let es = self.read_segment(vtl, TdxSegmentReg::Es);
2279 let fs = self.read_segment(vtl, TdxSegmentReg::Fs);
2280 let gs = self.read_segment(vtl, TdxSegmentReg::Gs);
2281 let ss = self.read_segment(vtl, TdxSegmentReg::Ss);
2282 let tr = self.read_segment(vtl, TdxSegmentReg::Tr);
2283 let ldtr = self.read_segment(vtl, TdxSegmentReg::Ldtr);
2284
2285 tracing::error!(
2286 CVM_CONFIDENTIAL,
2287 ?cs,
2288 ?ds,
2289 ?es,
2290 ?fs,
2291 ?gs,
2292 ?ss,
2293 ?tr,
2294 ?ldtr,
2295 "segment values"
2296 );
2297
2298 let exception_bitmap = self
2299 .runner
2300 .read_vmcs32(vtl, VmcsField::VMX_VMCS_EXCEPTION_BITMAP);
2301 tracing::error!(CVM_CONFIDENTIAL, exception_bitmap, "exception bitmap");
2302
2303 let cached_processor_controls = self.backing.vtls[vtl].processor_controls;
2304 let vmcs_processor_controls = ProcessorControls::from(
2305 self.runner
2306 .read_vmcs32(vtl, VmcsField::VMX_VMCS_PROCESSOR_CONTROLS),
2307 );
2308 let vmcs_secondary_processor_controls = SecondaryProcessorControls::from(
2309 self.runner
2310 .read_vmcs32(vtl, VmcsField::VMX_VMCS_SECONDARY_PROCESSOR_CONTROLS),
2311 );
2312 tracing::error!(
2313 CVM_CONFIDENTIAL,
2314 ?cached_processor_controls,
2315 ?vmcs_processor_controls,
2316 ?vmcs_secondary_processor_controls,
2317 "processor controls"
2318 );
2319
2320 if cached_processor_controls != vmcs_processor_controls {
2321 tracing::error!(CVM_ALLOWED, "BUGBUG: processor controls mismatch");
2322 }
2323
2324 let cached_tpr_threshold = self.backing.vtls[vtl].tpr_threshold;
2325 let vmcs_tpr_threshold = self
2326 .runner
2327 .read_vmcs32(vtl, VmcsField::VMX_VMCS_TPR_THRESHOLD);
2328 tracing::error!(
2329 CVM_CONFIDENTIAL,
2330 cached_tpr_threshold,
2331 vmcs_tpr_threshold,
2332 "tpr threshold"
2333 );
2334
2335 let cached_eoi_exit_bitmap = self.backing.eoi_exit_bitmap;
2336 let vmcs_eoi_exit_bitmap = {
2337 let fields = [
2338 VmcsField::VMX_VMCS_EOI_EXIT_0,
2339 VmcsField::VMX_VMCS_EOI_EXIT_1,
2340 VmcsField::VMX_VMCS_EOI_EXIT_2,
2341 VmcsField::VMX_VMCS_EOI_EXIT_3,
2342 ];
2343 fields
2344 .iter()
2345 .map(|field| self.runner.read_vmcs64(vtl, *field))
2346 .collect::<Vec<_>>()
2347 };
2348 tracing::error!(
2349 CVM_CONFIDENTIAL,
2350 ?cached_eoi_exit_bitmap,
2351 ?vmcs_eoi_exit_bitmap,
2352 "eoi exit bitmap"
2353 );
2354
2355 let cached_interrupt_information = self.backing.vtls[vtl].interruption_information;
2356 let cached_interruption_set = self.backing.vtls[vtl].interruption_set;
2357 let vmcs_interrupt_information = self
2358 .runner
2359 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_INTERRUPT_INFO);
2360 let vmcs_entry_exception_code = self
2361 .runner
2362 .read_vmcs32(vtl, VmcsField::VMX_VMCS_ENTRY_EXCEPTION_ERROR_CODE);
2363 tracing::error!(
2364 CVM_CONFIDENTIAL,
2365 ?cached_interrupt_information,
2366 cached_interruption_set,
2367 vmcs_interrupt_information,
2368 vmcs_entry_exception_code,
2369 "interrupt information"
2370 );
2371
2372 let guest_interruptibility = self
2373 .runner
2374 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY);
2375 tracing::error!(
2376 CVM_CONFIDENTIAL,
2377 guest_interruptibility,
2378 "guest interruptibility"
2379 );
2380
2381 let vmcs_sysenter_cs = self
2382 .runner
2383 .read_vmcs32(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR);
2384 let vmcs_sysenter_esp = self
2385 .runner
2386 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR);
2387 let vmcs_sysenter_eip = self
2388 .runner
2389 .read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR);
2390 tracing::error!(
2391 CVM_CONFIDENTIAL,
2392 vmcs_sysenter_cs,
2393 vmcs_sysenter_esp,
2394 vmcs_sysenter_eip,
2395 "sysenter values"
2396 );
2397
2398 let vmcs_pat = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT);
2399 tracing::error!(CVM_CONFIDENTIAL, vmcs_pat, "guest PAT");
2400 }
2401
2402 fn handle_vm_enter_failed(
2403 &self,
2404 vtl: GuestVtl,
2405 vmx_exit: VmxExit,
2406 ) -> VpHaltReason<UhRunVpError> {
2407 assert!(vmx_exit.vm_enter_failed());
2408 match vmx_exit.basic_reason() {
2409 VmxExitBasic::BAD_GUEST_STATE => {
2410 tracing::error!(CVM_ALLOWED, "VP.ENTER failed with bad guest state");
2413 self.trace_processor_state(vtl);
2414
2415 VpHaltReason::Hypervisor(UhRunVpError::VmxBadGuestState)
2417 }
2418 _ => VpHaltReason::Hypervisor(UhRunVpError::UnknownVmxExit(vmx_exit)),
2419 }
2420 }
2421
2422 fn advance_to_next_instruction(&mut self, vtl: GuestVtl) {
2423 let instr_info = TdxExit(self.runner.tdx_vp_enter_exit_info()).instr_info();
2424 let rip = &mut self.backing.vtls[vtl].private_regs.rip;
2425 *rip = rip.wrapping_add(instr_info.length().into());
2426 }
2427
2428 fn clear_interrupt_shadow(&mut self, vtl: GuestVtl) {
2429 let mask = Interruptibility::new().with_blocked_by_sti(true);
2430 let value = Interruptibility::new().with_blocked_by_sti(false);
2431 self.runner.write_vmcs32(
2432 vtl,
2433 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
2434 mask.into(),
2435 value.into(),
2436 );
2437 }
2438
2439 fn inject_gpf(&mut self, vtl: GuestVtl) {
2440 self.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
2441 .with_valid(true)
2442 .with_vector(x86defs::Exception::GENERAL_PROTECTION_FAULT.0)
2443 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION)
2444 .with_deliver_error_code(true);
2445 self.backing.vtls[vtl].exception_error_code = 0;
2446 }
2447
2448 fn inject_mc(&mut self, vtl: GuestVtl) {
2449 self.backing.vtls[vtl].interruption_information = InterruptionInformation::new()
2450 .with_valid(true)
2451 .with_vector(x86defs::Exception::MACHINE_CHECK.0)
2452 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION);
2453 }
2454
2455 async fn handle_ept(
2456 &mut self,
2457 intercepted_vtl: GuestVtl,
2458 dev: &impl CpuIo,
2459 gpa: u64,
2460 ept_info: VmxEptExitQualification,
2461 ) -> Result<(), VpHaltReason<UhRunVpError>> {
2462 let vtom = self.partition.caps.vtom.unwrap_or(0);
2463 let is_shared = (gpa & vtom) == vtom && vtom != 0;
2464 let canonical_gpa = gpa & !vtom;
2465
2466 let address_type = self
2468 .partition
2469 .lower_vtl_memory_layout
2470 .probe_address(canonical_gpa);
2471
2472 match address_type {
2473 Some(AddressType::Mmio) => {
2474 self.emulate(
2476 dev,
2477 self.backing.vtls[intercepted_vtl]
2478 .interruption_information
2479 .valid(),
2480 intercepted_vtl,
2481 TdxEmulationCache::default(),
2482 )
2483 .await?;
2484 }
2485 Some(AddressType::Ram) => {
2486 if self.partition.gm[intercepted_vtl]
2494 .probe_gpa_readable(gpa)
2495 .is_ok()
2496 {
2497 tracelimit::warn_ratelimited!(
2498 CVM_ALLOWED,
2499 gpa,
2500 "possible spurious EPT violation, ignoring"
2501 );
2502 } else {
2503 tracelimit::warn_ratelimited!(
2515 CVM_ALLOWED,
2516 gpa,
2517 is_shared,
2518 ?ept_info,
2519 "guest accessed inaccessible gpa, injecting MC"
2520 );
2521
2522 self.inject_mc(intercepted_vtl);
2524 }
2525 }
2526 None => {
2527 if !self.cvm_partition().hide_isolation {
2528 tracelimit::warn_ratelimited!(
2538 CVM_ALLOWED,
2539 gpa,
2540 is_shared,
2541 ?ept_info,
2542 "guest accessed gpa not described in memory layout, emulating anyways"
2543 );
2544 }
2545
2546 self.emulate(
2548 dev,
2549 self.backing.vtls[intercepted_vtl]
2550 .interruption_information
2551 .valid(),
2552 intercepted_vtl,
2553 TdxEmulationCache::default(),
2554 )
2555 .await?;
2556 }
2557 }
2558
2559 Ok(())
2560 }
2561
2562 fn handle_tdvmcall(&mut self, dev: &impl CpuIo, intercepted_vtl: GuestVtl) {
2563 let regs = self.runner.tdx_enter_guest_gps();
2564 if regs[TdxGp::R10] == 0 {
2565 let result = match VmxExitBasic(regs[TdxGp::R11] as u16) {
2567 VmxExitBasic::MSR_WRITE => {
2568 let msr = regs[TdxGp::R12] as u32;
2569 let value = regs[TdxGp::R13];
2570 match self.write_tdvmcall_msr(msr, value, intercepted_vtl) {
2571 Ok(()) => {
2572 tracing::debug!(msr, value, "tdvmcall msr write");
2573 TdVmCallR10Result::SUCCESS
2574 }
2575 Err(err) => {
2576 tracelimit::warn_ratelimited!(
2577 CVM_ALLOWED,
2578 msr,
2579 ?err,
2580 "failed tdvmcall msr write"
2581 );
2582 tracelimit::warn_ratelimited!(
2583 CVM_CONFIDENTIAL,
2584 value,
2585 "failed tdvmcall msr write"
2586 );
2587 TdVmCallR10Result::OPERAND_INVALID
2588 }
2589 }
2590 }
2591 VmxExitBasic::MSR_READ => {
2592 let msr = regs[TdxGp::R12] as u32;
2593 match self.read_tdvmcall_msr(msr, intercepted_vtl) {
2594 Ok(value) => {
2595 tracing::debug!(msr, value, "tdvmcall msr read");
2596 self.runner.tdx_enter_guest_gps_mut()[TdxGp::R11] = value;
2597 TdVmCallR10Result::SUCCESS
2598 }
2599 Err(err) => {
2600 tracelimit::warn_ratelimited!(
2601 CVM_ALLOWED,
2602 msr,
2603 ?err,
2604 "failed tdvmcall msr read"
2605 );
2606 TdVmCallR10Result::OPERAND_INVALID
2607 }
2608 }
2609 }
2610 subfunction => {
2611 tracelimit::warn_ratelimited!(
2612 CVM_ALLOWED,
2613 ?subfunction,
2614 "architectural vmcall not supported"
2615 );
2616 TdVmCallR10Result::OPERAND_INVALID
2617 }
2618 };
2619 self.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = result.0;
2620 self.backing.vtls[intercepted_vtl].private_regs.rip = self.backing.vtls
2621 [intercepted_vtl]
2622 .private_regs
2623 .rip
2624 .wrapping_add(4);
2625 } else {
2626 let guest_memory = &self.shared.cvm.shared_memory;
2633 let handler = UhHypercallHandler {
2634 vp: &mut *self,
2635 bus: dev,
2636 trusted: false,
2637 intercepted_vtl,
2638 };
2639
2640 UhHypercallHandler::TDCALL_DISPATCHER.dispatch(guest_memory, TdHypercall(handler));
2641 }
2642 }
2643
2644 fn read_tdvmcall_msr(&mut self, msr: u32, intercepted_vtl: GuestVtl) -> Result<u64, MsrError> {
2645 match msr {
2646 msr @ (hvdef::HV_X64_MSR_GUEST_OS_ID | hvdef::HV_X64_MSR_VP_INDEX) => {
2647 self.backing.cvm.hv[intercepted_vtl].msr_read(msr)
2648 }
2649 _ => self
2650 .backing
2651 .untrusted_synic
2652 .as_mut()
2653 .unwrap()
2654 .read_nontimer_msr(msr),
2655 }
2656 }
2657
2658 fn write_tdvmcall_msr(
2659 &mut self,
2660 msr: u32,
2661 value: u64,
2662 intercepted_vtl: GuestVtl,
2663 ) -> Result<(), MsrError> {
2664 match msr {
2665 hvdef::HV_X64_MSR_GUEST_OS_ID => {
2666 self.backing.cvm.hv[intercepted_vtl].msr_write_guest_os_id(value)
2667 }
2668 _ => {
2669 self.backing
2673 .untrusted_synic
2674 .as_mut()
2675 .unwrap()
2676 .write_nontimer_msr(
2677 msr,
2678 value,
2679 &mut UntrustedSynicVtlProts(&self.partition.gm[GuestVtl::Vtl0]),
2680 )?;
2681 if matches!(msr, hvdef::HV_X64_MSR_SINT0..=hvdef::HV_X64_MSR_SINT15) {
2684 if let Err(err) = self.runner.set_vp_register(
2685 intercepted_vtl,
2686 HvX64RegisterName(
2687 HvX64RegisterName::Sint0.0 + (msr - hvdef::HV_X64_MSR_SINT0),
2688 ),
2689 value.into(),
2690 ) {
2691 tracelimit::warn_ratelimited!(
2692 CVM_ALLOWED,
2693 error = &err as &dyn std::error::Error,
2694 "failed to set sint register"
2695 );
2696 }
2697 }
2698 }
2699 }
2700
2701 Ok(())
2702 }
2703
2704 fn read_msr_tdx(&mut self, msr: u32, vtl: GuestVtl) -> Result<u64, MsrError> {
2705 match msr {
2712 x86defs::X86X_IA32_MSR_PLATFORM_ID => {
2714 Ok(0)
2722 }
2723
2724 x86defs::X86X_MSR_MTRR_CAP => {
2725 Ok(0x400)
2728 }
2729 x86defs::X86X_MSR_MTRR_DEF_TYPE => {
2730 Ok(0)
2734 }
2735 x86defs::X86X_MSR_CSTAR => Ok(self.backing.vtls[vtl].msr_cstar),
2736 x86defs::X86X_MSR_MCG_CAP => Ok(0),
2737 x86defs::X86X_MSR_MCG_STATUS => Ok(0),
2738 x86defs::X86X_MSR_MC_UPDATE_PATCH_LEVEL => Ok(0xFFFFFFFF),
2739 x86defs::X86X_MSR_XSS => Ok(self.backing.vtls[vtl].private_regs.msr_xss),
2740 x86defs::X86X_IA32_MSR_MISC_ENABLE => Ok(hv1_emulator::x86::MISC_ENABLE.into()),
2741 x86defs::X86X_IA32_MSR_FEATURE_CONTROL => Ok(VMX_FEATURE_CONTROL_LOCKED),
2742 x86defs::X86X_MSR_CR_PAT => {
2743 let pat = self.runner.read_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT);
2744 Ok(pat)
2745 }
2746
2747 x86defs::X86X_MSR_MISC_FEATURE_ENABLES
2750 | x86defs::X86X_MSR_PLATFORM_INFO
2751 | x86defs::X86X_MSR_PPIN_CTL
2752 | x86defs::X86X_IA32_MSR_SMI_COUNT
2753 | x86defs::X86X_MSR_UMWAIT_CONTROL
2754 | x86defs::X86X_AMD_MSR_DE_CFG
2755 | x86defs::X86X_IA32_MSR_RAPL_POWER_UNIT
2756 | x86defs::X86X_IA32_MSR_PKG_ENERGY_STATUS
2757 | x86defs::X86X_IA32_MSR_DRAM_ENERGY_STATUS
2758 | x86defs::X86X_IA32_MSR_PP0_ENERGY_STATUS => Ok(0),
2759
2760 hvdef::HV_X64_MSR_GUEST_IDLE => {
2761 self.backing.cvm.lapics[vtl].activity = MpState::Idle;
2762 self.clear_interrupt_shadow(vtl);
2763 Ok(0)
2764 }
2765 X86X_MSR_EFER => Ok(self.backing.vtls[vtl].efer),
2766
2767 _ => Err(MsrError::Unknown),
2768 }
2769 }
2770
2771 fn write_msr_tdx(&mut self, msr: u32, value: u64, vtl: GuestVtl) -> Result<(), MsrError> {
2772 let state = &mut self.backing.vtls[vtl].private_regs;
2773
2774 match msr {
2775 X86X_MSR_EFER => {
2776 self.write_efer(vtl, value)
2777 .map_err(|_| MsrError::InvalidAccess)?;
2778 self.update_execution_mode(vtl);
2779 }
2780 x86defs::X86X_MSR_STAR => state.msr_star = value,
2781 x86defs::X86X_MSR_CSTAR => self.backing.vtls[vtl].msr_cstar = value,
2782 x86defs::X86X_MSR_LSTAR => state.msr_lstar = value,
2783 x86defs::X86X_MSR_SFMASK => state.msr_sfmask = value,
2784 x86defs::X86X_MSR_TSC_AUX => state.msr_tsc_aux = value,
2785 x86defs::X86X_MSR_SYSENTER_CS => {
2786 self.runner.write_vmcs32(
2787 vtl,
2788 VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR,
2789 !0,
2790 value as u32,
2791 );
2792 }
2793 x86defs::X86X_MSR_SYSENTER_EIP => {
2794 self.runner.write_vmcs64(
2795 vtl,
2796 VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR,
2797 !0,
2798 value,
2799 );
2800 }
2801 x86defs::X86X_MSR_SYSENTER_ESP => {
2802 self.runner.write_vmcs64(
2803 vtl,
2804 VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR,
2805 !0,
2806 value,
2807 );
2808 }
2809 x86defs::X86X_MSR_XSS => state.msr_xss = value,
2810 x86defs::X86X_MSR_MC_UPDATE_PATCH_LEVEL => {
2811 if value != 0 {
2813 return Err(MsrError::InvalidAccess);
2814 }
2815 }
2816 x86defs::X86X_IA32_MSR_MISC_ENABLE => {}
2817 x86defs::X86X_MSR_CR_PAT => {
2818 self.runner
2819 .write_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_PAT, !0, value);
2820 }
2821
2822 x86defs::X86X_MSR_MCG_STATUS => {
2823 if x86defs::X86xMcgStatusRegister::from(value).reserved0() != 0 {
2825 return Err(MsrError::InvalidAccess);
2826 }
2827 }
2828
2829 x86defs::X86X_MSR_MTRR_DEF_TYPE => {}
2831
2832 x86defs::X86X_MSR_BIOS_UPDT_TRIG => {}
2835
2836 x86defs::X86X_MSR_MISC_FEATURE_ENABLES
2839 | x86defs::X86X_MSR_PLATFORM_INFO
2840 | x86defs::X86X_MSR_PPIN_CTL
2841 | x86defs::X86X_IA32_MSR_SMI_COUNT
2842 | x86defs::X86X_MSR_UMWAIT_CONTROL
2843 | x86defs::X86X_AMD_MSR_DE_CFG
2844 | x86defs::X86X_IA32_MSR_RAPL_POWER_UNIT
2845 | x86defs::X86X_IA32_MSR_PKG_ENERGY_STATUS
2846 | x86defs::X86X_IA32_MSR_DRAM_ENERGY_STATUS
2847 | x86defs::X86X_IA32_MSR_PP0_ENERGY_STATUS => {}
2848
2849 _ => return Err(MsrError::Unknown),
2850 }
2851
2852 Ok(())
2853 }
2854
2855 fn write_segment(
2856 &mut self,
2857 vtl: GuestVtl,
2858 seg: TdxSegmentReg,
2859 reg: SegmentRegister,
2860 ) -> Result<(), vp_state::Error> {
2861 self.runner
2863 .write_vmcs16(vtl, seg.selector(), !0, reg.selector);
2864 self.runner.write_vmcs64(vtl, seg.base(), !0, reg.base);
2865 self.runner.write_vmcs32(vtl, seg.limit(), !0, reg.limit);
2866
2867 let mut attributes = x86defs::vmx::VmxSegmentAttributes::from(reg.attributes as u32);
2869 attributes.set_null(!attributes.present());
2870
2871 self.runner
2872 .write_vmcs32(vtl, seg.attributes(), !0, attributes.into());
2873
2874 Ok(())
2875 }
2876
2877 fn read_segment(&self, vtl: GuestVtl, seg: TdxSegmentReg) -> SegmentRegister {
2878 let selector = self.runner.read_vmcs16(vtl, seg.selector());
2879 let base = self.runner.read_vmcs64(vtl, seg.base());
2880 let limit = self.runner.read_vmcs32(vtl, seg.limit());
2881 let attributes = self.runner.read_vmcs32(vtl, seg.attributes());
2882
2883 SegmentRegister {
2884 selector,
2885 base,
2886 limit,
2887 attributes: attributes as u16,
2888 }
2889 }
2890
2891 fn long_mode(&self, vtl: GuestVtl) -> bool {
2892 let backing = &self.backing.vtls[vtl];
2893 backing.cr0.read(&self.runner) & X64_CR0_PE != 0 && backing.efer & X64_EFER_LMA != 0
2894 }
2895}
2896
2897impl<T: CpuIo> X86EmulatorSupport for UhEmulationState<'_, '_, T, TdxBacked> {
2898 type Error = UhRunVpError;
2899
2900 fn vp_index(&self) -> VpIndex {
2901 self.vp.vp_index()
2902 }
2903
2904 fn flush(&mut self) -> Result<(), Self::Error> {
2905 Ok(())
2907 }
2908
2909 fn vendor(&self) -> x86defs::cpuid::Vendor {
2910 self.vp.partition.caps.vendor
2911 }
2912
2913 fn gp(&mut self, reg: Gp) -> u64 {
2914 self.vp.runner.tdx_enter_guest_gps()[reg as usize]
2915 }
2916
2917 fn set_gp(&mut self, reg: Gp, v: u64) {
2918 self.vp.runner.tdx_enter_guest_gps_mut()[reg as usize] = v;
2919 }
2920
2921 fn xmm(&mut self, index: usize) -> u128 {
2922 u128::from_ne_bytes(self.vp.runner.fx_state().xmm[index])
2923 }
2924
2925 fn set_xmm(&mut self, index: usize, v: u128) -> Result<(), Self::Error> {
2926 self.vp.runner.fx_state_mut().xmm[index] = v.to_ne_bytes();
2927 Ok(())
2928 }
2929
2930 fn rip(&mut self) -> u64 {
2931 self.vp.backing.vtls[self.vtl].private_regs.rip
2932 }
2933
2934 fn set_rip(&mut self, v: u64) {
2935 self.vp.backing.vtls[self.vtl].private_regs.rip = v;
2936 }
2937
2938 fn segment(&mut self, index: Segment) -> x86defs::SegmentRegister {
2939 let tdx_segment_index = match index {
2940 Segment::CS => TdxSegmentReg::Cs,
2941 Segment::ES => TdxSegmentReg::Es,
2942 Segment::SS => TdxSegmentReg::Ss,
2943 Segment::DS => TdxSegmentReg::Ds,
2944 Segment::FS => TdxSegmentReg::Fs,
2945 Segment::GS => TdxSegmentReg::Gs,
2946 };
2947 let reg = match tdx_segment_index {
2948 TdxSegmentReg::Cs => self.cache.segs[index as usize]
2949 .get_or_insert_with(|| TdxExit(self.vp.runner.tdx_vp_enter_exit_info()).cs()),
2950 _ => self.cache.segs[index as usize]
2951 .get_or_insert_with(|| self.vp.read_segment(self.vtl, tdx_segment_index)),
2952 };
2953 (*reg).into()
2954 }
2955
2956 fn efer(&mut self) -> u64 {
2957 self.vp.backing.vtls[self.vtl].efer
2958 }
2959
2960 fn cr0(&mut self) -> u64 {
2961 let reg = self
2962 .cache
2963 .cr0
2964 .get_or_insert_with(|| self.vp.backing.vtls[self.vtl].cr0.read(&self.vp.runner));
2965 *reg
2966 }
2967
2968 fn rflags(&mut self) -> RFlags {
2969 self.vp.backing.vtls[self.vtl].private_regs.rflags.into()
2970 }
2971
2972 fn set_rflags(&mut self, v: RFlags) {
2973 self.vp.backing.vtls[self.vtl].private_regs.rflags = v.into();
2974 }
2975
2976 fn instruction_bytes(&self) -> &[u8] {
2977 &[]
2978 }
2979
2980 fn physical_address(&self) -> Option<u64> {
2981 TdxExit(self.vp.runner.tdx_vp_enter_exit_info()).gpa()
2982 }
2983
2984 fn initial_gva_translation(
2985 &mut self,
2986 ) -> Option<virt_support_x86emu::emulate::InitialTranslation> {
2987 let exit_info = TdxExit(self.vp.runner.tdx_vp_enter_exit_info());
2988 let ept_info = VmxEptExitQualification::from(exit_info.qualification());
2989
2990 if exit_info.code().vmx_exit().basic_reason() == VmxExitBasic::EPT_VIOLATION
2991 && ept_info.gva_valid()
2992 {
2993 Some(virt_support_x86emu::emulate::InitialTranslation {
2994 gva: exit_info.gla().expect("already validated EPT exit"),
2995 gpa: exit_info.gpa().expect("already validated EPT exit"),
2996 translate_mode: match ept_info.access_mask() {
2997 0x1 => TranslateMode::Read,
2998 0x2 | 0x3 => TranslateMode::Write,
3011 0x4 => TranslateMode::Execute,
3012 _ => panic!("unexpected ept access mask 0x{:x}", ept_info.access_mask()),
3013 },
3014 })
3015 } else {
3016 None
3017 }
3018 }
3019
3020 fn interruption_pending(&self) -> bool {
3021 self.interruption_pending
3022 }
3023
3024 fn check_vtl_access(
3025 &mut self,
3026 _gpa: u64,
3027 _mode: TranslateMode,
3028 ) -> Result<(), virt_support_x86emu::emulate::EmuCheckVtlAccessError<Self::Error>> {
3029 Ok(())
3031 }
3032
3033 fn translate_gva(
3034 &mut self,
3035 gva: u64,
3036 mode: TranslateMode,
3037 ) -> Result<
3038 Result<
3039 virt_support_x86emu::emulate::EmuTranslateResult,
3040 virt_support_x86emu::emulate::EmuTranslateError,
3041 >,
3042 Self::Error,
3043 > {
3044 emulate_translate_gva(self, gva, mode)
3045 }
3046
3047 fn inject_pending_event(&mut self, event_info: hvdef::HvX64PendingEvent) {
3048 assert!(event_info.reg_0.event_pending());
3049 assert_eq!(
3050 event_info.reg_0.event_type(),
3051 hvdef::HV_X64_PENDING_EVENT_EXCEPTION
3052 );
3053 assert!(!self.interruption_pending);
3054
3055 TdxBacked::set_pending_exception(
3058 self.vp,
3059 self.vtl,
3060 HvX64PendingExceptionEvent::from(event_info.reg_0.into_bits()),
3061 );
3062 }
3063
3064 fn is_gpa_mapped(&self, gpa: u64, write: bool) -> bool {
3065 let vtom = self.vp.partition.caps.vtom.unwrap_or(0);
3068 debug_assert!(vtom == 0 || vtom.is_power_of_two());
3069 self.vp.partition.is_gpa_mapped(gpa & !vtom, write)
3070 }
3071
3072 fn lapic_base_address(&self) -> Option<u64> {
3073 self.vp.backing.cvm.lapics[self.vtl].lapic.base_address()
3074 }
3075
3076 fn lapic_read(&mut self, address: u64, data: &mut [u8]) {
3077 self.vp.backing.cvm.lapics[self.vtl]
3078 .lapic
3079 .access(&mut TdxApicClient {
3080 partition: self.vp.partition,
3081 dev: self.devices,
3082 vmtime: &self.vp.vmtime,
3083 apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
3084 vtl: self.vtl,
3085 })
3086 .mmio_read(address, data);
3087 }
3088
3089 fn lapic_write(&mut self, address: u64, data: &[u8]) {
3090 self.vp.backing.cvm.lapics[self.vtl]
3091 .lapic
3092 .access(&mut TdxApicClient {
3093 partition: self.vp.partition,
3094 dev: self.devices,
3095 vmtime: &self.vp.vmtime,
3096 apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
3097 vtl: self.vtl,
3098 })
3099 .mmio_write(address, data);
3100 }
3101}
3102
3103#[derive(Debug)]
3104enum TdxSegmentReg {
3105 Es,
3106 Cs,
3107 Ss,
3108 Ds,
3109 Fs,
3110 Gs,
3111 Ldtr,
3112 Tr,
3113}
3114
3115impl TdxSegmentReg {
3116 fn selector(&self) -> VmcsField {
3118 match self {
3119 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_SELECTOR,
3120 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_SELECTOR,
3121 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_SELECTOR,
3122 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_SELECTOR,
3123 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_SELECTOR,
3124 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_SELECTOR,
3125 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_SELECTOR,
3126 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_SELECTOR,
3127 }
3128 }
3129
3130 fn base(&self) -> VmcsField {
3132 match self {
3133 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_BASE,
3134 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_BASE,
3135 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_BASE,
3136 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_BASE,
3137 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_BASE,
3138 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_BASE,
3139 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_BASE,
3140 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_BASE,
3141 }
3142 }
3143
3144 fn limit(&self) -> VmcsField {
3146 match self {
3147 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_LIMIT,
3148 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_LIMIT,
3149 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_LIMIT,
3150 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_LIMIT,
3151 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_LIMIT,
3152 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_LIMIT,
3153 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_LIMIT,
3154 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_LIMIT,
3155 }
3156 }
3157
3158 fn attributes(&self) -> VmcsField {
3160 match self {
3161 Self::Es => VmcsField::VMX_VMCS_GUEST_ES_AR,
3162 Self::Cs => VmcsField::VMX_VMCS_GUEST_CS_AR,
3163 Self::Ss => VmcsField::VMX_VMCS_GUEST_SS_AR,
3164 Self::Ds => VmcsField::VMX_VMCS_GUEST_DS_AR,
3165 Self::Fs => VmcsField::VMX_VMCS_GUEST_FS_AR,
3166 Self::Gs => VmcsField::VMX_VMCS_GUEST_GS_AR,
3167 Self::Ldtr => VmcsField::VMX_VMCS_GUEST_LDTR_AR,
3168 Self::Tr => VmcsField::VMX_VMCS_GUEST_TR_AR,
3169 }
3170 }
3171}
3172
3173#[derive(Debug)]
3174enum TdxTableReg {
3175 Idtr,
3176 Gdtr,
3177}
3178
3179impl TdxTableReg {
3180 fn base_code(&self) -> VmcsField {
3181 match self {
3182 Self::Idtr => VmcsField::VMX_VMCS_GUEST_IDTR_BASE,
3183 Self::Gdtr => VmcsField::VMX_VMCS_GUEST_GDTR_BASE,
3184 }
3185 }
3186
3187 fn limit_code(&self) -> VmcsField {
3188 match self {
3189 Self::Idtr => VmcsField::VMX_VMCS_GUEST_IDTR_LIMIT,
3190 Self::Gdtr => VmcsField::VMX_VMCS_GUEST_GDTR_LIMIT,
3191 }
3192 }
3193}
3194
3195impl UhProcessor<'_, TdxBacked> {
3196 fn write_efer(&mut self, vtl: GuestVtl, efer: u64) -> Result<(), vp_state::Error> {
3202 if efer & (X64_EFER_SVME | X64_EFER_FFXSR) != 0 {
3203 return Err(vp_state::Error::InvalidValue(
3204 efer,
3205 "EFER",
3206 "SVME or FFXSR set",
3207 ));
3208 }
3209
3210 if efer & X64_EFER_NXE == 0 {
3212 return Err(vp_state::Error::InvalidValue(efer, "EFER", "NXE not set"));
3213 }
3214
3215 if self.backing.vtls[vtl].efer != efer {
3217 self.backing.vtls[vtl].efer = efer;
3218 self.runner
3219 .write_vmcs64(vtl, VmcsField::VMX_VMCS_GUEST_EFER, !0, efer);
3220 }
3221
3222 Ok(())
3223 }
3224
3225 fn read_cr0(&self, vtl: GuestVtl) -> u64 {
3228 self.backing.vtls[vtl].cr0.read(&self.runner)
3229 }
3230
3231 fn write_cr0(&mut self, vtl: GuestVtl, value: u64) -> Result<(), vp_state::Error> {
3233 self.backing.vtls[vtl]
3234 .cr0
3235 .write(value | X64_CR0_ET, &mut self.runner)
3236 }
3237
3238 fn read_cr4(&self, vtl: GuestVtl) -> u64 {
3239 self.backing.vtls[vtl].cr4.read(&self.runner)
3240 }
3241
3242 fn write_cr4(&mut self, vtl: GuestVtl, value: u64) -> Result<(), vp_state::Error> {
3243 self.backing.vtls[vtl].cr4.write(value, &mut self.runner)
3244 }
3245
3246 fn write_table_register(&mut self, vtl: GuestVtl, table: TdxTableReg, reg: TableRegister) {
3247 self.runner
3248 .write_vmcs64(vtl, table.base_code(), !0, reg.base);
3249 self.runner
3250 .write_vmcs32(vtl, table.limit_code(), !0, reg.limit.into());
3251 }
3252
3253 fn read_table_register(&self, vtl: GuestVtl, table: TdxTableReg) -> TableRegister {
3254 let base = self.runner.read_vmcs64(vtl, table.base_code());
3255 let limit = self.runner.read_vmcs32(vtl, table.limit_code());
3256
3257 TableRegister {
3258 base,
3259 limit: limit as u16,
3260 }
3261 }
3262
3263 fn update_execution_mode(&mut self, vtl: GuestVtl) {
3265 let lme = self.backing.vtls[vtl].efer & X64_EFER_LME == X64_EFER_LME;
3266 let pg = self.read_cr0(vtl) & X64_CR0_PG == X64_CR0_PG;
3267 let efer_lma = self.backing.vtls[vtl].efer & X64_EFER_LMA == X64_EFER_LMA;
3268 let lma = lme && pg;
3269
3270 if lma != efer_lma {
3271 let new_efer = self.backing.vtls[vtl].efer ^ X64_EFER_LMA;
3273 self.write_efer(vtl, new_efer)
3274 .expect("EFER was valid before, it should still be valid");
3275 }
3276
3277 self.runner.write_vmcs32(
3278 vtl,
3279 VmcsField::VMX_VMCS_ENTRY_CONTROLS,
3280 VMX_ENTRY_CONTROL_LONG_MODE_GUEST,
3281 if lma {
3282 VMX_ENTRY_CONTROL_LONG_MODE_GUEST
3283 } else {
3284 0
3285 },
3286 );
3287 }
3288
3289 async fn emulate_gdtr_or_idtr(
3290 &mut self,
3291 vtl: GuestVtl,
3292 dev: &impl CpuIo,
3293 ) -> Result<(), VpHaltReason<UhRunVpError>> {
3294 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
3295 assert_eq!(
3296 exit_info.code().vmx_exit().basic_reason(),
3297 VmxExitBasic::GDTR_OR_IDTR
3298 );
3299 let instr_info = GdtrOrIdtrInstructionInfo::from(exit_info.instr_info().info());
3300
3301 if (instr_info.instruction().is_load() && exit_info.cpl() != 0)
3304 || (!instr_info.instruction().is_load()
3305 && exit_info.cpl() > 0
3306 && self.read_cr4(vtl) & X64_CR4_UMIP != 0)
3307 {
3308 self.inject_gpf(vtl);
3309 return Ok(());
3310 }
3311
3312 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3313 exit_info.qualification(),
3314 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3315 (!instr_info.index_register_invalid()).then_some(instr_info.index_register()),
3316 instr_info.scaling(),
3317 instr_info.address_size(),
3318 instr_info.segment_register(),
3319 );
3320
3321 let gm = &self.partition.gm[vtl];
3322 let interruption_pending = self.backing.vtls[vtl].interruption_information.valid();
3323 let len = 2 + if self.long_mode(vtl) { 8 } else { 4 };
3324 let mut buf = [0u8; 10];
3325
3326 match instr_info.instruction() {
3327 GdtrOrIdtrInstruction::Sidt | GdtrOrIdtrInstruction::Sgdt => {
3328 let table = self.read_table_register(
3329 vtl,
3330 if matches!(instr_info.instruction(), GdtrOrIdtrInstruction::Sidt) {
3331 TdxTableReg::Idtr
3332 } else {
3333 TdxTableReg::Gdtr
3334 },
3335 );
3336 buf[..2].copy_from_slice(&table.limit.to_le_bytes());
3337 buf[2..].copy_from_slice(&table.base.to_le_bytes());
3338 let mut emulation_state = UhEmulationState {
3339 vp: &mut *self,
3340 interruption_pending,
3341 devices: dev,
3342 vtl,
3343 cache: TdxEmulationCache::default(),
3344 };
3345 emulate_insn_memory_op(
3346 &mut emulation_state,
3347 gm,
3348 dev,
3349 gva,
3350 segment,
3351 x86emu::AlignmentMode::Unaligned,
3352 EmulatedMemoryOperation::Write(&buf[..len]),
3353 )
3354 .await?;
3355 }
3356
3357 GdtrOrIdtrInstruction::Lgdt | GdtrOrIdtrInstruction::Lidt => {
3358 let mut emulation_state = UhEmulationState {
3359 vp: &mut *self,
3360 interruption_pending,
3361 devices: dev,
3362 vtl,
3363 cache: TdxEmulationCache::default(),
3364 };
3365 emulate_insn_memory_op(
3366 &mut emulation_state,
3367 gm,
3368 dev,
3369 gva,
3370 segment,
3371 x86emu::AlignmentMode::Unaligned,
3372 EmulatedMemoryOperation::Read(&mut buf[..len]),
3373 )
3374 .await?;
3375 let table = TableRegister {
3376 limit: u16::from_le_bytes(buf[..2].try_into().unwrap()),
3377 base: u64::from_le_bytes(buf[2..len].try_into().unwrap()),
3378 };
3379 self.write_table_register(
3380 vtl,
3381 if matches!(instr_info.instruction(), GdtrOrIdtrInstruction::Lidt) {
3382 TdxTableReg::Idtr
3383 } else {
3384 TdxTableReg::Gdtr
3385 },
3386 table,
3387 );
3388 }
3389 }
3390
3391 self.advance_to_next_instruction(vtl);
3392 Ok(())
3393 }
3394
3395 async fn emulate_ldtr_or_tr(
3396 &mut self,
3397 vtl: GuestVtl,
3398 dev: &impl CpuIo,
3399 ) -> Result<(), VpHaltReason<UhRunVpError>> {
3400 let exit_info = TdxExit(self.runner.tdx_vp_enter_exit_info());
3401 assert_eq!(
3402 exit_info.code().vmx_exit().basic_reason(),
3403 VmxExitBasic::LDTR_OR_TR
3404 );
3405 let instr_info = LdtrOrTrInstructionInfo::from(exit_info.instr_info().info());
3406
3407 if (instr_info.instruction().is_load() && exit_info.cpl() != 0)
3410 || (!instr_info.instruction().is_load()
3411 && exit_info.cpl() > 0
3412 && self.read_cr4(vtl) & X64_CR4_UMIP != 0)
3413 {
3414 self.inject_gpf(vtl);
3415 return Ok(());
3416 }
3417
3418 let gm = &self.partition.gm[vtl];
3419 let interruption_pending = self.backing.vtls[vtl].interruption_information.valid();
3420
3421 match instr_info.instruction() {
3422 LdtrOrTrInstruction::Sldt | LdtrOrTrInstruction::Str => {
3423 let value = self.runner.read_vmcs16(
3424 vtl,
3425 if matches!(instr_info.instruction(), LdtrOrTrInstruction::Sldt) {
3426 TdxSegmentReg::Ldtr
3427 } else {
3428 TdxSegmentReg::Tr
3429 }
3430 .selector(),
3431 );
3432
3433 if instr_info.memory_or_register() {
3434 let gps = self.runner.tdx_enter_guest_gps_mut();
3435 gps[instr_info.register_1() as usize] = value.into();
3436 } else {
3437 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3438 exit_info.qualification(),
3439 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3440 (!instr_info.index_register_invalid())
3441 .then_some(instr_info.index_register()),
3442 instr_info.scaling(),
3443 instr_info.address_size(),
3444 instr_info.segment_register(),
3445 );
3446 let mut emulation_state = UhEmulationState {
3447 vp: &mut *self,
3448 interruption_pending,
3449 devices: dev,
3450 vtl,
3451 cache: TdxEmulationCache::default(),
3452 };
3453 emulate_insn_memory_op(
3454 &mut emulation_state,
3455 gm,
3456 dev,
3457 gva,
3458 segment,
3459 x86emu::AlignmentMode::Standard,
3460 EmulatedMemoryOperation::Write(&value.to_le_bytes()),
3461 )
3462 .await?;
3463 }
3464 }
3465
3466 LdtrOrTrInstruction::Lldt | LdtrOrTrInstruction::Ltr => {
3467 let value = if instr_info.memory_or_register() {
3468 let gps = self.runner.tdx_enter_guest_gps();
3469 gps[instr_info.register_1() as usize] as u16
3470 } else {
3471 let (gva, segment) = self.compute_gva_for_table_access_emulation(
3472 exit_info.qualification(),
3473 (!instr_info.base_register_invalid()).then_some(instr_info.base_register()),
3474 (!instr_info.index_register_invalid())
3475 .then_some(instr_info.index_register()),
3476 instr_info.scaling(),
3477 instr_info.address_size(),
3478 instr_info.segment_register(),
3479 );
3480 let mut emulation_state = UhEmulationState {
3481 vp: &mut *self,
3482 interruption_pending,
3483 devices: dev,
3484 vtl,
3485 cache: TdxEmulationCache::default(),
3486 };
3487 let mut buf = [0u8; 2];
3488 emulate_insn_memory_op(
3489 &mut emulation_state,
3490 gm,
3491 dev,
3492 gva,
3493 segment,
3494 x86emu::AlignmentMode::Standard,
3495 EmulatedMemoryOperation::Read(&mut buf),
3496 )
3497 .await?;
3498 u16::from_le_bytes(buf)
3499 };
3500 self.runner.write_vmcs16(
3501 vtl,
3502 if matches!(instr_info.instruction(), LdtrOrTrInstruction::Lldt) {
3503 TdxSegmentReg::Ldtr
3504 } else {
3505 TdxSegmentReg::Tr
3506 }
3507 .selector(),
3508 !0,
3509 value,
3510 );
3511 }
3512 }
3513
3514 self.advance_to_next_instruction(vtl);
3515 Ok(())
3516 }
3517
3518 fn compute_gva_for_table_access_emulation(
3519 &self,
3520 qualification: u64,
3521 base_reg: Option<u8>,
3522 index_reg: Option<u8>,
3523 scaling: u8,
3524 address_size: u8,
3525 segment_register: u8,
3526 ) -> (u64, Segment) {
3527 let gps = self.runner.tdx_enter_guest_gps();
3528
3529 let mut gva = qualification;
3531 if let Some(base_register) = base_reg {
3532 gva += gps[base_register as usize];
3533 }
3534 if let Some(index_register) = index_reg {
3535 gva += gps[index_register as usize] << scaling;
3536 }
3537 match address_size {
3538 0 => gva &= 0xFFFF,
3540 1 => gva &= 0xFFFFFFFF,
3542 2 => {}
3544 _ => unreachable!(),
3545 }
3546
3547 let segment = match segment_register {
3548 0 => Segment::ES,
3549 1 => Segment::CS,
3550 2 => Segment::SS,
3551 3 => Segment::DS,
3552 4 => Segment::FS,
3553 5 => Segment::GS,
3554 _ => unreachable!(),
3555 };
3556
3557 (gva, segment)
3558 }
3559}
3560
3561struct TdxApicClient<'a, T> {
3562 partition: &'a UhPartitionInner,
3563 apic_page: &'a mut ApicPage,
3564 dev: &'a T,
3565 vmtime: &'a VmTimeAccess,
3566 vtl: GuestVtl,
3567}
3568
3569impl<T: CpuIo> ApicClient for TdxApicClient<'_, T> {
3570 fn cr8(&mut self) -> u32 {
3571 self.apic_page.tpr.value >> 4
3572 }
3573
3574 fn set_cr8(&mut self, value: u32) {
3575 self.apic_page.tpr.value = value << 4;
3576 }
3577
3578 fn set_apic_base(&mut self, _value: u64) {
3579 }
3581
3582 fn wake(&mut self, vp_index: VpIndex) {
3583 self.partition.vps[vp_index.index() as usize].wake(self.vtl, WakeReason::INTCON);
3584 }
3585
3586 fn eoi(&mut self, vector: u8) {
3587 self.dev.handle_eoi(vector.into())
3588 }
3589
3590 fn now(&mut self) -> vmcore::vmtime::VmTime {
3591 self.vmtime.now()
3592 }
3593
3594 fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]) {
3595 pull_apic_offload(self.apic_page)
3596 }
3597}
3598
3599fn pull_apic_offload(page: &mut ApicPage) -> ([u32; 8], [u32; 8]) {
3600 let mut irr = [0; 8];
3601 let mut isr = [0; 8];
3602 for (((irr, page_irr), isr), page_isr) in irr
3603 .iter_mut()
3604 .zip(page.irr.iter_mut())
3605 .zip(isr.iter_mut())
3606 .zip(page.isr.iter_mut())
3607 {
3608 *irr = std::mem::take(&mut page_irr.value);
3609 *isr = std::mem::take(&mut page_isr.value);
3610 }
3611 (irr, isr)
3612}
3613
3614impl<T> hv1_hypercall::X64RegisterState for UhHypercallHandler<'_, '_, T, TdxBacked> {
3615 fn rip(&mut self) -> u64 {
3616 self.vp.backing.vtls[self.intercepted_vtl].private_regs.rip
3617 }
3618
3619 fn set_rip(&mut self, rip: u64) {
3620 self.vp.backing.vtls[self.intercepted_vtl].private_regs.rip = rip;
3621 }
3622
3623 fn gp(&mut self, n: hv1_hypercall::X64HypercallRegister) -> u64 {
3624 let gps = self.vp.runner.tdx_enter_guest_gps();
3625 match n {
3626 hv1_hypercall::X64HypercallRegister::Rax => gps[TdxGp::RAX],
3627 hv1_hypercall::X64HypercallRegister::Rcx => gps[TdxGp::RCX],
3628 hv1_hypercall::X64HypercallRegister::Rdx => gps[TdxGp::RDX],
3629 hv1_hypercall::X64HypercallRegister::Rbx => gps[TdxGp::RBX],
3630 hv1_hypercall::X64HypercallRegister::Rsi => gps[TdxGp::RSI],
3631 hv1_hypercall::X64HypercallRegister::Rdi => gps[TdxGp::RDI],
3632 hv1_hypercall::X64HypercallRegister::R8 => gps[TdxGp::R8],
3633 }
3634 }
3635
3636 fn set_gp(&mut self, n: hv1_hypercall::X64HypercallRegister, value: u64) {
3637 let gps = self.vp.runner.tdx_enter_guest_gps_mut();
3638 match n {
3639 hv1_hypercall::X64HypercallRegister::Rax => gps[TdxGp::RAX] = value,
3640 hv1_hypercall::X64HypercallRegister::Rcx => gps[TdxGp::RCX] = value,
3641 hv1_hypercall::X64HypercallRegister::Rdx => gps[TdxGp::RDX] = value,
3642 hv1_hypercall::X64HypercallRegister::Rbx => gps[TdxGp::RBX] = value,
3643 hv1_hypercall::X64HypercallRegister::Rsi => gps[TdxGp::RSI] = value,
3644 hv1_hypercall::X64HypercallRegister::Rdi => gps[TdxGp::RDI] = value,
3645 hv1_hypercall::X64HypercallRegister::R8 => gps[TdxGp::R8] = value,
3646 }
3647 }
3648
3649 fn xmm(&mut self, n: usize) -> u128 {
3651 u128::from_ne_bytes(self.vp.runner.fx_state().xmm[n])
3652 }
3653
3654 fn set_xmm(&mut self, n: usize, value: u128) {
3655 self.vp.runner.fx_state_mut().xmm[n] = value.to_ne_bytes();
3656 }
3657}
3658
3659impl<T: CpuIo> UhHypercallHandler<'_, '_, T, TdxBacked> {
3660 const TDX_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
3661 Self,
3662 [
3663 hv1_hypercall::HvModifySparseGpaPageHostVisibility,
3664 hv1_hypercall::HvQuerySparseGpaPageHostVisibility,
3665 hv1_hypercall::HvX64StartVirtualProcessor,
3666 hv1_hypercall::HvGetVpIndexFromApicId,
3667 hv1_hypercall::HvRetargetDeviceInterrupt,
3668 hv1_hypercall::HvFlushVirtualAddressList,
3669 hv1_hypercall::HvFlushVirtualAddressListEx,
3670 hv1_hypercall::HvFlushVirtualAddressSpace,
3671 hv1_hypercall::HvFlushVirtualAddressSpaceEx,
3672 hv1_hypercall::HvPostMessage,
3673 hv1_hypercall::HvSignalEvent,
3674 hv1_hypercall::HvExtQueryCapabilities,
3675 hv1_hypercall::HvGetVpRegisters,
3676 hv1_hypercall::HvSetVpRegisters,
3677 hv1_hypercall::HvEnablePartitionVtl,
3678 hv1_hypercall::HvX64EnableVpVtl,
3679 hv1_hypercall::HvVtlCall,
3680 hv1_hypercall::HvVtlReturn,
3681 hv1_hypercall::HvModifyVtlProtectionMask,
3682 hv1_hypercall::HvX64TranslateVirtualAddress,
3683 hv1_hypercall::HvSendSyntheticClusterIpi,
3684 hv1_hypercall::HvSendSyntheticClusterIpiEx,
3685 hv1_hypercall::HvInstallIntercept,
3686 hv1_hypercall::HvAssertVirtualInterrupt,
3687 ]
3688 );
3689
3690 const TDCALL_DISPATCHER: hv1_hypercall::Dispatcher<Self> = hv1_hypercall::dispatcher!(
3694 Self,
3695 [hv1_hypercall::HvPostMessage, hv1_hypercall::HvSignalEvent],
3696 );
3697}
3698
3699impl AccessVpState for UhVpStateAccess<'_, '_, TdxBacked> {
3700 type Error = vp_state::Error;
3701
3702 fn caps(&self) -> &virt::x86::X86PartitionCapabilities {
3703 &self.vp.partition.caps
3704 }
3705
3706 fn commit(&mut self) -> Result<(), Self::Error> {
3707 Ok(())
3708 }
3709
3710 fn registers(&mut self) -> Result<Registers, Self::Error> {
3711 let gps = self.vp.runner.tdx_enter_guest_gps();
3712
3713 let cs = self.vp.read_segment(self.vtl, TdxSegmentReg::Cs);
3714 let ds = self.vp.read_segment(self.vtl, TdxSegmentReg::Ds);
3715 let es = self.vp.read_segment(self.vtl, TdxSegmentReg::Es);
3716 let fs = self.vp.read_segment(self.vtl, TdxSegmentReg::Fs);
3717 let gs = self.vp.read_segment(self.vtl, TdxSegmentReg::Gs);
3718 let ss = self.vp.read_segment(self.vtl, TdxSegmentReg::Ss);
3719 let tr = self.vp.read_segment(self.vtl, TdxSegmentReg::Tr);
3720 let ldtr = self.vp.read_segment(self.vtl, TdxSegmentReg::Ldtr);
3721
3722 let gdtr = self.vp.read_table_register(self.vtl, TdxTableReg::Gdtr);
3723 let idtr = self.vp.read_table_register(self.vtl, TdxTableReg::Idtr);
3724
3725 let cr0 = self.vp.read_cr0(self.vtl);
3726 let cr2 = self.vp.runner.cr2();
3727 let cr3 = self
3728 .vp
3729 .runner
3730 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_CR3);
3731 let cr4 = self.vp.read_cr4(self.vtl);
3732
3733 let cr8 = self.vp.runner.tdx_apic_page(self.vtl).tpr.value >> 4;
3734
3735 let efer = self.vp.backing.vtls[self.vtl].efer;
3736
3737 Ok(Registers {
3738 rax: gps[TdxGp::RAX],
3739 rcx: gps[TdxGp::RCX],
3740 rdx: gps[TdxGp::RDX],
3741 rbx: gps[TdxGp::RBX],
3742 rsp: self.vp.backing.vtls[self.vtl].private_regs.rsp,
3743 rbp: gps[TdxGp::RBP],
3744 rsi: gps[TdxGp::RSI],
3745 rdi: gps[TdxGp::RDI],
3746 r8: gps[TdxGp::R8],
3747 r9: gps[TdxGp::R9],
3748 r10: gps[TdxGp::R10],
3749 r11: gps[TdxGp::R11],
3750 r12: gps[TdxGp::R12],
3751 r13: gps[TdxGp::R13],
3752 r14: gps[TdxGp::R14],
3753 r15: gps[TdxGp::R15],
3754 rip: self.vp.backing.vtls[self.vtl].private_regs.rip,
3755 rflags: self.vp.backing.vtls[self.vtl].private_regs.rflags,
3756 cs,
3757 ds,
3758 es,
3759 fs,
3760 gs,
3761 ss,
3762 tr,
3763 ldtr,
3764 gdtr,
3765 idtr,
3766 cr0,
3767 cr2,
3768 cr3,
3769 cr4,
3770 cr8: cr8.into(),
3771 efer,
3772 })
3773 }
3774
3775 fn set_registers(&mut self, value: &Registers) -> Result<(), Self::Error> {
3776 let Registers {
3777 rax,
3778 rcx,
3779 rdx,
3780 rbx,
3781 rsp,
3782 rbp,
3783 rsi,
3784 rdi,
3785 r8,
3786 r9,
3787 r10,
3788 r11,
3789 r12,
3790 r13,
3791 r14,
3792 r15,
3793 rip,
3794 rflags,
3795 cs,
3796 ds,
3797 es,
3798 fs,
3799 gs,
3800 ss,
3801 tr,
3802 ldtr,
3803 gdtr,
3804 idtr,
3805 cr0,
3806 cr2,
3807 cr3,
3808 cr4,
3809 cr8,
3810 efer,
3811 } = value;
3812
3813 let gps = self.vp.runner.tdx_enter_guest_gps_mut();
3814 gps[TdxGp::RAX] = *rax;
3815 gps[TdxGp::RCX] = *rcx;
3816 gps[TdxGp::RDX] = *rdx;
3817 gps[TdxGp::RBX] = *rbx;
3818 self.vp.backing.vtls[self.vtl].private_regs.rsp = *rsp;
3819 gps[TdxGp::RBP] = *rbp;
3820 gps[TdxGp::RSI] = *rsi;
3821 gps[TdxGp::RDI] = *rdi;
3822 gps[TdxGp::R8] = *r8;
3823 gps[TdxGp::R9] = *r9;
3824 gps[TdxGp::R10] = *r10;
3825 gps[TdxGp::R11] = *r11;
3826 gps[TdxGp::R12] = *r12;
3827 gps[TdxGp::R13] = *r13;
3828 gps[TdxGp::R14] = *r14;
3829 gps[TdxGp::R15] = *r15;
3830 self.vp.backing.vtls[self.vtl].private_regs.rip = *rip;
3831 self.vp.backing.vtls[self.vtl].private_regs.rflags = *rflags;
3833
3834 self.vp.write_segment(self.vtl, TdxSegmentReg::Cs, *cs)?;
3836 self.vp.write_segment(self.vtl, TdxSegmentReg::Ds, *ds)?;
3837 self.vp.write_segment(self.vtl, TdxSegmentReg::Es, *es)?;
3838 self.vp.write_segment(self.vtl, TdxSegmentReg::Fs, *fs)?;
3839 self.vp.write_segment(self.vtl, TdxSegmentReg::Gs, *gs)?;
3840 self.vp.write_segment(self.vtl, TdxSegmentReg::Ss, *ss)?;
3841 self.vp.write_segment(self.vtl, TdxSegmentReg::Tr, *tr)?;
3842 self.vp
3843 .write_segment(self.vtl, TdxSegmentReg::Ldtr, *ldtr)?;
3844
3845 self.vp
3847 .write_table_register(self.vtl, TdxTableReg::Gdtr, *gdtr);
3848 self.vp
3849 .write_table_register(self.vtl, TdxTableReg::Idtr, *idtr);
3850
3851 self.vp.write_cr0(self.vtl, *cr0)?;
3852
3853 self.vp.runner.set_cr2(*cr2);
3856
3857 self.vp
3858 .runner
3859 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_CR3, !0, *cr3);
3860
3861 self.vp.write_cr4(self.vtl, *cr4)?;
3862
3863 self.vp.runner.tdx_apic_page_mut(self.vtl).tpr.value = (*cr8 << 4) as u32;
3864
3865 self.vp.write_efer(self.vtl, *efer)?;
3866
3867 self.vp.update_execution_mode(self.vtl);
3869
3870 Ok(())
3871 }
3872
3873 fn activity(&mut self) -> Result<vp::Activity, Self::Error> {
3874 let lapic = &self.vp.backing.cvm.lapics[self.vtl];
3875 let interruptibility: Interruptibility = self
3876 .vp
3877 .runner
3878 .read_vmcs32(self.vtl, VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY)
3879 .into();
3880 Ok(vp::Activity {
3881 mp_state: lapic.activity,
3882 nmi_pending: lapic.nmi_pending,
3883 nmi_masked: interruptibility.blocked_by_nmi(),
3884 interrupt_shadow: interruptibility.blocked_by_sti()
3885 || interruptibility.blocked_by_movss(),
3886 pending_event: None, pending_interruption: None, })
3889 }
3890
3891 fn set_activity(&mut self, value: &vp::Activity) -> Result<(), Self::Error> {
3892 let &vp::Activity {
3893 mp_state,
3894 nmi_pending,
3895 nmi_masked,
3896 interrupt_shadow,
3897 pending_event: _, pending_interruption: _, } = value;
3900 self.vp.backing.cvm.lapics[self.vtl].activity = mp_state;
3901 self.vp.backing.cvm.lapics[self.vtl].nmi_pending = nmi_pending;
3902 let interruptibility = Interruptibility::new()
3903 .with_blocked_by_movss(interrupt_shadow)
3904 .with_blocked_by_nmi(nmi_masked);
3905 self.vp.runner.write_vmcs32(
3906 self.vtl,
3907 VmcsField::VMX_VMCS_GUEST_INTERRUPTIBILITY,
3908 !0,
3909 interruptibility.into(),
3910 );
3911 Ok(())
3912 }
3913
3914 fn xsave(&mut self) -> Result<vp::Xsave, Self::Error> {
3915 Err(vp_state::Error::Unimplemented("xsave"))
3917 }
3918
3919 fn set_xsave(&mut self, _value: &vp::Xsave) -> Result<(), Self::Error> {
3920 Err(vp_state::Error::Unimplemented("xsave"))
3922 }
3923
3924 fn apic(&mut self) -> Result<vp::Apic, Self::Error> {
3925 self.vp.access_apic_without_offload(self.vtl, |vp| {
3926 Ok(vp.backing.cvm.lapics[self.vtl].lapic.save())
3927 })
3928 }
3929
3930 fn set_apic(&mut self, value: &vp::Apic) -> Result<(), Self::Error> {
3931 self.vp.access_apic_without_offload(self.vtl, |vp| {
3932 vp.backing.cvm.lapics[self.vtl]
3933 .lapic
3934 .restore(value)
3935 .map_err(vp_state::Error::InvalidApicBase)?;
3936
3937 Ok(())
3938 })
3939 }
3940
3941 fn xcr(&mut self) -> Result<vp::Xcr0, Self::Error> {
3942 Ok(vp::Xcr0 {
3943 value: self
3944 .vp
3945 .runner
3946 .get_vp_register(self.vtl, HvX64RegisterName::Xfem)
3947 .unwrap()
3948 .as_u64(),
3949 })
3950 }
3951
3952 fn set_xcr(&mut self, _value: &vp::Xcr0) -> Result<(), Self::Error> {
3953 Err(vp_state::Error::Unimplemented("xcr"))
3954 }
3955
3956 fn xss(&mut self) -> Result<vp::Xss, Self::Error> {
3957 Ok(vp::Xss {
3958 value: self.vp.backing.vtls[self.vtl].private_regs.msr_xss,
3959 })
3960 }
3961
3962 fn set_xss(&mut self, value: &vp::Xss) -> Result<(), Self::Error> {
3963 self.vp.backing.vtls[self.vtl].private_regs.msr_xss = value.value;
3964 Ok(())
3965 }
3966
3967 fn mtrrs(&mut self) -> Result<vp::Mtrrs, Self::Error> {
3968 Ok(vp::Mtrrs {
3969 msr_mtrr_def_type: 0, fixed: [0; 11], variable: [0; 16], })
3973 }
3974
3975 fn set_mtrrs(&mut self, _value: &vp::Mtrrs) -> Result<(), Self::Error> {
3976 Ok(())
3978 }
3979
3980 fn pat(&mut self) -> Result<vp::Pat, Self::Error> {
3981 let msr_cr_pat = self
3982 .vp
3983 .runner
3984 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_PAT);
3985 Ok(vp::Pat { value: msr_cr_pat })
3986 }
3987
3988 fn set_pat(&mut self, value: &vp::Pat) -> Result<(), Self::Error> {
3989 self.vp
3990 .runner
3991 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_PAT, !0, value.value);
3992 Ok(())
3993 }
3994
3995 fn virtual_msrs(&mut self) -> Result<vp::VirtualMsrs, Self::Error> {
3996 let state = &self.vp.backing.vtls[self.vtl].private_regs;
3997
3998 let sysenter_cs = self
3999 .vp
4000 .runner
4001 .read_vmcs32(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR)
4002 .into();
4003 let sysenter_eip = self
4004 .vp
4005 .runner
4006 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR);
4007 let sysenter_esp = self
4008 .vp
4009 .runner
4010 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR);
4011
4012 Ok(vp::VirtualMsrs {
4013 kernel_gs_base: state.msr_kernel_gs_base,
4014 sysenter_cs,
4015 sysenter_eip,
4016 sysenter_esp,
4017 star: state.msr_star,
4018 lstar: state.msr_lstar,
4019 cstar: self.vp.backing.vtls[self.vtl].msr_cstar,
4020 sfmask: state.msr_sfmask,
4021 })
4022 }
4023
4024 fn set_virtual_msrs(&mut self, value: &vp::VirtualMsrs) -> Result<(), Self::Error> {
4025 let &vp::VirtualMsrs {
4026 kernel_gs_base,
4027 sysenter_cs,
4028 sysenter_eip,
4029 sysenter_esp,
4030 star,
4031 lstar,
4032 cstar,
4033 sfmask,
4034 } = value;
4035
4036 let state = &mut self.vp.backing.vtls[self.vtl].private_regs;
4037 state.msr_kernel_gs_base = kernel_gs_base;
4038 state.msr_star = star;
4039 state.msr_lstar = lstar;
4040 state.msr_sfmask = sfmask;
4041
4042 self.vp.runner.write_vmcs32(
4043 self.vtl,
4044 VmcsField::VMX_VMCS_GUEST_SYSENTER_CS_MSR,
4045 !0,
4046 sysenter_cs as u32,
4047 );
4048 self.vp.runner.write_vmcs64(
4049 self.vtl,
4050 VmcsField::VMX_VMCS_GUEST_SYSENTER_EIP_MSR,
4051 !0,
4052 sysenter_eip,
4053 );
4054 self.vp.runner.write_vmcs64(
4055 self.vtl,
4056 VmcsField::VMX_VMCS_GUEST_SYSENTER_ESP_MSR,
4057 !0,
4058 sysenter_esp,
4059 );
4060
4061 self.vp.backing.vtls[self.vtl].msr_cstar = cstar;
4062
4063 Ok(())
4064 }
4065
4066 fn debug_regs(&mut self) -> Result<vp::DebugRegisters, Self::Error> {
4067 let mut values = [0u64.into(); 5];
4068 self.vp
4069 .runner
4070 .get_vp_registers(
4071 self.vtl,
4072 &[
4073 HvX64RegisterName::Dr0,
4074 HvX64RegisterName::Dr1,
4075 HvX64RegisterName::Dr2,
4076 HvX64RegisterName::Dr3,
4077 HvX64RegisterName::Dr6,
4078 ],
4079 &mut values,
4080 )
4081 .map_err(vp_state::Error::GetRegisters)?;
4082
4083 let dr7 = self
4084 .vp
4085 .runner
4086 .read_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_DR7);
4087
4088 Ok(vp::DebugRegisters {
4089 dr0: values[0].as_u64(),
4090 dr1: values[1].as_u64(),
4091 dr2: values[2].as_u64(),
4092 dr3: values[3].as_u64(),
4093 dr6: values[4].as_u64(),
4094 dr7,
4095 })
4096 }
4097
4098 fn set_debug_regs(&mut self, value: &vp::DebugRegisters) -> Result<(), Self::Error> {
4099 let &vp::DebugRegisters {
4100 dr0,
4101 dr1,
4102 dr2,
4103 dr3,
4104 dr6,
4105 dr7,
4106 } = value;
4107 self.vp
4108 .runner
4109 .set_vp_registers(
4110 self.vtl,
4111 [
4112 (HvX64RegisterName::Dr0, dr0),
4113 (HvX64RegisterName::Dr1, dr1),
4114 (HvX64RegisterName::Dr2, dr2),
4115 (HvX64RegisterName::Dr3, dr3),
4116 (HvX64RegisterName::Dr6, dr6),
4117 ],
4118 )
4119 .map_err(vp_state::Error::SetRegisters)?;
4120
4121 self.vp
4122 .runner
4123 .write_vmcs64(self.vtl, VmcsField::VMX_VMCS_GUEST_DR7, !0, dr7);
4124
4125 Ok(())
4126 }
4127
4128 fn tsc(&mut self) -> Result<vp::Tsc, Self::Error> {
4129 Err(vp_state::Error::Unimplemented("tsc"))
4130 }
4131
4132 fn set_tsc(&mut self, _value: &vp::Tsc) -> Result<(), Self::Error> {
4133 Err(vp_state::Error::Unimplemented("tsc"))
4134 }
4135
4136 fn tsc_aux(&mut self) -> Result<vp::TscAux, Self::Error> {
4137 Ok(vp::TscAux {
4138 value: self.vp.backing.vtls[self.vtl].private_regs.msr_tsc_aux,
4139 })
4140 }
4141
4142 fn set_tsc_aux(&mut self, value: &vp::TscAux) -> Result<(), Self::Error> {
4143 self.vp.backing.vtls[self.vtl].private_regs.msr_tsc_aux = value.value;
4144 Ok(())
4145 }
4146
4147 fn cet(&mut self) -> Result<vp::Cet, Self::Error> {
4148 Err(vp_state::Error::Unimplemented("cet"))
4149 }
4150
4151 fn set_cet(&mut self, _value: &vp::Cet) -> Result<(), Self::Error> {
4152 Err(vp_state::Error::Unimplemented("cet"))
4153 }
4154
4155 fn cet_ss(&mut self) -> Result<vp::CetSs, Self::Error> {
4156 Err(vp_state::Error::Unimplemented("cet_ss"))
4157 }
4158
4159 fn set_cet_ss(&mut self, _value: &vp::CetSs) -> Result<(), Self::Error> {
4160 Err(vp_state::Error::Unimplemented("cet_ss"))
4161 }
4162
4163 fn synic_msrs(&mut self) -> Result<vp::SyntheticMsrs, Self::Error> {
4164 Err(vp_state::Error::Unimplemented("synic_msrs"))
4165 }
4166
4167 fn set_synic_msrs(&mut self, _value: &vp::SyntheticMsrs) -> Result<(), Self::Error> {
4168 Err(vp_state::Error::Unimplemented("synic_msrs"))
4169 }
4170
4171 fn synic_message_page(&mut self) -> Result<vp::SynicMessagePage, Self::Error> {
4172 Err(vp_state::Error::Unimplemented("synic_message_page"))
4173 }
4174
4175 fn set_synic_message_page(&mut self, _value: &vp::SynicMessagePage) -> Result<(), Self::Error> {
4176 Err(vp_state::Error::Unimplemented("synic_message_page"))
4177 }
4178
4179 fn synic_event_flags_page(&mut self) -> Result<vp::SynicEventFlagsPage, Self::Error> {
4180 Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
4181 }
4182
4183 fn set_synic_event_flags_page(
4184 &mut self,
4185 _value: &vp::SynicEventFlagsPage,
4186 ) -> Result<(), Self::Error> {
4187 Err(vp_state::Error::Unimplemented("synic_event_flags_page"))
4188 }
4189
4190 fn synic_message_queues(&mut self) -> Result<vp::SynicMessageQueues, Self::Error> {
4191 Err(vp_state::Error::Unimplemented("synic_message_queues"))
4192 }
4193
4194 fn set_synic_message_queues(
4195 &mut self,
4196 _value: &vp::SynicMessageQueues,
4197 ) -> Result<(), Self::Error> {
4198 Err(vp_state::Error::Unimplemented("synic_message_queues"))
4199 }
4200
4201 fn synic_timers(&mut self) -> Result<vp::SynicTimers, Self::Error> {
4202 Err(vp_state::Error::Unimplemented("synic_timers"))
4203 }
4204
4205 fn set_synic_timers(&mut self, _value: &vp::SynicTimers) -> Result<(), Self::Error> {
4206 Err(vp_state::Error::Unimplemented("synic_timers"))
4207 }
4208}
4209
4210fn top_vector(reg: &[ApicRegister; 8]) -> u8 {
4214 reg.iter()
4215 .enumerate()
4216 .rev()
4217 .find_map(|(i, r)| {
4218 (r.value != 0).then(|| (i as u32 * 32 + (31 - r.value.leading_zeros())) as u8)
4219 })
4220 .unwrap_or(0)
4221}
4222
4223struct TdHypercall<'a, 'b, T>(UhHypercallHandler<'a, 'b, T, TdxBacked>);
4224
4225impl<'a, 'b, T> AsHandler<UhHypercallHandler<'a, 'b, T, TdxBacked>> for TdHypercall<'a, 'b, T> {
4226 fn as_handler(&mut self) -> &mut UhHypercallHandler<'a, 'b, T, TdxBacked> {
4227 &mut self.0
4228 }
4229}
4230
4231impl<T> HypercallIo for TdHypercall<'_, '_, T> {
4232 fn advance_ip(&mut self) {
4233 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = 0;
4234 self.0.vp.backing.vtls[self.0.intercepted_vtl]
4235 .private_regs
4236 .rip = self.0.vp.backing.vtls[self.0.intercepted_vtl]
4237 .private_regs
4238 .rip
4239 .wrapping_add(4);
4240 }
4241
4242 fn retry(&mut self, control: u64) {
4243 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R10] = control;
4244 self.set_result(hvdef::hypercall::HypercallOutput::from(HvError::Timeout).into());
4245 }
4246
4247 fn control(&mut self) -> u64 {
4248 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::R10]
4249 }
4250
4251 fn input_gpa(&mut self) -> u64 {
4252 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::RDX]
4253 }
4254
4255 fn output_gpa(&mut self) -> u64 {
4256 self.0.vp.runner.tdx_enter_guest_gps()[TdxGp::R8]
4257 }
4258
4259 fn fast_register_pair_count(&mut self) -> usize {
4260 7
4261 }
4262
4263 fn extended_fast_hypercalls_ok(&mut self) -> bool {
4264 false
4265 }
4266
4267 fn fast_input(&mut self, buf: &mut [[u64; 2]], _output_register_pairs: usize) -> usize {
4268 self.fast_regs(0, buf);
4269 buf.len()
4270 }
4271
4272 fn fast_output(&mut self, _starting_pair_index: usize, buf: &[[u64; 2]]) {
4273 assert!(buf.is_empty());
4274 }
4275
4276 fn vtl_input(&mut self) -> u64 {
4277 unreachable!()
4278 }
4279
4280 fn set_result(&mut self, n: u64) {
4281 self.0.vp.runner.tdx_enter_guest_gps_mut()[TdxGp::R11] = n;
4282 }
4283
4284 fn fast_regs(&mut self, starting_pair_index: usize, buf: &mut [[u64; 2]]) {
4285 let regs = self.0.vp.runner.tdx_enter_guest_gps();
4286 let fx_state = self.0.vp.runner.fx_state();
4287 for (i, [low, high]) in buf.iter_mut().enumerate() {
4288 let index = i + starting_pair_index;
4289 if index == 0 {
4290 *low = regs[TdxGp::RDX];
4291 *high = regs[TdxGp::R8];
4292 } else {
4293 let value = u128::from_ne_bytes(fx_state.xmm[index - 1]);
4294 *low = value as u64;
4295 *high = (value >> 64) as u64;
4296 }
4297 }
4298 }
4299}
4300
4301impl<T> hv1_hypercall::VtlSwitchOps for UhHypercallHandler<'_, '_, T, TdxBacked> {
4302 fn advance_ip(&mut self) {
4303 let long_mode = self.vp.long_mode(self.intercepted_vtl);
4304 let mut io = hv1_hypercall::X64RegisterIo::new(self, long_mode);
4305 io.advance_ip();
4306 }
4307
4308 fn inject_invalid_opcode_fault(&mut self) {
4309 self.vp.backing.vtls[self.intercepted_vtl].interruption_information =
4310 InterruptionInformation::new()
4311 .with_valid(true)
4312 .with_interruption_type(INTERRUPT_TYPE_HARDWARE_EXCEPTION)
4313 .with_vector(x86defs::Exception::INVALID_OPCODE.0);
4314 }
4315}
4316
4317impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressList for UhHypercallHandler<'_, '_, T, TdxBacked> {
4318 fn flush_virtual_address_list(
4319 &mut self,
4320 processor_set: ProcessorSet<'_>,
4321 flags: HvFlushFlags,
4322 gva_ranges: &[HvGvaRange],
4323 ) -> HvRepResult {
4324 hv1_hypercall::FlushVirtualAddressListEx::flush_virtual_address_list_ex(
4325 self,
4326 processor_set,
4327 flags,
4328 gva_ranges,
4329 )
4330 }
4331}
4332
4333impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressListEx
4334 for UhHypercallHandler<'_, '_, T, TdxBacked>
4335{
4336 fn flush_virtual_address_list_ex(
4337 &mut self,
4338 processor_set: ProcessorSet<'_>,
4339 flags: HvFlushFlags,
4340 gva_ranges: &[HvGvaRange],
4341 ) -> HvRepResult {
4342 self.hcvm_validate_flush_inputs(processor_set, flags, true)
4343 .map_err(|e| (e, 0))?;
4344
4345 let vtl = self.intercepted_vtl;
4346 let flush_state = &self.vp.shared.flush_state[vtl];
4347
4348 if let Err(()) = Self::add_ranges_to_tlb_flush_list(
4350 flush_state,
4351 gva_ranges,
4352 flags.use_extended_range_format(),
4353 ) {
4354 if flags.non_global_mappings_only() {
4355 flush_state
4356 .flush_entire_non_global_counter
4357 .fetch_add(1, Ordering::Relaxed);
4358 } else {
4359 flush_state
4360 .flush_entire_counter
4361 .fetch_add(1, Ordering::Relaxed);
4362 }
4363 }
4364
4365 TdxTlbLockFlushAccess {
4367 vp_index: Some(self.vp.vp_index()),
4368 partition: self.vp.partition,
4369 shared: self.vp.shared,
4370 }
4371 .wake_processors_for_tlb_flush(vtl, (!flags.all_processors()).then_some(processor_set));
4372
4373 self.vp.set_wait_for_tlb_locks(vtl);
4375
4376 Ok(())
4377 }
4378}
4379
4380impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressSpace
4381 for UhHypercallHandler<'_, '_, T, TdxBacked>
4382{
4383 fn flush_virtual_address_space(
4384 &mut self,
4385 processor_set: ProcessorSet<'_>,
4386 flags: HvFlushFlags,
4387 ) -> hvdef::HvResult<()> {
4388 hv1_hypercall::FlushVirtualAddressSpaceEx::flush_virtual_address_space_ex(
4389 self,
4390 processor_set,
4391 flags,
4392 )
4393 }
4394}
4395
4396impl<T: CpuIo> hv1_hypercall::FlushVirtualAddressSpaceEx
4397 for UhHypercallHandler<'_, '_, T, TdxBacked>
4398{
4399 fn flush_virtual_address_space_ex(
4400 &mut self,
4401 processor_set: ProcessorSet<'_>,
4402 flags: HvFlushFlags,
4403 ) -> hvdef::HvResult<()> {
4404 self.hcvm_validate_flush_inputs(processor_set, flags, false)?;
4405 let vtl = self.intercepted_vtl;
4406
4407 let flush_state = &self.vp.shared.flush_state[vtl];
4408
4409 if flags.non_global_mappings_only() {
4411 flush_state
4412 .flush_entire_non_global_counter
4413 .fetch_add(1, Ordering::Relaxed);
4414 } else {
4415 flush_state
4416 .flush_entire_counter
4417 .fetch_add(1, Ordering::Relaxed);
4418 }
4419
4420 TdxTlbLockFlushAccess {
4422 vp_index: Some(self.vp.vp_index()),
4423 partition: self.vp.partition,
4424 shared: self.vp.shared,
4425 }
4426 .wake_processors_for_tlb_flush(vtl, (!flags.all_processors()).then_some(processor_set));
4427
4428 self.vp.set_wait_for_tlb_locks(vtl);
4430
4431 Ok(())
4432 }
4433}
4434
4435impl<T: CpuIo> UhHypercallHandler<'_, '_, T, TdxBacked> {
4436 fn add_ranges_to_tlb_flush_list(
4437 flush_state: &TdxPartitionFlushState,
4438 gva_ranges: &[HvGvaRange],
4439 use_extended_range_format: bool,
4440 ) -> Result<(), ()> {
4441 if gva_ranges.len() > FLUSH_GVA_LIST_SIZE {
4443 return Err(());
4444 }
4445
4446 if use_extended_range_format
4447 && gva_ranges
4448 .iter()
4449 .any(|range| range.as_extended().large_page())
4450 {
4451 return Err(());
4454 }
4455
4456 flush_state
4457 .gva_list
4458 .write()
4459 .extend(gva_ranges.iter().copied());
4460
4461 Ok(())
4462 }
4463}
4464
4465impl TdxTlbLockFlushAccess<'_> {
4466 fn wake_processors_for_tlb_flush(
4467 &mut self,
4468 target_vtl: GuestVtl,
4469 processor_set: Option<ProcessorSet<'_>>,
4470 ) {
4471 match processor_set {
4472 Some(processors) => {
4473 self.wake_processors_for_tlb_flush_inner(target_vtl, processors);
4474 }
4475 None => self.wake_processors_for_tlb_flush_inner(
4476 target_vtl,
4477 0..(self.partition.vps.len() as u32),
4478 ),
4479 }
4480 }
4481
4482 fn wake_processors_for_tlb_flush_inner(
4483 &mut self,
4484 target_vtl: GuestVtl,
4485 processors: impl IntoIterator<Item = u32>,
4486 ) {
4487 std::sync::atomic::fence(Ordering::SeqCst);
4495 self.partition.hcl.kick_cpus(
4496 processors.into_iter().filter(|&vp| {
4497 self.shared.active_vtl[vp as usize].load(Ordering::Relaxed) == target_vtl as u8
4498 }),
4499 true,
4500 true,
4501 );
4502 }
4503}
4504
4505struct TdxTlbLockFlushAccess<'a> {
4506 vp_index: Option<VpIndex>,
4507 partition: &'a UhPartitionInner,
4508 shared: &'a TdxBackedShared,
4509}
4510
4511impl TlbFlushLockAccess for TdxTlbLockFlushAccess<'_> {
4512 fn flush(&mut self, vtl: GuestVtl) {
4513 self.shared.flush_state[vtl]
4514 .flush_entire_counter
4515 .fetch_add(1, Ordering::Relaxed);
4516
4517 self.wake_processors_for_tlb_flush(vtl, None);
4518 self.set_wait_for_tlb_locks(vtl);
4519 }
4520
4521 fn flush_entire(&mut self) {
4522 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
4523 self.shared.flush_state[vtl]
4524 .flush_entire_counter
4525 .fetch_add(1, Ordering::Relaxed);
4526 }
4527 for vtl in [GuestVtl::Vtl0, GuestVtl::Vtl1] {
4528 self.wake_processors_for_tlb_flush(vtl, None);
4529 self.set_wait_for_tlb_locks(vtl);
4530 }
4531 }
4532
4533 fn set_wait_for_tlb_locks(&mut self, vtl: GuestVtl) {
4534 if let Some(vp_index) = self.vp_index {
4535 hardware_cvm::tlb_lock::TlbLockAccess {
4536 vp_index,
4537 cvm_partition: &self.shared.cvm,
4538 }
4539 .set_wait_for_tlb_locks(vtl);
4540 }
4541 }
4542}
4543
4544mod save_restore {
4545 use super::TdxBacked;
4546 use super::UhProcessor;
4547 use vmcore::save_restore::RestoreError;
4548 use vmcore::save_restore::SaveError;
4549 use vmcore::save_restore::SaveRestore;
4550 use vmcore::save_restore::SavedStateNotSupported;
4551
4552 impl SaveRestore for UhProcessor<'_, TdxBacked> {
4553 type SavedState = SavedStateNotSupported;
4554
4555 fn save(&mut self) -> Result<Self::SavedState, SaveError> {
4556 Err(SaveError::NotSupported)
4557 }
4558
4559 fn restore(&mut self, state: Self::SavedState) -> Result<(), RestoreError> {
4560 match state {}
4561 }
4562 }
4563}