Support TDX SMP

This commit is contained in:
Hsy-Intel
2025-03-12 21:54:17 +00:00
committed by Tate, Hongliang Tian
parent af4cf19eb4
commit 148695194f
5 changed files with 140 additions and 38 deletions

View File

@ -135,6 +135,7 @@ fn efi_phase_runtime(memory_map: MemoryMapOwned, boot_params: &mut BootParams) -
#[cfg(feature = "cvm_guest")]
uefi::table::boot::MemoryType::UNACCEPTED => {
unsafe {
crate::console::print_str("[EFI stub] Accepting pending pages...\n");
for page_idx in 0..md.page_count {
tdx_guest::tdcall::accept_page(0, md.phys_start + page_idx * PAGE_SIZE)
.unwrap();

View File

@ -14,13 +14,77 @@ IA32_APIC_BASE = 0x1B
IA32_X2APIC_APICID = 0x802
MMIO_XAPIC_APICID = 0xFEE00020
ap_real_mode_boot:
start:
cli // disable interrupts
cld
xor ax, ax // clear ax
mov ds, ax // clear ds
// In the Intel Trust Domain, the APs awakened by the operating system are in long mode.
// We can determine this using the value of the CS register.
// FIXME: This method will not affect the booting of linux-efi-handover64,
// multiboot and multiboot2 in non-TDX environments.
// However, it cannot guarantee the impact on other booting methods added in the future.
mov ax, cs
cmp ax, 0x38
jne ap_real_mode_boot
.code64
ap_long_mode_tdx:
// The Local APIC ID information is stored in r8d by Intel TDX Virtual Firmware.
mov edi, r8d
lgdt [boot_gdtr]
// Enable PAE and PGE.
mov rax, cr4
or rax, 0xa0
mov cr4, rax
// Set the page table. The application processors use
// the same page table as the bootstrap processor's
// boot phase page table.
mov rax, 0
mov rax, __boot_page_table_pointer
mov cr3, rax
push 0x8
mov rax, offset ap_long_mode_in_low_address
push rax
retfq
ap_long_mode_in_low_address:
mov ax, 0
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
// Update RIP to use the virtual address.
mov rax, offset ap_long_mode
jmp rax
ap_long_mode:
// The local APIC ID is in the RDI.
mov rax, rdi
shl rax, 3
// Setup the stack.
mov rbx, [__ap_boot_stack_array_pointer]
mov rsp, [rbx + rax]
xor rbp, rbp
// Go to Rust code.
mov rax, offset ap_early_entry
call rax
.extern halt # bsp_boot.S
jmp halt
.code16
ap_real_mode_boot:
lgdt [ap_gdtr] // load gdt
mov eax, cr0
@ -86,9 +150,9 @@ x2apic_mode:
// This is a pointer to the page table used by the APs.
// The BSP will fill this pointer before kicking the APs.
.global __boot_page_table_pointer
.align 4
.align 8
__boot_page_table_pointer:
.skip 4
.skip 8
ap_protect:
// Save the local APIC ID in an unused register.
@ -125,19 +189,6 @@ ap_protect:
ljmp 0x8, offset ap_long_mode_in_low_address
.code64
ap_long_mode_in_low_address:
mov ax, 0
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
// Update RIP to use the virtual address.
mov rax, offset ap_long_mode
jmp rax
.data
// This is a pointer to be filled by the BSP when boot stacks
// of all APs are allocated and initialized.
@ -145,22 +196,3 @@ ap_long_mode_in_low_address:
.align 8
__ap_boot_stack_array_pointer:
.skip 8
.text
.code64
ap_long_mode:
// The local APIC ID is in the RDI.
mov rax, rdi
shl rax, 3
// Setup the stack.
mov rbx, [__ap_boot_stack_array_pointer]
mov rsp, [rbx + rax]
xor rbp, rbp
// Go to Rust code.
mov rax, offset ap_early_entry
call rax
.extern halt # bsp_boot.S
jmp halt

View File

@ -27,6 +27,8 @@
//! This sequence does not need to be strictly followed, and there may be
//! different considerations in different systems.
use cfg_if::cfg_if;
use crate::{
arch::x86::kernel::{
acpi::get_acpi_tables,
@ -38,6 +40,14 @@ use crate::{
mm::{paddr_to_vaddr, PAGE_SIZE},
};
cfg_if! {
if #[cfg(feature = "cvm_guest")] {
use tdx_guest::tdx_is_enabled;
use crate::arch::x86::kernel::acpi::AcpiMemoryHandler;
use acpi::platform::wakeup_aps;
}
}
/// Get the number of processors
///
/// This function needs to be called after the OS initializes the ACPI table.
@ -59,11 +69,30 @@ pub(crate) fn get_num_processors() -> Option<u32> {
}
/// Brings up all application processors.
pub(crate) fn bringup_all_aps() {
pub(crate) fn bringup_all_aps(num_cpus: u32) {
copy_ap_boot_code();
fill_boot_stack_array_ptr();
fill_boot_pt_ptr();
cfg_if! {
if #[cfg(feature = "cvm_guest")] {
if tdx_is_enabled() {
for ap_num in 1..num_cpus {
wakeup_aps(
&ACPI_TABLES.get().unwrap().lock(),
AcpiMemoryHandler {},
ap_num,
AP_BOOT_START_PA as u64,
1000,
)
.unwrap();
}
} else {
send_boot_ipis();
}
} else {
send_boot_ipis();
}
}
}
/// This is where the linker load the symbols in the `.ap_boot` section.

View File

@ -7,6 +7,8 @@ use core::sync::atomic::{AtomicBool, Ordering};
use spin::Once;
#[cfg(feature = "cvm_guest")]
use crate::mm::frame::allocator;
use crate::{
arch::boot::smp::{bringup_all_aps, get_num_processors},
cpu,
@ -98,9 +100,12 @@ pub fn boot_all_aps() {
log::info!("Booting all application processors...");
bringup_all_aps();
bringup_all_aps(num_cpus);
wait_for_all_aps_started();
#[cfg(feature = "cvm_guest")]
allocator::reclaim_tdx_ap_boot_memory();
log::info!("All application processors started. The BSP continues to run.");
}

View File

@ -356,3 +356,38 @@ pub(crate) unsafe fn init_early_allocator() {
let mut early_allocator = EARLY_ALLOCATOR.lock();
*early_allocator = Some(EarlyFrameAllocator::new());
}
#[cfg(feature = "cvm_guest")]
pub(crate) fn reclaim_tdx_ap_boot_memory() {
let regions = &crate::boot::EARLY_INFO.get().unwrap().memory_regions;
for region in regions.iter() {
if region.typ() == MemoryRegionType::Usable {
// Make the memory region page-aligned, and skip if it is too small.
let start = region.base().align_up(PAGE_SIZE) / PAGE_SIZE;
let region_end = region.base().checked_add(region.len()).unwrap();
let end = region_end.align_down(PAGE_SIZE) / PAGE_SIZE;
if end <= start {
continue;
}
// 0x800000 is temporarily used for AP boot in Intel TDX environment.
// We should include this frame into page allocator after AP initialization.
if (start..end).contains(&(0x800000 / PAGE_SIZE)) {
info!(
"Found usable region, start:{:x}, end:{:x}",
region.base(),
region.base() + region.len()
);
FRAME_ALLOCATOR
.get()
.unwrap()
.disable_irq()
.lock()
.allocator
.add_frame(start, end);
FRAME_ALLOCATOR.get().unwrap().disable_irq().lock().total +=
(end - start) * PAGE_SIZE;
}
}
}
}