Don't treat APIC IDs as CPU IDs

This commit is contained in:
Ruihan Li 2025-05-19 15:45:21 +08:00 committed by Junyang Zhang
parent d7cd0244ff
commit 0a27a1f37b
5 changed files with 136 additions and 119 deletions

View File

@ -140,12 +140,26 @@ impl Drop for IrqCallbackHandle {
}
}
// ####### Inter-Processor Interrupts (IPIs) #######
/// Hardware-specific, architecture-dependent CPU ID.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct HwCpuId(u32);
impl HwCpuId {
pub(crate) fn read_current() -> Self {
// TODO: Support SMP in RISC-V.
Self(0)
}
}
/// Sends a general inter-processor interrupt (IPI) to the specified CPU.
///
/// # Safety
///
/// The caller must ensure that the CPU ID and the interrupt number corresponds
/// to a safe function to call.
pub(crate) unsafe fn send_ipi(cpu_id: CpuId, irq_num: u8) {
/// The caller must ensure that the interrupt number is valid and that
/// the corresponding handler is configured correctly on the remote CPU.
/// Furthermore, invoking the interrupt handler must also be safe.
pub(crate) unsafe fn send_ipi(hw_cpu_id: HwCpuId, irq_num: u8) {
unimplemented!()
}

View File

@ -8,11 +8,15 @@
.section ".ap_boot", "awx"
.align 4096
IA32_APIC_BASE_MSR = 0x1B
IA32_X2APIC_APICID_MSR = 0x802
IA32_EFER_MSR = 0xC0000080
IA32_EFER_MSR = 0xC0000080
IA32_EFER_BIT_LME = 1 << 8
IA32_EFER_BIT_NXE = 1 << 11
XAPIC_APICID_MMIO_ADDR = 0xFEE00020
CR0_BIT_PE = 1 << 0
CR0_BIT_PG = 1 << 31
CR4_BIT_PAE = 1 << 5
CR4_BIT_PGE = 1 << 7
.macro setup_64bit_gdt_and_page_table eax
// Use the 64-bit GDT.
@ -22,12 +26,12 @@ XAPIC_APICID_MMIO_ADDR = 0xFEE00020
// Set the NX bit support in the EFER MSR.
mov ecx, IA32_EFER_MSR
rdmsr
or eax, 1 << 11 // support no-execute PTE flag
or eax, IA32_EFER_BIT_NXE
wrmsr
// Enable PAE and PGE.
mov \eax, cr4
or \eax, 0xa0
or \eax, CR4_BIT_PAE | CR4_BIT_PGE
mov cr4, \eax
// Set the page table. The application processors use
@ -50,15 +54,6 @@ ap_boot_from_long_mode:
cli // disable interrupts
cld
// The firmware stores the local APIC ID in R8D, see:
// <https://github.com/tianocore/edk2/blob/14b730cde8bfd56bba10cf78b24338b6a59b989f/OvmfPkg/TdxDxe/X64/ApRunLoop.nasm#L67-L73>.
// FIXME: This is an implementation detail of the specific firmware. We
// should NOT rely on it. We should NOT even try to rely on the local APIC
// ID, because the APIC IDs on real hardware may NOT be contiguous (i.e.,
// there may be holes where the holes do not represent logical processors).
// We should compute the CPU ID ourselves using atomic operations.
mov edi, r8d
setup_64bit_gdt_and_page_table rax
// Some firmware seems to provide per-AP stacks that we can use. However,
@ -80,10 +75,10 @@ ap_real_mode:
lgdt [ap_gdtr] // load gdt
mov eax, cr0
or eax, 1
or eax, CR0_BIT_PE
mov cr0, eax // enable protected mode
ljmp 0x8, offset ap_protect_entry
ljmp 0x8, offset ap_protect_mode
// 32-bit AP GDT.
.align 16
@ -102,42 +97,25 @@ ap_gdtr:
.align 4
.code32
ap_protect_entry:
ap_protect_mode:
mov ax, 0x10
mov ds, ax
mov ss, ax
// Get the local APIC ID from xAPIC or x2APIC.
// It is better to get this information in protected mode.
// After entering long mode, we need to set additional page
// table mapping for xAPIC mode mmio region.
setup_64bit_gdt_and_page_table eax
// Tell if it is xAPIC or x2APIC.
// IA32_APIC_BASE register:
// - bit 8: BSPProcessor is BSP
// - bit 10: EXTDEnable x2APIC mode
// - bit 11: ENxAPIC global enable/disable
// - bit 12-35: APIC BaseBase physical address
mov ecx, IA32_APIC_BASE_MSR
// Enable long mode.
mov ecx, IA32_EFER_MSR
rdmsr
and eax, 0x400 // check EXTD bit
cmp eax, 0x400
je x2apic_mode
or eax, IA32_EFER_BIT_LME
wrmsr
xapic_mode:
// In xAPIC mode, the local APIC ID is stored in
// the MMIO region.
mov eax, [XAPIC_APICID_MMIO_ADDR]
shr eax, 24
jmp ap_protect
// Enable paging.
mov eax, cr0
or eax, CR0_BIT_PG
mov cr0, eax
x2apic_mode:
// In x2APIC mode, the local APIC ID is stored in
// IA32_X2APIC_APICID MSR.
mov ecx, IA32_X2APIC_APICID_MSR
rdmsr
jmp ap_protect
ljmp 0x8, offset ap_long_mode_in_low_address
// This is a pointer to the page table used by the APs.
// The BSP will fill this pointer before kicking the APs.
@ -146,29 +124,6 @@ x2apic_mode:
__boot_page_table_pointer:
.skip 4
ap_protect:
// Save the local APIC ID in an unused register.
// We will calculate the stack pointer of this core
// by taking the local apic id as the offset.
mov edi, eax
// Now we try getting into long mode.
setup_64bit_gdt_and_page_table eax
// Enable long mode.
mov ecx, IA32_EFER_MSR
rdmsr
or eax, 1 << 8
wrmsr
// Enable paging.
mov eax, cr0
or eax, 1 << 31
mov cr0, eax
ljmp 0x8, offset ap_long_mode_in_low_address
.code64
ap_long_mode_in_low_address:
mov ax, 0
@ -188,12 +143,17 @@ ap_long_mode_in_low_address:
.global __ap_boot_info_array_pointer
.align 8
__ap_boot_info_array_pointer:
.skip 8
.quad 0
__ap_boot_cpu_id_tail:
.quad 1
.text
.code64
ap_long_mode:
// The local APIC ID is in the RDI.
mov rdi, 1
lock xadd [__ap_boot_cpu_id_tail], rdi
// The CPU ID is in the RDI.
mov rax, rdi
shl rax, 4 // 16-byte `PerApRawInfo`
@ -213,5 +173,5 @@ ap_long_mode:
mov rax, offset ap_early_entry
call rax
.extern halt # bsp_boot.S
.extern halt // bsp_boot.S
jmp halt

View File

@ -12,7 +12,6 @@ use x86_64::registers::rflags::{self, RFlags};
use super::iommu::{alloc_irt_entry, has_interrupt_remapping, IrtEntryHandle};
use crate::{
cpu::CpuId,
sync::{LocalIrqDisabled, Mutex, PreemptDisabled, RwLock, RwLockReadGuard, SpinLock},
trap::TrapFrame,
};
@ -171,17 +170,35 @@ impl Drop for IrqCallbackHandle {
}
}
// ####### Inter-Processor Interrupts (IPIs) #######
/// Hardware-specific, architecture-dependent CPU ID.
///
/// This is the Local APIC ID in the x86_64 architecture.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct HwCpuId(u32);
impl HwCpuId {
pub(crate) fn read_current() -> Self {
use crate::arch::kernel::apic;
let id = apic::with_borrow(|apic| apic.id());
Self(id)
}
}
/// Sends a general inter-processor interrupt (IPI) to the specified CPU.
///
/// # Safety
///
/// The caller must ensure that the CPU ID and the interrupt number corresponds
/// to a safe function to call.
pub(crate) unsafe fn send_ipi(cpu_id: CpuId, irq_num: u8) {
/// The caller must ensure that the interrupt number is valid and that
/// the corresponding handler is configured correctly on the remote CPU.
/// Furthermore, invoking the interrupt handler must also be safe.
pub(crate) unsafe fn send_ipi(hw_cpu_id: HwCpuId, irq_num: u8) {
use crate::arch::kernel::apic::{self, Icr};
let icr = Icr::new(
apic::ApicId::from(cpu_id.as_usize() as u32),
apic::ApicId::from(hw_cpu_id.0),
apic::DestinationShorthand::NoShorthand,
apic::TriggerMode::Edge,
apic::Level::Assert,

View File

@ -2,18 +2,17 @@
//! Symmetric multiprocessing (SMP) boot support.
use alloc::{boxed::Box, vec::Vec};
use core::sync::atomic::{AtomicBool, Ordering};
use alloc::{boxed::Box, collections::btree_map::BTreeMap, vec::Vec};
use spin::Once;
use crate::{
arch::boot::smp::bringup_all_aps,
cpu,
arch::{boot::smp::bringup_all_aps, irq::HwCpuId},
mm::{
frame::{meta::KernelMeta, Segment},
paddr_to_vaddr, FrameAllocOptions, PAGE_SIZE,
},
sync::SpinLock,
task::Task,
};
@ -25,11 +24,11 @@ struct ApBootInfo {
/// Raw boot information for each AP.
per_ap_raw_info: Box<[PerApRawInfo]>,
/// Boot information for each AP.
#[expect(dead_code)]
per_ap_info: Box<[PerApInfo]>,
}
struct PerApInfo {
is_started: AtomicBool,
// TODO: When the AP starts up and begins executing tasks, the boot stack will
// no longer be used, and the `Segment` can be deallocated (this problem also
// exists in the boot processor, but the memory it occupies should be returned
@ -56,7 +55,7 @@ pub(crate) struct PerApRawInfo {
unsafe impl Send for PerApRawInfo {}
unsafe impl Sync for PerApRawInfo {}
static AP_LATE_ENTRY: Once<fn()> = Once::new();
static HW_CPU_ID_MAP: SpinLock<BTreeMap<u32, HwCpuId>> = SpinLock::new(BTreeMap::new());
/// Boots all application processors.
///
@ -69,6 +68,9 @@ static AP_LATE_ENTRY: Once<fn()> = Once::new();
/// This function can only be called in the boot context of the BSP where APs have
/// not yet been booted.
pub(crate) unsafe fn boot_all_aps() {
// Mark the BSP as started.
report_online_and_hw_cpu_id(crate::cpu::CpuId::bsp().as_usize().try_into().unwrap());
let num_cpus = crate::cpu::num_cpus();
if num_cpus == 1 {
@ -76,10 +78,6 @@ pub(crate) unsafe fn boot_all_aps() {
}
log::info!("Booting {} processors", num_cpus - 1);
// We currently assume that
// 1. the bootstrap processor (BSP) has the processor ID 0;
// 2. the processor ID starts from `0` to `num_cpus - 1`.
let mut per_ap_raw_info = Vec::with_capacity(num_cpus);
let mut per_ap_info = Vec::with_capacity(num_cpus);
@ -93,10 +91,7 @@ pub(crate) unsafe fn boot_all_aps() {
stack_top: paddr_to_vaddr(boot_stack_pages.end_paddr()) as *mut u8,
cpu_local: paddr_to_vaddr(crate::cpu::local::get_ap(ap.try_into().unwrap())) as *mut u8,
});
per_ap_info.push(PerApInfo {
is_started: AtomicBool::new(false),
boot_stack_pages,
});
per_ap_info.push(PerApInfo { boot_stack_pages });
}
assert!(!AP_BOOT_INFO.is_completed());
@ -113,12 +108,14 @@ pub(crate) unsafe fn boot_all_aps() {
// the arguments are valid to boot APs (generated above).
unsafe { bringup_all_aps(info_ptr, pt_ptr, num_cpus as u32) };
wait_for_all_aps_started();
wait_for_all_aps_started(num_cpus);
log::info!("All application processors started. The BSP continues to run.");
}
/// Register the entry function for the application processor.
static AP_LATE_ENTRY: Once<fn()> = Once::new();
/// Registers the entry function for the application processor.
///
/// Once the entry function is registered, all the application processors
/// will jump to the entry function immediately.
@ -129,7 +126,7 @@ pub fn register_ap_entry(entry: fn()) {
#[no_mangle]
fn ap_early_entry(cpu_id: u32) -> ! {
// SAFETY: `cpu_id` is the correct value of the CPU ID.
unsafe { cpu::init_on_ap(cpu_id) };
unsafe { crate::cpu::init_on_ap(cpu_id) };
crate::arch::enable_cpu_features();
@ -146,10 +143,7 @@ fn ap_early_entry(cpu_id: u32) -> ! {
unsafe { crate::mm::kspace::activate_kernel_page_table() };
// Mark the AP as started.
let ap_boot_info = AP_BOOT_INFO.get().unwrap();
ap_boot_info.per_ap_info[cpu_id as usize - 1]
.is_started
.store(true, Ordering::Release);
report_online_and_hw_cpu_id(cpu_id);
log::info!("Processor {} started. Spinning for tasks.", cpu_id);
@ -160,16 +154,36 @@ fn ap_early_entry(cpu_id: u32) -> ! {
unreachable!("`yield_now` in the boot context should not return");
}
fn wait_for_all_aps_started() {
fn is_all_aps_started() -> bool {
let ap_boot_info = AP_BOOT_INFO.get().unwrap();
ap_boot_info
.per_ap_info
.iter()
.all(|info| info.is_started.load(Ordering::Acquire))
fn report_online_and_hw_cpu_id(cpu_id: u32) {
let old_val = HW_CPU_ID_MAP.lock().insert(cpu_id, HwCpuId::read_current());
assert!(old_val.is_none());
}
fn wait_for_all_aps_started(num_cpus: usize) {
fn is_all_aps_started(num_cpus: usize) -> bool {
HW_CPU_ID_MAP.lock().len() == num_cpus
}
while !is_all_aps_started() {
while !is_all_aps_started(num_cpus) {
core::hint::spin_loop();
}
}
/// Constructs a boxed slice that maps [`CpuId`] to [`HwCpuId`].
///
/// # Panics
///
/// This method will panic if it is called either before all APs have booted or more than once.
pub(crate) fn construct_hw_cpu_id_mapping() -> Box<[HwCpuId]> {
let mut hw_cpu_id_map = HW_CPU_ID_MAP.lock();
assert_eq!(hw_cpu_id_map.len(), crate::cpu::num_cpus());
let result = hw_cpu_id_map
.values()
.cloned()
.collect::<Vec<_>>()
.into_boxed_slice();
hw_cpu_id_map.clear();
result
}

View File

@ -5,18 +5,19 @@
//! This module provides a way to execute code on other processors via inter-
//! processor interrupts.
use alloc::collections::VecDeque;
use alloc::{boxed::Box, collections::VecDeque};
use spin::Once;
use crate::{
arch::irq::{send_ipi, HwCpuId},
cpu::{CpuSet, PinCurrentCpu},
cpu_local,
sync::SpinLock,
trap::{self, IrqLine, TrapFrame},
};
/// Execute a function on other processors.
/// Executes a function on other processors.
///
/// The provided function `f` will be executed on all target processors
/// specified by `targets`. It can also be executed on the current processor.
@ -33,7 +34,9 @@ use crate::{
pub fn inter_processor_call(targets: &CpuSet, f: fn()) {
let irq_guard = trap::disable_local();
let this_cpu_id = irq_guard.current_cpu();
let irq_num = INTER_PROCESSOR_CALL_IRQ.get().unwrap().num();
let ipi_data = IPI_GLOBAL_DATA.get().unwrap();
let irq_num = ipi_data.irq.num();
let mut call_on_self = false;
for cpu_id in targets.iter() {
@ -47,10 +50,9 @@ pub fn inter_processor_call(targets: &CpuSet, f: fn()) {
if cpu_id == this_cpu_id {
continue;
}
// SAFETY: It is safe to send inter processor call IPI to other CPUs.
unsafe {
crate::arch::irq::send_ipi(cpu_id, irq_num);
}
// SAFETY: The value of `irq_num` corresponds to a valid IRQ line and
// triggering it will not cause any safety issues.
unsafe { send_ipi(ipi_data.hw_cpu_ids[cpu_id.as_usize()], irq_num) };
}
if call_on_self {
// Execute the function synchronously.
@ -58,7 +60,12 @@ pub fn inter_processor_call(targets: &CpuSet, f: fn()) {
}
}
static INTER_PROCESSOR_CALL_IRQ: Once<IrqLine> = Once::new();
struct IpiGlobalData {
irq: IrqLine,
hw_cpu_ids: Box<[HwCpuId]>,
}
static IPI_GLOBAL_DATA: Once<IpiGlobalData> = Once::new();
cpu_local! {
static CALL_QUEUES: SpinLock<VecDeque<fn()>> = SpinLock::new(VecDeque::new());
@ -81,7 +88,12 @@ fn do_inter_processor_call(_trapframe: &TrapFrame) {
}
pub(super) fn init() {
let mut irq = IrqLine::alloc().unwrap();
irq.on_active(do_inter_processor_call);
INTER_PROCESSOR_CALL_IRQ.call_once(|| irq);
IPI_GLOBAL_DATA.call_once(|| {
let mut irq = IrqLine::alloc().unwrap();
irq.on_active(do_inter_processor_call);
let hw_cpu_ids = crate::boot::smp::construct_hw_cpu_id_mapping();
IpiGlobalData { irq, hw_cpu_ids }
});
}