Boot application processors into spin loops

Co-authored-by: Chuandong Li <lichuand@pku.edu.cn>
This commit is contained in:
Zhang Junyang
2024-07-06 04:43:33 +00:00
committed by Tate, Hongliang Tian
parent 870d542f60
commit 393c9019c0
21 changed files with 828 additions and 123 deletions

View File

@ -2,27 +2,93 @@ ENTRY(__multiboot_boot)
OUTPUT_ARCH(i386:x86-64)
OUTPUT_FORMAT(elf64-x86-64)
# The physical address where the kernel will start to be loaded.
KERNEL_LMA = 0x8000000;
LINUX_32_ENTRY = 0x8001000;
# The physical address of the boot section for the bootstrap processor.
BSP_BOOT_LMA = 0x8001000;
# The application processors need to start with low physical addresses.
# We link the symbols to low address plus virtual offset, and load the segment
# to higher physical memory. The BSP will copy the segment to low physical
# addresses before bringing up the APs.
AP_EXEC_MA = 0x8000;
# The virtual memory offset of the kernel mapping.
KERNEL_VMA = 0xffffffff80000000;
SECTIONS
{
# --------------------------------------------------------------------------- #
# The multiboot headers are placed at the beginning of the ELF file. #
# --------------------------------------------------------------------------- #
. = KERNEL_LMA + KERNEL_VMA;
__kernel_start = .;
.multiboot_header : AT(ADDR(.multiboot_header) - KERNEL_VMA) { KEEP(*(.multiboot_header)) }
.multiboot2_header : AT(ADDR(.multiboot2_header) - KERNEL_VMA) { KEEP(*(.multiboot2_header)) }
.multiboot_header : AT(ADDR(.multiboot_header) - KERNEL_VMA) {
KEEP(*(.multiboot_header))
}
.multiboot2_header : AT(ADDR(.multiboot2_header) - KERNEL_VMA) {
KEEP(*(.multiboot2_header))
}
. = LINUX_32_ENTRY + KERNEL_VMA;
# --------------------------------------------------------------------------- #
# These are 2 boot sections that need specific physical addresses. But they #
# should use virtual symbols. #
# --------------------------------------------------------------------------- #
. = BSP_BOOT_LMA + KERNEL_VMA;
.boot : AT(ADDR(.boot) - KERNEL_VMA) { KEEP(*(.boot)) }
.bsp_boot : AT(BSP_BOOT_LMA) {
KEEP(*(.bsp_boot .bsp_boot.*))
}
. = AP_EXEC_MA + KERNEL_VMA;
PROVIDE(__ap_boot_start = BSP_BOOT_LMA + SIZEOF(.bsp_boot) + KERNEL_VMA);
.ap_boot : AT(BSP_BOOT_LMA + SIZEOF(.bsp_boot)) {
KEEP(*(.ap_boot .ap_boot.*))
}
PROVIDE(__ap_boot_end = __ap_boot_start + SIZEOF(.ap_boot));
. = BSP_BOOT_LMA + KERNEL_VMA + SIZEOF(.bsp_boot) + SIZEOF(.ap_boot);
. = ALIGN(4096);
# --------------------------------------------------------------------------- #
# Here are the rest of the virtual memory sections which can be relocated. #
# --------------------------------------------------------------------------- #
.text : AT(ADDR(.text) - KERNEL_VMA) {
*(.text .text.*)
PROVIDE(__etext = .);
}
# The section to store exception table (ExTable).
# This table is used for recovering from specific exception handling faults
# occurring at known points in the code.
# Ref: /aster-frame/src/arch/x86/ex_table.rs
.ex_table : AT(ADDR(.ex_table) - KERNEL_VMA) {
__ex_table = .;
KEEP(*(SORT(.ex_table)))
__ex_table_end = .;
}
# The list of unit test function symbols that should be executed while
# doing `cargo osdk test`.
.ktest_array : AT(ADDR(.ktest_array) - KERNEL_VMA) {
__ktest_array = .;
KEEP(*(SORT(.ktest_array)))
__ktest_array_end = .;
}
# A list of initialization function symbols. They will be called on OSTD
# initialization.
.init_array : AT(ADDR(.init_array) - KERNEL_VMA) {
__sinit_array = .;
KEEP(*(SORT(.init_array .init_array.*)))
__einit_array = .;
}
.rodata : AT(ADDR(.rodata) - KERNEL_VMA) { *(.rodata .rodata.*) }
.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - KERNEL_VMA) {
@ -40,12 +106,6 @@ SECTIONS
.data.rel.ro : AT(ADDR(.data.rel.ro) - KERNEL_VMA) { *(.data.rel.ro .data.rel.ro.*) }
.dynamic : AT(ADDR(.dynamic) - KERNEL_VMA) { *(.dynamic) }
.init_array : AT(ADDR(.init_array) - KERNEL_VMA) {
__sinit_array = .;
KEEP(*(SORT(.init_array .init_array.*)))
__einit_array = .;
}
.got : AT(ADDR(.got) - KERNEL_VMA) { *(.got .got.*) }
.got.plt : AT(ADDR(.got.plt) - KERNEL_VMA) { *(.got.plt .got.plt.*) }
@ -56,6 +116,7 @@ SECTIONS
# The CPU local data storage. It is readable and writable for the bootstrap
# processor, while it would be copied to other dynamically allocated memory
# areas for the application processors.
. = ALIGN(4096);
.cpu_local : AT(ADDR(.cpu_local) - KERNEL_VMA) {
__cpu_local_start = .;
@ -78,22 +139,6 @@ SECTIONS
__bss_end = .;
}
# The section to store exception table (ExTable).
# This table is used for recovering from specific exception handling faults
# occurring at known points in the code.
# Ref: /aster-frame/src/arch/x86/ex_table.rs
.ex_table : AT(ADDR(.ex_table) - KERNEL_VMA) {
__ex_table = .;
KEEP(*(SORT(.ex_table)))
__ex_table_end = .;
}
.ktest_array : AT(ADDR(.ktest_array) - KERNEL_VMA) {
__ktest_array = .;
KEEP(*(SORT(.ktest_array)))
__ktest_array_end = .;
}
.tdata : AT(ADDR(.tdata) - KERNEL_VMA) { *(.tdata .tdata.*) }
.tbss : AT(ADDR(.tbss) - KERNEL_VMA) { *(.tbss .tbss.*) }

View File

@ -0,0 +1,161 @@
/* SPDX-License-Identifier: MPL-2.0 */
// The boot routine excecuted by the application processor.
.extern boot_gdtr
.extern boot_page_table_start
.extern ap_early_entry
KERNEL_VMA = 0xffffffff80000000
.section ".ap_boot", "awx"
.align 4096
.code16
IA32_APIC_BASE = 0x1B
IA32_X2APIC_APICID = 0x802
MMIO_XAPIC_APICID = 0xFEE00020
ap_real_mode_boot:
cli // disable interrupts
cld
xor ax, ax // clear ax
mov ds, ax // clear ds
lgdt [ap_gdtr - KERNEL_VMA] // load gdt
mov eax, cr0
or eax, 1
mov cr0, eax // enable protected mode
ljmp 0x8, offset ap_protect_entry - KERNEL_VMA
// 32-bit AP GDT.
.align 16
ap_gdt:
.quad 0x0000000000000000
ap_gdt_code:
.quad 0x00cf9a000000ffff
ap_gdt_data:
.quad 0x00cf92000000ffff
ap_gdt_end:
.align 16
ap_gdtr:
.word ap_gdt_end - ap_gdt - 1
.quad ap_gdt - KERNEL_VMA
.align 4
.code32
ap_protect_entry:
mov ax, 0x10
mov ds, ax
mov ss, ax
// Get the local APIC ID from xAPIC or x2APIC.
// It is better to get this information in protected mode.
// After entering long mode, we need to set additional page
// table mapping for xAPIC mode mmio region.
// Tell if it is xAPIC or x2APIC.
// IA32_APIC_BASE register:
// bit 8: BSPProcessor is BSP
// bit 10: EXTDEnable x2APIC mode
// bit 11: ENxAPIC global enable/disable
// bit 12-35: APIC BaseBase physical address
mov ecx, IA32_APIC_BASE
rdmsr
and eax, 0x400 // check EXTD bit
cmp eax, 0x400
je x2apic_mode
xapic_mode:
// In xAPIC mode, the local APIC ID is stored in
// the MMIO region.
mov eax, [MMIO_XAPIC_APICID]
shr eax, 24
jmp ap_protect
x2apic_mode:
// In x2APIC mode, the local APIC ID is stored in
// IA32_X2APIC_APICID MSR.
mov ecx, IA32_X2APIC_APICID
rdmsr
jmp ap_protect
.code32
ap_protect:
// Save the local APIC ID in an unused register.
// We will calculate the stack pointer of this core
// by taking the local apic id as the offset.
mov edi, eax
// Now we try getting into long mode.
// Use the 64-bit GDT.
lgdt [boot_gdtr - KERNEL_VMA]
// Enable PAE and PGE.
mov eax, cr4
or eax, 0xa0
mov cr4, eax
// Set the page table. The application processors use
// the same page table as the bootstrap processor's
// boot phase page table.
lea eax, [boot_page_table_start - KERNEL_VMA]
mov cr3, eax
// Enable long mode.
mov ecx, 0xc0000080
rdmsr // load EFER MSR
or eax, 1 << 8
wrmsr // set long bit
// Enable paging.
mov eax, cr0
or eax, 1 << 31
mov cr0, eax
ljmp 0x8, offset ap_long_mode_in_low_address - KERNEL_VMA
.code64
ap_long_mode_in_low_address:
mov ax, 0
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
// Update RIP to use the virtual address.
mov rbx, KERNEL_VMA
lea rax, [ap_long_mode - KERNEL_VMA]
or rax, rbx
jmp rax
// This is a pointer to be filled by the BSP when boot stacks
// of all APs are allocated and initialized.
.global __ap_boot_stack_array_pointer
.align 8
__ap_boot_stack_array_pointer:
.skip 8
ap_long_mode:
// The local APIC ID is in the RDI.
mov rax, rdi
shl rax, 3
// Setup the stack.
mov rbx, [__ap_boot_stack_array_pointer]
mov rsp, [rbx + rax]
xor rbp, rbp
// Go to Rust code.
mov rax, offset ap_early_entry
call rax
hlt

View File

@ -1,9 +1,11 @@
/* SPDX-License-Identifier: MPL-2.0 */
// The boot routine excecuted by the bootstrap processor.
// The boot header, initial boot setup code, temporary GDT and page tables are
// in the boot section. The boot section is mapped writable since kernel may
// modify the initial page table.
.section ".boot", "awx"
.section ".bsp_boot", "awx"
.code32
// With every entry types we could go through common paging or machine

View File

@ -143,6 +143,13 @@ fn init_memory_regions(memory_regions: &'static Once<Vec<MemoryRegion>>) {
MemoryRegionType::Module,
));
// Add the AP boot code region that will be copied into by the BSP.
regions.push(MemoryRegion::new(
super::smp::AP_BOOT_START_PA,
super::smp::ap_boot_code_size(),
MemoryRegionType::Reclaimable,
));
memory_regions.call_once(|| non_overlapping_regions_from(regions.as_ref()));
}

View File

@ -26,4 +26,5 @@ pub mod smp;
use core::arch::global_asm;
global_asm!(include_str!("boot.S"));
global_asm!(include_str!("bsp_boot.S"));
global_asm!(include_str!("ap_boot.S"));

View File

@ -152,6 +152,13 @@ fn init_memory_regions(memory_regions: &'static Once<Vec<MemoryRegion>>) {
));
}
// Add the AP boot code region that will be copied into by the BSP.
regions.push(MemoryRegion::new(
super::smp::AP_BOOT_START_PA,
super::smp::ap_boot_code_size(),
MemoryRegionType::Reclaimable,
));
// Initialize with non-overlapping regions.
memory_regions.call_once(move || non_overlapping_regions_from(regions.as_ref()));
}

View File

@ -148,6 +148,13 @@ fn init_memory_regions(memory_regions: &'static Once<Vec<MemoryRegion>>) {
));
}
// Add the AP boot code region that will be copied into by the BSP.
regions.push(MemoryRegion::new(
super::smp::AP_BOOT_START_PA,
super::smp::ap_boot_code_size(),
MemoryRegionType::Reclaimable,
));
// Initialize with non-overlapping regions.
memory_regions.call_once(move || non_overlapping_regions_from(regions.as_ref()));
}

View File

@ -11,22 +11,196 @@
//! The BSP executes the BIOS's boot-strap code to configure the APIC environment,
//! sets up system-wide data structures. Up to now, BSP has completed most of the
//! initialization of the OS, but APs has not been awakened.
//!
//! Following a power-up or reset, the APs complete a minimal self-configuration,
//! then wait for a startup signal (a SIPI message) from the BSP processor.
//!
//! The wake-up of AP follows SNIT-SIPI-SIPI IPI sequence:
//! - Broadcast INIT IPI (Initialize the APs to the wait-for-SIPI state)
//! - Wait
//! - Broadcast De-assert INIT IPI (Only older processors need this step)
//! - Wait
//! - Broadcast SIPI IPI (APs exits the wait-for-SIPI state and starts executing code)
//! - Wait
//! - Broadcast SIPI IPI (If an AP fails to start)
//!
//! This sequence does not need to be strictly followed, and there may be
//! different considerations in different systems.
use acpi::platform::{PlatformInfo, ProcessorInfo};
use acpi::platform::PlatformInfo;
use crate::arch::x86::kernel::acpi::ACPI_TABLES;
use crate::{
arch::x86::kernel::{
acpi::ACPI_TABLES,
apic::{
self, ApicId, DeliveryMode, DeliveryStatus, DestinationMode, DestinationShorthand, Icr,
Level, TriggerMode,
},
},
mm::{paddr_to_vaddr, PAGE_SIZE},
};
/// Get processor information
/// Get the number of processors
///
/// This function needs to be called after the OS initializes the ACPI table.
pub(crate) fn get_processor_info() -> Option<ProcessorInfo> {
pub(crate) fn get_num_processors() -> Option<u32> {
if !ACPI_TABLES.is_completed() {
return None;
}
Some(
PlatformInfo::new(&*ACPI_TABLES.get().unwrap().lock())
.unwrap()
.processor_info
.unwrap(),
)
let processor_info = PlatformInfo::new(&*ACPI_TABLES.get().unwrap().lock())
.unwrap()
.processor_info
.unwrap();
Some(processor_info.application_processors.len() as u32 + 1)
}
/// Brings up all application processors.
pub(crate) fn bringup_all_aps() {
copy_ap_boot_code();
init_boot_stack_array();
send_boot_ipis();
}
/// This is where the linker load the symbols in the `.ap_boot` section.
/// The BSP would copy the AP boot code to this address.
pub(super) const AP_BOOT_START_PA: usize = 0x8000;
/// The size of the AP boot code (the `.ap_boot` section).
pub(super) fn ap_boot_code_size() -> usize {
__ap_boot_end as usize - __ap_boot_start as usize
}
fn copy_ap_boot_code() {
let ap_boot_start = __ap_boot_start as usize as *const u8;
let len = __ap_boot_end as usize - __ap_boot_start as usize;
// SAFETY: we are copying the AP boot code to the AP boot address.
unsafe {
core::ptr::copy_nonoverlapping(
ap_boot_start,
crate::mm::paddr_to_vaddr(AP_BOOT_START_PA) as *mut u8,
len,
);
}
}
/// Initializes the boot stack array in the AP boot code with the given pages.
fn init_boot_stack_array() {
let pages = &crate::boot::smp::AP_BOOT_INFO
.get()
.unwrap()
.boot_stack_array;
// This is defined in the boot assembly code.
extern "C" {
fn __ap_boot_stack_array_pointer();
}
let ap_boot_stack_arr_ptr: *mut u64 = __ap_boot_stack_array_pointer as usize as *mut u64;
log::debug!(
"__ap_boot_stack_array_pointer: {:#x?}",
ap_boot_stack_arr_ptr
);
// SAFETY: this pointer points to a static variable defined in the `ap_boot.S`.
unsafe {
ap_boot_stack_arr_ptr.write_volatile(paddr_to_vaddr(pages.start_paddr()) as u64);
}
}
// The symbols are defined in the linker script.
extern "C" {
fn __ap_boot_start();
fn __ap_boot_end();
}
/// Sends IPIs to notify all application processors to boot.
///
/// Follow the INIT-SIPI-SIPI IPI sequence.
/// Here, we don't check whether there is an AP that failed to start,
/// but send the second SIPI directly (checking whether each core is
/// started successfully one by one will bring extra overhead). For
/// APs that have been started, this signal will not bring any cost.
fn send_boot_ipis() {
send_init_to_all_aps();
spin_wait_cycles(100_000_000);
send_init_deassert();
spin_wait_cycles(20_000_000);
send_startup_to_all_aps();
spin_wait_cycles(20_000_000);
send_startup_to_all_aps();
spin_wait_cycles(20_000_000);
}
fn send_startup_to_all_aps() {
let icr = Icr::new(
ApicId::from(0),
DestinationShorthand::AllExcludingSelf,
TriggerMode::Egde,
Level::Assert,
DeliveryStatus::Idle,
DestinationMode::Physical,
DeliveryMode::StrartUp,
(AP_BOOT_START_PA / PAGE_SIZE) as u8,
);
// SAFETY: we are sending startup IPI to all APs.
apic::borrow(|apic| unsafe { apic.send_ipi(icr) });
}
fn send_init_to_all_aps() {
let icr = Icr::new(
ApicId::from(0),
DestinationShorthand::AllExcludingSelf,
TriggerMode::Level,
Level::Assert,
DeliveryStatus::Idle,
DestinationMode::Physical,
DeliveryMode::Init,
0,
);
// SAFETY: we are sending init IPI to all APs.
apic::borrow(|apic| unsafe { apic.send_ipi(icr) });
}
fn send_init_deassert() {
let icr = Icr::new(
ApicId::from(0),
DestinationShorthand::AllIncludingSelf,
TriggerMode::Level,
Level::Deassert,
DeliveryStatus::Idle,
DestinationMode::Physical,
DeliveryMode::Init,
0,
);
// SAFETY: we are sending deassert IPI to all APs.
apic::borrow(|apic| unsafe { apic.send_ipi(icr) });
}
/// Spin wait approximately `c` cycles.
///
/// Since the timer requires CPU local storage to be initialized, we
/// can only wait by spinning.
fn spin_wait_cycles(c: u64) {
fn duration(from: u64, to: u64) -> u64 {
if to >= from {
to - from
} else {
u64::MAX - from + to
}
}
use core::arch::x86_64::_rdtsc;
let start = unsafe { _rdtsc() };
while duration(start, unsafe { _rdtsc() }) < c {
core::hint::spin_loop();
}
}

View File

@ -1,22 +1,80 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use alloc::sync::Arc;
use alloc::boxed::Box;
use core::cell::RefCell;
use bit_field::BitField;
use log::info;
use spin::Once;
use crate::sync::SpinLock;
use crate::cpu_local;
pub mod ioapic;
pub mod x2apic;
pub mod xapic;
pub static APIC_INSTANCE: Once<Arc<SpinLock<dyn Apic + 'static>>> = Once::new();
cpu_local! {
static APIC_INSTANCE: Once<RefCell<Box<dyn Apic + 'static>>> = Once::new();
}
static APIC_TYPE: Once<ApicType> = Once::new();
/// Do something over the APIC instance for the current CPU.
///
/// You should provide a closure operating on the given mutable borrow of the
/// local APIC instance. During the execution of the closure, the interrupts
/// are guarenteed to be disabled.
///
/// Example:
/// ```rust
/// use ostd::arch::x86::kernel::apic;
///
/// let ticks = apic::borrow(|apic| {
/// let ticks = apic.timer_current_count();
/// apic.set_timer_init_count(0);
/// ticks
/// });
/// ```
pub fn borrow<R>(f: impl FnOnce(&mut (dyn Apic + 'static)) -> R) -> R {
let apic_guard = APIC_INSTANCE.borrow_irq_disabled();
// If it is not initialzed, lazily initialize it.
if !apic_guard.is_completed() {
apic_guard.call_once(|| match APIC_TYPE.get().unwrap() {
ApicType::XApic => {
let mut xapic = xapic::XApic::new().unwrap();
xapic.enable();
let version = xapic.version();
log::info!(
"xAPIC ID:{:x}, Version:{:x}, Max LVT:{:x}",
xapic.id(),
version & 0xff,
(version >> 16) & 0xff
);
RefCell::new(Box::new(xapic))
}
ApicType::X2Apic => {
let mut x2apic = x2apic::X2Apic::new().unwrap();
x2apic.enable();
let version = x2apic.version();
log::info!(
"x2APIC ID:{:x}, Version:{:x}, Max LVT:{:x}",
x2apic.id(),
version & 0xff,
(version >> 16) & 0xff
);
RefCell::new(Box::new(x2apic))
}
});
}
let apic_cell = apic_guard.get().unwrap();
let mut apic_ref = apic_cell.borrow_mut();
let ret = f.call_once((apic_ref.as_mut(),));
ret
}
pub trait Apic: ApicTimer + Sync + Send {
fn id(&self) -> u32;
@ -179,7 +237,9 @@ impl From<u32> for ApicId {
/// in the system excluding the sender.
#[repr(u64)]
pub enum DestinationShorthand {
#[allow(dead_code)]
NoShorthand = 0b00,
#[allow(dead_code)]
MySelf = 0b01,
AllIncludingSelf = 0b10,
AllExcludingSelf = 0b11,
@ -203,28 +263,34 @@ pub enum Level {
#[repr(u64)]
pub enum DeliveryStatus {
Idle = 0,
#[allow(dead_code)]
SendPending = 1,
}
#[repr(u64)]
pub enum DestinationMode {
Physical = 0,
#[allow(dead_code)]
Logical = 1,
}
#[repr(u64)]
pub enum DeliveryMode {
/// Delivers the interrupt specified in the vector field to the target processor or processors.
#[allow(dead_code)]
Fixed = 0b000,
/// Same as fixed mode, except that the interrupt is delivered to the processor executing at
/// the lowest priority among the set of processors specified in the destination field. The
/// ability for a processor to send a lowest priority IPI is model specific and should be
/// avoided by BIOS and operating system software.
#[allow(dead_code)]
LowestPriority = 0b001,
/// Non-Maskable Interrupt
#[allow(dead_code)]
Smi = 0b010,
_Reserved = 0b011,
/// System Management Interrupt
#[allow(dead_code)]
Nmi = 0b100,
/// Delivers an INIT request to the target processor or processors, which causes them to
/// perform an initialization.
@ -241,6 +307,7 @@ pub enum ApicInitError {
#[derive(Debug)]
#[repr(u32)]
#[allow(dead_code)]
pub enum DivideConfig {
Divide1 = 0b1011,
Divide2 = 0b0000,
@ -254,28 +321,12 @@ pub enum DivideConfig {
pub fn init() -> Result<(), ApicInitError> {
crate::arch::x86::kernel::pic::disable_temp();
if let Some(mut x2apic) = x2apic::X2Apic::new() {
x2apic.enable();
let version = x2apic.version();
info!(
"x2APIC ID:{:x}, Version:{:x}, Max LVT:{:x}",
x2apic.id(),
version & 0xff,
(version >> 16) & 0xff
);
APIC_INSTANCE.call_once(|| Arc::new(SpinLock::new(x2apic)));
if x2apic::X2Apic::has_x2apic() {
log::info!("x2APIC found!");
APIC_TYPE.call_once(|| ApicType::X2Apic);
Ok(())
} else if let Some(mut xapic) = xapic::XApic::new() {
xapic.enable();
let version = xapic.version();
info!(
"xAPIC ID:{:x}, Version:{:x}, Max LVT:{:x}",
xapic.id(),
version & 0xff,
(version >> 16) & 0xff
);
APIC_INSTANCE.call_once(|| Arc::new(SpinLock::new(xapic)));
} else if xapic::XApic::has_xapic() {
log::info!("xAPIC found!");
APIC_TYPE.call_once(|| ApicType::XApic);
Ok(())
} else {
@ -283,3 +334,7 @@ pub fn init() -> Result<(), ApicInitError> {
Err(ApicInitError::NoApic)
}
}
pub fn exists() -> bool {
APIC_TYPE.is_completed()
}

View File

@ -18,7 +18,7 @@ impl X2Apic {
Some(Self {})
}
fn has_x2apic() -> bool {
pub(super) fn has_x2apic() -> bool {
// x2apic::X2APIC::new()
let value = unsafe { core::arch::x86_64::__cpuid(1) };
value.ecx & 0x20_0000 != 0

View File

@ -2,11 +2,10 @@
#![allow(dead_code)]
use spin::Once;
use x86::apic::xapic;
use super::ApicTimer;
use crate::{mm, sync::Mutex};
use crate::mm;
const IA32_APIC_BASE_MSR: u32 = 0x1B;
const IA32_APIC_BASE_MSR_BSP: u32 = 0x100; // Processor is a BSP
@ -14,8 +13,6 @@ const IA32_APIC_BASE_MSR_ENABLE: u64 = 0x800;
const APIC_LVT_MASK_BITS: u32 = 1 << 16;
pub static XAPIC_INSTANCE: Once<Mutex<XApic>> = Once::new();
#[derive(Debug)]
pub struct XApic {
mmio_region: &'static mut [u32],
@ -56,7 +53,7 @@ impl XApic {
self.write(xapic::XAPIC_SVR, svr);
}
pub fn has_xapic() -> bool {
pub(super) fn has_xapic() -> bool {
let value = unsafe { core::arch::x86_64::__cpuid(1) };
value.edx & 0x100 != 0
}

View File

@ -53,9 +53,13 @@ pub(crate) fn check_tdx_init() {
}
}
pub(crate) fn after_all_init() {
pub(crate) fn init_on_bsp() {
irq::init();
kernel::acpi::init();
// SAFETY: it is only called once and ACPI has been initialized.
unsafe { crate::cpu::init() };
match kernel::apic::init() {
Ok(_) => {
ioapic::init();
@ -66,7 +70,15 @@ pub(crate) fn after_all_init() {
}
}
serial::callback_init();
// SAFETY: no CPU local objects have been accessed by this far. And
// we are on the BSP.
unsafe { crate::cpu::cpu_local::init_on_bsp() };
crate::boot::smp::boot_all_aps();
timer::init();
#[cfg(feature = "intel_tdx")]
if !tdx_is_enabled() {
match iommu::init() {
@ -86,9 +98,9 @@ pub(crate) fn after_all_init() {
pub(crate) fn interrupts_ack(irq_number: usize) {
if !cpu::CpuException::is_cpu_exception(irq_number as u16) {
kernel::pic::ack();
if let Some(apic) = kernel::apic::APIC_INSTANCE.get() {
apic.lock_irq_disabled().eoi();
}
kernel::apic::borrow(|apic| {
apic.eoi();
});
}
}

View File

@ -22,7 +22,7 @@ use crate::{
kernel::tsc::init_tsc_freq,
timer::pit::OperatingMode,
x86::kernel::{
apic::{DivideConfig, APIC_INSTANCE},
apic::{self, DivideConfig},
tsc::TSC_FREQ,
},
},
@ -53,10 +53,10 @@ fn is_tsc_deadline_mode_supported() -> bool {
fn init_tsc_mode() -> IrqLine {
let timer_irq = IrqLine::alloc().unwrap();
let mut apic_lock = APIC_INSTANCE.get().unwrap().lock_irq_disabled();
// Enable tsc deadline mode
apic_lock.set_lvt_timer(timer_irq.num() as u64 | (1 << 18));
drop(apic_lock);
apic::borrow(|apic| {
apic.set_lvt_timer(timer_irq.num() as u64 | (1 << 18));
});
let tsc_step = TSC_FREQ.load(Ordering::Relaxed) / TIMER_FREQ;
let callback = move || unsafe {
@ -81,10 +81,10 @@ fn init_periodic_mode() -> IrqLine {
super::pit::enable_ioapic_line(irq.clone());
// Set APIC timer count
let mut apic_lock = APIC_INSTANCE.get().unwrap().lock_irq_disabled();
apic_lock.set_timer_div_config(DivideConfig::Divide64);
apic_lock.set_timer_init_count(0xFFFF_FFFF);
drop(apic_lock);
apic::borrow(|apic| {
apic.set_timer_div_config(DivideConfig::Divide64);
apic.set_timer_init_count(0xFFFF_FFFF);
});
static IS_FINISH: AtomicBool = AtomicBool::new(false);
static INIT_COUNT: AtomicU64 = AtomicU64::new(0);
@ -99,10 +99,11 @@ fn init_periodic_mode() -> IrqLine {
// Init APIC Timer
let timer_irq = IrqLine::alloc().unwrap();
let mut apic_lock = APIC_INSTANCE.get().unwrap().lock_irq_disabled();
apic_lock.set_timer_init_count(INIT_COUNT.load(Ordering::Relaxed));
apic_lock.set_lvt_timer(timer_irq.num() as u64 | (1 << 17));
apic_lock.set_timer_div_config(DivideConfig::Divide64);
apic::borrow(|apic| {
apic.set_timer_init_count(INIT_COUNT.load(Ordering::Relaxed));
apic.set_lvt_timer(timer_irq.num() as u64 | (1 << 17));
apic.set_timer_div_config(DivideConfig::Divide64);
});
return timer_irq;
@ -114,8 +115,7 @@ fn init_periodic_mode() -> IrqLine {
if IN_TIME.load(Ordering::Relaxed) < CALLBACK_TIMES || IS_FINISH.load(Ordering::Acquire) {
if IN_TIME.load(Ordering::Relaxed) == 0 {
let apic_lock = APIC_INSTANCE.get().unwrap().lock_irq_disabled();
let remain_ticks = apic_lock.timer_current_count();
let remain_ticks = apic::borrow(|apic| apic.timer_current_count());
APIC_FIRST_COUNT.store(0xFFFF_FFFF - remain_ticks, Ordering::Relaxed);
}
IN_TIME.fetch_add(1, Ordering::Relaxed);
@ -124,9 +124,11 @@ fn init_periodic_mode() -> IrqLine {
// Stop PIT and APIC Timer
super::pit::disable_ioapic_line();
let mut apic_lock = APIC_INSTANCE.get().unwrap().lock_irq_disabled();
let remain_ticks = apic_lock.timer_current_count();
apic_lock.set_timer_init_count(0);
let remain_ticks = apic::borrow(|apic| {
let remain_ticks = apic.timer_current_count();
apic.set_timer_init_count(0);
remain_ticks
});
let ticks = (0xFFFF_FFFF - remain_ticks - APIC_FIRST_COUNT.load(Ordering::Relaxed))
/ CALLBACK_TIMES;
info!(

View File

@ -37,7 +37,7 @@ pub(super) fn init() {
/// Ref: https://wiki.osdev.org/Programmable_Interval_Timer#Outputs.
const PIT_MODE_TIMER_IRQ_NUM: u8 = 32;
let mut timer_irq = if kernel::apic::APIC_INSTANCE.is_completed() {
let mut timer_irq = if kernel::apic::exists() {
apic::init()
} else {
pit::init(pit::OperatingMode::SquareWaveGenerator);

View File

@ -2,12 +2,15 @@
#![allow(dead_code)]
//! The architecture-independent boot module, which provides a universal interface
//! from the bootloader to the rest of OSTD.
//!
//! The architecture-independent boot module, which provides
//! 1. a universal information getter interface from the bootloader to the
//! rest of OSTD;
//! 2. the routine booting into the actual kernel;
//! 3. the routine booting the other processors in the SMP context.
pub mod kcmdline;
pub mod memory_region;
pub mod smp;
use alloc::{string::String, vec::Vec};

150
ostd/src/boot/smp.rs Normal file
View File

@ -0,0 +1,150 @@
// SPDX-License-Identifier: MPL-2.0
//! Symmetric multiprocessing (SMP) boot support.
use alloc::collections::BTreeMap;
use core::sync::atomic::{AtomicBool, Ordering};
use spin::Once;
use crate::{
arch::boot::smp::{bringup_all_aps, get_num_processors},
cpu,
mm::{
paddr_to_vaddr,
page::{self, meta::KernelMeta, ContPages},
PAGE_SIZE,
},
trap,
};
pub(crate) static AP_BOOT_INFO: Once<ApBootInfo> = Once::new();
const AP_BOOT_STACK_SIZE: usize = PAGE_SIZE * 64;
pub(crate) struct ApBootInfo {
/// It holds the boot stack top pointers used by all APs.
pub(crate) boot_stack_array: ContPages<KernelMeta>,
/// `per_ap_info` maps each AP's ID to its associated boot information.
per_ap_info: BTreeMap<u32, PerApInfo>,
}
struct PerApInfo {
is_started: AtomicBool,
// TODO: When the AP starts up and begins executing tasks, the boot stack will
// no longer be used, and the `ContPages` can be deallocated (this problem also
// exists in the boot processor, but the memory it occupies should be returned
// to the frame allocator).
boot_stack_pages: ContPages<KernelMeta>,
}
static AP_LATE_ENTRY: Once<fn() -> !> = Once::new();
/// Boot all application processors.
///
/// This function should be called late in the system startup. The system must at
/// least ensure that the scheduler, ACPI table, memory allocation, and IPI module
/// have been initialized.
///
/// However, the function need to be called before any `cpu_local!` variables are
/// accessed, including the APIC instance.
pub fn boot_all_aps() {
// TODO: support boot protocols without ACPI tables, e.g., Multiboot
let Some(num_cpus) = get_num_processors() else {
log::warn!("No processor information found. The kernel operates with a single processor.");
return;
};
log::info!("Found {} processors.", num_cpus);
// We currently assumes that bootstrap processor (BSP) have always the
// processor ID 0. And the processor ID starts from 0 to `num_cpus - 1`.
AP_BOOT_INFO.call_once(|| {
let mut per_ap_info = BTreeMap::new();
// Use two pages to place stack pointers of all APs, thus support up to 1024 APs.
let boot_stack_array =
page::allocator::alloc_contiguous(2 * PAGE_SIZE, |_| KernelMeta::default()).unwrap();
assert!(num_cpus < 1024);
for ap in 1..num_cpus {
let boot_stack_pages =
page::allocator::alloc_contiguous(AP_BOOT_STACK_SIZE, |_| KernelMeta::default())
.unwrap();
let boot_stack_ptr = paddr_to_vaddr(boot_stack_pages.end_paddr());
let stack_array_ptr = paddr_to_vaddr(boot_stack_array.start_paddr()) as *mut u64;
// SAFETY: The `stack_array_ptr` is valid and aligned.
unsafe {
stack_array_ptr
.add(ap as usize)
.write_volatile(boot_stack_ptr as u64);
}
per_ap_info.insert(
ap,
PerApInfo {
is_started: AtomicBool::new(false),
boot_stack_pages,
},
);
}
ApBootInfo {
boot_stack_array,
per_ap_info,
}
});
log::info!("Booting all application processors...");
bringup_all_aps();
wait_for_all_aps_started();
log::info!("All application processors started. The BSP continues to run.");
}
/// Register the entry function for the application processor.
///
/// Once the entry function is registered, all the application processors
/// will jump to the entry function immediately.
pub fn register_ap_entry(entry: fn() -> !) {
AP_LATE_ENTRY.call_once(|| entry);
}
#[no_mangle]
fn ap_early_entry(local_apic_id: u32) -> ! {
crate::arch::enable_cpu_features();
// SAFETY: we are on the AP.
unsafe {
cpu::cpu_local::init_on_ap(local_apic_id);
}
trap::init();
// Mark the AP as started.
let ap_boot_info = AP_BOOT_INFO.get().unwrap();
ap_boot_info
.per_ap_info
.get(&local_apic_id)
.unwrap()
.is_started
.store(true, Ordering::Release);
log::info!("Processor {} started. Spinning for tasks.", local_apic_id);
let ap_late_entry = AP_LATE_ENTRY.wait();
ap_late_entry();
}
fn wait_for_all_aps_started() {
fn is_all_aps_started() -> bool {
let ap_boot_info = AP_BOOT_INFO.get().unwrap();
ap_boot_info
.per_ap_info
.values()
.all(|info| info.is_started.load(Ordering::Acquire))
}
while !is_all_aps_started() {
core::hint::spin_loop();
}
}

View File

@ -18,10 +18,18 @@
//! be directly used as a CPU-local object. Wrapping it in a type that has a
//! constant constructor, like [`Option<T>`], can make it CPU-local.
use alloc::vec::Vec;
use core::ops::Deref;
use align_ext::AlignExt;
use crate::{
arch,
arch, cpu,
mm::{
paddr_to_vaddr,
page::{self, meta::KernelMeta, ContPages},
PAGE_SIZE,
},
trap::{disable_local, DisabledLocalIrqGuard},
};
@ -200,6 +208,12 @@ pub(crate) unsafe fn early_init_bsp_local_base() {
}
}
/// The BSP initializes the CPU-local areas for APs. Here we use a
/// non-disabling preempt version of lock because the [`crate::sync`]
/// version needs `cpu_local` to work. Preemption and interrupts are
/// disabled in this phase so it is safe to use this lock.
static CPU_LOCAL_STORAGES: spin::RwLock<Vec<ContPages<KernelMeta>>> = spin::RwLock::new(Vec::new());
/// Initializes the CPU local data for the bootstrap processor (BSP).
///
/// # Safety
@ -209,13 +223,77 @@ pub(crate) unsafe fn early_init_bsp_local_base() {
/// It must be guaranteed that the BSP will not access local data before
/// this function being called, otherwise copying non-constant values
/// will result in pretty bad undefined behavior.
pub(crate) unsafe fn init_on_bsp() {
// TODO: allocate the pages for application processors and copy the
// CPU-local objects to the allocated pages.
pub unsafe fn init_on_bsp() {
let bsp_base_va = __cpu_local_start as usize;
let bsp_end_va = __cpu_local_end as usize;
let num_cpus = super::num_cpus();
let mut cpu_local_storages = CPU_LOCAL_STORAGES.write();
for cpu_i in 1..num_cpus {
let ap_pages = {
let nbytes = (bsp_end_va - bsp_base_va).align_up(PAGE_SIZE);
page::allocator::alloc_contiguous(nbytes, |_| KernelMeta::default()).unwrap()
};
let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u8;
// SAFETY: The BSP has not initialized the CPU-local area, so the objects in
// in the `.cpu_local` section can be bitwise bulk copied to the AP's local
// storage. The destination memory is allocated so it is valid to write to.
unsafe {
core::ptr::copy_nonoverlapping(
bsp_base_va as *const u8,
ap_pages_ptr,
bsp_end_va - bsp_base_va,
);
}
// SAFETY: the first 4 bytes is reserved for storing CPU ID.
unsafe {
(ap_pages_ptr as *mut u32).write(cpu_i);
}
// SAFETY: the second 4 bytes is reserved for storing the preemt count.
unsafe {
(ap_pages_ptr as *mut u32).add(1).write(0);
}
cpu_local_storages.push(ap_pages);
}
// Write the CPU ID of BSP to the first 4 bytes of the CPU-local area.
let bsp_cpu_id_ptr = bsp_base_va as *mut u32;
// SAFETY: the first 4 bytes is reserved for storing CPU ID.
unsafe {
bsp_cpu_id_ptr.write(0);
}
cpu::local::set_base(bsp_base_va as u64);
#[cfg(debug_assertions)]
{
IS_INITIALIZED.store(true, Ordering::Relaxed);
IS_INITIALIZED.store(true, Ordering::Relaxed);
}
/// Initializes the CPU local data for the application processor (AP).
///
/// # Safety
///
/// This function can only called on the AP.
pub unsafe fn init_on_ap(cpu_id: u32) {
let rlock = CPU_LOCAL_STORAGES.read();
let ap_pages = rlock.get(cpu_id as usize - 1).unwrap();
let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u32;
debug_assert_eq!(
cpu_id,
// SAFETY: the CPU ID is stored at the beginning of the CPU local area.
unsafe { ap_pages_ptr.read() }
);
// SAFETY: the memory will be dedicated to the AP. And we are on the AP.
unsafe {
cpu::local::set_base(ap_pages_ptr as u64);
}
}

View File

@ -11,31 +11,34 @@ cfg_if::cfg_if! {
}
use alloc::vec::Vec;
use core::sync::atomic::{AtomicU32, Ordering};
use bitvec::{
prelude::{BitVec, Lsb0},
slice::IterOnes,
};
use spin::Once;
use crate::{arch::boot::smp::get_processor_info, cpu};
use crate::{arch::boot::smp::get_num_processors, cpu};
/// The number of CPUs.
pub static NUM_CPUS: Once<u32> = Once::new();
/// The number of CPUs. Zero means uninitialized.
static NUM_CPUS: AtomicU32 = AtomicU32::new(0);
/// Initializes the number of CPUs.
pub fn init() {
let processor_info = get_processor_info();
let num_processors = match processor_info {
Some(info) => info.application_processors.len() + 1,
None => 1,
};
NUM_CPUS.call_once(|| num_processors as u32);
///
/// # Safety
///
/// The caller must ensure that this function is called only once at the
/// correct time when the number of CPUs is available from the platform.
pub unsafe fn init() {
let num_processors = get_num_processors().unwrap_or(1);
NUM_CPUS.store(num_processors, Ordering::Release)
}
/// Returns the number of CPUs.
pub fn num_cpus() -> u32 {
*NUM_CPUS.get().unwrap()
let num = NUM_CPUS.load(Ordering::Acquire);
debug_assert_ne!(num, 0, "The number of CPUs is not initialized");
num
}
/// Returns the ID of this CPU.

View File

@ -74,15 +74,10 @@ pub fn init() {
mm::page::allocator::init();
mm::kspace::init_boot_page_table();
mm::kspace::init_kernel_page_table(mm::init_page_meta());
// SAFETY: no CPU local objects have been accessed by this far. And
// we are on the BSP.
unsafe { cpu::cpu_local::init_on_bsp() };
mm::misc_init();
trap::init();
arch::after_all_init();
cpu::init();
arch::init_on_bsp();
bus::init();

View File

@ -63,6 +63,11 @@ impl<M: PageMeta> ContPages<M> {
self.range.start
}
/// Get the end physical address of the contiguous pages.
pub fn end_paddr(&self) -> Paddr {
self.range.end
}
/// Get the length in bytes of the contiguous pages.
pub fn len(&self) -> usize {
self.range.end - self.range.start

View File

@ -14,9 +14,9 @@
//! The reference count and usage of a page are stored in the metadata as well, leaving
//! the handle only a pointer to the metadata.
pub(crate) mod allocator;
pub(in crate::mm) mod cont_pages;
pub(in crate::mm) mod meta;
pub mod allocator;
pub mod cont_pages;
pub mod meta;
use core::{
marker::PhantomData,
@ -25,6 +25,7 @@ use core::{
sync::atomic::{AtomicU32, AtomicUsize, Ordering},
};
pub use cont_pages::ContPages;
use meta::{mapping, FrameMeta, MetaSlot, PageMeta, PageUsage};
use super::{Frame, PagingLevel, PAGE_SIZE};