diff --git a/ostd/src/arch/riscv/mod.rs b/ostd/src/arch/riscv/mod.rs index b325d5db..29a75805 100644 --- a/ostd/src/arch/riscv/mod.rs +++ b/ostd/src/arch/riscv/mod.rs @@ -29,7 +29,8 @@ pub(crate) unsafe fn late_init_on_bsp() { } irq::init(); - crate::boot::smp::boot_all_aps(); + // SAFETY: we're on the BSP and we're ready to boot all APs. + unsafe { crate::boot::smp::boot_all_aps() }; timer::init(); let _ = pci::init(); diff --git a/ostd/src/arch/x86/boot/smp.rs b/ostd/src/arch/x86/boot/smp.rs index 7829e19c..5b5d6919 100644 --- a/ostd/src/arch/x86/boot/smp.rs +++ b/ostd/src/arch/x86/boot/smp.rs @@ -101,15 +101,26 @@ pub(crate) fn count_processors() -> Option { } /// Brings up all application processors. -pub(crate) fn bringup_all_aps(num_cpus: u32) { - copy_ap_boot_code(); - fill_boot_stack_array_ptr(); - fill_boot_pt_ptr(); +/// +/// # Safety +/// +/// The caller must ensure that +/// 1. we're in the boot context of the BSP, and +/// 2. all APs have not yet been booted. +pub(crate) unsafe fn bringup_all_aps(num_cpus: u32) { + // SAFETY: The code and data to boot AP is valid to write because + // there are no readers and we are the only writer at this point. + unsafe { + copy_ap_boot_code(); + fill_boot_stack_array_ptr(); + fill_boot_pt_ptr(); + } + // SAFETY: We've properly prepared all the resources to boot APs. if_tdx_enabled!({ - wake_up_aps_via_mailbox(num_cpus); + unsafe { wake_up_aps_via_mailbox(num_cpus) }; } else { - send_boot_ipis(); + unsafe { send_boot_ipis() }; }); } @@ -130,11 +141,19 @@ pub(super) fn reclaimable_memory_region() -> MemoryRegion { ) } -fn copy_ap_boot_code() { +/// # Safety +/// +/// The caller must ensure the memory region to be filled with AP boot code is valid to write. +unsafe fn copy_ap_boot_code() { let ap_boot_start = __ap_boot_start as usize as *const u8; let len = __ap_boot_end as usize - __ap_boot_start as usize; - // SAFETY: we are copying the AP boot code to the AP boot address. + // SAFETY: + // 1. The source memory region is valid for reading because it's inside the kernel text. + // 2. The destination memory region is valid for writing because the caller upholds this. + // 3. The memory is aligned because the alignment of `u8` is 1. + // 4. The two memory regions do not overlap because the kernel text is isolated with the AP + // boot region. unsafe { core::ptr::copy_nonoverlapping( ap_boot_start, @@ -144,36 +163,42 @@ fn copy_ap_boot_code() { } } -/// Initializes the boot stack array in the AP boot code with the given pages. -fn fill_boot_stack_array_ptr() { +/// # Safety +/// +/// The caller must ensure the pointer to be filled is valid to write. +unsafe fn fill_boot_stack_array_ptr() { let pages = &crate::boot::smp::AP_BOOT_INFO .get() .unwrap() .boot_stack_array; + let vaddr = paddr_to_vaddr(pages.start_paddr()); extern "C" { - static __ap_boot_stack_array_pointer: u64; + static mut __ap_boot_stack_array_pointer: usize; } - // SAFETY: This pointer points to a static variable defined in the `ap_boot.S`. - let ptr = unsafe { &__ap_boot_stack_array_pointer as *const u64 as *mut u64 }; - // SAFETY: We only write to it once. + // SAFETY: The safety is upheld by the caller. unsafe { - ptr.write_volatile(paddr_to_vaddr(pages.start_paddr()) as u64); + __ap_boot_stack_array_pointer = vaddr; } } -fn fill_boot_pt_ptr() { +/// # Safety +/// +/// The caller must ensure the pointer to be filled is valid to write. +unsafe fn fill_boot_pt_ptr() { extern "C" { - static __boot_page_table_pointer: u32; + static mut __boot_page_table_pointer: u32; } - let boot_pt = crate::mm::page_table::boot_pt::with_borrow(|pt| pt.root_address()).unwrap(); - // SAFETY: this pointer points to a static variable defined in the `ap_boot.S`. - let ptr = unsafe { &__boot_page_table_pointer as *const u32 as *mut u32 }; - // SAFETY: We only write to it once. + let boot_pt = crate::mm::page_table::boot_pt::with_borrow(|pt| pt.root_address()) + .unwrap() + .try_into() + .unwrap(); + + // SAFETY: The safety is upheld by the caller. unsafe { - ptr.write_volatile(boot_pt as u32); + __boot_page_table_pointer = boot_pt; } } @@ -183,8 +208,13 @@ extern "C" { fn __ap_boot_end(); } +/// Wakes up all application processors via the ACPI multiprocessor mailbox structure. +/// +/// # Safety +/// +/// The safety preconditions are the same as [`send_boot_ipis`]. #[cfg(feature = "cvm_guest")] -fn wake_up_aps_via_mailbox(num_cpus: u32) { +unsafe fn wake_up_aps_via_mailbox(num_cpus: u32) { use acpi::platform::wakeup_aps; use crate::arch::x86::kernel::acpi::AcpiMemoryHandler; @@ -217,21 +247,38 @@ fn wake_up_aps_via_mailbox(num_cpus: u32) { /// but send the second SIPI directly (checking whether each core is /// started successfully one by one will bring extra overhead). For /// APs that have been started, this signal will not bring any cost. -fn send_boot_ipis() { - send_init_to_all_aps(); - spin_wait_cycles(100_000_000); +/// +/// # Safety +/// +/// The caller must ensure that all application processors can be +/// safely booted by ensuring that: +/// 1. We're in the boot context of the BSP and all APs have not yet +/// been booted. +/// 2. We've properly prepared all the resources for the application +/// processors to boot successfully (e.g., each AP's page table +/// and stack). +unsafe fn send_boot_ipis() { + // SAFETY: We're sending IPIs to boot all application processors. + // The safety is upheld by the caller. + unsafe { + send_init_to_all_aps(); + spin_wait_cycles(100_000_000); - send_init_deassert(); - spin_wait_cycles(20_000_000); + send_init_deassert(); + spin_wait_cycles(20_000_000); - send_startup_to_all_aps(); - spin_wait_cycles(20_000_000); + send_startup_to_all_aps(); + spin_wait_cycles(20_000_000); - send_startup_to_all_aps(); - spin_wait_cycles(20_000_000); + send_startup_to_all_aps(); + spin_wait_cycles(20_000_000); + } } -fn send_startup_to_all_aps() { +/// # Safety +/// +/// The caller should ensure it's valid to send STARTUP IPIs to all CPUs excluding self. +unsafe fn send_startup_to_all_aps() { let icr = Icr::new( ApicId::from(0), DestinationShorthand::AllExcludingSelf, @@ -242,11 +289,14 @@ fn send_startup_to_all_aps() { DeliveryMode::StartUp, (AP_BOOT_START_PA / PAGE_SIZE) as u8, ); - // SAFETY: we are sending startup IPI to all APs. + // SAFETY: The safety is upheld by the caller. apic::with_borrow(|apic| unsafe { apic.send_ipi(icr) }); } -fn send_init_to_all_aps() { +/// # Safety +/// +/// The caller should ensure it's valid to send INIT IPIs to all CPUs excluding self. +unsafe fn send_init_to_all_aps() { let icr = Icr::new( ApicId::from(0), DestinationShorthand::AllExcludingSelf, @@ -257,11 +307,14 @@ fn send_init_to_all_aps() { DeliveryMode::Init, 0, ); - // SAFETY: we are sending init IPI to all APs. + // SAFETY: The safety is upheld by the caller. apic::with_borrow(|apic| unsafe { apic.send_ipi(icr) }); } -fn send_init_deassert() { +/// # Safety +/// +/// The caller should ensure it's valid to deassert INIT IPIs for all CPUs excluding self. +unsafe fn send_init_deassert() { let icr = Icr::new( ApicId::from(0), DestinationShorthand::AllIncludingSelf, @@ -272,7 +325,7 @@ fn send_init_deassert() { DeliveryMode::Init, 0, ); - // SAFETY: we are sending deassert IPI to all APs. + // SAFETY: The safety is upheld by the caller. apic::with_borrow(|apic| unsafe { apic.send_ipi(icr) }); } @@ -291,8 +344,10 @@ fn spin_wait_cycles(c: u64) { use core::arch::x86_64::_rdtsc; + // SAFETY: Reading CPU cycles is always safe. let start = unsafe { _rdtsc() }; + // SAFETY: Reading CPU cycles is always safe. while duration(start, unsafe { _rdtsc() }) < c { core::hint::spin_loop(); } diff --git a/ostd/src/arch/x86/mod.rs b/ostd/src/arch/x86/mod.rs index 4a2706f0..f6086114 100644 --- a/ostd/src/arch/x86/mod.rs +++ b/ostd/src/arch/x86/mod.rs @@ -95,7 +95,8 @@ pub(crate) unsafe fn late_init_on_bsp() { kernel::tsc::init_tsc_freq(); timer::init_bsp(); - crate::boot::smp::boot_all_aps(); + // SAFETY: we're on the BSP and we're ready to boot all APs. + unsafe { crate::boot::smp::boot_all_aps() }; if_tdx_enabled!({ } else { diff --git a/ostd/src/boot/smp.rs b/ostd/src/boot/smp.rs index 2f16a392..84ebd517 100644 --- a/ostd/src/boot/smp.rs +++ b/ostd/src/boot/smp.rs @@ -40,14 +40,16 @@ struct PerApInfo { static AP_LATE_ENTRY: Once = Once::new(); -/// Boot all application processors. +/// Boots all application processors. /// /// This function should be called late in the system startup. The system must at /// least ensure that the scheduler, ACPI table, memory allocation, and IPI module /// have been initialized. /// -/// However, the function need to be called before any `cpu_local!` variables are -/// accessed, including the APIC instance. +/// # Safety +/// +/// This function can only be called in the boot context of the BSP where APs have +/// not yet been booted. pub fn boot_all_aps() { let num_cpus = num_cpus() as u32; if num_cpus == 1 { @@ -97,7 +99,8 @@ pub fn boot_all_aps() { log::info!("Booting all application processors..."); - bringup_all_aps(num_cpus); + // SAFETY: The safety is upheld by the caller. + unsafe { bringup_all_aps(num_cpus) }; wait_for_all_aps_started(); log::info!("All application processors started. The BSP continues to run.");