From 3ae884081f0ae997714e554ac6177510f815cc93 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Fri, 19 Jul 2024 13:23:28 +0000 Subject: [PATCH] Fix both the preempt count atomicity and the CPU-local init problem --- osdk/src/base_crate/x86_64.ld.template | 10 +++ ostd/src/arch/x86/cpu/local.rs | 87 ++++++++++++++++++++++++ ostd/src/arch/x86/{cpu.rs => cpu/mod.rs} | 26 +------ ostd/src/cpu/cpu_local.rs | 45 ++++++++++-- ostd/src/lib.rs | 3 + ostd/src/task/processor.rs | 51 ++++++++------ 6 files changed, 173 insertions(+), 49 deletions(-) create mode 100644 ostd/src/arch/x86/cpu/local.rs rename ostd/src/arch/x86/{cpu.rs => cpu/mod.rs} (95%) diff --git a/osdk/src/base_crate/x86_64.ld.template b/osdk/src/base_crate/x86_64.ld.template index 18539eb1f..0682ed20d 100644 --- a/osdk/src/base_crate/x86_64.ld.template +++ b/osdk/src/base_crate/x86_64.ld.template @@ -58,6 +58,16 @@ SECTIONS # areas for the application processors. .cpu_local : AT(ADDR(.cpu_local) - KERNEL_VMA) { __cpu_local_start = .; + + # These 4 bytes are used to store the CPU ID. + . += 4; + + # These 4 bytes are used to store the number of preemption locks held. + # The reason is stated in the Rust documentation of + # [`ostd::task::processor::PreemptInfo`]. + __cpu_local_preempt_lock_count = . - __cpu_local_start; + . += 4; + KEEP(*(SORT(.cpu_local))) __cpu_local_end = .; } diff --git a/ostd/src/arch/x86/cpu/local.rs b/ostd/src/arch/x86/cpu/local.rs new file mode 100644 index 000000000..325d692d2 --- /dev/null +++ b/ostd/src/arch/x86/cpu/local.rs @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Architecture dependent CPU-local information utilities. + +use x86_64::registers::segmentation::{Segment64, FS}; + +/// Sets the base address for the CPU local storage by writing to the FS base model-specific register. +/// This operation is marked as `unsafe` because it directly interfaces with low-level CPU registers. +/// +/// # Safety +/// +/// - This function is safe to call provided that the FS register is dedicated entirely for CPU local storage +/// and is not concurrently accessed for other purposes. +/// - The caller must ensure that `addr` is a valid address and properly aligned, as required by the CPU. +/// - This function should only be called in contexts where the CPU is in a state to accept such changes, +/// such as during processor initialization. +pub(crate) unsafe fn set_base(addr: u64) { + FS::write_base(x86_64::addr::VirtAddr::new(addr)); +} + +/// Gets the base address for the CPU local storage by reading the FS base model-specific register. +pub(crate) fn get_base() -> u64 { + FS::read_base().as_u64() +} + +pub mod preempt_lock_count { + //! We need to increment/decrement the per-CPU preemption lock count using + //! a single instruction. This requirement is stated by + //! [`crate::task::processor::PreemptInfo`]. + + /// The GDT ensures that the FS segment is initialized to zero on boot. + /// This assertion checks that the base address has been set. + macro_rules! debug_assert_initialized { + () => { + // The compiler may think that [`super::get_base`] has side effects + // so it may not be optimized out. We make sure that it will be + // conditionally compiled only in debug builds. + #[cfg(debug_assertions)] + debug_assert_ne!(super::get_base(), 0); + }; + } + + /// Increments the per-CPU preemption lock count using one instruction. + pub(crate) fn inc() { + debug_assert_initialized!(); + + // SAFETY: The inline assembly increments the lock count in one + // instruction without side effects. + unsafe { + core::arch::asm!( + "add dword ptr fs:[__cpu_local_preempt_lock_count], 1", + options(nostack), + ); + } + } + + /// Decrements the per-CPU preemption lock count using one instruction. + pub(crate) fn dec() { + debug_assert_initialized!(); + + // SAFETY: The inline assembly decrements the lock count in one + // instruction without side effects. + unsafe { + core::arch::asm!( + "sub dword ptr fs:[__cpu_local_preempt_lock_count], 1", + options(nostack), + ); + } + } + + /// Gets the per-CPU preemption lock count using one instruction. + pub(crate) fn get() -> u32 { + debug_assert_initialized!(); + + let count: u32; + // SAFETY: The inline assembly reads the lock count in one instruction + // without side effects. + unsafe { + core::arch::asm!( + "mov {0:e}, fs:[__cpu_local_preempt_lock_count]", + out(reg) count, + options(nostack, readonly), + ); + } + count + } +} diff --git a/ostd/src/arch/x86/cpu.rs b/ostd/src/arch/x86/cpu/mod.rs similarity index 95% rename from ostd/src/arch/x86/cpu.rs rename to ostd/src/arch/x86/cpu/mod.rs index 7692d472b..636fc4161 100644 --- a/ostd/src/arch/x86/cpu.rs +++ b/ostd/src/arch/x86/cpu/mod.rs @@ -2,6 +2,8 @@ //! CPU. +pub mod local; + use alloc::vec::Vec; use core::{ arch::x86_64::{_fxrstor, _fxsave}, @@ -18,10 +20,7 @@ use log::debug; use tdx_guest::tdcall; pub use trapframe::GeneralRegs as RawGeneralRegs; use trapframe::UserContext as RawUserContext; -use x86_64::registers::{ - rflags::RFlags, - segmentation::{Segment64, FS}, -}; +use x86_64::registers::rflags::RFlags; #[cfg(feature = "intel_tdx")] use crate::arch::tdx_guest::{handle_virtual_exception, TdxTrapFrame}; @@ -673,22 +672,3 @@ impl Default for FpRegs { struct FxsaveArea { data: [u8; 512], // 512 bytes } - -/// Sets the base address for the CPU local storage by writing to the FS base model-specific register. -/// This operation is marked as `unsafe` because it directly interfaces with low-level CPU registers. -/// -/// # Safety -/// -/// - This function is safe to call provided that the FS register is dedicated entirely for CPU local storage -/// and is not concurrently accessed for other purposes. -/// - The caller must ensure that `addr` is a valid address and properly aligned, as required by the CPU. -/// - This function should only be called in contexts where the CPU is in a state to accept such changes, -/// such as during processor initialization. -pub(crate) unsafe fn set_cpu_local_base(addr: u64) { - FS::write_base(x86_64::addr::VirtAddr::new(addr)); -} - -/// Gets the base address for the CPU local storage by reading the FS base model-specific register. -pub(crate) fn get_cpu_local_base() -> u64 { - FS::read_base().as_u64() -} diff --git a/ostd/src/cpu/cpu_local.rs b/ostd/src/cpu/cpu_local.rs index ef3f96c50..71beac6a5 100644 --- a/ostd/src/cpu/cpu_local.rs +++ b/ostd/src/cpu/cpu_local.rs @@ -21,7 +21,7 @@ use core::ops::Deref; use crate::{ - cpu::{get_cpu_local_base, set_cpu_local_base}, + arch, trap::{disable_local, DisabledLocalIrqGuard}, }; @@ -82,6 +82,13 @@ impl !Clone for CpuLocal {} // other tasks as they should live on other CPUs to make sending useful. impl !Send for CpuLocal {} +// A check to ensure that the CPU-local object is never accessed before the +// initialization for all CPUs. +#[cfg(debug_assertions)] +use core::sync::atomic::{AtomicBool, Ordering}; +#[cfg(debug_assertions)] +static IS_INITIALIZED: AtomicBool = AtomicBool::new(false); + impl CpuLocal { /// Initialize a CPU-local object. /// @@ -115,6 +122,11 @@ impl CpuLocal { /// This function calculates the virtual address of the CPU-local object based on the per- /// cpu base address and the offset in the BSP. fn get(&self) -> *const T { + // CPU-local objects should be initialized before being accessed. It should be ensured + // by the implementation of OSTD initialization. + #[cfg(debug_assertions)] + debug_assert!(IS_INITIALIZED.load(Ordering::Relaxed)); + let offset = { let bsp_va = self as *const _ as usize; let bsp_base = __cpu_local_start as usize; @@ -124,7 +136,7 @@ impl CpuLocal { bsp_va - bsp_base as usize }; - let local_base = get_cpu_local_base() as usize; + let local_base = arch::cpu::local::get_base() as usize; let local_va = local_base + offset; // A sanity check about the alignment. @@ -170,6 +182,24 @@ impl Deref for CpuLocalDerefGuard<'_, T> { } } +/// Sets the base address of the CPU-local storage for the bootstrap processor. +/// +/// It should be called early to let [`crate::task::disable_preempt`] work, +/// which needs to update a CPU-local preempt lock count. Otherwise it may +/// panic when calling [`crate::task::disable_preempt`]. +/// +/// # Safety +/// +/// It should be called only once and only on the BSP. +pub(crate) unsafe fn early_init_bsp_local_base() { + let start_base_va = __cpu_local_start as usize as u64; + // SAFETY: The base to be set is the start of the `.cpu_local` section, + // where accessing the CPU-local objects have defined behaviors. + unsafe { + arch::cpu::local::set_base(start_base_va); + } +} + /// Initializes the CPU local data for the bootstrap processor (BSP). /// /// # Safety @@ -179,9 +209,14 @@ impl Deref for CpuLocalDerefGuard<'_, T> { /// It must be guaranteed that the BSP will not access local data before /// this function being called, otherwise copying non-constant values /// will result in pretty bad undefined behavior. -pub unsafe fn init_on_bsp() { - let start_base_va = __cpu_local_start as usize as u64; - set_cpu_local_base(start_base_va); +pub(crate) unsafe fn init_on_bsp() { + // TODO: allocate the pages for application processors and copy the + // CPU-local objects to the allocated pages. + + #[cfg(debug_assertions)] + { + IS_INITIALIZED.store(true, Ordering::Relaxed); + } } // These symbols are provided by the linker script. diff --git a/ostd/src/lib.rs b/ostd/src/lib.rs index abc7e5773..83a9e42d6 100644 --- a/ostd/src/lib.rs +++ b/ostd/src/lib.rs @@ -59,6 +59,9 @@ pub use self::{cpu::cpu_local::CpuLocal, error::Error, prelude::Result}; pub fn init() { arch::before_all_init(); + // SAFETY: This function is called only once and only on the BSP. + unsafe { cpu::cpu_local::early_init_bsp_local_base() }; + mm::heap_allocator::init(); boot::init(); diff --git a/ostd/src/task/processor.rs b/ostd/src/task/processor.rs index c6dfb9932..71acd5ffc 100644 --- a/ostd/src/task/processor.rs +++ b/ostd/src/task/processor.rs @@ -1,17 +1,14 @@ // SPDX-License-Identifier: MPL-2.0 use alloc::sync::Arc; -use core::{ - cell::RefCell, - sync::atomic::{AtomicUsize, Ordering::Relaxed}, -}; +use core::cell::RefCell; use super::{ scheduler::{fetch_task, GLOBAL_SCHEDULER}, task::{context_switch, TaskContext}, Task, TaskStatus, }; -use crate::cpu_local; +use crate::{arch, cpu_local}; pub struct Processor { current: Option>, @@ -154,38 +151,50 @@ fn switch_to_task(next_task: Arc) { // to the next task switching. } -cpu_local! { - static PREEMPT_COUNT: PreemptInfo = PreemptInfo::new(); -} +static PREEMPT_COUNT: PreemptInfo = PreemptInfo::new(); -/// Currently, ``PreemptInfo`` only holds the number of spin -/// locks held by the current CPU. When it has a non-zero value, -/// the CPU cannot call ``schedule()``. -struct PreemptInfo { - num_locks: AtomicUsize, -} +/// Currently, it only holds the number of preemption locks held by the +/// current CPU. When it has a non-zero value, the CPU cannot call +/// [`schedule()`]. +/// +/// For per-CPU preemption lock count, we cannot afford two non-atomic +/// operations to increment and decrement the count. The [`crate::cpu_local`] +/// implementation is free to read the base register and then calculate the +/// address of the per-CPU variable using an additional instruction. Interrupts +/// can happen between the address calculation and modification to that +/// address. If the task is preempted to another CPU by this interrupt, the +/// count of the original CPU will be mistakenly modified. To avoid this, we +/// introduce [`crate::arch::cpu::local::preempt_lock_count`]. For x86_64 we +/// can implement this using one instruction. In other less expressive +/// architectures, we may need to disable interrupts. +/// +/// Also, the preemption count is reserved in the `.cpu_local` section +/// specified in the linker script. The reason is that we need to access the +/// preemption count before we can copy the section for application processors. +/// So, the preemption count is not copied from bootstrap processor's section +/// as the initialization. Instead it is initialized to zero for application +/// processors. +struct PreemptInfo {} impl PreemptInfo { const fn new() -> Self { - Self { - num_locks: AtomicUsize::new(0), - } + Self {} } fn increase_num_locks(&self) { - self.num_locks.fetch_add(1, Relaxed); + arch::cpu::local::preempt_lock_count::inc(); } fn decrease_num_locks(&self) { - self.num_locks.fetch_sub(1, Relaxed); + arch::cpu::local::preempt_lock_count::dec(); } fn is_preemptive(&self) -> bool { - self.num_locks.load(Relaxed) == 0 + arch::cpu::local::preempt_lock_count::get() == 0 } fn num_locks(&self) -> usize { - self.num_locks.load(Relaxed) + arch::cpu::local::preempt_lock_count::get() as usize } }