diff --git a/kernel/aster-nix/src/taskless.rs b/kernel/aster-nix/src/taskless.rs index 14bd44fa..64ac8f82 100644 --- a/kernel/aster-nix/src/taskless.rs +++ b/kernel/aster-nix/src/taskless.rs @@ -1,7 +1,5 @@ // SPDX-License-Identifier: MPL-2.0 -#![allow(dead_code)] - use alloc::{boxed::Box, sync::Arc}; use core::{ cell::RefCell, @@ -10,7 +8,7 @@ use core::{ }; use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink}; -use ostd::{cpu_local, trap::SoftIrqLine, CpuLocal}; +use ostd::{cpu::local::CpuLocal, cpu_local, trap::SoftIrqLine}; use crate::softirq_id::{TASKLESS_SOFTIRQ_ID, TASKLESS_URGENT_SOFTIRQ_ID}; diff --git a/osdk/src/base_crate/x86_64.ld.template b/osdk/src/base_crate/x86_64.ld.template index 57087f86..2803d195 100644 --- a/osdk/src/base_crate/x86_64.ld.template +++ b/osdk/src/base_crate/x86_64.ld.template @@ -122,13 +122,7 @@ SECTIONS # These 4 bytes are used to store the CPU ID. . += 4; - - # These 4 bytes are used to store the number of preemption locks held. - # The reason is stated in the Rust documentation of - # [`ostd::task::processor::PreemptInfo`]. - __cpu_local_preempt_lock_count = . - __cpu_local_start; - . += 4; - + KEEP(*(SORT(.cpu_local))) __cpu_local_end = .; } diff --git a/ostd/src/arch/x86/cpu/local.rs b/ostd/src/arch/x86/cpu/local.rs index 325d692d..b5a64732 100644 --- a/ostd/src/arch/x86/cpu/local.rs +++ b/ostd/src/arch/x86/cpu/local.rs @@ -23,65 +23,205 @@ pub(crate) fn get_base() -> u64 { FS::read_base().as_u64() } -pub mod preempt_lock_count { - //! We need to increment/decrement the per-CPU preemption lock count using - //! a single instruction. This requirement is stated by - //! [`crate::task::processor::PreemptInfo`]. +use crate::cpu::local::single_instr::{ + SingleInstructionAddAssign, SingleInstructionBitAndAssign, SingleInstructionBitOrAssign, + SingleInstructionBitXorAssign, SingleInstructionLoad, SingleInstructionStore, + SingleInstructionSubAssign, +}; - /// The GDT ensures that the FS segment is initialized to zero on boot. - /// This assertion checks that the base address has been set. - macro_rules! debug_assert_initialized { - () => { - // The compiler may think that [`super::get_base`] has side effects - // so it may not be optimized out. We make sure that it will be - // conditionally compiled only in debug builds. - #[cfg(debug_assertions)] - debug_assert_ne!(super::get_base(), 0); - }; - } +/// The GDT ensures that the FS segment is initialized to zero on boot. +/// This assertion checks that the base address has been set. +macro_rules! debug_assert_initialized { + () => { + // The compiler may think that [`super::get_base`] has side effects + // so it may not be optimized out. We make sure that it will be + // conditionally compiled only in debug builds. + #[cfg(debug_assertions)] + debug_assert_ne!(get_base(), 0); + }; +} - /// Increments the per-CPU preemption lock count using one instruction. - pub(crate) fn inc() { +macro_rules! impl_numeric_single_instruction_for { + ($([$typ: ty, $inout_type: ident, $register_format: expr])*) => {$( + + impl SingleInstructionAddAssign<$typ> for $typ { + unsafe fn add_assign(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("add fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + impl SingleInstructionSubAssign<$typ> for $typ { + unsafe fn sub_assign(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("sub fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + impl SingleInstructionBitAndAssign<$typ> for $typ { + unsafe fn bitand_assign(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("and fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + impl SingleInstructionBitOrAssign<$typ> for $typ { + unsafe fn bitor_assign(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("or fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + impl SingleInstructionBitXorAssign<$typ> for $typ { + unsafe fn bitxor_assign(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("xor fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + impl SingleInstructionLoad for $typ { + unsafe fn load(offset: *const Self) -> Self { + debug_assert_initialized!(); + + let val: Self; + core::arch::asm!( + concat!("mov {0", $register_format, "}, fs:[{1}]"), + out($inout_type) val, + in(reg) offset, + options(nostack, readonly), + ); + val + } + } + + impl SingleInstructionStore for $typ { + unsafe fn store(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("mov fs:[{0}], {1", $register_format, "}"), + in(reg) offset, + in($inout_type) val, + options(nostack), + ); + } + } + + )*}; +} + +impl_numeric_single_instruction_for!( + [u64, reg, ":r"] + [usize, reg, ":r"] + [u32, reg, ":e"] + [u16, reg, ":x"] + [u8, reg_byte, ""] + [i64, reg, ":r"] + [isize, reg, ":r"] + [i32, reg, ":e"] + [i16, reg, ":x"] + [i8, reg_byte, ""] +); + +macro_rules! impl_generic_single_instruction_for { + ($([<$gen_type:ident $(, $more_gen_type:ident)*>, $typ:ty])*) => {$( + + impl<$gen_type $(, $more_gen_type)*> SingleInstructionLoad for $typ { + unsafe fn load(offset: *const Self) -> Self { + debug_assert_initialized!(); + + let val: Self; + core::arch::asm!( + concat!("mov {0}, fs:[{1}]"), + out(reg) val, + in(reg) offset, + options(nostack, readonly), + ); + val + } + } + + impl<$gen_type $(, $more_gen_type)*> SingleInstructionStore for $typ { + unsafe fn store(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + core::arch::asm!( + concat!("mov fs:[{0}], {1}"), + in(reg) offset, + in(reg) val, + options(nostack), + ); + } + } + )*} +} + +impl_generic_single_instruction_for!( + [, *const T] + [, *mut T] + [, fn(T) -> R] +); + +// In this module, booleans are represented by the least significant bit of a +// `u8` type. Other bits must be zero. This definition is compatible with the +// Rust reference: . + +impl SingleInstructionLoad for bool { + unsafe fn load(offset: *const Self) -> Self { debug_assert_initialized!(); - // SAFETY: The inline assembly increments the lock count in one - // instruction without side effects. - unsafe { - core::arch::asm!( - "add dword ptr fs:[__cpu_local_preempt_lock_count], 1", - options(nostack), - ); - } - } - - /// Decrements the per-CPU preemption lock count using one instruction. - pub(crate) fn dec() { - debug_assert_initialized!(); - - // SAFETY: The inline assembly decrements the lock count in one - // instruction without side effects. - unsafe { - core::arch::asm!( - "sub dword ptr fs:[__cpu_local_preempt_lock_count], 1", - options(nostack), - ); - } - } - - /// Gets the per-CPU preemption lock count using one instruction. - pub(crate) fn get() -> u32 { - debug_assert_initialized!(); - - let count: u32; - // SAFETY: The inline assembly reads the lock count in one instruction - // without side effects. - unsafe { - core::arch::asm!( - "mov {0:e}, fs:[__cpu_local_preempt_lock_count]", - out(reg) count, - options(nostack, readonly), - ); - } - count + let val: u8; + core::arch::asm!( + "mov {0}, fs:[{1}]", + out(reg_byte) val, + in(reg) offset, + options(nostack, readonly), + ); + debug_assert!(val == 1 || val == 0); + val == 1 + } +} + +impl SingleInstructionStore for bool { + unsafe fn store(offset: *mut Self, val: Self) { + debug_assert_initialized!(); + + let val: u8 = if val { 1 } else { 0 }; + core::arch::asm!( + "mov fs:[{0}], {1}", + in(reg) offset, + in(reg_byte) val, + options(nostack), + ); } } diff --git a/ostd/src/arch/x86/mod.rs b/ostd/src/arch/x86/mod.rs index 1693e0a8..3e9b3c36 100644 --- a/ostd/src/arch/x86/mod.rs +++ b/ostd/src/arch/x86/mod.rs @@ -73,7 +73,7 @@ pub(crate) fn init_on_bsp() { // SAFETY: no CPU local objects have been accessed by this far. And // we are on the BSP. - unsafe { crate::cpu::cpu_local::init_on_bsp() }; + unsafe { crate::cpu::local::init_on_bsp() }; crate::boot::smp::boot_all_aps(); diff --git a/ostd/src/arch/x86/trap.rs b/ostd/src/arch/x86/trap.rs index dc0ce895..a38b498e 100644 --- a/ostd/src/arch/x86/trap.rs +++ b/ostd/src/arch/x86/trap.rs @@ -2,8 +2,6 @@ //! Handles trap. -use core::sync::atomic::{AtomicBool, Ordering}; - use align_ext::AlignExt; use log::debug; #[cfg(feature = "intel_tdx")] @@ -15,7 +13,7 @@ use super::ex_table::ExTable; use crate::arch::{cpu::VIRTUALIZATION_EXCEPTION, tdx_guest::handle_virtual_exception}; use crate::{ cpu::{CpuException, CpuExceptionInfo, PageFaultErrorCode, PAGE_FAULT}, - cpu_local, + cpu_local_cell, mm::{ kspace::{KERNEL_PAGE_TABLE, LINEAR_MAPPING_BASE_VADDR, LINEAR_MAPPING_VADDR_RANGE}, page_prop::{CachePolicy, PageProperty}, @@ -25,15 +23,15 @@ use crate::{ trap::call_irq_callback_functions, }; -cpu_local! { - static IS_KERNEL_INTERRUPTED: AtomicBool = AtomicBool::new(false); +cpu_local_cell! { + static IS_KERNEL_INTERRUPTED: bool = false; } /// Returns true if this function is called within the context of an IRQ handler /// and the IRQ occurs while the CPU is executing in the kernel mode. /// Otherwise, it returns false. pub fn is_kernel_interrupted() -> bool { - IS_KERNEL_INTERRUPTED.load(Ordering::Acquire) + IS_KERNEL_INTERRUPTED.load() } /// Only from kernel @@ -64,9 +62,9 @@ extern "sysv64" fn trap_handler(f: &mut TrapFrame) { } } } else { - IS_KERNEL_INTERRUPTED.store(true, Ordering::Release); + IS_KERNEL_INTERRUPTED.store(true); call_irq_callback_functions(f, f.trap_num); - IS_KERNEL_INTERRUPTED.store(false, Ordering::Release); + IS_KERNEL_INTERRUPTED.store(false); } } diff --git a/ostd/src/boot/smp.rs b/ostd/src/boot/smp.rs index a40b417e..05b93845 100644 --- a/ostd/src/boot/smp.rs +++ b/ostd/src/boot/smp.rs @@ -115,7 +115,7 @@ fn ap_early_entry(local_apic_id: u32) -> ! { // SAFETY: we are on the AP. unsafe { - cpu::cpu_local::init_on_ap(local_apic_id); + cpu::local::init_on_ap(local_apic_id); } trap::init(); diff --git a/ostd/src/cpu/cpu_local.rs b/ostd/src/cpu/cpu_local.rs deleted file mode 100644 index e72ed1ff..00000000 --- a/ostd/src/cpu/cpu_local.rs +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -//! CPU local storage. -//! -//! This module provides a mechanism to define CPU-local objects. -//! -//! This is acheived by placing the CPU-local objects in a special section -//! `.cpu_local`. The bootstrap processor (BSP) uses the objects linked in this -//! section, and these objects are copied to dynamically allocated local -//! storage of each application processors (AP) during the initialization -//! process. -//! -//! Such a mechanism exploits the fact that constant values of non-[`Copy`] -//! types can be bitwise copied. For example, a [`Option`] object, though -//! being not [`Copy`], have a constant constructor [`Option::None`] that -//! produces a value that can be bitwise copied to create a new instance. -//! [`alloc::sync::Arc`] however, don't have such a constructor, and thus cannot -//! be directly used as a CPU-local object. Wrapping it in a type that has a -//! constant constructor, like [`Option`], can make it CPU-local. - -use alloc::vec::Vec; -use core::ops::Deref; - -use align_ext::AlignExt; - -use crate::{ - arch, cpu, - mm::{ - paddr_to_vaddr, - page::{self, meta::KernelMeta, ContPages}, - PAGE_SIZE, - }, - trap::{disable_local, DisabledLocalIrqGuard}, -}; - -/// Defines a CPU-local variable. -/// -/// # Example -/// -/// ```rust -/// use crate::cpu_local; -/// use core::cell::RefCell; -/// -/// cpu_local! { -/// static FOO: RefCell = RefCell::new(1); -/// -/// #[allow(unused)] -/// pub static BAR: RefCell = RefCell::new(1.0); -/// } -/// -/// println!("FOO VAL: {:?}", *FOO.borrow()); -/// ``` -#[macro_export] -macro_rules! cpu_local { - ($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => { - $( - #[link_section = ".cpu_local"] - $(#[$attr])* $vis static $name: $crate::CpuLocal<$t> = { - let val = $init; - // SAFETY: The CPU local variable instantiated is statically - // stored in the special `.cpu_local` section. - unsafe { - $crate::CpuLocal::__new(val) - } - }; - )* - }; -} - -/// CPU-local objects. -/// -/// A CPU-local object only gives you immutable references to the underlying value. -/// To mutate the value, one can use atomic values (e.g., [`AtomicU32`]) or internally mutable -/// objects (e.g., [`RefCell`]). -/// -/// [`AtomicU32`]: core::sync::atomic::AtomicU32 -/// [`RefCell`]: core::cell::RefCell -pub struct CpuLocal(T); - -// SAFETY: At any given time, only one task can access the inner value T -// of a cpu-local variable even if `T` is not `Sync`. -unsafe impl Sync for CpuLocal {} - -// Prevent valid instances of CpuLocal from being copied to any memory -// area outside the .cpu_local section. -impl !Copy for CpuLocal {} -impl !Clone for CpuLocal {} - -// In general, it does not make any sense to send instances of CpuLocal to -// other tasks as they should live on other CPUs to make sending useful. -impl !Send for CpuLocal {} - -// A check to ensure that the CPU-local object is never accessed before the -// initialization for all CPUs. -#[cfg(debug_assertions)] -use core::sync::atomic::{AtomicBool, Ordering}; -#[cfg(debug_assertions)] -static IS_INITIALIZED: AtomicBool = AtomicBool::new(false); - -impl CpuLocal { - /// Initialize a CPU-local object. - /// - /// Please do not call this function directly. Instead, use the - /// `cpu_local!` macro. - /// - /// # Safety - /// - /// The caller should ensure that the object initialized by this - /// function resides in the `.cpu_local` section. Otherwise the - /// behavior is undefined. - #[doc(hidden)] - pub const unsafe fn __new(val: T) -> Self { - Self(val) - } - - /// Get access to the underlying value with IRQs disabled. - /// - /// By this method, you can borrow a reference to the underlying value - /// even if `T` is not `Sync`. Because that it is per-CPU and IRQs are - /// disabled, no other running task can access it. - pub fn borrow_irq_disabled(&self) -> CpuLocalDerefGuard<'_, T> { - CpuLocalDerefGuard { - cpu_local: self, - _guard: disable_local(), - } - } - - /// Get access to the underlying value through a raw pointer. - /// - /// This function calculates the virtual address of the CPU-local object based on the per- - /// cpu base address and the offset in the BSP. - fn get(&self) -> *const T { - // CPU-local objects should be initialized before being accessed. It should be ensured - // by the implementation of OSTD initialization. - #[cfg(debug_assertions)] - debug_assert!(IS_INITIALIZED.load(Ordering::Relaxed)); - - let offset = { - let bsp_va = self as *const _ as usize; - let bsp_base = __cpu_local_start as usize; - // The implementation should ensure that the CPU-local object resides in the `.cpu_local`. - debug_assert!(bsp_va + core::mem::size_of::() <= __cpu_local_end as usize); - - bsp_va - bsp_base as usize - }; - - let local_base = arch::cpu::local::get_base() as usize; - let local_va = local_base + offset; - - // A sanity check about the alignment. - debug_assert_eq!(local_va % core::mem::align_of::(), 0); - - local_va as *mut T - } -} - -// Considering a preemptive kernel, a CPU-local object may be dereferenced -// when another task tries to access it. So, we need to ensure that `T` is -// `Sync` before allowing it to be dereferenced. -impl Deref for CpuLocal { - type Target = T; - - fn deref(&self) -> &Self::Target { - // SAFETY: it should be properly initialized before accesses. - // And we do not create a mutable reference over it. It is - // `Sync` so it can be referenced from this task. - unsafe { &*self.get() } - } -} - -/// A guard for accessing the CPU-local object. -/// -/// It ensures that the CPU-local object is accessed with IRQs -/// disabled. It is created by [`CpuLocal::borrow_irq_disabled`]. -/// Do not hold this guard for a long time. -#[must_use] -pub struct CpuLocalDerefGuard<'a, T> { - cpu_local: &'a CpuLocal, - _guard: DisabledLocalIrqGuard, -} - -impl Deref for CpuLocalDerefGuard<'_, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - // SAFETY: it should be properly initialized before accesses. - // And we do not create a mutable reference over it. The IRQs - // are disabled so it can be referenced from this task. - unsafe { &*self.cpu_local.get() } - } -} - -/// Sets the base address of the CPU-local storage for the bootstrap processor. -/// -/// It should be called early to let [`crate::task::disable_preempt`] work, -/// which needs to update a CPU-local preempt lock count. Otherwise it may -/// panic when calling [`crate::task::disable_preempt`]. -/// -/// # Safety -/// -/// It should be called only once and only on the BSP. -pub(crate) unsafe fn early_init_bsp_local_base() { - let start_base_va = __cpu_local_start as usize as u64; - // SAFETY: The base to be set is the start of the `.cpu_local` section, - // where accessing the CPU-local objects have defined behaviors. - unsafe { - arch::cpu::local::set_base(start_base_va); - } -} - -/// The BSP initializes the CPU-local areas for APs. Here we use a -/// non-disabling preempt version of lock because the [`crate::sync`] -/// version needs `cpu_local` to work. Preemption and interrupts are -/// disabled in this phase so it is safe to use this lock. -static CPU_LOCAL_STORAGES: spin::RwLock>> = spin::RwLock::new(Vec::new()); - -/// Initializes the CPU local data for the bootstrap processor (BSP). -/// -/// # Safety -/// -/// This function can only called on the BSP, for once. -/// -/// It must be guaranteed that the BSP will not access local data before -/// this function being called, otherwise copying non-constant values -/// will result in pretty bad undefined behavior. -pub unsafe fn init_on_bsp() { - let bsp_base_va = __cpu_local_start as usize; - let bsp_end_va = __cpu_local_end as usize; - - let num_cpus = super::num_cpus(); - - let mut cpu_local_storages = CPU_LOCAL_STORAGES.write(); - for cpu_i in 1..num_cpus { - let ap_pages = { - let nbytes = (bsp_end_va - bsp_base_va).align_up(PAGE_SIZE); - page::allocator::alloc_contiguous(nbytes, |_| KernelMeta::default()).unwrap() - }; - let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u8; - - // SAFETY: The BSP has not initialized the CPU-local area, so the objects in - // in the `.cpu_local` section can be bitwise bulk copied to the AP's local - // storage. The destination memory is allocated so it is valid to write to. - unsafe { - core::ptr::copy_nonoverlapping( - bsp_base_va as *const u8, - ap_pages_ptr, - bsp_end_va - bsp_base_va, - ); - } - - // SAFETY: the first 4 bytes is reserved for storing CPU ID. - unsafe { - (ap_pages_ptr as *mut u32).write(cpu_i); - } - - // SAFETY: the second 4 bytes is reserved for storing the preemt count. - unsafe { - (ap_pages_ptr as *mut u32).add(1).write(0); - } - - cpu_local_storages.push(ap_pages); - } - - // Write the CPU ID of BSP to the first 4 bytes of the CPU-local area. - let bsp_cpu_id_ptr = bsp_base_va as *mut u32; - // SAFETY: the first 4 bytes is reserved for storing CPU ID. - unsafe { - bsp_cpu_id_ptr.write(0); - } - - cpu::local::set_base(bsp_base_va as u64); - - #[cfg(debug_assertions)] - IS_INITIALIZED.store(true, Ordering::Relaxed); -} - -/// Initializes the CPU local data for the application processor (AP). -/// -/// # Safety -/// -/// This function can only called on the AP. -pub unsafe fn init_on_ap(cpu_id: u32) { - let rlock = CPU_LOCAL_STORAGES.read(); - let ap_pages = rlock.get(cpu_id as usize - 1).unwrap(); - - let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u32; - - debug_assert_eq!( - cpu_id, - // SAFETY: the CPU ID is stored at the beginning of the CPU local area. - unsafe { ap_pages_ptr.read() } - ); - - // SAFETY: the memory will be dedicated to the AP. And we are on the AP. - unsafe { - cpu::local::set_base(ap_pages_ptr as u64); - } -} - -// These symbols are provided by the linker script. -extern "C" { - fn __cpu_local_start(); - fn __cpu_local_end(); -} - -#[cfg(ktest)] -mod test { - use core::{ - cell::RefCell, - sync::atomic::{AtomicU8, Ordering}, - }; - - use ostd_macros::ktest; - - use super::*; - - #[ktest] - fn test_cpu_local() { - cpu_local! { - static FOO: RefCell = RefCell::new(1); - static BAR: AtomicU8 = AtomicU8::new(3); - } - for _ in 0..10 { - let foo_guard = FOO.borrow_irq_disabled(); - assert_eq!(*foo_guard.borrow(), 1); - *foo_guard.borrow_mut() = 2; - drop(foo_guard); - for _ in 0..10 { - assert_eq!(BAR.load(Ordering::Relaxed), 3); - BAR.store(4, Ordering::Relaxed); - assert_eq!(BAR.load(Ordering::Relaxed), 4); - BAR.store(3, Ordering::Relaxed); - } - let foo_guard = FOO.borrow_irq_disabled(); - assert_eq!(*foo_guard.borrow(), 2); - *foo_guard.borrow_mut() = 1; - drop(foo_guard); - } - } -} diff --git a/ostd/src/cpu/local/cell.rs b/ostd/src/cpu/local/cell.rs new file mode 100644 index 00000000..97c6ceca --- /dev/null +++ b/ostd/src/cpu/local/cell.rs @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! The implementaion of CPU-local variables that have inner mutability. + +use core::cell::UnsafeCell; + +use super::{__cpu_local_end, __cpu_local_start, single_instr::*}; +use crate::arch; + +/// Defines an inner-mutable CPU-local variable. +/// +/// The accessors of the CPU-local variables are defined with [`CpuLocalCell`]. +/// +/// It should be noted that if the interrupts or preemption is enabled, two +/// operations on the same CPU-local cell variable may access different objects +/// since the task may live on different CPUs. +/// +/// # Example +/// +/// ```rust +/// use ostd::cpu_local_cell; +/// +/// cpu_local_cell! { +/// static FOO: u32 = 1; +/// pub static BAR: *const usize = core::ptr::null(); +/// } +/// +/// fn not_an_atomic_function() { +/// let bar_var: usize = 1; +/// BAR.store(&bar_var as *const _); +/// // Note that the value of `BAR` here doesn't nessarily equal to the address +/// // of `bar_var`, since the task may be preempted and moved to another CPU. +/// // You can avoid this by disabling interrupts (and preemption, if needed). +/// println!("BAR VAL: {:?}", BAR.load()); +/// +/// let _irq_guard = ostd::trap::disable_local_irq(); +/// println!("1st FOO VAL: {:?}", FOO.load()); +/// // No suprises here, the two accesses must result in the same value. +/// println!("2nd FOO VAL: {:?}", FOO.load()); +/// } +/// ``` +macro_rules! cpu_local_cell { + ($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => { + $( + #[link_section = ".cpu_local"] + $(#[$attr])* $vis static $name: $crate::cpu::local::CpuLocalCell<$t> = { + let val = $init; + // SAFETY: The CPU local variable instantiated is statically + // stored in the special `.cpu_local` section. + unsafe { + $crate::cpu::local::CpuLocalCell::__new(val) + } + }; + )* + }; +} + +pub(crate) use cpu_local_cell; + +/// Inner mutable CPU-local objects. +/// +/// CPU-local cell objects are only accessible from the current CPU. When +/// accessing an underlying object using the same `CpuLocalCell` instance, the +/// actually accessed object is always on the current CPU. So in a preemptive +/// kernel task, the operated object may change if interrupts are enabled. +/// +/// The inner mutability is provided by single instruction operations, and the +/// CPU-local cell objects will not ever be shared between CPUs. So it is safe +/// to modify the inner value without any locks. +/// +/// You should only create the CPU-local cell object using the macro +/// [`cpu_local_cell!`]. +/// +/// For the difference between [`super::CpuLocal`] and [`CpuLocalCell`], see +/// [`super`]. +pub struct CpuLocalCell(UnsafeCell); + +impl CpuLocalCell { + /// Initialize a CPU-local object. + /// + /// Please do not call this function directly. Instead, use the + /// `cpu_local!` macro. + /// + /// # Safety + /// + /// The caller should ensure that the object initialized by this + /// function resides in the `.cpu_local` section. Otherwise the + /// behavior is undefined. + #[doc(hidden)] + pub const unsafe fn __new(val: T) -> Self { + Self(UnsafeCell::new(val)) + } + + /// Get access to the underlying value through a raw pointer. + /// + /// This function calculates the virtual address of the CPU-local object + /// based on the CPU-local base address and the offset in the BSP. + /// + /// # Safety + /// + /// The caller should ensure that within the entire execution of this + /// function, no interrupt or preemption can occur. Otherwise, the + /// returned pointer may points to the variable in another CPU. + pub unsafe fn as_ptr_mut(&'static self) -> *mut T { + super::has_init::assert_true(); + + let offset = { + let bsp_va = self as *const _ as usize; + let bsp_base = __cpu_local_start as usize; + // The implementation should ensure that the CPU-local object resides in the `.cpu_local`. + debug_assert!(bsp_va + core::mem::size_of::() <= __cpu_local_end as usize); + + bsp_va - bsp_base as usize + }; + + let local_base = arch::cpu::local::get_base() as usize; + let local_va = local_base + offset; + + // A sanity check about the alignment. + debug_assert_eq!(local_va % core::mem::align_of::(), 0); + + local_va as *mut T + } +} + +// SAFETY: At any given time, only one task can access the inner value T +// of a cpu-local variable even if `T` is not `Sync`. +unsafe impl Sync for CpuLocalCell {} + +// Prevent valid instances of CpuLocalCell from being copied to any memory +// area outside the `.cpu_local` section. +impl !Copy for CpuLocalCell {} +impl !Clone for CpuLocalCell {} + +// In general, it does not make any sense to send instances of CpuLocalCell to +// other tasks as they should live on other CPUs to make sending useful. +impl !Send for CpuLocalCell {} + +// Accessors for the per-CPU objects whose type implements the single- +// instruction operations. + +impl> CpuLocalCell { + /// Adds a value to the per-CPU object in a single instruction. + /// + /// This operation wraps on overflow/underflow. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn add_assign(&'static self, rhs: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::add_assign(offset as *mut T, rhs); + } + } +} + +impl> CpuLocalCell { + /// Subtracts a value to the per-CPU object in a single instruction. + /// + /// This operation wraps on overflow/underflow. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn sub_assign(&'static self, rhs: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::sub_assign(offset as *mut T, rhs); + } + } +} + +impl> CpuLocalCell { + /// Bitwise ANDs a value to the per-CPU object in a single instruction. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn bitand_assign(&'static self, rhs: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::bitand_assign(offset as *mut T, rhs); + } + } +} + +impl> CpuLocalCell { + /// Bitwise ORs a value to the per-CPU object in a single instruction. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn bitor_assign(&'static self, rhs: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::bitor_assign(offset as *mut T, rhs); + } + } +} + +impl> CpuLocalCell { + /// Bitwise XORs a value to the per-CPU object in a single instruction. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn bitxor_assign(&'static self, rhs: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::bitxor_assign(offset as *mut T, rhs); + } + } +} + +impl CpuLocalCell { + /// Gets the value of the per-CPU object in a single instruction. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn load(&'static self) -> T { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. + unsafe { T::load(offset as *const T) } + } +} + +impl CpuLocalCell { + /// Writes a value to the per-CPU object in a single instruction. + /// + /// Note that this memory operation will not be elided or reordered by the + /// compiler since it is a black-box. + pub fn store(&'static self, val: T) { + let offset = self as *const _ as usize - __cpu_local_start as usize; + // SAFETY: The CPU-local object is defined in the `.cpu_local` section, + // so the pointer to the object is valid. And the reference is never shared. + unsafe { + T::store(offset as *mut T, val); + } + } +} diff --git a/ostd/src/cpu/local/cpu_local.rs b/ostd/src/cpu/local/cpu_local.rs new file mode 100644 index 00000000..37724d27 --- /dev/null +++ b/ostd/src/cpu/local/cpu_local.rs @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! The CPU-local variable implementation. + +use core::{marker::Sync, ops::Deref}; + +use super::{__cpu_local_end, __cpu_local_start}; +use crate::{ + arch, + trap::{self, DisabledLocalIrqGuard}, +}; + +/// Defines a CPU-local variable. +/// +/// The accessors of the CPU-local variables are defined with [`CpuLocal`]. +/// +/// You can get the reference to the inner object by calling [`deref`]. But +/// it is worth noting that the object is always the one in the original core +/// when the reference is created. Use [`CpuLocal::borrow_irq_disabled`] if +/// this is not expected, or if the inner type can't be shared across CPUs. +/// +/// # Example +/// +/// ```rust +/// use ostd::{cpu_local, sync::SpinLock}; +/// use core::sync::atomic::{AtomicU32, Ordering}; +/// +/// cpu_local! { +/// static FOO: AtomicU32 = AtomicU32::new(1); +/// pub static BAR: SpinLock = SpinLock::new(2); +/// } +/// +/// fn not_an_atomic_function() { +/// let ref_of_foo = FOO.deref(); +/// // Note that the value of `FOO` here doesn't necessarily equal to the value +/// // of `FOO` of exactly the __current__ CPU. Since that task may be preempted +/// // and moved to another CPU since `ref_of_foo` is created. +/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed); +/// println!("FOO VAL: {}", val_of_foo); +/// +/// let bar_guard = BAR.lock_irq_disabled(); +/// // Here the value of `BAR` is always the one in the __current__ CPU since +/// // interrupts are disabled and we do not explicitly yield execution here. +/// let val_of_bar = *bar_guard; +/// println!("BAR VAL: {}", val_of_bar); +/// } +/// ``` +#[macro_export] +macro_rules! cpu_local { + ($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => { + $( + #[link_section = ".cpu_local"] + $(#[$attr])* $vis static $name: $crate::cpu::local::CpuLocal<$t> = { + let val = $init; + // SAFETY: The per-CPU variable instantiated is statically + // stored in the special `.cpu_local` section. + unsafe { + $crate::cpu::local::CpuLocal::__new(val) + } + }; + )* + }; +} + +/// CPU-local objects. +/// +/// CPU-local objects are instanciated once per CPU core. They can be shared to +/// other cores. In the context of a preemptible kernel task, when holding the +/// reference to the inner object, the object is always the one in the original +/// core (when the reference is created), no matter which core the code is +/// currently running on. +/// +/// For the difference between [`CpuLocal`] and [`super::CpuLocalCell`], see +/// [`super`]. +pub struct CpuLocal(T); + +impl CpuLocal { + /// Creates a new CPU-local object. + /// + /// Please do not call this function directly. Instead, use the + /// `cpu_local!` macro. + /// + /// # Safety + /// + /// The caller should ensure that the object initialized by this + /// function resides in the `.cpu_local` section. Otherwise the + /// behavior is undefined. + #[doc(hidden)] + pub const unsafe fn __new(val: T) -> Self { + Self(val) + } + + /// Get access to the underlying value with IRQs disabled. + /// + /// By this method, you can borrow a reference to the underlying value + /// even if `T` is not `Sync`. Because that it is per-CPU and IRQs are + /// disabled, no other running tasks can access it. + pub fn borrow_irq_disabled(&'static self) -> CpuLocalDerefGuard<'_, T> { + CpuLocalDerefGuard { + cpu_local: self, + _guard: InnerGuard::Created(trap::disable_local()), + } + } + + /// Get access to the underlying value with a provided guard. + /// + /// Similar to [`CpuLocal::borrow_irq_disabled`], but you can provide + /// a guard to disable IRQs if you already have one. + pub fn borrow_with<'a>( + &'static self, + guard: &'a DisabledLocalIrqGuard, + ) -> CpuLocalDerefGuard<'a, T> { + CpuLocalDerefGuard { + cpu_local: self, + _guard: InnerGuard::Provided(guard), + } + } + + /// Get access to the underlying value through a raw pointer. + /// + /// This function calculates the virtual address of the CPU-local object + /// based on the CPU-local base address and the offset in the BSP. + /// + /// # Safety + /// + /// The caller must ensure that the reference to `self` is static. + unsafe fn as_ptr(&self) -> *const T { + super::has_init::assert_true(); + + let offset = { + let bsp_va = self as *const _ as usize; + let bsp_base = __cpu_local_start as usize; + // The implementation should ensure that the CPU-local object resides in the `.cpu_local`. + debug_assert!(bsp_va + core::mem::size_of::() <= __cpu_local_end as usize); + + bsp_va - bsp_base as usize + }; + + let local_base = arch::cpu::local::get_base() as usize; + let local_va = local_base + offset; + + // A sanity check about the alignment. + debug_assert_eq!(local_va % core::mem::align_of::(), 0); + + local_va as *mut T + } +} + +// SAFETY: At any given time, only one task can access the inner value `T` of a +// CPU-local variable if `T` is not `Sync`. We guarentee it by disabling the +// reference to the inner value, or turning off preemptions when creating +// the reference. +unsafe impl Sync for CpuLocal {} + +// Prevent valid instances of `CpuLocal` from being copied to any memory areas +// outside the `.cpu_local` section. +impl !Copy for CpuLocal {} +impl !Clone for CpuLocal {} + +// In general, it does not make any sense to send instances of `CpuLocal` to +// other tasks as they should live on other CPUs to make sending useful. +impl !Send for CpuLocal {} + +// For `Sync` types, we can create a reference over the inner type and allow +// it to be shared across CPUs. So it is sound to provide a `Deref` +// implementation. However it is up to the caller if sharing is desired. +impl Deref for CpuLocal { + type Target = T; + + /// Note that the reference to the inner object remains to the same object + /// accessed on the original CPU where the reference is created. If this + /// is not expected, turn off preemptions. + fn deref(&self) -> &Self::Target { + // SAFETY: it should be properly initialized before accesses. + // And we do not create a mutable reference over it. It is + // `Sync` so it can be referenced from this task. Here dereferencing + // from non-static instances is not feasible since no one can create + // a non-static instance of `CpuLocal`. + unsafe { &*self.as_ptr() } + } +} + +/// A guard for accessing the CPU-local object. +/// +/// It ensures that the CPU-local object is accessed with IRQs disabled. +/// It is created by [`CpuLocal::borrow_irq_disabled`] or +/// [`CpuLocal::borrow_with`]. Do not hold this guard for a longtime. +#[must_use] +pub struct CpuLocalDerefGuard<'a, T: 'static> { + cpu_local: &'static CpuLocal, + _guard: InnerGuard<'a>, +} + +enum InnerGuard<'a> { + #[allow(dead_code)] + Created(DisabledLocalIrqGuard), + #[allow(dead_code)] + Provided(&'a DisabledLocalIrqGuard), +} + +impl Deref for CpuLocalDerefGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: it should be properly initialized before accesses. + // And we do not create a mutable reference over it. The IRQs + // are disabled so it can only be referenced from this task. + unsafe { &*self.cpu_local.as_ptr() } + } +} diff --git a/ostd/src/cpu/local/mod.rs b/ostd/src/cpu/local/mod.rs new file mode 100644 index 00000000..01467c9b --- /dev/null +++ b/ostd/src/cpu/local/mod.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! CPU local storage. +//! +//! This module provides a mechanism to define CPU-local objects, by the macro +//! [`crate::cpu_local!`]. +//! +//! Such a mechanism exploits the fact that constant values of non-[`Copy`] +//! types can be bitwise copied. For example, a [`Option`] object, though +//! being not [`Copy`], have a constant constructor [`Option::None`] that +//! produces a value that can be bitwise copied to create a new instance. +//! [`alloc::sync::Arc`] however, don't have such a constructor, and thus cannot +//! be directly used as a CPU-local object. Wrapping it in a type that has a +//! constant constructor, like [`Option`], can make it CPU-local. +//! +//! # Implementation +//! +//! These APIs are implemented by placing the CPU-local objects in a special +//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects linked +//! in this section, and these objects are copied to dynamically allocated +//! local storage of each application processors (AP) during the initialization +//! process. + +// This module also, provide CPU-local cell objects that have inner mutability. +// +// The difference between CPU-local objects (defined by [`crate::cpu_local!`]) +// and CPU-local cell objects (defined by [`crate::cpu_local_cell!`]) is that +// the CPU-local objects can be shared across CPUs. While through a CPU-local +// cell object you can only access the value on the current CPU, therefore +// enabling inner mutability without locks. +// +// The cell-variant is currently not a public API because that it is rather +// hard to be used without introducing races. But it is useful for OSTD's +// internal implementation. + +mod cell; +mod cpu_local; + +pub(crate) mod single_instr; + +use alloc::vec::Vec; + +use align_ext::AlignExt; +pub(crate) use cell::{cpu_local_cell, CpuLocalCell}; +pub use cpu_local::{CpuLocal, CpuLocalDerefGuard}; + +use crate::{ + arch, + mm::{ + paddr_to_vaddr, + page::{self, meta::KernelMeta, ContPages}, + PAGE_SIZE, + }, +}; + +// These symbols are provided by the linker script. +extern "C" { + fn __cpu_local_start(); + fn __cpu_local_end(); +} + +cpu_local_cell! { + /// The count of the preempt lock. + /// + /// We need to access the preemption count before we can copy the section + /// for application processors. So, the preemption count is not copied from + /// bootstrap processor's section as the initialization. Instead it is + /// initialized to zero for application processors. + pub(crate) static PREEMPT_LOCK_COUNT: u32 = 0; +} + +/// Sets the base address of the CPU-local storage for the bootstrap processor. +/// +/// It should be called early to let [`crate::task::disable_preempt`] work, +/// which needs to update a CPU-local preempt lock count. Otherwise it may +/// panic when calling [`crate::task::disable_preempt`]. +/// +/// # Safety +/// +/// It should be called only once and only on the BSP. +pub(crate) unsafe fn early_init_bsp_local_base() { + let start_base_va = __cpu_local_start as usize as u64; + // SAFETY: The base to be set is the start of the `.cpu_local` section, + // where accessing the CPU-local objects have defined behaviors. + unsafe { + arch::cpu::local::set_base(start_base_va); + } +} + +/// The BSP initializes the CPU-local areas for APs. Here we use a +/// non-disabling preempt version of lock because the [`crate::sync`] +/// version needs `cpu_local` to work. Preemption and interrupts are +/// disabled in this phase so it is safe to use this lock. +static CPU_LOCAL_STORAGES: spin::RwLock>> = spin::RwLock::new(Vec::new()); + +/// Initializes the CPU local data for the bootstrap processor (BSP). +/// +/// # Safety +/// +/// This function can only called on the BSP, for once. +/// +/// It must be guaranteed that the BSP will not access local data before +/// this function being called, otherwise copying non-constant values +/// will result in pretty bad undefined behavior. +pub unsafe fn init_on_bsp() { + let bsp_base_va = __cpu_local_start as usize; + let bsp_end_va = __cpu_local_end as usize; + + let num_cpus = super::num_cpus(); + + let mut cpu_local_storages = CPU_LOCAL_STORAGES.write(); + for cpu_i in 1..num_cpus { + let ap_pages = { + let nbytes = (bsp_end_va - bsp_base_va).align_up(PAGE_SIZE); + page::allocator::alloc_contiguous(nbytes, |_| KernelMeta::default()).unwrap() + }; + let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u8; + + // SAFETY: The BSP has not initialized the CPU-local area, so the objects in + // in the `.cpu_local` section can be bitwise bulk copied to the AP's local + // storage. The destination memory is allocated so it is valid to write to. + unsafe { + core::ptr::copy_nonoverlapping( + bsp_base_va as *const u8, + ap_pages_ptr, + bsp_end_va - bsp_base_va, + ); + } + + // SAFETY: the first 4 bytes is reserved for storing CPU ID. + unsafe { + (ap_pages_ptr as *mut u32).write(cpu_i); + } + + // SAFETY: the `PREEMPT_LOCK_COUNT` may be dirty on the BSP, so we need + // to ensure that it is initialized to zero for APs. The safety + // requirements are met since the static is defined in the `.cpu_local` + // section and the pointer to that static is the offset in the CPU- + // local area. It is a `usize` so it is safe to be overwritten. + unsafe { + let preempt_count_offset = &PREEMPT_LOCK_COUNT as *const _ as usize; + let ap_preempt_count_ptr = ap_pages_ptr.add(preempt_count_offset) as *mut usize; + ap_preempt_count_ptr.write(0); + } + + cpu_local_storages.push(ap_pages); + } + + // Write the CPU ID of BSP to the first 4 bytes of the CPU-local area. + let bsp_cpu_id_ptr = bsp_base_va as *mut u32; + // SAFETY: the first 4 bytes is reserved for storing CPU ID. + unsafe { + bsp_cpu_id_ptr.write(0); + } + + arch::cpu::local::set_base(bsp_base_va as u64); + + has_init::set_true(); +} + +/// Initializes the CPU local data for the application processor (AP). +/// +/// # Safety +/// +/// This function can only called on the AP. +pub unsafe fn init_on_ap(cpu_id: u32) { + let rlock = CPU_LOCAL_STORAGES.read(); + let ap_pages = rlock.get(cpu_id as usize - 1).unwrap(); + + let ap_pages_ptr = paddr_to_vaddr(ap_pages.start_paddr()) as *mut u32; + + debug_assert_eq!( + cpu_id, + // SAFETY: the CPU ID is stored at the beginning of the CPU local area. + unsafe { ap_pages_ptr.read() } + ); + + // SAFETY: the memory will be dedicated to the AP. And we are on the AP. + unsafe { + arch::cpu::local::set_base(ap_pages_ptr as u64); + } +} + +mod has_init { + //! This module is used to detect the programming error of using the CPU-local + //! mechanism before it is initialized. Such bugs have been found before and we + //! do not want to repeat this error again. This module is only incurs runtime + //! overhead if debug assertions are enabled. + cfg_if::cfg_if! { + if #[cfg(debug_assertions)] { + use core::sync::atomic::{AtomicBool, Ordering}; + + static IS_INITIALIZED: AtomicBool = AtomicBool::new(false); + + pub fn assert_true() { + debug_assert!(IS_INITIALIZED.load(Ordering::Relaxed)); + } + + pub fn set_true() { + IS_INITIALIZED.store(true, Ordering::Relaxed); + } + } else { + pub fn assert_true() {} + + pub fn set_true() {} + } + } +} + +#[cfg(ktest)] +mod test { + use core::cell::RefCell; + + use ostd_macros::ktest; + + #[ktest] + fn test_cpu_local() { + crate::cpu_local! { + static FOO: RefCell = RefCell::new(1); + } + let foo_guard = FOO.borrow_irq_disabled(); + assert_eq!(*foo_guard.borrow(), 1); + *foo_guard.borrow_mut() = 2; + assert_eq!(*foo_guard.borrow(), 2); + drop(foo_guard); + } + + #[ktest] + fn test_cpu_local_cell() { + crate::cpu_local_cell! { + static BAR: usize = 3; + } + let _guard = crate::trap::disable_local(); + assert_eq!(BAR.load(), 3); + BAR.store(4); + assert_eq!(BAR.load(), 4); + } +} diff --git a/ostd/src/cpu/local/single_instr.rs b/ostd/src/cpu/local/single_instr.rs new file mode 100644 index 00000000..1ac436c0 --- /dev/null +++ b/ostd/src/cpu/local/single_instr.rs @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Extensions for CPU-local types that allows single-instruction operations. +//! +//! For some per-CPU objects, fetching or modifying the values of them can be +//! done in a single instruction. Then we would avoid turning off interrupts +//! when accessing them, which incurs non-trivial overhead. +//! +//! These traits are the architecture-specific interface for single-instruction +//! operations. The architecture-specific module can implement these traits for +//! common integer types. For architectures that don't support such single- +//! instruction operations, we emulate a single-instruction implementation by +//! disabling interruptions and preemptions. +//! +//! Currently we implement some of the [`core::ops`] operations. Bitwise shift +//! implementations are missing. Also for less-fundamental types such as +//! enumerations or boolean types, the caller can cast it themselves to the +//! integer types, for which the operations are implemented. +//! +//! # Safety +//! +//! All operations in the provided traits are unsafe, and the caller should +//! ensure that the offset is a valid pointer to a static [`CpuLocalCell`] +//! object. The offset of the object is relative to the base address of the +//! CPU-local storage. These operations are not atomic. Accessing the same +//! address from multiple CPUs produces undefined behavior. +//! +//! [`CpuLocalCell`]: crate::cpu::local::CpuLocalCell + +/// An interface for architecture-specific single-instruction add operation. +pub trait SingleInstructionAddAssign { + /// Adds a value to the per-CPU object. + /// + /// This operation wraps on overflow. + /// + /// # Safety + /// + /// + unsafe fn add_assign(offset: *mut Self, rhs: Rhs); +} + +impl SingleInstructionAddAssign for T { + default unsafe fn add_assign(offset: *mut Self, rhs: T) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let addr = (base + offset as usize) as *mut Self; + addr.write(addr.read().wrapping_add(&rhs)); + } +} + +/// An interface for architecture-specific single-instruction subtract operation. +pub trait SingleInstructionSubAssign { + /// Subtracts a value to the per-CPU object. + /// + /// This operation wraps on overflow. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn sub_assign(offset: *mut Self, rhs: Rhs); +} + +impl SingleInstructionSubAssign for T { + default unsafe fn sub_assign(offset: *mut Self, rhs: T) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let addr = (base + offset as usize) as *mut Self; + addr.write(addr.read().wrapping_sub(&rhs)); + } +} + +/// An interface for architecture-specific single-instruction bitwise OR. +pub trait SingleInstructionBitOrAssign { + /// Bitwise ORs a value to the per-CPU object. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn bitor_assign(offset: *mut Self, rhs: Rhs); +} + +impl + Copy> SingleInstructionBitOrAssign for T { + default unsafe fn bitor_assign(offset: *mut Self, rhs: T) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let addr = (base + offset as usize) as *mut Self; + addr.write(addr.read() | rhs); + } +} + +/// An interface for architecture-specific single-instruction bitwise AND. +pub trait SingleInstructionBitAndAssign { + /// Bitwise ANDs a value to the per-CPU object. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn bitand_assign(offset: *mut Self, rhs: Rhs); +} + +impl + Copy> SingleInstructionBitAndAssign for T { + default unsafe fn bitand_assign(offset: *mut Self, rhs: T) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let addr = (base + offset as usize) as *mut Self; + addr.write(addr.read() & rhs); + } +} + +/// An interface for architecture-specific single-instruction bitwise XOR. +pub trait SingleInstructionBitXorAssign { + /// Bitwise XORs a value to the per-CPU object. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn bitxor_assign(offset: *mut Self, rhs: Rhs); +} + +impl + Copy> SingleInstructionBitXorAssign for T { + default unsafe fn bitxor_assign(offset: *mut Self, rhs: T) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let addr = (base + offset as usize) as *mut Self; + addr.write(addr.read() ^ rhs); + } +} + +/// An interface for architecture-specific single-instruction get operation. +pub trait SingleInstructionLoad { + /// Gets the value of the per-CPU object. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn load(offset: *const Self) -> Self; +} + +impl SingleInstructionLoad for T { + default unsafe fn load(offset: *const Self) -> Self { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let ptr = (base + offset as usize) as *const Self; + ptr.read() + } +} + +/// An interface for architecture-specific single-instruction set operation. +pub trait SingleInstructionStore { + /// Writes a value to the per-CPU object. + /// + /// # Safety + /// + /// Please refer to the module-level documentation of [`self`]. + unsafe fn store(offset: *mut Self, val: Self); +} + +impl SingleInstructionStore for T { + default unsafe fn store(offset: *mut Self, val: Self) { + let _guard = crate::trap::disable_local(); + let base = crate::arch::cpu::local::get_base() as usize; + let ptr = (base + offset as usize) as *mut Self; + ptr.write(val); + } +} diff --git a/ostd/src/cpu/mod.rs b/ostd/src/cpu/mod.rs index fa73c96c..17289230 100644 --- a/ostd/src/cpu/mod.rs +++ b/ostd/src/cpu/mod.rs @@ -2,7 +2,7 @@ //! CPU-related definitions. -pub mod cpu_local; +pub mod local; cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")]{ @@ -18,7 +18,7 @@ use bitvec::{ slice::IterOnes, }; -use crate::{arch::boot::smp::get_num_processors, cpu}; +use crate::arch::{self, boot::smp::get_num_processors}; /// The number of CPUs. Zero means uninitialized. static NUM_CPUS: AtomicU32 = AtomicU32::new(0); @@ -47,7 +47,7 @@ pub fn num_cpus() -> u32 { pub fn this_cpu() -> u32 { // SAFETY: the cpu ID is stored at the beginning of the cpu local area, provided // by the linker script. - unsafe { (cpu::local::get_base() as usize as *mut u32).read() } + unsafe { (arch::cpu::local::get_base() as usize as *mut u32).read() } } /// A subset of all CPUs in the system. diff --git a/ostd/src/lib.rs b/ostd/src/lib.rs index eee80a8a..f99bb07e 100644 --- a/ostd/src/lib.rs +++ b/ostd/src/lib.rs @@ -11,6 +11,7 @@ #![feature(generic_const_exprs)] #![feature(iter_from_coroutine)] #![feature(let_chains)] +#![feature(min_specialization)] #![feature(negative_impls)] #![feature(new_uninit)] #![feature(panic_info_message)] @@ -46,7 +47,9 @@ pub mod user; pub use ostd_macros::main; pub use ostd_pod::Pod; -pub use self::{cpu::cpu_local::CpuLocal, error::Error, prelude::Result}; +pub use self::{error::Error, prelude::Result}; +// [`CpuLocalCell`] is easy to be mis-used, so we don't expose it to the users. +pub(crate) use crate::cpu::local::cpu_local_cell; /// Initializes OSTD. /// @@ -64,7 +67,7 @@ pub fn init() { arch::check_tdx_init(); // SAFETY: This function is called only once and only on the BSP. - unsafe { cpu::cpu_local::early_init_bsp_local_base() }; + unsafe { cpu::local::early_init_bsp_local_base() }; mm::heap_allocator::init(); diff --git a/ostd/src/task/processor.rs b/ostd/src/task/processor.rs index 71acd5ff..9dc72c2e 100644 --- a/ostd/src/task/processor.rs +++ b/ostd/src/task/processor.rs @@ -8,7 +8,7 @@ use super::{ task::{context_switch, TaskContext}, Task, TaskStatus, }; -use crate::{arch, cpu_local}; +use crate::{cpu::local::PREEMPT_LOCK_COUNT, cpu_local}; pub struct Processor { current: Option>, @@ -91,10 +91,11 @@ pub fn preempt(task: &Arc) { /// /// before context switch, current task will switch to the next task fn switch_to_task(next_task: Arc) { - if !PREEMPT_COUNT.is_preemptive() { + let preemt_lock_count = PREEMPT_LOCK_COUNT.load(); + if preemt_lock_count != 0 { panic!( "Calling schedule() while holding {} locks", - PREEMPT_COUNT.num_locks() + preemt_lock_count ); } @@ -151,53 +152,6 @@ fn switch_to_task(next_task: Arc) { // to the next task switching. } -static PREEMPT_COUNT: PreemptInfo = PreemptInfo::new(); - -/// Currently, it only holds the number of preemption locks held by the -/// current CPU. When it has a non-zero value, the CPU cannot call -/// [`schedule()`]. -/// -/// For per-CPU preemption lock count, we cannot afford two non-atomic -/// operations to increment and decrement the count. The [`crate::cpu_local`] -/// implementation is free to read the base register and then calculate the -/// address of the per-CPU variable using an additional instruction. Interrupts -/// can happen between the address calculation and modification to that -/// address. If the task is preempted to another CPU by this interrupt, the -/// count of the original CPU will be mistakenly modified. To avoid this, we -/// introduce [`crate::arch::cpu::local::preempt_lock_count`]. For x86_64 we -/// can implement this using one instruction. In other less expressive -/// architectures, we may need to disable interrupts. -/// -/// Also, the preemption count is reserved in the `.cpu_local` section -/// specified in the linker script. The reason is that we need to access the -/// preemption count before we can copy the section for application processors. -/// So, the preemption count is not copied from bootstrap processor's section -/// as the initialization. Instead it is initialized to zero for application -/// processors. -struct PreemptInfo {} - -impl PreemptInfo { - const fn new() -> Self { - Self {} - } - - fn increase_num_locks(&self) { - arch::cpu::local::preempt_lock_count::inc(); - } - - fn decrease_num_locks(&self) { - arch::cpu::local::preempt_lock_count::dec(); - } - - fn is_preemptive(&self) -> bool { - arch::cpu::local::preempt_lock_count::get() == 0 - } - - fn num_locks(&self) -> usize { - arch::cpu::local::preempt_lock_count::get() as usize - } -} - /// A guard for disable preempt. #[clippy::has_significant_drop] #[must_use] @@ -210,7 +164,7 @@ impl !Send for DisablePreemptGuard {} impl DisablePreemptGuard { fn new() -> Self { - PREEMPT_COUNT.increase_num_locks(); + PREEMPT_LOCK_COUNT.add_assign(1); Self { _private: () } } @@ -223,7 +177,7 @@ impl DisablePreemptGuard { impl Drop for DisablePreemptGuard { fn drop(&mut self) { - PREEMPT_COUNT.decrease_num_locks(); + PREEMPT_LOCK_COUNT.sub_assign(1); } } diff --git a/ostd/src/trap/handler.rs b/ostd/src/trap/handler.rs index d61bd2b2..3f359f70 100644 --- a/ostd/src/trap/handler.rs +++ b/ostd/src/trap/handler.rs @@ -1,17 +1,15 @@ // SPDX-License-Identifier: MPL-2.0 -use core::sync::atomic::{AtomicBool, Ordering}; - use trapframe::TrapFrame; -use crate::{arch::irq::IRQ_LIST, cpu_local}; +use crate::{arch::irq::IRQ_LIST, cpu_local_cell}; pub(crate) fn call_irq_callback_functions(trap_frame: &TrapFrame, irq_number: usize) { // For x86 CPUs, interrupts are not re-entrant. Local interrupts will be disabled when // an interrupt handler is called (Unless interrupts are re-enabled in an interrupt handler). // // FIXME: For arch that supports re-entrant interrupts, we may need to record nested level here. - IN_INTERRUPT_CONTEXT.store(true, Ordering::Release); + IN_INTERRUPT_CONTEXT.store(true); let irq_line = IRQ_LIST.get().unwrap().get(irq_number).unwrap(); let callback_functions = irq_line.callback_list(); @@ -22,20 +20,17 @@ pub(crate) fn call_irq_callback_functions(trap_frame: &TrapFrame, irq_number: us crate::arch::interrupts_ack(irq_number); - IN_INTERRUPT_CONTEXT.store(false, Ordering::Release); - crate::arch::irq::enable_local(); crate::trap::softirq::process_pending(); + + IN_INTERRUPT_CONTEXT.store(false); } -cpu_local! { - static IN_INTERRUPT_CONTEXT: AtomicBool = AtomicBool::new(false); +cpu_local_cell! { + static IN_INTERRUPT_CONTEXT: bool = false; } /// Returns whether we are in the interrupt context. -/// -/// FIXME: Here only hardware irq is taken into account. According to linux implementation, if -/// we are in softirq context, or bottom half is disabled, this function also returns true. pub fn in_interrupt_context() -> bool { - IN_INTERRUPT_CONTEXT.load(Ordering::Acquire) + IN_INTERRUPT_CONTEXT.load() } diff --git a/ostd/src/trap/softirq.rs b/ostd/src/trap/softirq.rs index df08a08d..3d9a136c 100644 --- a/ostd/src/trap/softirq.rs +++ b/ostd/src/trap/softirq.rs @@ -2,14 +2,12 @@ //! Software interrupt. -#![allow(unused_variables)] - use alloc::boxed::Box; -use core::sync::atomic::{AtomicBool, AtomicU8, Ordering}; +use core::sync::atomic::{AtomicU8, Ordering}; use spin::Once; -use crate::{cpu_local, task::disable_preempt}; +use crate::{cpu_local_cell, task::disable_preempt}; /// A representation of a software interrupt (softirq) line. /// @@ -70,7 +68,7 @@ impl SoftIrqLine { /// /// If this line is not enabled yet, the method has no effect. pub fn raise(&self) { - PENDING_MASK.fetch_or(1 << self.id, Ordering::Release); + PENDING_MASK.bitor_assign(1 << self.id); } /// Enables a softirq line by registering its callback. @@ -105,24 +103,24 @@ pub(super) fn init() { static ENABLED_MASK: AtomicU8 = AtomicU8::new(0); -cpu_local! { - static PENDING_MASK: AtomicU8 = AtomicU8::new(0); - static IS_ENABLED: AtomicBool = AtomicBool::new(true); +cpu_local_cell! { + static PENDING_MASK: u8 = 0; + static IS_ENABLED: bool = true; } /// Enables softirq in current processor. fn enable_softirq_local() { - IS_ENABLED.store(true, Ordering::Release); + IS_ENABLED.store(true); } /// Disables softirq in current processor. fn disable_softirq_local() { - IS_ENABLED.store(false, Ordering::Release); + IS_ENABLED.store(false); } /// Checks whether the softirq is enabled in current processor. fn is_softirq_enabled() -> bool { - IS_ENABLED.load(Ordering::Acquire) + IS_ENABLED.load() } /// Processes pending softirqs. @@ -136,12 +134,13 @@ pub(crate) fn process_pending() { return; } - let preempt_guard = disable_preempt(); + let _preempt_guard = disable_preempt(); disable_softirq_local(); - for i in 0..SOFTIRQ_RUN_TIMES { + for _i in 0..SOFTIRQ_RUN_TIMES { let mut action_mask = { - let pending_mask = PENDING_MASK.fetch_and(0, Ordering::Acquire); + let pending_mask = PENDING_MASK.load(); + PENDING_MASK.store(0); pending_mask & ENABLED_MASK.load(Ordering::Acquire) };