diff --git a/Cargo.lock b/Cargo.lock index dfca4676..08d4db81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1316,6 +1316,7 @@ dependencies = [ "align_ext", "bit_field", "bitflags 1.3.2", + "bitvec", "buddy_system_allocator", "cfg-if", "fdt", diff --git a/kernel/comps/softirq/src/taskless.rs b/kernel/comps/softirq/src/taskless.rs index 15d27dd0..36e4b160 100644 --- a/kernel/comps/softirq/src/taskless.rs +++ b/kernel/comps/softirq/src/taskless.rs @@ -8,7 +8,7 @@ use core::{ }; use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink}; -use ostd::{cpu::local::CpuLocal, cpu_local, trap}; +use ostd::{cpu::local::StaticCpuLocal, cpu_local, trap}; use super::{ softirq_id::{TASKLESS_SOFTIRQ_ID, TASKLESS_URGENT_SOFTIRQ_ID}, @@ -123,7 +123,7 @@ impl Taskless { fn do_schedule( taskless: &Arc, - taskless_list: &'static CpuLocal>>, + taskless_list: &'static StaticCpuLocal>>, ) { if taskless.is_disabled.load(Ordering::Acquire) { return; @@ -158,7 +158,7 @@ pub(super) fn init() { /// If the `Taskless` is ready to be executed, it will be set to not scheduled /// and can be scheduled again. fn taskless_softirq_handler( - taskless_list: &'static CpuLocal>>, + taskless_list: &'static StaticCpuLocal>>, softirq_id: u8, ) { let mut processing_list = { diff --git a/osdk/deps/frame-allocator/src/smp_counter.rs b/osdk/deps/frame-allocator/src/smp_counter.rs index 142dfe74..ef63e2ba 100644 --- a/osdk/deps/frame-allocator/src/smp_counter.rs +++ b/osdk/deps/frame-allocator/src/smp_counter.rs @@ -2,7 +2,7 @@ //! A fast and scalable SMP counter. -use ostd::cpu::{all_cpus, local::CpuLocal, CpuId}; +use ostd::cpu::{all_cpus, local::StaticCpuLocal, CpuId}; use core::sync::atomic::{AtomicIsize, Ordering}; @@ -43,7 +43,7 @@ macro_rules! fast_smp_counter { /// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter /// will wrap on overflow. pub struct FastSmpCounter { - per_cpu_counter: &'static CpuLocal, + per_cpu_counter: &'static StaticCpuLocal, } impl FastSmpCounter { @@ -51,7 +51,7 @@ impl FastSmpCounter { /// /// This function should only be used by the [`fast_smp_counter!`] macro. #[doc(hidden)] - pub const fn new(per_cpu_counter: &'static CpuLocal) -> Self { + pub const fn new(per_cpu_counter: &'static StaticCpuLocal) -> Self { Self { per_cpu_counter } } diff --git a/osdk/deps/heap-allocator/src/allocator.rs b/osdk/deps/heap-allocator/src/allocator.rs index 6ea04482..1db3392c 100644 --- a/osdk/deps/heap-allocator/src/allocator.rs +++ b/osdk/deps/heap-allocator/src/allocator.rs @@ -21,7 +21,7 @@ use crate::slab_cache::SlabCache; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[repr(usize)] -enum CommonSizeClass { +pub(crate) enum CommonSizeClass { Bytes8 = 8, Bytes16 = 16, Bytes32 = 32, @@ -34,7 +34,7 @@ enum CommonSizeClass { } impl CommonSizeClass { - const fn from_layout(layout: Layout) -> Option { + pub(crate) const fn from_layout(layout: Layout) -> Option { let size_class = match layout.size() { 0..=8 => CommonSizeClass::Bytes8, 9..=16 => CommonSizeClass::Bytes16, @@ -67,7 +67,7 @@ impl CommonSizeClass { }) } - fn from_size(size: usize) -> Option { + pub(crate) const fn from_size(size: usize) -> Option { match size { 8 => Some(CommonSizeClass::Bytes8), 16 => Some(CommonSizeClass::Bytes16), diff --git a/osdk/deps/heap-allocator/src/cpu_local_allocator.rs b/osdk/deps/heap-allocator/src/cpu_local_allocator.rs new file mode 100644 index 00000000..5befcdf5 --- /dev/null +++ b/osdk/deps/heap-allocator/src/cpu_local_allocator.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: MPL-2.0 + +use crate::allocator::CommonSizeClass; +use alloc::vec::Vec; +use core::ops::Deref; +use ostd::{ + cpu::{ + local::{DynCpuLocalChunk, DynamicCpuLocal}, + CpuId, + }, + prelude::*, + sync::SpinLock, + Error, +}; + +/// Allocator for dynamically-allocated CPU-local objects. +struct CpuLocalAllocator { + chunks: SpinLock>>, +} + +impl CpuLocalAllocator { + /// Creates a new allocator for dynamically-allocated CPU-local objects. + pub(self) const fn new() -> Self { + Self { + chunks: SpinLock::new(Vec::new()), + } + } + + /// Allocates a CPU-local object and initializes it with `init_values`. + pub(self) fn alloc( + &'static self, + init_values: &mut impl FnMut(CpuId) -> T, + ) -> Result> { + let mut chunks = self.chunks.lock(); + + for chunk in chunks.iter_mut() { + if !chunk.is_full() { + let cpu_local = chunk.alloc::(init_values).unwrap(); + return Ok(cpu_local); + } + } + + let mut new_chunk = DynCpuLocalChunk::::new()?; + let cpu_local = new_chunk.alloc::(init_values).unwrap(); + chunks.push(new_chunk); + + Ok(cpu_local) + } + + /// Deallocates a CPU-local object. + pub(self) fn dealloc(&self, cpu_local: DynamicCpuLocal) { + let mut cpu_local = cpu_local; + let mut chunks = self.chunks.lock(); + + let mut chunk_index = None; + for (i, chunk) in chunks.iter_mut().enumerate() { + match chunk.try_dealloc(cpu_local) { + Ok(()) => { + chunk_index = Some(i); + break; + } + Err(returned) => cpu_local = returned, + } + } + let chunk_index = chunk_index.unwrap(); + if chunks[chunk_index].is_empty() && chunks.iter().filter(|c| c.is_empty()).count() > 1 { + chunks.swap_remove(chunk_index); + } + } +} + +/// A wrapper over [`DynamicCpuLocal`] to deallocate CPU-local objects on +/// drop automatically. +pub struct CpuLocalBox(Option>); + +impl Deref for CpuLocalBox { + type Target = DynamicCpuLocal; + + fn deref(&self) -> &Self::Target { + self.0.as_ref().unwrap() + } +} + +impl Drop for CpuLocalBox { + fn drop(&mut self) { + let cpu_local = self.0.take().unwrap(); + dealloc_cpu_local(cpu_local); + } +} + +/// Global allocators for dynamically-allocated CPU-local objects. +static ALLOCATOR_8: CpuLocalAllocator<8> = CpuLocalAllocator::new(); +static ALLOCATOR_16: CpuLocalAllocator<16> = CpuLocalAllocator::new(); +static ALLOCATOR_32: CpuLocalAllocator<32> = CpuLocalAllocator::new(); + +/// Allocates a dynamically-allocated CPU-local object of type `T` and +/// initializes it with `init_values`. +/// +/// Currently, the size of `T` must be no larger than 32 bytes. +pub fn alloc_cpu_local(mut init_values: impl FnMut(CpuId) -> T) -> Result> { + let size = core::mem::size_of::(); + let class = CommonSizeClass::from_size(size).ok_or(Error::InvalidArgs)?; + let cpu_local = match class { + CommonSizeClass::Bytes8 => ALLOCATOR_8.alloc::(&mut init_values), + CommonSizeClass::Bytes16 => ALLOCATOR_16.alloc::(&mut init_values), + CommonSizeClass::Bytes32 => ALLOCATOR_32.alloc::(&mut init_values), + // TODO: Support contiguous allocations for larger sizes. + // Since cache lines are normally 64 bytes, when allocating CPU-local + // objects with larger sizes, we should allocate a `Vec` with size + // `num_cpus()` instead. + _ => Err(Error::InvalidArgs), + }?; + Ok(CpuLocalBox(Some(cpu_local))) +} + +/// Deallocates a dynamically-allocated CPU-local object of type `T`. +fn dealloc_cpu_local(cpu_local: DynamicCpuLocal) { + let size = core::mem::size_of::(); + let class = CommonSizeClass::from_size(size).unwrap(); + match class { + CommonSizeClass::Bytes8 => ALLOCATOR_8.dealloc(cpu_local), + CommonSizeClass::Bytes16 => ALLOCATOR_16.dealloc(cpu_local), + CommonSizeClass::Bytes32 => ALLOCATOR_32.dealloc(cpu_local), + _ => todo!(), + } +} diff --git a/osdk/deps/heap-allocator/src/lib.rs b/osdk/deps/heap-allocator/src/lib.rs index 8463f80d..27341281 100644 --- a/osdk/deps/heap-allocator/src/lib.rs +++ b/osdk/deps/heap-allocator/src/lib.rs @@ -4,7 +4,11 @@ #![no_std] #![deny(unsafe_code)] +extern crate alloc; + mod allocator; +mod cpu_local_allocator; mod slab_cache; pub use allocator::{type_from_layout, HeapAllocator}; +pub use cpu_local_allocator::{alloc_cpu_local, CpuLocalBox}; diff --git a/ostd/Cargo.toml b/ostd/Cargo.toml index ff49ca89..194459c6 100644 --- a/ostd/Cargo.toml +++ b/ostd/Cargo.toml @@ -37,6 +37,7 @@ spin = "0.9.4" smallvec = "1.13.2" unwinding = { version = "=0.2.5", default-features = false, features = ["fde-gnu-eh-frame-hdr", "hide-trace", "panic", "personality", "unwinder"] } volatile = "0.6.1" +bitvec = { version = "1.0", default-features = false, features = ["alloc"] } [target.x86_64-unknown-none.dependencies] x86_64 = "0.14.13" diff --git a/ostd/src/arch/x86/trap/gdt.rs b/ostd/src/arch/x86/trap/gdt.rs index 358aa445..62b4c2a6 100644 --- a/ostd/src/arch/x86/trap/gdt.rs +++ b/ostd/src/arch/x86/trap/gdt.rs @@ -18,7 +18,7 @@ use x86_64::{ PrivilegeLevel, VirtAddr, }; -use crate::cpu::local::CpuLocal; +use crate::cpu::local::{CpuLocal, StaticCpuLocal}; /// Initializes and loads the GDT and TSS. /// @@ -95,10 +95,10 @@ pub(super) unsafe fn init() { // No other special initialization is required because the kernel stack information is stored in // the TSS when we start the userspace program. See `syscall.S` for details. #[link_section = ".cpu_local_tss"] -static LOCAL_TSS: CpuLocal = { +static LOCAL_TSS: StaticCpuLocal = { let tss = TaskStateSegment::new(); // SAFETY: The `.cpu_local_tss` section is part of the CPU-local area. - unsafe { CpuLocal::__new(tss) } + unsafe { CpuLocal::__new_static(tss) } }; // Kernel code and data descriptors. diff --git a/ostd/src/cpu/local/cpu_local.rs b/ostd/src/cpu/local/cpu_local.rs deleted file mode 100644 index a81b131d..00000000 --- a/ostd/src/cpu/local/cpu_local.rs +++ /dev/null @@ -1,201 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -//! The CPU-local variable implementation. - -use core::{marker::Sync, ops::Deref}; - -use super::{__cpu_local_end, __cpu_local_start}; -use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard}; - -/// Defines a CPU-local variable. -/// -/// The accessors of the CPU-local variables are defined with [`CpuLocal`]. -/// -/// You can get the reference to the inner object on one CPU by calling -/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object -/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter -/// accessors can be used even if the inner object is not `Sync`. -/// -/// # Example -/// -/// ```rust -/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap}; -/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell}; -/// -/// cpu_local! { -/// static FOO: AtomicU32 = AtomicU32::new(1); -/// pub static BAR: Cell = Cell::new(2); -/// } -/// -/// fn not_an_atomic_function() { -/// let preempt_guard = disable_preempt(); -/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu()); -/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed); -/// println!("FOO VAL: {}", val_of_foo); -/// -/// let irq_guard = trap::disable_local(); -/// let bar_guard = BAR.get_with(&irq_guard); -/// let val_of_bar = bar_guard.get(); -/// println!("BAR VAL: {}", val_of_bar); -/// } -/// ``` -#[macro_export] -macro_rules! cpu_local { - ($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => { - $( - #[link_section = ".cpu_local"] - $(#[$attr])* $vis static $name: $crate::cpu::local::CpuLocal<$t> = { - let val = $init; - // SAFETY: The per-CPU variable instantiated is statically - // stored in the special `.cpu_local` section. - unsafe { - $crate::cpu::local::CpuLocal::__new(val) - } - }; - )* - }; -} - -/// CPU-local objects. -/// -/// CPU-local objects are instantiated once per CPU core. They can be shared to -/// other cores. In the context of a preemptible kernel task, when holding the -/// reference to the inner object, the object is always the one in the original -/// core (when the reference is created), no matter which core the code is -/// currently running on. -/// -/// For the difference between [`CpuLocal`] and [`super::CpuLocalCell`], see -/// [`super`]. -pub struct CpuLocal(T); - -impl CpuLocal { - /// Creates a new CPU-local object. - /// - /// Please do not call this function directly. Instead, use the - /// `cpu_local!` macro. - /// - /// # Safety - /// - /// The caller should ensure that the object initialized by this - /// function resides in the `.cpu_local` section. Otherwise the - /// behavior is undefined. - #[doc(hidden)] - pub const unsafe fn __new(val: T) -> Self { - Self(val) - } - - /// Gets access to the underlying value on the current CPU with a - /// provided IRQ guard. - /// - /// By this method, you can borrow a reference to the underlying value - /// even if `T` is not `Sync`. Because that it is per-CPU and IRQs are - /// disabled, no other running tasks can access it. - pub fn get_with<'a>( - &'static self, - guard: &'a DisabledLocalIrqGuard, - ) -> CpuLocalDerefGuard<'a, T> { - CpuLocalDerefGuard { - cpu_local: self, - guard, - } - } - - /// Gets access to the underlying value through a raw pointer. - /// - /// This method is safe, but using the returned pointer will be unsafe. - pub(crate) fn as_ptr(&'static self) -> *const T { - super::is_used::debug_set_true(); - - let offset = self.get_offset(); - - let local_base = arch::cpu::local::get_base() as usize; - let local_va = local_base + offset; - - // A sanity check about the alignment. - debug_assert_eq!(local_va % core::mem::align_of::(), 0); - - local_va as *mut T - } - - /// Gets the offset of the CPU-local object in the CPU-local area. - fn get_offset(&'static self) -> usize { - let bsp_va = self as *const _ as usize; - let bsp_base = __cpu_local_start as usize; - // The implementation should ensure that the CPU-local object resides in the `.cpu_local`. - debug_assert!(bsp_va + core::mem::size_of::() <= __cpu_local_end as usize); - - bsp_va - bsp_base - } -} - -impl CpuLocal { - /// Gets access to the CPU-local value on a specific CPU. - /// - /// This allows the caller to access CPU-local data from a remote CPU, - /// so the data type must be `Sync`. - pub fn get_on_cpu(&'static self, cpu_id: CpuId) -> &'static T { - super::is_used::debug_set_true(); - - let cpu_id = cpu_id.as_usize(); - - // If on the BSP, just use the statically linked storage. - if cpu_id == 0 { - return &self.0; - } - - // SAFETY: At this time we have a non-BSP `CpuId`, which means that - // `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must - // also have been called (see the implementation of `cpu::init_on_bsp`), - // so `CPU_LOCAL_STORAGES` must already be initialized. - let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() }; - // SAFETY: `cpu_id` is guaranteed to be in range because the type - // invariant of `CpuId`. - let storage = unsafe { *storages.get_unchecked(cpu_id - 1) }; - let base = crate::mm::paddr_to_vaddr(storage); - - let offset = self.get_offset(); - let ptr = (base + offset) as *const T; - - // SAFETY: `ptr` represents CPU-local data on a remote CPU. It - // contains valid data, the type is `Sync`, and no one will mutably - // borrow it, so creating an immutable borrow here is valid. - unsafe { &*ptr } - } -} - -// SAFETY: At any given time, only one task can access the inner value `T` of a -// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the -// reference to the inner value, or turning off preemptions when creating -// the reference. -unsafe impl Sync for CpuLocal {} - -// Prevent valid instances of `CpuLocal` from being copied to any memory areas -// outside the `.cpu_local` section. -impl !Copy for CpuLocal {} -impl !Clone for CpuLocal {} - -// In general, it does not make any sense to send instances of `CpuLocal` to -// other tasks as they should live on other CPUs to make sending useful. -impl !Send for CpuLocal {} - -/// A guard for accessing the CPU-local object. -/// -/// It ensures that the CPU-local object is accessed with IRQs disabled. -/// It is created by [`CpuLocal::borrow_with`]. -#[must_use] -pub struct CpuLocalDerefGuard<'a, T: 'static> { - cpu_local: &'static CpuLocal, - #[expect(dead_code)] - guard: &'a DisabledLocalIrqGuard, -} - -impl Deref for CpuLocalDerefGuard<'_, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - // SAFETY: it should be properly initialized before accesses. - // And we do not create a mutable reference over it. The IRQs - // are disabled so it can only be referenced from this task. - unsafe { &*self.cpu_local.as_ptr() } - } -} diff --git a/ostd/src/cpu/local/dyn_cpu_local.rs b/ostd/src/cpu/local/dyn_cpu_local.rs new file mode 100644 index 00000000..aa2a495e --- /dev/null +++ b/ostd/src/cpu/local/dyn_cpu_local.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Dynamically-allocated CPU-local objects. + +use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull}; + +use bitvec::prelude::{bitvec, BitVec}; + +use super::{AnyStorage, CpuLocal}; +use crate::{ + cpu::{all_cpus, num_cpus, CpuId, PinCurrentCpu}, + mm::{paddr_to_vaddr, FrameAllocOptions, Segment, Vaddr, PAGE_SIZE}, + trap::DisabledLocalIrqGuard, + Result, +}; + +/// A dynamically-allocated storage for a CPU-local variable of type `T`. +/// +/// Such a CPU-local storage should be allocated and deallocated by +/// [`DynCpuLocalChunk`], not directly. Dropping it without deallocation +/// will cause panic. +/// +/// When dropping a `CpuLocal>`, we have no way to know +/// which `DynCpuLocalChunk` the CPU-local object was originally allocated +/// from. Therefore, we rely on the user to correctly manage the corresponding +/// `DynCpuLocalChunk`, ensuring that both allocation and deallocation of +/// `CpuLocal>` occur within the same chunk. +/// +/// To properly deallocate the CPU-local object, the user must explicitly call +/// the appropriate `DynCpuLocalChunk`'s `try_dealloc()`. Otherwise, +/// dropping it directly will cause a panic. +pub struct DynamicStorage(NonNull); + +unsafe impl AnyStorage for DynamicStorage { + fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T { + self.get_ptr_on_target(guard.current_cpu()) + } + + fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T { + let bsp_va = self.0.as_ptr() as usize; + let va = bsp_va + cpu_id.as_usize() * CHUNK_SIZE; + va as *mut T + } + + fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T { + self.get_ptr_on_target(cpu).cast_mut() + } +} + +impl Drop for DynamicStorage { + fn drop(&mut self) { + panic!( + "Do not drop `DynamicStorage` directly. \ + Use `DynCpuLocalChunk::try_dealloc` instead." + ); + } +} + +impl alloc::fmt::Debug for CpuLocal> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut list = f.debug_list(); + for cpu in all_cpus() { + let val = self.get_on_cpu(cpu); + list.entry(&(&cpu, val)); + } + list.finish() + } +} + +impl CpuLocal> { + /// Creates a new dynamically-allocated CPU-local object, and + /// initializes it with `init_values`. + /// + /// The given `ptr` points to the variable located on the BSP. + /// + /// Please do not call this function directly. Instead, use + /// `DynCpuLocalChunk::alloc`. + /// + /// # Safety + /// + /// The caller must ensure that the new per-CPU object belongs to an + /// existing [`DynCpuLocalChunk`], and does not overlap with any existing + /// CPU-local object. + unsafe fn __new_dynamic(ptr: *mut T, init_values: &mut impl FnMut(CpuId) -> T) -> Self { + let mut storage = DynamicStorage(NonNull::new(ptr).unwrap()); + for cpu in all_cpus() { + let ptr = storage.get_mut_ptr_on_target(cpu); + // SAFETY: `ptr` points to valid, uninitialized per-CPU memory + // reserved for CPU-local storage. This initialization occurs + // before any other code can access the memory. References to + // the data may only be created after `Self` is created, ensuring + // exclusive access by the current task. Each per-CPU memory + // region is written exactly once using `ptr::write`, which is + // safe for uninitialized memory. + unsafe { + core::ptr::write(ptr, init_values(cpu)); + } + } + + Self { + storage, + phantom: PhantomData, + } + } +} + +const CHUNK_SIZE: usize = PAGE_SIZE; + +/// Footer metadata to describe a `SSTable`. +#[derive(Debug, Clone, Copy)] +struct DynCpuLocalMeta; +crate::impl_frame_meta_for!(DynCpuLocalMeta); + +/// Manages dynamically-allocated CPU-local chunks. +/// +/// Each CPU owns a chunk of size `CHUNK_SIZE`, and the chunks are laid +/// out contiguously in the order of CPU IDs. Per-CPU variables lie within +/// the chunks. +pub struct DynCpuLocalChunk { + segment: ManuallyDrop>, + bitmap: BitVec, +} + +impl DynCpuLocalChunk { + /// Creates a new dynamically-allocated CPU-local chunk. + pub fn new() -> Result { + let total_chunk_size = CHUNK_SIZE * num_cpus(); + let segment = FrameAllocOptions::new() + .zeroed(false) + .alloc_segment_with(total_chunk_size.div_ceil(PAGE_SIZE), |_| DynCpuLocalMeta)?; + + let num_items = CHUNK_SIZE / ITEM_SIZE; + const { assert!(CHUNK_SIZE % ITEM_SIZE == 0) }; + + Ok(Self { + segment: ManuallyDrop::new(segment), + bitmap: bitvec![0; num_items], + }) + } + + /// Returns a pointer to the local chunk owned by the BSP. + fn start_vaddr(&self) -> Vaddr { + paddr_to_vaddr(self.segment.start_paddr()) + } + + /// Allocates a CPU-local object from the chunk, and + /// initializes it with `init_values`. + /// + /// Returns `None` if the chunk is full. + pub fn alloc( + &mut self, + init_values: &mut impl FnMut(CpuId) -> T, + ) -> Option>> { + const { + assert!(ITEM_SIZE.is_power_of_two()); + assert!(core::mem::size_of::() <= ITEM_SIZE); + assert!(core::mem::align_of::() <= ITEM_SIZE); + } + + let index = self.bitmap.first_zero()?; + self.bitmap.set(index, true); + // SAFETY: `index` refers to an available position in the chunk + // for allocating a new CPU-local object. + unsafe { + let vaddr = self.start_vaddr() + index * ITEM_SIZE; + Some(CpuLocal::__new_dynamic(vaddr as *mut T, init_values)) + } + } + + /// Gets the index of a dynamically-allocated CPU-local object + /// within the chunk. + /// + /// Returns `None` if the object does not belong to the chunk. + fn get_item_index(&mut self, cpu_local: &CpuLocal>) -> Option { + let vaddr = cpu_local.storage.0.as_ptr() as Vaddr; + let start_vaddr = self.start_vaddr(); + + let offset = vaddr.checked_sub(start_vaddr)?; + if offset > CHUNK_SIZE { + return None; + } + + debug_assert_eq!(offset % ITEM_SIZE, 0); + + Some(offset / ITEM_SIZE) + } + + /// Attempts to deallocate a previously allocated CPU-local object. + /// + /// Returns `Err(cpu_local)` if the object does not belong to this chunk. + pub fn try_dealloc( + &mut self, + mut cpu_local: CpuLocal>, + ) -> core::result::Result<(), CpuLocal>> { + let Some(index) = self.get_item_index(&cpu_local) else { + return Err(cpu_local); + }; + self.bitmap.set(index, false); + for cpu in all_cpus() { + let ptr = cpu_local.storage.get_mut_ptr_on_target(cpu); + // SAFETY: `ptr` points to the valid CPU-local object. We can + // mutably borrow the CPU-local object on `cpu` because we have + // the exclusive access to `cpu_local`. Each CPU-local object + // is dropped exactly once. After the deallocation, no one will + // access the dropped CPU-local object, since we explicitly + // forget the `cpu_local`. + unsafe { + core::ptr::drop_in_place(ptr); + } + } + let _ = ManuallyDrop::new(cpu_local); + Ok(()) + } + + /// Checks whether the chunk is full. + pub fn is_full(&self) -> bool { + self.bitmap.all() + } + + /// Checks whether the chunk is empty. + pub fn is_empty(&self) -> bool { + self.bitmap.not_any() + } +} + +impl Drop for DynCpuLocalChunk { + fn drop(&mut self) { + if self.is_empty() { + // SAFETY: The `segment` does not contain any CPU-local objects. + // It is the last time the `segment` is accessed, and it will be + // dropped only once. + unsafe { ManuallyDrop::drop(&mut self.segment) } + } else { + // Leak the `segment` and panic. + panic!("Dropping `DynCpuLocalChunk` while some CPU-local objects are still alive"); + } + } +} diff --git a/ostd/src/cpu/local/mod.rs b/ostd/src/cpu/local/mod.rs index f7b8f290..260848fc 100644 --- a/ostd/src/cpu/local/mod.rs +++ b/ostd/src/cpu/local/mod.rs @@ -2,47 +2,68 @@ //! CPU local storage. //! -//! This module provides a mechanism to define CPU-local objects, by the macro -//! [`crate::cpu_local!`]. +//! This module provides a mechanism to define CPU-local objects. Users can +//! define a statically-allocated CPU-local object by the macro +//! [`crate::cpu_local!`], or allocate a dynamically-allocated CPU-local +//! object with the function [`osdk_heap_allocator::alloc_cpu_local`]. //! -//! Such a mechanism exploits the fact that constant values of non-[`Copy`] -//! types can be bitwise copied. For example, a [`Option`] object, though -//! being not [`Copy`], have a constant constructor [`Option::None`] that -//! produces a value that can be bitwise copied to create a new instance. -//! [`alloc::sync::Arc`] however, don't have such a constructor, and thus cannot -//! be directly used as a CPU-local object. Wrapping it in a type that has a -//! constant constructor, like [`Option`], can make it CPU-local. +//! The mechanism for statically-allocated CPU-local objects exploits the fact +//! that constant values of non-[`Copy`] types can be bitwise copied. For +//! example, a [`Option`] object, though being not [`Copy`], have a constant +//! constructor [`Option::None`] that produces a value that can be bitwise +//! copied to create a new instance. [`alloc::sync::Arc`] however, don't have +//! such a constructor, and thus cannot be directly used as a statically- +//! allocated CPU-local object. Wrapping it in a type that has a constant +//! constructor, like [`Option`], can make it statically-allocated CPU-local. //! //! # Implementation //! -//! These APIs are implemented by placing the CPU-local objects in a special -//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects linked -//! in this section, and these objects are copied to dynamically allocated -//! local storage of each application processors (AP) during the initialization -//! process. +//! These APIs are implemented by the methods as follows: +//! 1. For statically-allocated CPU-local objects, we place them in a special +//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects +//! linked in this section, and these objects are copied to dynamically +//! allocated local storage of each application processors (AP) during the +//! initialization process. +//! 2. For dynamically-allocated CPU-local objects, we prepare a fixed-size +//! chunk for each CPU. These per-CPU memory chunks are laid out contiguously +//! in memory in the order of the CPU IDs. A dynamically-allocated CPU-local +//! object can be allocated by occupying the same offset in each per-CPU +//! memory chunk. // This module also, provide CPU-local cell objects that have inner mutability. // -// The difference between CPU-local objects (defined by [`crate::cpu_local!`]) -// and CPU-local cell objects (defined by [`crate::cpu_local_cell!`]) is that -// the CPU-local objects can be shared across CPUs. While through a CPU-local -// cell object you can only access the value on the current CPU, therefore -// enabling inner mutability without locks. +// The difference between statically-allocated CPU-local objects (defined by +// [`crate::cpu_local!`]) and CPU-local cell objects (defined by +// [`crate::cpu_local_cell!`]) is that the CPU-local objects can be shared +// across CPUs. While through a CPU-local cell object you can only access the +// value on the current CPU, therefore enabling inner mutability without locks. mod cell; -mod cpu_local; +mod dyn_cpu_local; +mod static_cpu_local; pub(crate) mod single_instr; -use core::alloc::Layout; +use core::{alloc::Layout, marker::PhantomData, ops::Deref}; use align_ext::AlignExt; pub use cell::CpuLocalCell; -pub use cpu_local::{CpuLocal, CpuLocalDerefGuard}; +pub use dyn_cpu_local::DynCpuLocalChunk; +use dyn_cpu_local::DynamicStorage; use spin::Once; +use static_cpu_local::StaticStorage; use super::CpuId; -use crate::mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE}; +use crate::{ + mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE}, + trap::DisabledLocalIrqGuard, +}; + +/// Dynamically-allocated CPU-local objects. +pub type DynamicCpuLocal = CpuLocal>; + +/// Statically-allocated CPU-local objects. +pub type StaticCpuLocal = CpuLocal>; // These symbols are provided by the linker script. extern "C" { @@ -50,10 +71,120 @@ extern "C" { fn __cpu_local_end(); } -/// The CPU-local areas for APs. +/// A trait to abstract any type that can be used as a slot for a CPU-local +/// variable of type `T`. +/// +/// Each slot provides the memory space for storing `num_cpus` instances +/// of type `T`. +/// +/// # Safety +/// +/// The implementor must ensure that the returned pointer refers to the +/// variable on the correct CPU. +pub unsafe trait AnyStorage { + /// Gets the `const` pointer for the object on the current CPU. + fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T; + + /// Gets the `const` pointer for the object on a target CPU. + fn get_ptr_on_target(&self, cpu: CpuId) -> *const T; + + /// Gets the `mut` pointer for the object on a target CPU. + /// + /// This method is intended for use when initializing or dropping the storage. + fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T; +} + +/// A CPU-local variable for type `T`, backed by a storage of type `S`. +/// +/// CPU-local objects are instantiated once per CPU core. They can be shared to +/// other cores. In the context of a preemptible kernel task, when holding the +/// reference to the inner object, the object is always the one in the original +/// core (when the reference is created), no matter which core the code is +/// currently running on. +pub struct CpuLocal> { + storage: S, + phantom: PhantomData, +} + +impl> CpuLocal { + /// Gets access to the underlying value on the current CPU with a + /// provided IRQ guard. + /// + /// By this method, you can borrow a reference to the underlying value + /// on the current CPU even if `T` is not `Sync`. + pub fn get_with<'a>( + &'a self, + guard: &'a DisabledLocalIrqGuard, + ) -> CpuLocalDerefGuard<'a, T, S> { + CpuLocalDerefGuard { + cpu_local: self, + guard, + } + } +} + +impl> CpuLocal { + /// Gets access to the CPU-local value on a specific CPU. + /// + /// This allows the caller to access CPU-local data from a remote CPU, + /// so the data type must be `Sync`. + pub fn get_on_cpu(&self, target_cpu_id: CpuId) -> &T { + let ptr = self.storage.get_ptr_on_target(target_cpu_id); + // SAFETY: `ptr` represents CPU-local data on a remote CPU. It + // contains valid data, the type is `Sync`, and no one will mutably + // borrow it, so creating an immutable borrow here is valid. + unsafe { &*ptr } + } +} + +/// A guard for accessing the CPU-local object. +/// +/// It ensures that the CPU-local object is accessed with IRQs disabled. +/// It is created by [`CpuLocal::get_with`]. +#[must_use] +pub struct CpuLocalDerefGuard<'a, T: 'static, S: AnyStorage> { + cpu_local: &'a CpuLocal, + guard: &'a DisabledLocalIrqGuard, +} + +impl<'a, T: 'static, S: AnyStorage> Deref for CpuLocalDerefGuard<'a, T, S> { + type Target = T; + + fn deref(&self) -> &'a Self::Target { + is_used::debug_set_true(); + + let ptr = self.cpu_local.storage.get_ptr_on_current(self.guard); + // SAFETY: `ptr` represents CPU-local data on the current CPU. It + // contains valid data, only the current task can reference the data + // (due to `self.guard`), and no one will mutably borrow it, so + // creating an immutable borrow here is valid. + unsafe { &*ptr } + } +} + +// SAFETY: At any given time, only one task can access the inner value `T` of a +// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the +// reference to the inner value, or turning off preemptions when creating +// the reference. +unsafe impl> Sync for CpuLocal {} +unsafe impl Send for CpuLocal> {} + +// Implement `!Copy` and `!Clone` for `CpuLocal` to ensure memory safety: +// - Prevent valid instances of `CpuLocal>` from being copied +// to any memory areas outside the `.cpu_local` section. +// - Prevent multiple valid instances of `CpuLocal>` from +// referring to the same CPU-local object, avoiding double deallocation. +impl> !Copy for CpuLocal {} +impl> !Clone for CpuLocal {} + +// In general, it does not make any sense to send instances of static `CpuLocal` +// to other tasks as they should live on other CPUs to make sending useful. +impl !Send for CpuLocal> {} + +/// The static CPU-local areas for APs. static CPU_LOCAL_STORAGES: Once<&'static [Paddr]> = Once::new(); -/// Copies the CPU-local data on the bootstrap processor (BSP) +/// Copies the static CPU-local data on the bootstrap processor (BSP) /// for application processors (APs). /// /// # Safety @@ -123,7 +254,7 @@ pub(crate) unsafe fn copy_bsp_for_ap(num_cpus: usize) { CPU_LOCAL_STORAGES.call_once(|| res); } -/// Gets the pointer to the CPU-local storage for the given AP. +/// Gets the pointer to the static CPU-local storage for the given AP. /// /// # Panics /// @@ -148,7 +279,8 @@ pub(crate) fn get_ap(cpu_id: CpuId) -> Paddr { } mod is_used { - //! This module tracks whether any CPU-local variables are used. + //! This module tracks whether any statically-allocated CPU-local + //! variables are used. //! //! [`copy_bsp_for_ap`] copies the CPU local data from the BSP //! to the APs, so it requires as a safety condition that the diff --git a/ostd/src/cpu/local/static_cpu_local.rs b/ostd/src/cpu/local/static_cpu_local.rs new file mode 100644 index 00000000..68f54bdd --- /dev/null +++ b/ostd/src/cpu/local/static_cpu_local.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Statically-allocated CPU-local objects. + +use core::marker::PhantomData; + +use super::{AnyStorage, CpuLocal, __cpu_local_end, __cpu_local_start}; +use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard}; + +/// Defines a statically-allocated CPU-local variable. +/// +/// The accessors of the CPU-local variables are defined with [`CpuLocal`]. +/// +/// You can get the reference to the inner object on one CPU by calling +/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object +/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter +/// accessors can be used even if the inner object is not `Sync`. +/// +/// # Example +/// +/// ```rust +/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap}; +/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell}; +/// +/// cpu_local! { +/// static FOO: AtomicU32 = AtomicU32::new(1); +/// pub static BAR: Cell = Cell::new(2); +/// } +/// +/// fn not_an_atomic_function() { +/// let preempt_guard = disable_preempt(); +/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu()); +/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed); +/// println!("FOO VAL: {}", val_of_foo); +/// +/// let irq_guard = trap::disable_local(); +/// let bar_guard = BAR.get_with(&irq_guard); +/// let val_of_bar = bar_guard.get(); +/// println!("BAR VAL: {}", val_of_bar); +/// } +/// ``` +#[macro_export] +macro_rules! cpu_local { + ($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => { + $( + #[link_section = ".cpu_local"] + $(#[$attr])* $vis static $name: $crate::cpu::local::StaticCpuLocal<$t> = { + let val = $init; + // SAFETY: The per-CPU variable instantiated is statically + // stored in the special `.cpu_local` section. + unsafe { + $crate::cpu::local::CpuLocal::__new_static(val) + } + }; + )* + }; +} + +/// A static storage for a CPU-local variable of type `T`. +/// +/// Such a CPU-local storage is not intended to be allocated directly. +/// Use the `cpu_local` macro instead. +pub struct StaticStorage(T); + +impl StaticStorage { + /// Gets access to the underlying value through a raw pointer. + /// + /// This method is safe, but using the returned pointer will be unsafe. + fn as_ptr(&self) -> *const T { + super::is_used::debug_set_true(); + + let offset = self.get_offset(); + + let local_base = arch::cpu::local::get_base() as usize; + let local_va = local_base + offset; + + // A sanity check about the alignment. + debug_assert_eq!(local_va % core::mem::align_of::(), 0); + + local_va as *const T + } + + /// Gets the offset of the CPU-local object in the CPU-local area. + fn get_offset(&self) -> usize { + let bsp_va = self as *const _ as usize; + let bsp_base = __cpu_local_start as usize; + // The implementation should ensure that the CPU-local object resides in the `.cpu_local`. + debug_assert!(bsp_va + core::mem::size_of::() <= __cpu_local_end as usize); + + bsp_va - bsp_base + } +} + +unsafe impl AnyStorage for StaticStorage { + fn get_ptr_on_current(&self, _guard: &DisabledLocalIrqGuard) -> *const T { + self.as_ptr() + } + + fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T { + super::is_used::debug_set_true(); + + let cpu_id = cpu_id.as_usize(); + + // If on the BSP, just use the statically linked storage. + if cpu_id == 0 { + return &self.0 as *const T; + } + + let base = { + // SAFETY: At this time we have a non-BSP `CpuId`, which means that + // `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must + // also have been called (see the implementation of `cpu::init_on_bsp`), + // so `CPU_LOCAL_STORAGES` must already be initialized. + let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() }; + // SAFETY: `cpu_id` is guaranteed to be in range because the type + // invariant of `CpuId`. + let storage = unsafe { *storages.get_unchecked(cpu_id - 1) }; + crate::mm::paddr_to_vaddr(storage) + }; + + let offset = self.get_offset(); + (base + offset) as *const T + } + + fn get_mut_ptr_on_target(&mut self, _: CpuId) -> *mut T { + // `StaticStorage` does not support `get_mut_ptr_on_target`, because + // statically-allocated CPU-local objects do not require per-CPU initialization. + panic!("Can't get the mutable pointer of StaticStorage on a target CPU."); + } +} + +impl CpuLocal> { + /// Creates a new statically-allocated CPU-local object. + /// + /// Please do not call this function directly. Instead, use the + /// `cpu_local!` macro. + /// + /// # Safety + /// + /// The caller should ensure that the object initialized by this + /// function resides in the `.cpu_local` section. Otherwise the + /// behavior is undefined. + #[doc(hidden)] + pub const unsafe fn __new_static(val: T) -> Self { + Self { + storage: StaticStorage(val), + phantom: PhantomData, + } + } + + /// Gets access to the underlying value through a raw pointer. + /// + /// This method is safe, but using the returned pointer will be unsafe. + pub(crate) fn as_ptr(&self) -> *const T { + self.storage.as_ptr() + } +}