Add dynamically-allocated CPU-local objects

This commit is contained in:
Wang Siyuan 2025-06-03 14:00:47 +00:00 committed by Ruihan Li
parent f24bc718fa
commit dfd3042276
12 changed files with 698 additions and 240 deletions

1
Cargo.lock generated
View File

@ -1316,6 +1316,7 @@ dependencies = [
"align_ext",
"bit_field",
"bitflags 1.3.2",
"bitvec",
"buddy_system_allocator",
"cfg-if",
"fdt",

View File

@ -8,7 +8,7 @@ use core::{
};
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink};
use ostd::{cpu::local::CpuLocal, cpu_local, trap};
use ostd::{cpu::local::StaticCpuLocal, cpu_local, trap};
use super::{
softirq_id::{TASKLESS_SOFTIRQ_ID, TASKLESS_URGENT_SOFTIRQ_ID},
@ -123,7 +123,7 @@ impl Taskless {
fn do_schedule(
taskless: &Arc<Taskless>,
taskless_list: &'static CpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
taskless_list: &'static StaticCpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
) {
if taskless.is_disabled.load(Ordering::Acquire) {
return;
@ -158,7 +158,7 @@ pub(super) fn init() {
/// If the `Taskless` is ready to be executed, it will be set to not scheduled
/// and can be scheduled again.
fn taskless_softirq_handler(
taskless_list: &'static CpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
taskless_list: &'static StaticCpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
softirq_id: u8,
) {
let mut processing_list = {

View File

@ -2,7 +2,7 @@
//! A fast and scalable SMP counter.
use ostd::cpu::{all_cpus, local::CpuLocal, CpuId};
use ostd::cpu::{all_cpus, local::StaticCpuLocal, CpuId};
use core::sync::atomic::{AtomicIsize, Ordering};
@ -43,7 +43,7 @@ macro_rules! fast_smp_counter {
/// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter
/// will wrap on overflow.
pub struct FastSmpCounter {
per_cpu_counter: &'static CpuLocal<AtomicIsize>,
per_cpu_counter: &'static StaticCpuLocal<AtomicIsize>,
}
impl FastSmpCounter {
@ -51,7 +51,7 @@ impl FastSmpCounter {
///
/// This function should only be used by the [`fast_smp_counter!`] macro.
#[doc(hidden)]
pub const fn new(per_cpu_counter: &'static CpuLocal<AtomicIsize>) -> Self {
pub const fn new(per_cpu_counter: &'static StaticCpuLocal<AtomicIsize>) -> Self {
Self { per_cpu_counter }
}

View File

@ -21,7 +21,7 @@ use crate::slab_cache::SlabCache;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(usize)]
enum CommonSizeClass {
pub(crate) enum CommonSizeClass {
Bytes8 = 8,
Bytes16 = 16,
Bytes32 = 32,
@ -34,7 +34,7 @@ enum CommonSizeClass {
}
impl CommonSizeClass {
const fn from_layout(layout: Layout) -> Option<Self> {
pub(crate) const fn from_layout(layout: Layout) -> Option<Self> {
let size_class = match layout.size() {
0..=8 => CommonSizeClass::Bytes8,
9..=16 => CommonSizeClass::Bytes16,
@ -67,7 +67,7 @@ impl CommonSizeClass {
})
}
fn from_size(size: usize) -> Option<Self> {
pub(crate) const fn from_size(size: usize) -> Option<Self> {
match size {
8 => Some(CommonSizeClass::Bytes8),
16 => Some(CommonSizeClass::Bytes16),

View File

@ -0,0 +1,126 @@
// SPDX-License-Identifier: MPL-2.0
use crate::allocator::CommonSizeClass;
use alloc::vec::Vec;
use core::ops::Deref;
use ostd::{
cpu::{
local::{DynCpuLocalChunk, DynamicCpuLocal},
CpuId,
},
prelude::*,
sync::SpinLock,
Error,
};
/// Allocator for dynamically-allocated CPU-local objects.
struct CpuLocalAllocator<const ITEM_SIZE: usize> {
chunks: SpinLock<Vec<DynCpuLocalChunk<ITEM_SIZE>>>,
}
impl<const ITEM_SIZE: usize> CpuLocalAllocator<ITEM_SIZE> {
/// Creates a new allocator for dynamically-allocated CPU-local objects.
pub(self) const fn new() -> Self {
Self {
chunks: SpinLock::new(Vec::new()),
}
}
/// Allocates a CPU-local object and initializes it with `init_values`.
pub(self) fn alloc<T>(
&'static self,
init_values: &mut impl FnMut(CpuId) -> T,
) -> Result<DynamicCpuLocal<T>> {
let mut chunks = self.chunks.lock();
for chunk in chunks.iter_mut() {
if !chunk.is_full() {
let cpu_local = chunk.alloc::<T>(init_values).unwrap();
return Ok(cpu_local);
}
}
let mut new_chunk = DynCpuLocalChunk::<ITEM_SIZE>::new()?;
let cpu_local = new_chunk.alloc::<T>(init_values).unwrap();
chunks.push(new_chunk);
Ok(cpu_local)
}
/// Deallocates a CPU-local object.
pub(self) fn dealloc<T>(&self, cpu_local: DynamicCpuLocal<T>) {
let mut cpu_local = cpu_local;
let mut chunks = self.chunks.lock();
let mut chunk_index = None;
for (i, chunk) in chunks.iter_mut().enumerate() {
match chunk.try_dealloc(cpu_local) {
Ok(()) => {
chunk_index = Some(i);
break;
}
Err(returned) => cpu_local = returned,
}
}
let chunk_index = chunk_index.unwrap();
if chunks[chunk_index].is_empty() && chunks.iter().filter(|c| c.is_empty()).count() > 1 {
chunks.swap_remove(chunk_index);
}
}
}
/// A wrapper over [`DynamicCpuLocal<T>`] to deallocate CPU-local objects on
/// drop automatically.
pub struct CpuLocalBox<T>(Option<DynamicCpuLocal<T>>);
impl<T> Deref for CpuLocalBox<T> {
type Target = DynamicCpuLocal<T>;
fn deref(&self) -> &Self::Target {
self.0.as_ref().unwrap()
}
}
impl<T> Drop for CpuLocalBox<T> {
fn drop(&mut self) {
let cpu_local = self.0.take().unwrap();
dealloc_cpu_local(cpu_local);
}
}
/// Global allocators for dynamically-allocated CPU-local objects.
static ALLOCATOR_8: CpuLocalAllocator<8> = CpuLocalAllocator::new();
static ALLOCATOR_16: CpuLocalAllocator<16> = CpuLocalAllocator::new();
static ALLOCATOR_32: CpuLocalAllocator<32> = CpuLocalAllocator::new();
/// Allocates a dynamically-allocated CPU-local object of type `T` and
/// initializes it with `init_values`.
///
/// Currently, the size of `T` must be no larger than 32 bytes.
pub fn alloc_cpu_local<T>(mut init_values: impl FnMut(CpuId) -> T) -> Result<CpuLocalBox<T>> {
let size = core::mem::size_of::<T>();
let class = CommonSizeClass::from_size(size).ok_or(Error::InvalidArgs)?;
let cpu_local = match class {
CommonSizeClass::Bytes8 => ALLOCATOR_8.alloc::<T>(&mut init_values),
CommonSizeClass::Bytes16 => ALLOCATOR_16.alloc::<T>(&mut init_values),
CommonSizeClass::Bytes32 => ALLOCATOR_32.alloc::<T>(&mut init_values),
// TODO: Support contiguous allocations for larger sizes.
// Since cache lines are normally 64 bytes, when allocating CPU-local
// objects with larger sizes, we should allocate a `Vec` with size
// `num_cpus()` instead.
_ => Err(Error::InvalidArgs),
}?;
Ok(CpuLocalBox(Some(cpu_local)))
}
/// Deallocates a dynamically-allocated CPU-local object of type `T`.
fn dealloc_cpu_local<T>(cpu_local: DynamicCpuLocal<T>) {
let size = core::mem::size_of::<T>();
let class = CommonSizeClass::from_size(size).unwrap();
match class {
CommonSizeClass::Bytes8 => ALLOCATOR_8.dealloc(cpu_local),
CommonSizeClass::Bytes16 => ALLOCATOR_16.dealloc(cpu_local),
CommonSizeClass::Bytes32 => ALLOCATOR_32.dealloc(cpu_local),
_ => todo!(),
}
}

View File

@ -4,7 +4,11 @@
#![no_std]
#![deny(unsafe_code)]
extern crate alloc;
mod allocator;
mod cpu_local_allocator;
mod slab_cache;
pub use allocator::{type_from_layout, HeapAllocator};
pub use cpu_local_allocator::{alloc_cpu_local, CpuLocalBox};

View File

@ -37,6 +37,7 @@ spin = "0.9.4"
smallvec = "1.13.2"
unwinding = { version = "=0.2.5", default-features = false, features = ["fde-gnu-eh-frame-hdr", "hide-trace", "panic", "personality", "unwinder"] }
volatile = "0.6.1"
bitvec = { version = "1.0", default-features = false, features = ["alloc"] }
[target.x86_64-unknown-none.dependencies]
x86_64 = "0.14.13"

View File

@ -18,7 +18,7 @@ use x86_64::{
PrivilegeLevel, VirtAddr,
};
use crate::cpu::local::CpuLocal;
use crate::cpu::local::{CpuLocal, StaticCpuLocal};
/// Initializes and loads the GDT and TSS.
///
@ -95,10 +95,10 @@ pub(super) unsafe fn init() {
// No other special initialization is required because the kernel stack information is stored in
// the TSS when we start the userspace program. See `syscall.S` for details.
#[link_section = ".cpu_local_tss"]
static LOCAL_TSS: CpuLocal<TaskStateSegment> = {
static LOCAL_TSS: StaticCpuLocal<TaskStateSegment> = {
let tss = TaskStateSegment::new();
// SAFETY: The `.cpu_local_tss` section is part of the CPU-local area.
unsafe { CpuLocal::__new(tss) }
unsafe { CpuLocal::__new_static(tss) }
};
// Kernel code and data descriptors.

View File

@ -1,201 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
//! The CPU-local variable implementation.
use core::{marker::Sync, ops::Deref};
use super::{__cpu_local_end, __cpu_local_start};
use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard};
/// Defines a CPU-local variable.
///
/// The accessors of the CPU-local variables are defined with [`CpuLocal`].
///
/// You can get the reference to the inner object on one CPU by calling
/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object
/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter
/// accessors can be used even if the inner object is not `Sync`.
///
/// # Example
///
/// ```rust
/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap};
/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell};
///
/// cpu_local! {
/// static FOO: AtomicU32 = AtomicU32::new(1);
/// pub static BAR: Cell<usize> = Cell::new(2);
/// }
///
/// fn not_an_atomic_function() {
/// let preempt_guard = disable_preempt();
/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu());
/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed);
/// println!("FOO VAL: {}", val_of_foo);
///
/// let irq_guard = trap::disable_local();
/// let bar_guard = BAR.get_with(&irq_guard);
/// let val_of_bar = bar_guard.get();
/// println!("BAR VAL: {}", val_of_bar);
/// }
/// ```
#[macro_export]
macro_rules! cpu_local {
($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => {
$(
#[link_section = ".cpu_local"]
$(#[$attr])* $vis static $name: $crate::cpu::local::CpuLocal<$t> = {
let val = $init;
// SAFETY: The per-CPU variable instantiated is statically
// stored in the special `.cpu_local` section.
unsafe {
$crate::cpu::local::CpuLocal::__new(val)
}
};
)*
};
}
/// CPU-local objects.
///
/// CPU-local objects are instantiated once per CPU core. They can be shared to
/// other cores. In the context of a preemptible kernel task, when holding the
/// reference to the inner object, the object is always the one in the original
/// core (when the reference is created), no matter which core the code is
/// currently running on.
///
/// For the difference between [`CpuLocal`] and [`super::CpuLocalCell`], see
/// [`super`].
pub struct CpuLocal<T: 'static>(T);
impl<T: 'static> CpuLocal<T> {
/// Creates a new CPU-local object.
///
/// Please do not call this function directly. Instead, use the
/// `cpu_local!` macro.
///
/// # Safety
///
/// The caller should ensure that the object initialized by this
/// function resides in the `.cpu_local` section. Otherwise the
/// behavior is undefined.
#[doc(hidden)]
pub const unsafe fn __new(val: T) -> Self {
Self(val)
}
/// Gets access to the underlying value on the current CPU with a
/// provided IRQ guard.
///
/// By this method, you can borrow a reference to the underlying value
/// even if `T` is not `Sync`. Because that it is per-CPU and IRQs are
/// disabled, no other running tasks can access it.
pub fn get_with<'a>(
&'static self,
guard: &'a DisabledLocalIrqGuard,
) -> CpuLocalDerefGuard<'a, T> {
CpuLocalDerefGuard {
cpu_local: self,
guard,
}
}
/// Gets access to the underlying value through a raw pointer.
///
/// This method is safe, but using the returned pointer will be unsafe.
pub(crate) fn as_ptr(&'static self) -> *const T {
super::is_used::debug_set_true();
let offset = self.get_offset();
let local_base = arch::cpu::local::get_base() as usize;
let local_va = local_base + offset;
// A sanity check about the alignment.
debug_assert_eq!(local_va % core::mem::align_of::<T>(), 0);
local_va as *mut T
}
/// Gets the offset of the CPU-local object in the CPU-local area.
fn get_offset(&'static self) -> usize {
let bsp_va = self as *const _ as usize;
let bsp_base = __cpu_local_start as usize;
// The implementation should ensure that the CPU-local object resides in the `.cpu_local`.
debug_assert!(bsp_va + core::mem::size_of::<T>() <= __cpu_local_end as usize);
bsp_va - bsp_base
}
}
impl<T: 'static + Sync> CpuLocal<T> {
/// Gets access to the CPU-local value on a specific CPU.
///
/// This allows the caller to access CPU-local data from a remote CPU,
/// so the data type must be `Sync`.
pub fn get_on_cpu(&'static self, cpu_id: CpuId) -> &'static T {
super::is_used::debug_set_true();
let cpu_id = cpu_id.as_usize();
// If on the BSP, just use the statically linked storage.
if cpu_id == 0 {
return &self.0;
}
// SAFETY: At this time we have a non-BSP `CpuId`, which means that
// `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must
// also have been called (see the implementation of `cpu::init_on_bsp`),
// so `CPU_LOCAL_STORAGES` must already be initialized.
let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() };
// SAFETY: `cpu_id` is guaranteed to be in range because the type
// invariant of `CpuId`.
let storage = unsafe { *storages.get_unchecked(cpu_id - 1) };
let base = crate::mm::paddr_to_vaddr(storage);
let offset = self.get_offset();
let ptr = (base + offset) as *const T;
// SAFETY: `ptr` represents CPU-local data on a remote CPU. It
// contains valid data, the type is `Sync`, and no one will mutably
// borrow it, so creating an immutable borrow here is valid.
unsafe { &*ptr }
}
}
// SAFETY: At any given time, only one task can access the inner value `T` of a
// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the
// reference to the inner value, or turning off preemptions when creating
// the reference.
unsafe impl<T: 'static> Sync for CpuLocal<T> {}
// Prevent valid instances of `CpuLocal` from being copied to any memory areas
// outside the `.cpu_local` section.
impl<T: 'static> !Copy for CpuLocal<T> {}
impl<T: 'static> !Clone for CpuLocal<T> {}
// In general, it does not make any sense to send instances of `CpuLocal` to
// other tasks as they should live on other CPUs to make sending useful.
impl<T: 'static> !Send for CpuLocal<T> {}
/// A guard for accessing the CPU-local object.
///
/// It ensures that the CPU-local object is accessed with IRQs disabled.
/// It is created by [`CpuLocal::borrow_with`].
#[must_use]
pub struct CpuLocalDerefGuard<'a, T: 'static> {
cpu_local: &'static CpuLocal<T>,
#[expect(dead_code)]
guard: &'a DisabledLocalIrqGuard,
}
impl<T: 'static> Deref for CpuLocalDerefGuard<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
// SAFETY: it should be properly initialized before accesses.
// And we do not create a mutable reference over it. The IRQs
// are disabled so it can only be referenced from this task.
unsafe { &*self.cpu_local.as_ptr() }
}
}

View File

@ -0,0 +1,238 @@
// SPDX-License-Identifier: MPL-2.0
//! Dynamically-allocated CPU-local objects.
use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull};
use bitvec::prelude::{bitvec, BitVec};
use super::{AnyStorage, CpuLocal};
use crate::{
cpu::{all_cpus, num_cpus, CpuId, PinCurrentCpu},
mm::{paddr_to_vaddr, FrameAllocOptions, Segment, Vaddr, PAGE_SIZE},
trap::DisabledLocalIrqGuard,
Result,
};
/// A dynamically-allocated storage for a CPU-local variable of type `T`.
///
/// Such a CPU-local storage should be allocated and deallocated by
/// [`DynCpuLocalChunk`], not directly. Dropping it without deallocation
/// will cause panic.
///
/// When dropping a `CpuLocal<T, DynamicStorage<T>>`, we have no way to know
/// which `DynCpuLocalChunk` the CPU-local object was originally allocated
/// from. Therefore, we rely on the user to correctly manage the corresponding
/// `DynCpuLocalChunk`, ensuring that both allocation and deallocation of
/// `CpuLocal<T, DynamicStorage<T>>` occur within the same chunk.
///
/// To properly deallocate the CPU-local object, the user must explicitly call
/// the appropriate `DynCpuLocalChunk`'s `try_dealloc<T>()`. Otherwise,
/// dropping it directly will cause a panic.
pub struct DynamicStorage<T>(NonNull<T>);
unsafe impl<T> AnyStorage<T> for DynamicStorage<T> {
fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T {
self.get_ptr_on_target(guard.current_cpu())
}
fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T {
let bsp_va = self.0.as_ptr() as usize;
let va = bsp_va + cpu_id.as_usize() * CHUNK_SIZE;
va as *mut T
}
fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T {
self.get_ptr_on_target(cpu).cast_mut()
}
}
impl<T> Drop for DynamicStorage<T> {
fn drop(&mut self) {
panic!(
"Do not drop `DynamicStorage<T>` directly. \
Use `DynCpuLocalChunk::try_dealloc<T>` instead."
);
}
}
impl<T: Sync + alloc::fmt::Debug + 'static> alloc::fmt::Debug for CpuLocal<T, DynamicStorage<T>> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut list = f.debug_list();
for cpu in all_cpus() {
let val = self.get_on_cpu(cpu);
list.entry(&(&cpu, val));
}
list.finish()
}
}
impl<T> CpuLocal<T, DynamicStorage<T>> {
/// Creates a new dynamically-allocated CPU-local object, and
/// initializes it with `init_values`.
///
/// The given `ptr` points to the variable located on the BSP.
///
/// Please do not call this function directly. Instead, use
/// `DynCpuLocalChunk::alloc`.
///
/// # Safety
///
/// The caller must ensure that the new per-CPU object belongs to an
/// existing [`DynCpuLocalChunk`], and does not overlap with any existing
/// CPU-local object.
unsafe fn __new_dynamic(ptr: *mut T, init_values: &mut impl FnMut(CpuId) -> T) -> Self {
let mut storage = DynamicStorage(NonNull::new(ptr).unwrap());
for cpu in all_cpus() {
let ptr = storage.get_mut_ptr_on_target(cpu);
// SAFETY: `ptr` points to valid, uninitialized per-CPU memory
// reserved for CPU-local storage. This initialization occurs
// before any other code can access the memory. References to
// the data may only be created after `Self` is created, ensuring
// exclusive access by the current task. Each per-CPU memory
// region is written exactly once using `ptr::write`, which is
// safe for uninitialized memory.
unsafe {
core::ptr::write(ptr, init_values(cpu));
}
}
Self {
storage,
phantom: PhantomData,
}
}
}
const CHUNK_SIZE: usize = PAGE_SIZE;
/// Footer metadata to describe a `SSTable`.
#[derive(Debug, Clone, Copy)]
struct DynCpuLocalMeta;
crate::impl_frame_meta_for!(DynCpuLocalMeta);
/// Manages dynamically-allocated CPU-local chunks.
///
/// Each CPU owns a chunk of size `CHUNK_SIZE`, and the chunks are laid
/// out contiguously in the order of CPU IDs. Per-CPU variables lie within
/// the chunks.
pub struct DynCpuLocalChunk<const ITEM_SIZE: usize> {
segment: ManuallyDrop<Segment<DynCpuLocalMeta>>,
bitmap: BitVec,
}
impl<const ITEM_SIZE: usize> DynCpuLocalChunk<ITEM_SIZE> {
/// Creates a new dynamically-allocated CPU-local chunk.
pub fn new() -> Result<Self> {
let total_chunk_size = CHUNK_SIZE * num_cpus();
let segment = FrameAllocOptions::new()
.zeroed(false)
.alloc_segment_with(total_chunk_size.div_ceil(PAGE_SIZE), |_| DynCpuLocalMeta)?;
let num_items = CHUNK_SIZE / ITEM_SIZE;
const { assert!(CHUNK_SIZE % ITEM_SIZE == 0) };
Ok(Self {
segment: ManuallyDrop::new(segment),
bitmap: bitvec![0; num_items],
})
}
/// Returns a pointer to the local chunk owned by the BSP.
fn start_vaddr(&self) -> Vaddr {
paddr_to_vaddr(self.segment.start_paddr())
}
/// Allocates a CPU-local object from the chunk, and
/// initializes it with `init_values`.
///
/// Returns `None` if the chunk is full.
pub fn alloc<T>(
&mut self,
init_values: &mut impl FnMut(CpuId) -> T,
) -> Option<CpuLocal<T, DynamicStorage<T>>> {
const {
assert!(ITEM_SIZE.is_power_of_two());
assert!(core::mem::size_of::<T>() <= ITEM_SIZE);
assert!(core::mem::align_of::<T>() <= ITEM_SIZE);
}
let index = self.bitmap.first_zero()?;
self.bitmap.set(index, true);
// SAFETY: `index` refers to an available position in the chunk
// for allocating a new CPU-local object.
unsafe {
let vaddr = self.start_vaddr() + index * ITEM_SIZE;
Some(CpuLocal::__new_dynamic(vaddr as *mut T, init_values))
}
}
/// Gets the index of a dynamically-allocated CPU-local object
/// within the chunk.
///
/// Returns `None` if the object does not belong to the chunk.
fn get_item_index<T>(&mut self, cpu_local: &CpuLocal<T, DynamicStorage<T>>) -> Option<usize> {
let vaddr = cpu_local.storage.0.as_ptr() as Vaddr;
let start_vaddr = self.start_vaddr();
let offset = vaddr.checked_sub(start_vaddr)?;
if offset > CHUNK_SIZE {
return None;
}
debug_assert_eq!(offset % ITEM_SIZE, 0);
Some(offset / ITEM_SIZE)
}
/// Attempts to deallocate a previously allocated CPU-local object.
///
/// Returns `Err(cpu_local)` if the object does not belong to this chunk.
pub fn try_dealloc<T>(
&mut self,
mut cpu_local: CpuLocal<T, DynamicStorage<T>>,
) -> core::result::Result<(), CpuLocal<T, DynamicStorage<T>>> {
let Some(index) = self.get_item_index(&cpu_local) else {
return Err(cpu_local);
};
self.bitmap.set(index, false);
for cpu in all_cpus() {
let ptr = cpu_local.storage.get_mut_ptr_on_target(cpu);
// SAFETY: `ptr` points to the valid CPU-local object. We can
// mutably borrow the CPU-local object on `cpu` because we have
// the exclusive access to `cpu_local`. Each CPU-local object
// is dropped exactly once. After the deallocation, no one will
// access the dropped CPU-local object, since we explicitly
// forget the `cpu_local`.
unsafe {
core::ptr::drop_in_place(ptr);
}
}
let _ = ManuallyDrop::new(cpu_local);
Ok(())
}
/// Checks whether the chunk is full.
pub fn is_full(&self) -> bool {
self.bitmap.all()
}
/// Checks whether the chunk is empty.
pub fn is_empty(&self) -> bool {
self.bitmap.not_any()
}
}
impl<const ITEM_SIZE: usize> Drop for DynCpuLocalChunk<ITEM_SIZE> {
fn drop(&mut self) {
if self.is_empty() {
// SAFETY: The `segment` does not contain any CPU-local objects.
// It is the last time the `segment` is accessed, and it will be
// dropped only once.
unsafe { ManuallyDrop::drop(&mut self.segment) }
} else {
// Leak the `segment` and panic.
panic!("Dropping `DynCpuLocalChunk` while some CPU-local objects are still alive");
}
}
}

View File

@ -2,47 +2,68 @@
//! CPU local storage.
//!
//! This module provides a mechanism to define CPU-local objects, by the macro
//! [`crate::cpu_local!`].
//! This module provides a mechanism to define CPU-local objects. Users can
//! define a statically-allocated CPU-local object by the macro
//! [`crate::cpu_local!`], or allocate a dynamically-allocated CPU-local
//! object with the function [`osdk_heap_allocator::alloc_cpu_local`].
//!
//! Such a mechanism exploits the fact that constant values of non-[`Copy`]
//! types can be bitwise copied. For example, a [`Option<T>`] object, though
//! being not [`Copy`], have a constant constructor [`Option::None`] that
//! produces a value that can be bitwise copied to create a new instance.
//! [`alloc::sync::Arc`] however, don't have such a constructor, and thus cannot
//! be directly used as a CPU-local object. Wrapping it in a type that has a
//! constant constructor, like [`Option<T>`], can make it CPU-local.
//! The mechanism for statically-allocated CPU-local objects exploits the fact
//! that constant values of non-[`Copy`] types can be bitwise copied. For
//! example, a [`Option<T>`] object, though being not [`Copy`], have a constant
//! constructor [`Option::None`] that produces a value that can be bitwise
//! copied to create a new instance. [`alloc::sync::Arc`] however, don't have
//! such a constructor, and thus cannot be directly used as a statically-
//! allocated CPU-local object. Wrapping it in a type that has a constant
//! constructor, like [`Option<T>`], can make it statically-allocated CPU-local.
//!
//! # Implementation
//!
//! These APIs are implemented by placing the CPU-local objects in a special
//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects linked
//! in this section, and these objects are copied to dynamically allocated
//! local storage of each application processors (AP) during the initialization
//! process.
//! These APIs are implemented by the methods as follows:
//! 1. For statically-allocated CPU-local objects, we place them in a special
//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects
//! linked in this section, and these objects are copied to dynamically
//! allocated local storage of each application processors (AP) during the
//! initialization process.
//! 2. For dynamically-allocated CPU-local objects, we prepare a fixed-size
//! chunk for each CPU. These per-CPU memory chunks are laid out contiguously
//! in memory in the order of the CPU IDs. A dynamically-allocated CPU-local
//! object can be allocated by occupying the same offset in each per-CPU
//! memory chunk.
// This module also, provide CPU-local cell objects that have inner mutability.
//
// The difference between CPU-local objects (defined by [`crate::cpu_local!`])
// and CPU-local cell objects (defined by [`crate::cpu_local_cell!`]) is that
// the CPU-local objects can be shared across CPUs. While through a CPU-local
// cell object you can only access the value on the current CPU, therefore
// enabling inner mutability without locks.
// The difference between statically-allocated CPU-local objects (defined by
// [`crate::cpu_local!`]) and CPU-local cell objects (defined by
// [`crate::cpu_local_cell!`]) is that the CPU-local objects can be shared
// across CPUs. While through a CPU-local cell object you can only access the
// value on the current CPU, therefore enabling inner mutability without locks.
mod cell;
mod cpu_local;
mod dyn_cpu_local;
mod static_cpu_local;
pub(crate) mod single_instr;
use core::alloc::Layout;
use core::{alloc::Layout, marker::PhantomData, ops::Deref};
use align_ext::AlignExt;
pub use cell::CpuLocalCell;
pub use cpu_local::{CpuLocal, CpuLocalDerefGuard};
pub use dyn_cpu_local::DynCpuLocalChunk;
use dyn_cpu_local::DynamicStorage;
use spin::Once;
use static_cpu_local::StaticStorage;
use super::CpuId;
use crate::mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE};
use crate::{
mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE},
trap::DisabledLocalIrqGuard,
};
/// Dynamically-allocated CPU-local objects.
pub type DynamicCpuLocal<T> = CpuLocal<T, DynamicStorage<T>>;
/// Statically-allocated CPU-local objects.
pub type StaticCpuLocal<T> = CpuLocal<T, static_cpu_local::StaticStorage<T>>;
// These symbols are provided by the linker script.
extern "C" {
@ -50,10 +71,120 @@ extern "C" {
fn __cpu_local_end();
}
/// The CPU-local areas for APs.
/// A trait to abstract any type that can be used as a slot for a CPU-local
/// variable of type `T`.
///
/// Each slot provides the memory space for storing `num_cpus` instances
/// of type `T`.
///
/// # Safety
///
/// The implementor must ensure that the returned pointer refers to the
/// variable on the correct CPU.
pub unsafe trait AnyStorage<T> {
/// Gets the `const` pointer for the object on the current CPU.
fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T;
/// Gets the `const` pointer for the object on a target CPU.
fn get_ptr_on_target(&self, cpu: CpuId) -> *const T;
/// Gets the `mut` pointer for the object on a target CPU.
///
/// This method is intended for use when initializing or dropping the storage.
fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T;
}
/// A CPU-local variable for type `T`, backed by a storage of type `S`.
///
/// CPU-local objects are instantiated once per CPU core. They can be shared to
/// other cores. In the context of a preemptible kernel task, when holding the
/// reference to the inner object, the object is always the one in the original
/// core (when the reference is created), no matter which core the code is
/// currently running on.
pub struct CpuLocal<T, S: AnyStorage<T>> {
storage: S,
phantom: PhantomData<T>,
}
impl<T: 'static, S: AnyStorage<T>> CpuLocal<T, S> {
/// Gets access to the underlying value on the current CPU with a
/// provided IRQ guard.
///
/// By this method, you can borrow a reference to the underlying value
/// on the current CPU even if `T` is not `Sync`.
pub fn get_with<'a>(
&'a self,
guard: &'a DisabledLocalIrqGuard,
) -> CpuLocalDerefGuard<'a, T, S> {
CpuLocalDerefGuard {
cpu_local: self,
guard,
}
}
}
impl<T: 'static + Sync, S: AnyStorage<T>> CpuLocal<T, S> {
/// Gets access to the CPU-local value on a specific CPU.
///
/// This allows the caller to access CPU-local data from a remote CPU,
/// so the data type must be `Sync`.
pub fn get_on_cpu(&self, target_cpu_id: CpuId) -> &T {
let ptr = self.storage.get_ptr_on_target(target_cpu_id);
// SAFETY: `ptr` represents CPU-local data on a remote CPU. It
// contains valid data, the type is `Sync`, and no one will mutably
// borrow it, so creating an immutable borrow here is valid.
unsafe { &*ptr }
}
}
/// A guard for accessing the CPU-local object.
///
/// It ensures that the CPU-local object is accessed with IRQs disabled.
/// It is created by [`CpuLocal::get_with`].
#[must_use]
pub struct CpuLocalDerefGuard<'a, T: 'static, S: AnyStorage<T>> {
cpu_local: &'a CpuLocal<T, S>,
guard: &'a DisabledLocalIrqGuard,
}
impl<'a, T: 'static, S: AnyStorage<T>> Deref for CpuLocalDerefGuard<'a, T, S> {
type Target = T;
fn deref(&self) -> &'a Self::Target {
is_used::debug_set_true();
let ptr = self.cpu_local.storage.get_ptr_on_current(self.guard);
// SAFETY: `ptr` represents CPU-local data on the current CPU. It
// contains valid data, only the current task can reference the data
// (due to `self.guard`), and no one will mutably borrow it, so
// creating an immutable borrow here is valid.
unsafe { &*ptr }
}
}
// SAFETY: At any given time, only one task can access the inner value `T` of a
// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the
// reference to the inner value, or turning off preemptions when creating
// the reference.
unsafe impl<T: 'static, S: AnyStorage<T>> Sync for CpuLocal<T, S> {}
unsafe impl<T: 'static> Send for CpuLocal<T, DynamicStorage<T>> {}
// Implement `!Copy` and `!Clone` for `CpuLocal` to ensure memory safety:
// - Prevent valid instances of `CpuLocal<T, StaticStorage<T>>` from being copied
// to any memory areas outside the `.cpu_local` section.
// - Prevent multiple valid instances of `CpuLocal<T, DynamicStorage<T>>` from
// referring to the same CPU-local object, avoiding double deallocation.
impl<T: 'static, S: AnyStorage<T>> !Copy for CpuLocal<T, S> {}
impl<T: 'static, S: AnyStorage<T>> !Clone for CpuLocal<T, S> {}
// In general, it does not make any sense to send instances of static `CpuLocal`
// to other tasks as they should live on other CPUs to make sending useful.
impl<T: 'static> !Send for CpuLocal<T, StaticStorage<T>> {}
/// The static CPU-local areas for APs.
static CPU_LOCAL_STORAGES: Once<&'static [Paddr]> = Once::new();
/// Copies the CPU-local data on the bootstrap processor (BSP)
/// Copies the static CPU-local data on the bootstrap processor (BSP)
/// for application processors (APs).
///
/// # Safety
@ -123,7 +254,7 @@ pub(crate) unsafe fn copy_bsp_for_ap(num_cpus: usize) {
CPU_LOCAL_STORAGES.call_once(|| res);
}
/// Gets the pointer to the CPU-local storage for the given AP.
/// Gets the pointer to the static CPU-local storage for the given AP.
///
/// # Panics
///
@ -148,7 +279,8 @@ pub(crate) fn get_ap(cpu_id: CpuId) -> Paddr {
}
mod is_used {
//! This module tracks whether any CPU-local variables are used.
//! This module tracks whether any statically-allocated CPU-local
//! variables are used.
//!
//! [`copy_bsp_for_ap`] copies the CPU local data from the BSP
//! to the APs, so it requires as a safety condition that the

View File

@ -0,0 +1,157 @@
// SPDX-License-Identifier: MPL-2.0
//! Statically-allocated CPU-local objects.
use core::marker::PhantomData;
use super::{AnyStorage, CpuLocal, __cpu_local_end, __cpu_local_start};
use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard};
/// Defines a statically-allocated CPU-local variable.
///
/// The accessors of the CPU-local variables are defined with [`CpuLocal`].
///
/// You can get the reference to the inner object on one CPU by calling
/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object
/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter
/// accessors can be used even if the inner object is not `Sync`.
///
/// # Example
///
/// ```rust
/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap};
/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell};
///
/// cpu_local! {
/// static FOO: AtomicU32 = AtomicU32::new(1);
/// pub static BAR: Cell<usize> = Cell::new(2);
/// }
///
/// fn not_an_atomic_function() {
/// let preempt_guard = disable_preempt();
/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu());
/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed);
/// println!("FOO VAL: {}", val_of_foo);
///
/// let irq_guard = trap::disable_local();
/// let bar_guard = BAR.get_with(&irq_guard);
/// let val_of_bar = bar_guard.get();
/// println!("BAR VAL: {}", val_of_bar);
/// }
/// ```
#[macro_export]
macro_rules! cpu_local {
($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => {
$(
#[link_section = ".cpu_local"]
$(#[$attr])* $vis static $name: $crate::cpu::local::StaticCpuLocal<$t> = {
let val = $init;
// SAFETY: The per-CPU variable instantiated is statically
// stored in the special `.cpu_local` section.
unsafe {
$crate::cpu::local::CpuLocal::__new_static(val)
}
};
)*
};
}
/// A static storage for a CPU-local variable of type `T`.
///
/// Such a CPU-local storage is not intended to be allocated directly.
/// Use the `cpu_local` macro instead.
pub struct StaticStorage<T: 'static>(T);
impl<T: 'static> StaticStorage<T> {
/// Gets access to the underlying value through a raw pointer.
///
/// This method is safe, but using the returned pointer will be unsafe.
fn as_ptr(&self) -> *const T {
super::is_used::debug_set_true();
let offset = self.get_offset();
let local_base = arch::cpu::local::get_base() as usize;
let local_va = local_base + offset;
// A sanity check about the alignment.
debug_assert_eq!(local_va % core::mem::align_of::<T>(), 0);
local_va as *const T
}
/// Gets the offset of the CPU-local object in the CPU-local area.
fn get_offset(&self) -> usize {
let bsp_va = self as *const _ as usize;
let bsp_base = __cpu_local_start as usize;
// The implementation should ensure that the CPU-local object resides in the `.cpu_local`.
debug_assert!(bsp_va + core::mem::size_of::<T>() <= __cpu_local_end as usize);
bsp_va - bsp_base
}
}
unsafe impl<T: 'static> AnyStorage<T> for StaticStorage<T> {
fn get_ptr_on_current(&self, _guard: &DisabledLocalIrqGuard) -> *const T {
self.as_ptr()
}
fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T {
super::is_used::debug_set_true();
let cpu_id = cpu_id.as_usize();
// If on the BSP, just use the statically linked storage.
if cpu_id == 0 {
return &self.0 as *const T;
}
let base = {
// SAFETY: At this time we have a non-BSP `CpuId`, which means that
// `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must
// also have been called (see the implementation of `cpu::init_on_bsp`),
// so `CPU_LOCAL_STORAGES` must already be initialized.
let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() };
// SAFETY: `cpu_id` is guaranteed to be in range because the type
// invariant of `CpuId`.
let storage = unsafe { *storages.get_unchecked(cpu_id - 1) };
crate::mm::paddr_to_vaddr(storage)
};
let offset = self.get_offset();
(base + offset) as *const T
}
fn get_mut_ptr_on_target(&mut self, _: CpuId) -> *mut T {
// `StaticStorage<T>` does not support `get_mut_ptr_on_target`, because
// statically-allocated CPU-local objects do not require per-CPU initialization.
panic!("Can't get the mutable pointer of StaticStorage<T> on a target CPU.");
}
}
impl<T: 'static> CpuLocal<T, StaticStorage<T>> {
/// Creates a new statically-allocated CPU-local object.
///
/// Please do not call this function directly. Instead, use the
/// `cpu_local!` macro.
///
/// # Safety
///
/// The caller should ensure that the object initialized by this
/// function resides in the `.cpu_local` section. Otherwise the
/// behavior is undefined.
#[doc(hidden)]
pub const unsafe fn __new_static(val: T) -> Self {
Self {
storage: StaticStorage(val),
phantom: PhantomData,
}
}
/// Gets access to the underlying value through a raw pointer.
///
/// This method is safe, but using the returned pointer will be unsafe.
pub(crate) fn as_ptr(&self) -> *const T {
self.storage.as_ptr()
}
}