mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-08 04:55:03 +00:00
Add dynamically-allocated CPU-local objects
This commit is contained in:
parent
f24bc718fa
commit
dfd3042276
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1316,6 +1316,7 @@ dependencies = [
|
|||||||
"align_ext",
|
"align_ext",
|
||||||
"bit_field",
|
"bit_field",
|
||||||
"bitflags 1.3.2",
|
"bitflags 1.3.2",
|
||||||
|
"bitvec",
|
||||||
"buddy_system_allocator",
|
"buddy_system_allocator",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"fdt",
|
"fdt",
|
||||||
|
@ -8,7 +8,7 @@ use core::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink};
|
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink};
|
||||||
use ostd::{cpu::local::CpuLocal, cpu_local, trap};
|
use ostd::{cpu::local::StaticCpuLocal, cpu_local, trap};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
softirq_id::{TASKLESS_SOFTIRQ_ID, TASKLESS_URGENT_SOFTIRQ_ID},
|
softirq_id::{TASKLESS_SOFTIRQ_ID, TASKLESS_URGENT_SOFTIRQ_ID},
|
||||||
@ -123,7 +123,7 @@ impl Taskless {
|
|||||||
|
|
||||||
fn do_schedule(
|
fn do_schedule(
|
||||||
taskless: &Arc<Taskless>,
|
taskless: &Arc<Taskless>,
|
||||||
taskless_list: &'static CpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
|
taskless_list: &'static StaticCpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
|
||||||
) {
|
) {
|
||||||
if taskless.is_disabled.load(Ordering::Acquire) {
|
if taskless.is_disabled.load(Ordering::Acquire) {
|
||||||
return;
|
return;
|
||||||
@ -158,7 +158,7 @@ pub(super) fn init() {
|
|||||||
/// If the `Taskless` is ready to be executed, it will be set to not scheduled
|
/// If the `Taskless` is ready to be executed, it will be set to not scheduled
|
||||||
/// and can be scheduled again.
|
/// and can be scheduled again.
|
||||||
fn taskless_softirq_handler(
|
fn taskless_softirq_handler(
|
||||||
taskless_list: &'static CpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
|
taskless_list: &'static StaticCpuLocal<RefCell<LinkedList<TasklessAdapter>>>,
|
||||||
softirq_id: u8,
|
softirq_id: u8,
|
||||||
) {
|
) {
|
||||||
let mut processing_list = {
|
let mut processing_list = {
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
//! A fast and scalable SMP counter.
|
//! A fast and scalable SMP counter.
|
||||||
|
|
||||||
use ostd::cpu::{all_cpus, local::CpuLocal, CpuId};
|
use ostd::cpu::{all_cpus, local::StaticCpuLocal, CpuId};
|
||||||
|
|
||||||
use core::sync::atomic::{AtomicIsize, Ordering};
|
use core::sync::atomic::{AtomicIsize, Ordering};
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ macro_rules! fast_smp_counter {
|
|||||||
/// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter
|
/// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter
|
||||||
/// will wrap on overflow.
|
/// will wrap on overflow.
|
||||||
pub struct FastSmpCounter {
|
pub struct FastSmpCounter {
|
||||||
per_cpu_counter: &'static CpuLocal<AtomicIsize>,
|
per_cpu_counter: &'static StaticCpuLocal<AtomicIsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FastSmpCounter {
|
impl FastSmpCounter {
|
||||||
@ -51,7 +51,7 @@ impl FastSmpCounter {
|
|||||||
///
|
///
|
||||||
/// This function should only be used by the [`fast_smp_counter!`] macro.
|
/// This function should only be used by the [`fast_smp_counter!`] macro.
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub const fn new(per_cpu_counter: &'static CpuLocal<AtomicIsize>) -> Self {
|
pub const fn new(per_cpu_counter: &'static StaticCpuLocal<AtomicIsize>) -> Self {
|
||||||
Self { per_cpu_counter }
|
Self { per_cpu_counter }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ use crate::slab_cache::SlabCache;
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
#[repr(usize)]
|
#[repr(usize)]
|
||||||
enum CommonSizeClass {
|
pub(crate) enum CommonSizeClass {
|
||||||
Bytes8 = 8,
|
Bytes8 = 8,
|
||||||
Bytes16 = 16,
|
Bytes16 = 16,
|
||||||
Bytes32 = 32,
|
Bytes32 = 32,
|
||||||
@ -34,7 +34,7 @@ enum CommonSizeClass {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl CommonSizeClass {
|
impl CommonSizeClass {
|
||||||
const fn from_layout(layout: Layout) -> Option<Self> {
|
pub(crate) const fn from_layout(layout: Layout) -> Option<Self> {
|
||||||
let size_class = match layout.size() {
|
let size_class = match layout.size() {
|
||||||
0..=8 => CommonSizeClass::Bytes8,
|
0..=8 => CommonSizeClass::Bytes8,
|
||||||
9..=16 => CommonSizeClass::Bytes16,
|
9..=16 => CommonSizeClass::Bytes16,
|
||||||
@ -67,7 +67,7 @@ impl CommonSizeClass {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_size(size: usize) -> Option<Self> {
|
pub(crate) const fn from_size(size: usize) -> Option<Self> {
|
||||||
match size {
|
match size {
|
||||||
8 => Some(CommonSizeClass::Bytes8),
|
8 => Some(CommonSizeClass::Bytes8),
|
||||||
16 => Some(CommonSizeClass::Bytes16),
|
16 => Some(CommonSizeClass::Bytes16),
|
||||||
|
126
osdk/deps/heap-allocator/src/cpu_local_allocator.rs
Normal file
126
osdk/deps/heap-allocator/src/cpu_local_allocator.rs
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
use crate::allocator::CommonSizeClass;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use core::ops::Deref;
|
||||||
|
use ostd::{
|
||||||
|
cpu::{
|
||||||
|
local::{DynCpuLocalChunk, DynamicCpuLocal},
|
||||||
|
CpuId,
|
||||||
|
},
|
||||||
|
prelude::*,
|
||||||
|
sync::SpinLock,
|
||||||
|
Error,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Allocator for dynamically-allocated CPU-local objects.
|
||||||
|
struct CpuLocalAllocator<const ITEM_SIZE: usize> {
|
||||||
|
chunks: SpinLock<Vec<DynCpuLocalChunk<ITEM_SIZE>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const ITEM_SIZE: usize> CpuLocalAllocator<ITEM_SIZE> {
|
||||||
|
/// Creates a new allocator for dynamically-allocated CPU-local objects.
|
||||||
|
pub(self) const fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
chunks: SpinLock::new(Vec::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Allocates a CPU-local object and initializes it with `init_values`.
|
||||||
|
pub(self) fn alloc<T>(
|
||||||
|
&'static self,
|
||||||
|
init_values: &mut impl FnMut(CpuId) -> T,
|
||||||
|
) -> Result<DynamicCpuLocal<T>> {
|
||||||
|
let mut chunks = self.chunks.lock();
|
||||||
|
|
||||||
|
for chunk in chunks.iter_mut() {
|
||||||
|
if !chunk.is_full() {
|
||||||
|
let cpu_local = chunk.alloc::<T>(init_values).unwrap();
|
||||||
|
return Ok(cpu_local);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut new_chunk = DynCpuLocalChunk::<ITEM_SIZE>::new()?;
|
||||||
|
let cpu_local = new_chunk.alloc::<T>(init_values).unwrap();
|
||||||
|
chunks.push(new_chunk);
|
||||||
|
|
||||||
|
Ok(cpu_local)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deallocates a CPU-local object.
|
||||||
|
pub(self) fn dealloc<T>(&self, cpu_local: DynamicCpuLocal<T>) {
|
||||||
|
let mut cpu_local = cpu_local;
|
||||||
|
let mut chunks = self.chunks.lock();
|
||||||
|
|
||||||
|
let mut chunk_index = None;
|
||||||
|
for (i, chunk) in chunks.iter_mut().enumerate() {
|
||||||
|
match chunk.try_dealloc(cpu_local) {
|
||||||
|
Ok(()) => {
|
||||||
|
chunk_index = Some(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(returned) => cpu_local = returned,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let chunk_index = chunk_index.unwrap();
|
||||||
|
if chunks[chunk_index].is_empty() && chunks.iter().filter(|c| c.is_empty()).count() > 1 {
|
||||||
|
chunks.swap_remove(chunk_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A wrapper over [`DynamicCpuLocal<T>`] to deallocate CPU-local objects on
|
||||||
|
/// drop automatically.
|
||||||
|
pub struct CpuLocalBox<T>(Option<DynamicCpuLocal<T>>);
|
||||||
|
|
||||||
|
impl<T> Deref for CpuLocalBox<T> {
|
||||||
|
type Target = DynamicCpuLocal<T>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.0.as_ref().unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Drop for CpuLocalBox<T> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let cpu_local = self.0.take().unwrap();
|
||||||
|
dealloc_cpu_local(cpu_local);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Global allocators for dynamically-allocated CPU-local objects.
|
||||||
|
static ALLOCATOR_8: CpuLocalAllocator<8> = CpuLocalAllocator::new();
|
||||||
|
static ALLOCATOR_16: CpuLocalAllocator<16> = CpuLocalAllocator::new();
|
||||||
|
static ALLOCATOR_32: CpuLocalAllocator<32> = CpuLocalAllocator::new();
|
||||||
|
|
||||||
|
/// Allocates a dynamically-allocated CPU-local object of type `T` and
|
||||||
|
/// initializes it with `init_values`.
|
||||||
|
///
|
||||||
|
/// Currently, the size of `T` must be no larger than 32 bytes.
|
||||||
|
pub fn alloc_cpu_local<T>(mut init_values: impl FnMut(CpuId) -> T) -> Result<CpuLocalBox<T>> {
|
||||||
|
let size = core::mem::size_of::<T>();
|
||||||
|
let class = CommonSizeClass::from_size(size).ok_or(Error::InvalidArgs)?;
|
||||||
|
let cpu_local = match class {
|
||||||
|
CommonSizeClass::Bytes8 => ALLOCATOR_8.alloc::<T>(&mut init_values),
|
||||||
|
CommonSizeClass::Bytes16 => ALLOCATOR_16.alloc::<T>(&mut init_values),
|
||||||
|
CommonSizeClass::Bytes32 => ALLOCATOR_32.alloc::<T>(&mut init_values),
|
||||||
|
// TODO: Support contiguous allocations for larger sizes.
|
||||||
|
// Since cache lines are normally 64 bytes, when allocating CPU-local
|
||||||
|
// objects with larger sizes, we should allocate a `Vec` with size
|
||||||
|
// `num_cpus()` instead.
|
||||||
|
_ => Err(Error::InvalidArgs),
|
||||||
|
}?;
|
||||||
|
Ok(CpuLocalBox(Some(cpu_local)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deallocates a dynamically-allocated CPU-local object of type `T`.
|
||||||
|
fn dealloc_cpu_local<T>(cpu_local: DynamicCpuLocal<T>) {
|
||||||
|
let size = core::mem::size_of::<T>();
|
||||||
|
let class = CommonSizeClass::from_size(size).unwrap();
|
||||||
|
match class {
|
||||||
|
CommonSizeClass::Bytes8 => ALLOCATOR_8.dealloc(cpu_local),
|
||||||
|
CommonSizeClass::Bytes16 => ALLOCATOR_16.dealloc(cpu_local),
|
||||||
|
CommonSizeClass::Bytes32 => ALLOCATOR_32.dealloc(cpu_local),
|
||||||
|
_ => todo!(),
|
||||||
|
}
|
||||||
|
}
|
@ -4,7 +4,11 @@
|
|||||||
#![no_std]
|
#![no_std]
|
||||||
#![deny(unsafe_code)]
|
#![deny(unsafe_code)]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
|
||||||
mod allocator;
|
mod allocator;
|
||||||
|
mod cpu_local_allocator;
|
||||||
mod slab_cache;
|
mod slab_cache;
|
||||||
|
|
||||||
pub use allocator::{type_from_layout, HeapAllocator};
|
pub use allocator::{type_from_layout, HeapAllocator};
|
||||||
|
pub use cpu_local_allocator::{alloc_cpu_local, CpuLocalBox};
|
||||||
|
@ -37,6 +37,7 @@ spin = "0.9.4"
|
|||||||
smallvec = "1.13.2"
|
smallvec = "1.13.2"
|
||||||
unwinding = { version = "=0.2.5", default-features = false, features = ["fde-gnu-eh-frame-hdr", "hide-trace", "panic", "personality", "unwinder"] }
|
unwinding = { version = "=0.2.5", default-features = false, features = ["fde-gnu-eh-frame-hdr", "hide-trace", "panic", "personality", "unwinder"] }
|
||||||
volatile = "0.6.1"
|
volatile = "0.6.1"
|
||||||
|
bitvec = { version = "1.0", default-features = false, features = ["alloc"] }
|
||||||
|
|
||||||
[target.x86_64-unknown-none.dependencies]
|
[target.x86_64-unknown-none.dependencies]
|
||||||
x86_64 = "0.14.13"
|
x86_64 = "0.14.13"
|
||||||
|
@ -18,7 +18,7 @@ use x86_64::{
|
|||||||
PrivilegeLevel, VirtAddr,
|
PrivilegeLevel, VirtAddr,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::cpu::local::CpuLocal;
|
use crate::cpu::local::{CpuLocal, StaticCpuLocal};
|
||||||
|
|
||||||
/// Initializes and loads the GDT and TSS.
|
/// Initializes and loads the GDT and TSS.
|
||||||
///
|
///
|
||||||
@ -95,10 +95,10 @@ pub(super) unsafe fn init() {
|
|||||||
// No other special initialization is required because the kernel stack information is stored in
|
// No other special initialization is required because the kernel stack information is stored in
|
||||||
// the TSS when we start the userspace program. See `syscall.S` for details.
|
// the TSS when we start the userspace program. See `syscall.S` for details.
|
||||||
#[link_section = ".cpu_local_tss"]
|
#[link_section = ".cpu_local_tss"]
|
||||||
static LOCAL_TSS: CpuLocal<TaskStateSegment> = {
|
static LOCAL_TSS: StaticCpuLocal<TaskStateSegment> = {
|
||||||
let tss = TaskStateSegment::new();
|
let tss = TaskStateSegment::new();
|
||||||
// SAFETY: The `.cpu_local_tss` section is part of the CPU-local area.
|
// SAFETY: The `.cpu_local_tss` section is part of the CPU-local area.
|
||||||
unsafe { CpuLocal::__new(tss) }
|
unsafe { CpuLocal::__new_static(tss) }
|
||||||
};
|
};
|
||||||
|
|
||||||
// Kernel code and data descriptors.
|
// Kernel code and data descriptors.
|
||||||
|
@ -1,201 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
|
||||||
|
|
||||||
//! The CPU-local variable implementation.
|
|
||||||
|
|
||||||
use core::{marker::Sync, ops::Deref};
|
|
||||||
|
|
||||||
use super::{__cpu_local_end, __cpu_local_start};
|
|
||||||
use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard};
|
|
||||||
|
|
||||||
/// Defines a CPU-local variable.
|
|
||||||
///
|
|
||||||
/// The accessors of the CPU-local variables are defined with [`CpuLocal`].
|
|
||||||
///
|
|
||||||
/// You can get the reference to the inner object on one CPU by calling
|
|
||||||
/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object
|
|
||||||
/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter
|
|
||||||
/// accessors can be used even if the inner object is not `Sync`.
|
|
||||||
///
|
|
||||||
/// # Example
|
|
||||||
///
|
|
||||||
/// ```rust
|
|
||||||
/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap};
|
|
||||||
/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell};
|
|
||||||
///
|
|
||||||
/// cpu_local! {
|
|
||||||
/// static FOO: AtomicU32 = AtomicU32::new(1);
|
|
||||||
/// pub static BAR: Cell<usize> = Cell::new(2);
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// fn not_an_atomic_function() {
|
|
||||||
/// let preempt_guard = disable_preempt();
|
|
||||||
/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu());
|
|
||||||
/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed);
|
|
||||||
/// println!("FOO VAL: {}", val_of_foo);
|
|
||||||
///
|
|
||||||
/// let irq_guard = trap::disable_local();
|
|
||||||
/// let bar_guard = BAR.get_with(&irq_guard);
|
|
||||||
/// let val_of_bar = bar_guard.get();
|
|
||||||
/// println!("BAR VAL: {}", val_of_bar);
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! cpu_local {
|
|
||||||
($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => {
|
|
||||||
$(
|
|
||||||
#[link_section = ".cpu_local"]
|
|
||||||
$(#[$attr])* $vis static $name: $crate::cpu::local::CpuLocal<$t> = {
|
|
||||||
let val = $init;
|
|
||||||
// SAFETY: The per-CPU variable instantiated is statically
|
|
||||||
// stored in the special `.cpu_local` section.
|
|
||||||
unsafe {
|
|
||||||
$crate::cpu::local::CpuLocal::__new(val)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
)*
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// CPU-local objects.
|
|
||||||
///
|
|
||||||
/// CPU-local objects are instantiated once per CPU core. They can be shared to
|
|
||||||
/// other cores. In the context of a preemptible kernel task, when holding the
|
|
||||||
/// reference to the inner object, the object is always the one in the original
|
|
||||||
/// core (when the reference is created), no matter which core the code is
|
|
||||||
/// currently running on.
|
|
||||||
///
|
|
||||||
/// For the difference between [`CpuLocal`] and [`super::CpuLocalCell`], see
|
|
||||||
/// [`super`].
|
|
||||||
pub struct CpuLocal<T: 'static>(T);
|
|
||||||
|
|
||||||
impl<T: 'static> CpuLocal<T> {
|
|
||||||
/// Creates a new CPU-local object.
|
|
||||||
///
|
|
||||||
/// Please do not call this function directly. Instead, use the
|
|
||||||
/// `cpu_local!` macro.
|
|
||||||
///
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// The caller should ensure that the object initialized by this
|
|
||||||
/// function resides in the `.cpu_local` section. Otherwise the
|
|
||||||
/// behavior is undefined.
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub const unsafe fn __new(val: T) -> Self {
|
|
||||||
Self(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets access to the underlying value on the current CPU with a
|
|
||||||
/// provided IRQ guard.
|
|
||||||
///
|
|
||||||
/// By this method, you can borrow a reference to the underlying value
|
|
||||||
/// even if `T` is not `Sync`. Because that it is per-CPU and IRQs are
|
|
||||||
/// disabled, no other running tasks can access it.
|
|
||||||
pub fn get_with<'a>(
|
|
||||||
&'static self,
|
|
||||||
guard: &'a DisabledLocalIrqGuard,
|
|
||||||
) -> CpuLocalDerefGuard<'a, T> {
|
|
||||||
CpuLocalDerefGuard {
|
|
||||||
cpu_local: self,
|
|
||||||
guard,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets access to the underlying value through a raw pointer.
|
|
||||||
///
|
|
||||||
/// This method is safe, but using the returned pointer will be unsafe.
|
|
||||||
pub(crate) fn as_ptr(&'static self) -> *const T {
|
|
||||||
super::is_used::debug_set_true();
|
|
||||||
|
|
||||||
let offset = self.get_offset();
|
|
||||||
|
|
||||||
let local_base = arch::cpu::local::get_base() as usize;
|
|
||||||
let local_va = local_base + offset;
|
|
||||||
|
|
||||||
// A sanity check about the alignment.
|
|
||||||
debug_assert_eq!(local_va % core::mem::align_of::<T>(), 0);
|
|
||||||
|
|
||||||
local_va as *mut T
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets the offset of the CPU-local object in the CPU-local area.
|
|
||||||
fn get_offset(&'static self) -> usize {
|
|
||||||
let bsp_va = self as *const _ as usize;
|
|
||||||
let bsp_base = __cpu_local_start as usize;
|
|
||||||
// The implementation should ensure that the CPU-local object resides in the `.cpu_local`.
|
|
||||||
debug_assert!(bsp_va + core::mem::size_of::<T>() <= __cpu_local_end as usize);
|
|
||||||
|
|
||||||
bsp_va - bsp_base
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: 'static + Sync> CpuLocal<T> {
|
|
||||||
/// Gets access to the CPU-local value on a specific CPU.
|
|
||||||
///
|
|
||||||
/// This allows the caller to access CPU-local data from a remote CPU,
|
|
||||||
/// so the data type must be `Sync`.
|
|
||||||
pub fn get_on_cpu(&'static self, cpu_id: CpuId) -> &'static T {
|
|
||||||
super::is_used::debug_set_true();
|
|
||||||
|
|
||||||
let cpu_id = cpu_id.as_usize();
|
|
||||||
|
|
||||||
// If on the BSP, just use the statically linked storage.
|
|
||||||
if cpu_id == 0 {
|
|
||||||
return &self.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// SAFETY: At this time we have a non-BSP `CpuId`, which means that
|
|
||||||
// `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must
|
|
||||||
// also have been called (see the implementation of `cpu::init_on_bsp`),
|
|
||||||
// so `CPU_LOCAL_STORAGES` must already be initialized.
|
|
||||||
let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() };
|
|
||||||
// SAFETY: `cpu_id` is guaranteed to be in range because the type
|
|
||||||
// invariant of `CpuId`.
|
|
||||||
let storage = unsafe { *storages.get_unchecked(cpu_id - 1) };
|
|
||||||
let base = crate::mm::paddr_to_vaddr(storage);
|
|
||||||
|
|
||||||
let offset = self.get_offset();
|
|
||||||
let ptr = (base + offset) as *const T;
|
|
||||||
|
|
||||||
// SAFETY: `ptr` represents CPU-local data on a remote CPU. It
|
|
||||||
// contains valid data, the type is `Sync`, and no one will mutably
|
|
||||||
// borrow it, so creating an immutable borrow here is valid.
|
|
||||||
unsafe { &*ptr }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SAFETY: At any given time, only one task can access the inner value `T` of a
|
|
||||||
// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the
|
|
||||||
// reference to the inner value, or turning off preemptions when creating
|
|
||||||
// the reference.
|
|
||||||
unsafe impl<T: 'static> Sync for CpuLocal<T> {}
|
|
||||||
|
|
||||||
// Prevent valid instances of `CpuLocal` from being copied to any memory areas
|
|
||||||
// outside the `.cpu_local` section.
|
|
||||||
impl<T: 'static> !Copy for CpuLocal<T> {}
|
|
||||||
impl<T: 'static> !Clone for CpuLocal<T> {}
|
|
||||||
|
|
||||||
// In general, it does not make any sense to send instances of `CpuLocal` to
|
|
||||||
// other tasks as they should live on other CPUs to make sending useful.
|
|
||||||
impl<T: 'static> !Send for CpuLocal<T> {}
|
|
||||||
|
|
||||||
/// A guard for accessing the CPU-local object.
|
|
||||||
///
|
|
||||||
/// It ensures that the CPU-local object is accessed with IRQs disabled.
|
|
||||||
/// It is created by [`CpuLocal::borrow_with`].
|
|
||||||
#[must_use]
|
|
||||||
pub struct CpuLocalDerefGuard<'a, T: 'static> {
|
|
||||||
cpu_local: &'static CpuLocal<T>,
|
|
||||||
#[expect(dead_code)]
|
|
||||||
guard: &'a DisabledLocalIrqGuard,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: 'static> Deref for CpuLocalDerefGuard<'_, T> {
|
|
||||||
type Target = T;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
// SAFETY: it should be properly initialized before accesses.
|
|
||||||
// And we do not create a mutable reference over it. The IRQs
|
|
||||||
// are disabled so it can only be referenced from this task.
|
|
||||||
unsafe { &*self.cpu_local.as_ptr() }
|
|
||||||
}
|
|
||||||
}
|
|
238
ostd/src/cpu/local/dyn_cpu_local.rs
Normal file
238
ostd/src/cpu/local/dyn_cpu_local.rs
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
//! Dynamically-allocated CPU-local objects.
|
||||||
|
|
||||||
|
use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull};
|
||||||
|
|
||||||
|
use bitvec::prelude::{bitvec, BitVec};
|
||||||
|
|
||||||
|
use super::{AnyStorage, CpuLocal};
|
||||||
|
use crate::{
|
||||||
|
cpu::{all_cpus, num_cpus, CpuId, PinCurrentCpu},
|
||||||
|
mm::{paddr_to_vaddr, FrameAllocOptions, Segment, Vaddr, PAGE_SIZE},
|
||||||
|
trap::DisabledLocalIrqGuard,
|
||||||
|
Result,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A dynamically-allocated storage for a CPU-local variable of type `T`.
|
||||||
|
///
|
||||||
|
/// Such a CPU-local storage should be allocated and deallocated by
|
||||||
|
/// [`DynCpuLocalChunk`], not directly. Dropping it without deallocation
|
||||||
|
/// will cause panic.
|
||||||
|
///
|
||||||
|
/// When dropping a `CpuLocal<T, DynamicStorage<T>>`, we have no way to know
|
||||||
|
/// which `DynCpuLocalChunk` the CPU-local object was originally allocated
|
||||||
|
/// from. Therefore, we rely on the user to correctly manage the corresponding
|
||||||
|
/// `DynCpuLocalChunk`, ensuring that both allocation and deallocation of
|
||||||
|
/// `CpuLocal<T, DynamicStorage<T>>` occur within the same chunk.
|
||||||
|
///
|
||||||
|
/// To properly deallocate the CPU-local object, the user must explicitly call
|
||||||
|
/// the appropriate `DynCpuLocalChunk`'s `try_dealloc<T>()`. Otherwise,
|
||||||
|
/// dropping it directly will cause a panic.
|
||||||
|
pub struct DynamicStorage<T>(NonNull<T>);
|
||||||
|
|
||||||
|
unsafe impl<T> AnyStorage<T> for DynamicStorage<T> {
|
||||||
|
fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T {
|
||||||
|
self.get_ptr_on_target(guard.current_cpu())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T {
|
||||||
|
let bsp_va = self.0.as_ptr() as usize;
|
||||||
|
let va = bsp_va + cpu_id.as_usize() * CHUNK_SIZE;
|
||||||
|
va as *mut T
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T {
|
||||||
|
self.get_ptr_on_target(cpu).cast_mut()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Drop for DynamicStorage<T> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
panic!(
|
||||||
|
"Do not drop `DynamicStorage<T>` directly. \
|
||||||
|
Use `DynCpuLocalChunk::try_dealloc<T>` instead."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Sync + alloc::fmt::Debug + 'static> alloc::fmt::Debug for CpuLocal<T, DynamicStorage<T>> {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
let mut list = f.debug_list();
|
||||||
|
for cpu in all_cpus() {
|
||||||
|
let val = self.get_on_cpu(cpu);
|
||||||
|
list.entry(&(&cpu, val));
|
||||||
|
}
|
||||||
|
list.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> CpuLocal<T, DynamicStorage<T>> {
|
||||||
|
/// Creates a new dynamically-allocated CPU-local object, and
|
||||||
|
/// initializes it with `init_values`.
|
||||||
|
///
|
||||||
|
/// The given `ptr` points to the variable located on the BSP.
|
||||||
|
///
|
||||||
|
/// Please do not call this function directly. Instead, use
|
||||||
|
/// `DynCpuLocalChunk::alloc`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The caller must ensure that the new per-CPU object belongs to an
|
||||||
|
/// existing [`DynCpuLocalChunk`], and does not overlap with any existing
|
||||||
|
/// CPU-local object.
|
||||||
|
unsafe fn __new_dynamic(ptr: *mut T, init_values: &mut impl FnMut(CpuId) -> T) -> Self {
|
||||||
|
let mut storage = DynamicStorage(NonNull::new(ptr).unwrap());
|
||||||
|
for cpu in all_cpus() {
|
||||||
|
let ptr = storage.get_mut_ptr_on_target(cpu);
|
||||||
|
// SAFETY: `ptr` points to valid, uninitialized per-CPU memory
|
||||||
|
// reserved for CPU-local storage. This initialization occurs
|
||||||
|
// before any other code can access the memory. References to
|
||||||
|
// the data may only be created after `Self` is created, ensuring
|
||||||
|
// exclusive access by the current task. Each per-CPU memory
|
||||||
|
// region is written exactly once using `ptr::write`, which is
|
||||||
|
// safe for uninitialized memory.
|
||||||
|
unsafe {
|
||||||
|
core::ptr::write(ptr, init_values(cpu));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
storage,
|
||||||
|
phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const CHUNK_SIZE: usize = PAGE_SIZE;
|
||||||
|
|
||||||
|
/// Footer metadata to describe a `SSTable`.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct DynCpuLocalMeta;
|
||||||
|
crate::impl_frame_meta_for!(DynCpuLocalMeta);
|
||||||
|
|
||||||
|
/// Manages dynamically-allocated CPU-local chunks.
|
||||||
|
///
|
||||||
|
/// Each CPU owns a chunk of size `CHUNK_SIZE`, and the chunks are laid
|
||||||
|
/// out contiguously in the order of CPU IDs. Per-CPU variables lie within
|
||||||
|
/// the chunks.
|
||||||
|
pub struct DynCpuLocalChunk<const ITEM_SIZE: usize> {
|
||||||
|
segment: ManuallyDrop<Segment<DynCpuLocalMeta>>,
|
||||||
|
bitmap: BitVec,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const ITEM_SIZE: usize> DynCpuLocalChunk<ITEM_SIZE> {
|
||||||
|
/// Creates a new dynamically-allocated CPU-local chunk.
|
||||||
|
pub fn new() -> Result<Self> {
|
||||||
|
let total_chunk_size = CHUNK_SIZE * num_cpus();
|
||||||
|
let segment = FrameAllocOptions::new()
|
||||||
|
.zeroed(false)
|
||||||
|
.alloc_segment_with(total_chunk_size.div_ceil(PAGE_SIZE), |_| DynCpuLocalMeta)?;
|
||||||
|
|
||||||
|
let num_items = CHUNK_SIZE / ITEM_SIZE;
|
||||||
|
const { assert!(CHUNK_SIZE % ITEM_SIZE == 0) };
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
segment: ManuallyDrop::new(segment),
|
||||||
|
bitmap: bitvec![0; num_items],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a pointer to the local chunk owned by the BSP.
|
||||||
|
fn start_vaddr(&self) -> Vaddr {
|
||||||
|
paddr_to_vaddr(self.segment.start_paddr())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Allocates a CPU-local object from the chunk, and
|
||||||
|
/// initializes it with `init_values`.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the chunk is full.
|
||||||
|
pub fn alloc<T>(
|
||||||
|
&mut self,
|
||||||
|
init_values: &mut impl FnMut(CpuId) -> T,
|
||||||
|
) -> Option<CpuLocal<T, DynamicStorage<T>>> {
|
||||||
|
const {
|
||||||
|
assert!(ITEM_SIZE.is_power_of_two());
|
||||||
|
assert!(core::mem::size_of::<T>() <= ITEM_SIZE);
|
||||||
|
assert!(core::mem::align_of::<T>() <= ITEM_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = self.bitmap.first_zero()?;
|
||||||
|
self.bitmap.set(index, true);
|
||||||
|
// SAFETY: `index` refers to an available position in the chunk
|
||||||
|
// for allocating a new CPU-local object.
|
||||||
|
unsafe {
|
||||||
|
let vaddr = self.start_vaddr() + index * ITEM_SIZE;
|
||||||
|
Some(CpuLocal::__new_dynamic(vaddr as *mut T, init_values))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the index of a dynamically-allocated CPU-local object
|
||||||
|
/// within the chunk.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the object does not belong to the chunk.
|
||||||
|
fn get_item_index<T>(&mut self, cpu_local: &CpuLocal<T, DynamicStorage<T>>) -> Option<usize> {
|
||||||
|
let vaddr = cpu_local.storage.0.as_ptr() as Vaddr;
|
||||||
|
let start_vaddr = self.start_vaddr();
|
||||||
|
|
||||||
|
let offset = vaddr.checked_sub(start_vaddr)?;
|
||||||
|
if offset > CHUNK_SIZE {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug_assert_eq!(offset % ITEM_SIZE, 0);
|
||||||
|
|
||||||
|
Some(offset / ITEM_SIZE)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to deallocate a previously allocated CPU-local object.
|
||||||
|
///
|
||||||
|
/// Returns `Err(cpu_local)` if the object does not belong to this chunk.
|
||||||
|
pub fn try_dealloc<T>(
|
||||||
|
&mut self,
|
||||||
|
mut cpu_local: CpuLocal<T, DynamicStorage<T>>,
|
||||||
|
) -> core::result::Result<(), CpuLocal<T, DynamicStorage<T>>> {
|
||||||
|
let Some(index) = self.get_item_index(&cpu_local) else {
|
||||||
|
return Err(cpu_local);
|
||||||
|
};
|
||||||
|
self.bitmap.set(index, false);
|
||||||
|
for cpu in all_cpus() {
|
||||||
|
let ptr = cpu_local.storage.get_mut_ptr_on_target(cpu);
|
||||||
|
// SAFETY: `ptr` points to the valid CPU-local object. We can
|
||||||
|
// mutably borrow the CPU-local object on `cpu` because we have
|
||||||
|
// the exclusive access to `cpu_local`. Each CPU-local object
|
||||||
|
// is dropped exactly once. After the deallocation, no one will
|
||||||
|
// access the dropped CPU-local object, since we explicitly
|
||||||
|
// forget the `cpu_local`.
|
||||||
|
unsafe {
|
||||||
|
core::ptr::drop_in_place(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let _ = ManuallyDrop::new(cpu_local);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks whether the chunk is full.
|
||||||
|
pub fn is_full(&self) -> bool {
|
||||||
|
self.bitmap.all()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks whether the chunk is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.bitmap.not_any()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const ITEM_SIZE: usize> Drop for DynCpuLocalChunk<ITEM_SIZE> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if self.is_empty() {
|
||||||
|
// SAFETY: The `segment` does not contain any CPU-local objects.
|
||||||
|
// It is the last time the `segment` is accessed, and it will be
|
||||||
|
// dropped only once.
|
||||||
|
unsafe { ManuallyDrop::drop(&mut self.segment) }
|
||||||
|
} else {
|
||||||
|
// Leak the `segment` and panic.
|
||||||
|
panic!("Dropping `DynCpuLocalChunk` while some CPU-local objects are still alive");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,47 +2,68 @@
|
|||||||
|
|
||||||
//! CPU local storage.
|
//! CPU local storage.
|
||||||
//!
|
//!
|
||||||
//! This module provides a mechanism to define CPU-local objects, by the macro
|
//! This module provides a mechanism to define CPU-local objects. Users can
|
||||||
//! [`crate::cpu_local!`].
|
//! define a statically-allocated CPU-local object by the macro
|
||||||
|
//! [`crate::cpu_local!`], or allocate a dynamically-allocated CPU-local
|
||||||
|
//! object with the function [`osdk_heap_allocator::alloc_cpu_local`].
|
||||||
//!
|
//!
|
||||||
//! Such a mechanism exploits the fact that constant values of non-[`Copy`]
|
//! The mechanism for statically-allocated CPU-local objects exploits the fact
|
||||||
//! types can be bitwise copied. For example, a [`Option<T>`] object, though
|
//! that constant values of non-[`Copy`] types can be bitwise copied. For
|
||||||
//! being not [`Copy`], have a constant constructor [`Option::None`] that
|
//! example, a [`Option<T>`] object, though being not [`Copy`], have a constant
|
||||||
//! produces a value that can be bitwise copied to create a new instance.
|
//! constructor [`Option::None`] that produces a value that can be bitwise
|
||||||
//! [`alloc::sync::Arc`] however, don't have such a constructor, and thus cannot
|
//! copied to create a new instance. [`alloc::sync::Arc`] however, don't have
|
||||||
//! be directly used as a CPU-local object. Wrapping it in a type that has a
|
//! such a constructor, and thus cannot be directly used as a statically-
|
||||||
//! constant constructor, like [`Option<T>`], can make it CPU-local.
|
//! allocated CPU-local object. Wrapping it in a type that has a constant
|
||||||
|
//! constructor, like [`Option<T>`], can make it statically-allocated CPU-local.
|
||||||
//!
|
//!
|
||||||
//! # Implementation
|
//! # Implementation
|
||||||
//!
|
//!
|
||||||
//! These APIs are implemented by placing the CPU-local objects in a special
|
//! These APIs are implemented by the methods as follows:
|
||||||
//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects linked
|
//! 1. For statically-allocated CPU-local objects, we place them in a special
|
||||||
//! in this section, and these objects are copied to dynamically allocated
|
//! section `.cpu_local`. The bootstrap processor (BSP) uses the objects
|
||||||
//! local storage of each application processors (AP) during the initialization
|
//! linked in this section, and these objects are copied to dynamically
|
||||||
//! process.
|
//! allocated local storage of each application processors (AP) during the
|
||||||
|
//! initialization process.
|
||||||
|
//! 2. For dynamically-allocated CPU-local objects, we prepare a fixed-size
|
||||||
|
//! chunk for each CPU. These per-CPU memory chunks are laid out contiguously
|
||||||
|
//! in memory in the order of the CPU IDs. A dynamically-allocated CPU-local
|
||||||
|
//! object can be allocated by occupying the same offset in each per-CPU
|
||||||
|
//! memory chunk.
|
||||||
|
|
||||||
// This module also, provide CPU-local cell objects that have inner mutability.
|
// This module also, provide CPU-local cell objects that have inner mutability.
|
||||||
//
|
//
|
||||||
// The difference between CPU-local objects (defined by [`crate::cpu_local!`])
|
// The difference between statically-allocated CPU-local objects (defined by
|
||||||
// and CPU-local cell objects (defined by [`crate::cpu_local_cell!`]) is that
|
// [`crate::cpu_local!`]) and CPU-local cell objects (defined by
|
||||||
// the CPU-local objects can be shared across CPUs. While through a CPU-local
|
// [`crate::cpu_local_cell!`]) is that the CPU-local objects can be shared
|
||||||
// cell object you can only access the value on the current CPU, therefore
|
// across CPUs. While through a CPU-local cell object you can only access the
|
||||||
// enabling inner mutability without locks.
|
// value on the current CPU, therefore enabling inner mutability without locks.
|
||||||
|
|
||||||
mod cell;
|
mod cell;
|
||||||
mod cpu_local;
|
mod dyn_cpu_local;
|
||||||
|
mod static_cpu_local;
|
||||||
|
|
||||||
pub(crate) mod single_instr;
|
pub(crate) mod single_instr;
|
||||||
|
|
||||||
use core::alloc::Layout;
|
use core::{alloc::Layout, marker::PhantomData, ops::Deref};
|
||||||
|
|
||||||
use align_ext::AlignExt;
|
use align_ext::AlignExt;
|
||||||
pub use cell::CpuLocalCell;
|
pub use cell::CpuLocalCell;
|
||||||
pub use cpu_local::{CpuLocal, CpuLocalDerefGuard};
|
pub use dyn_cpu_local::DynCpuLocalChunk;
|
||||||
|
use dyn_cpu_local::DynamicStorage;
|
||||||
use spin::Once;
|
use spin::Once;
|
||||||
|
use static_cpu_local::StaticStorage;
|
||||||
|
|
||||||
use super::CpuId;
|
use super::CpuId;
|
||||||
use crate::mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE};
|
use crate::{
|
||||||
|
mm::{frame::allocator, paddr_to_vaddr, Paddr, PAGE_SIZE},
|
||||||
|
trap::DisabledLocalIrqGuard,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Dynamically-allocated CPU-local objects.
|
||||||
|
pub type DynamicCpuLocal<T> = CpuLocal<T, DynamicStorage<T>>;
|
||||||
|
|
||||||
|
/// Statically-allocated CPU-local objects.
|
||||||
|
pub type StaticCpuLocal<T> = CpuLocal<T, static_cpu_local::StaticStorage<T>>;
|
||||||
|
|
||||||
// These symbols are provided by the linker script.
|
// These symbols are provided by the linker script.
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -50,10 +71,120 @@ extern "C" {
|
|||||||
fn __cpu_local_end();
|
fn __cpu_local_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The CPU-local areas for APs.
|
/// A trait to abstract any type that can be used as a slot for a CPU-local
|
||||||
|
/// variable of type `T`.
|
||||||
|
///
|
||||||
|
/// Each slot provides the memory space for storing `num_cpus` instances
|
||||||
|
/// of type `T`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The implementor must ensure that the returned pointer refers to the
|
||||||
|
/// variable on the correct CPU.
|
||||||
|
pub unsafe trait AnyStorage<T> {
|
||||||
|
/// Gets the `const` pointer for the object on the current CPU.
|
||||||
|
fn get_ptr_on_current(&self, guard: &DisabledLocalIrqGuard) -> *const T;
|
||||||
|
|
||||||
|
/// Gets the `const` pointer for the object on a target CPU.
|
||||||
|
fn get_ptr_on_target(&self, cpu: CpuId) -> *const T;
|
||||||
|
|
||||||
|
/// Gets the `mut` pointer for the object on a target CPU.
|
||||||
|
///
|
||||||
|
/// This method is intended for use when initializing or dropping the storage.
|
||||||
|
fn get_mut_ptr_on_target(&mut self, cpu: CpuId) -> *mut T;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A CPU-local variable for type `T`, backed by a storage of type `S`.
|
||||||
|
///
|
||||||
|
/// CPU-local objects are instantiated once per CPU core. They can be shared to
|
||||||
|
/// other cores. In the context of a preemptible kernel task, when holding the
|
||||||
|
/// reference to the inner object, the object is always the one in the original
|
||||||
|
/// core (when the reference is created), no matter which core the code is
|
||||||
|
/// currently running on.
|
||||||
|
pub struct CpuLocal<T, S: AnyStorage<T>> {
|
||||||
|
storage: S,
|
||||||
|
phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: 'static, S: AnyStorage<T>> CpuLocal<T, S> {
|
||||||
|
/// Gets access to the underlying value on the current CPU with a
|
||||||
|
/// provided IRQ guard.
|
||||||
|
///
|
||||||
|
/// By this method, you can borrow a reference to the underlying value
|
||||||
|
/// on the current CPU even if `T` is not `Sync`.
|
||||||
|
pub fn get_with<'a>(
|
||||||
|
&'a self,
|
||||||
|
guard: &'a DisabledLocalIrqGuard,
|
||||||
|
) -> CpuLocalDerefGuard<'a, T, S> {
|
||||||
|
CpuLocalDerefGuard {
|
||||||
|
cpu_local: self,
|
||||||
|
guard,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: 'static + Sync, S: AnyStorage<T>> CpuLocal<T, S> {
|
||||||
|
/// Gets access to the CPU-local value on a specific CPU.
|
||||||
|
///
|
||||||
|
/// This allows the caller to access CPU-local data from a remote CPU,
|
||||||
|
/// so the data type must be `Sync`.
|
||||||
|
pub fn get_on_cpu(&self, target_cpu_id: CpuId) -> &T {
|
||||||
|
let ptr = self.storage.get_ptr_on_target(target_cpu_id);
|
||||||
|
// SAFETY: `ptr` represents CPU-local data on a remote CPU. It
|
||||||
|
// contains valid data, the type is `Sync`, and no one will mutably
|
||||||
|
// borrow it, so creating an immutable borrow here is valid.
|
||||||
|
unsafe { &*ptr }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A guard for accessing the CPU-local object.
|
||||||
|
///
|
||||||
|
/// It ensures that the CPU-local object is accessed with IRQs disabled.
|
||||||
|
/// It is created by [`CpuLocal::get_with`].
|
||||||
|
#[must_use]
|
||||||
|
pub struct CpuLocalDerefGuard<'a, T: 'static, S: AnyStorage<T>> {
|
||||||
|
cpu_local: &'a CpuLocal<T, S>,
|
||||||
|
guard: &'a DisabledLocalIrqGuard,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: 'static, S: AnyStorage<T>> Deref for CpuLocalDerefGuard<'a, T, S> {
|
||||||
|
type Target = T;
|
||||||
|
|
||||||
|
fn deref(&self) -> &'a Self::Target {
|
||||||
|
is_used::debug_set_true();
|
||||||
|
|
||||||
|
let ptr = self.cpu_local.storage.get_ptr_on_current(self.guard);
|
||||||
|
// SAFETY: `ptr` represents CPU-local data on the current CPU. It
|
||||||
|
// contains valid data, only the current task can reference the data
|
||||||
|
// (due to `self.guard`), and no one will mutably borrow it, so
|
||||||
|
// creating an immutable borrow here is valid.
|
||||||
|
unsafe { &*ptr }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: At any given time, only one task can access the inner value `T` of a
|
||||||
|
// CPU-local variable if `T` is not `Sync`. We guarantee it by disabling the
|
||||||
|
// reference to the inner value, or turning off preemptions when creating
|
||||||
|
// the reference.
|
||||||
|
unsafe impl<T: 'static, S: AnyStorage<T>> Sync for CpuLocal<T, S> {}
|
||||||
|
unsafe impl<T: 'static> Send for CpuLocal<T, DynamicStorage<T>> {}
|
||||||
|
|
||||||
|
// Implement `!Copy` and `!Clone` for `CpuLocal` to ensure memory safety:
|
||||||
|
// - Prevent valid instances of `CpuLocal<T, StaticStorage<T>>` from being copied
|
||||||
|
// to any memory areas outside the `.cpu_local` section.
|
||||||
|
// - Prevent multiple valid instances of `CpuLocal<T, DynamicStorage<T>>` from
|
||||||
|
// referring to the same CPU-local object, avoiding double deallocation.
|
||||||
|
impl<T: 'static, S: AnyStorage<T>> !Copy for CpuLocal<T, S> {}
|
||||||
|
impl<T: 'static, S: AnyStorage<T>> !Clone for CpuLocal<T, S> {}
|
||||||
|
|
||||||
|
// In general, it does not make any sense to send instances of static `CpuLocal`
|
||||||
|
// to other tasks as they should live on other CPUs to make sending useful.
|
||||||
|
impl<T: 'static> !Send for CpuLocal<T, StaticStorage<T>> {}
|
||||||
|
|
||||||
|
/// The static CPU-local areas for APs.
|
||||||
static CPU_LOCAL_STORAGES: Once<&'static [Paddr]> = Once::new();
|
static CPU_LOCAL_STORAGES: Once<&'static [Paddr]> = Once::new();
|
||||||
|
|
||||||
/// Copies the CPU-local data on the bootstrap processor (BSP)
|
/// Copies the static CPU-local data on the bootstrap processor (BSP)
|
||||||
/// for application processors (APs).
|
/// for application processors (APs).
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
@ -123,7 +254,7 @@ pub(crate) unsafe fn copy_bsp_for_ap(num_cpus: usize) {
|
|||||||
CPU_LOCAL_STORAGES.call_once(|| res);
|
CPU_LOCAL_STORAGES.call_once(|| res);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the pointer to the CPU-local storage for the given AP.
|
/// Gets the pointer to the static CPU-local storage for the given AP.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
@ -148,7 +279,8 @@ pub(crate) fn get_ap(cpu_id: CpuId) -> Paddr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mod is_used {
|
mod is_used {
|
||||||
//! This module tracks whether any CPU-local variables are used.
|
//! This module tracks whether any statically-allocated CPU-local
|
||||||
|
//! variables are used.
|
||||||
//!
|
//!
|
||||||
//! [`copy_bsp_for_ap`] copies the CPU local data from the BSP
|
//! [`copy_bsp_for_ap`] copies the CPU local data from the BSP
|
||||||
//! to the APs, so it requires as a safety condition that the
|
//! to the APs, so it requires as a safety condition that the
|
||||||
|
157
ostd/src/cpu/local/static_cpu_local.rs
Normal file
157
ostd/src/cpu/local/static_cpu_local.rs
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
//! Statically-allocated CPU-local objects.
|
||||||
|
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
|
||||||
|
use super::{AnyStorage, CpuLocal, __cpu_local_end, __cpu_local_start};
|
||||||
|
use crate::{arch, cpu::CpuId, trap::DisabledLocalIrqGuard};
|
||||||
|
|
||||||
|
/// Defines a statically-allocated CPU-local variable.
|
||||||
|
///
|
||||||
|
/// The accessors of the CPU-local variables are defined with [`CpuLocal`].
|
||||||
|
///
|
||||||
|
/// You can get the reference to the inner object on one CPU by calling
|
||||||
|
/// [`CpuLocal::get_on_cpu`]. Also if you intend to access the inner object
|
||||||
|
/// on the current CPU, you can use [`CpuLocal::get_with`]. The latter
|
||||||
|
/// accessors can be used even if the inner object is not `Sync`.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use ostd::{cpu_local, cpu::PinCurrentCpu, task::disable_preempt, trap};
|
||||||
|
/// use core::{sync::atomic::{AtomicU32, Ordering}, cell::Cell};
|
||||||
|
///
|
||||||
|
/// cpu_local! {
|
||||||
|
/// static FOO: AtomicU32 = AtomicU32::new(1);
|
||||||
|
/// pub static BAR: Cell<usize> = Cell::new(2);
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// fn not_an_atomic_function() {
|
||||||
|
/// let preempt_guard = disable_preempt();
|
||||||
|
/// let ref_of_foo = FOO.get_on_cpu(preempt_guard.current_cpu());
|
||||||
|
/// let val_of_foo = ref_of_foo.load(Ordering::Relaxed);
|
||||||
|
/// println!("FOO VAL: {}", val_of_foo);
|
||||||
|
///
|
||||||
|
/// let irq_guard = trap::disable_local();
|
||||||
|
/// let bar_guard = BAR.get_with(&irq_guard);
|
||||||
|
/// let val_of_bar = bar_guard.get();
|
||||||
|
/// println!("BAR VAL: {}", val_of_bar);
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! cpu_local {
|
||||||
|
($( $(#[$attr:meta])* $vis:vis static $name:ident: $t:ty = $init:expr; )*) => {
|
||||||
|
$(
|
||||||
|
#[link_section = ".cpu_local"]
|
||||||
|
$(#[$attr])* $vis static $name: $crate::cpu::local::StaticCpuLocal<$t> = {
|
||||||
|
let val = $init;
|
||||||
|
// SAFETY: The per-CPU variable instantiated is statically
|
||||||
|
// stored in the special `.cpu_local` section.
|
||||||
|
unsafe {
|
||||||
|
$crate::cpu::local::CpuLocal::__new_static(val)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
)*
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A static storage for a CPU-local variable of type `T`.
|
||||||
|
///
|
||||||
|
/// Such a CPU-local storage is not intended to be allocated directly.
|
||||||
|
/// Use the `cpu_local` macro instead.
|
||||||
|
pub struct StaticStorage<T: 'static>(T);
|
||||||
|
|
||||||
|
impl<T: 'static> StaticStorage<T> {
|
||||||
|
/// Gets access to the underlying value through a raw pointer.
|
||||||
|
///
|
||||||
|
/// This method is safe, but using the returned pointer will be unsafe.
|
||||||
|
fn as_ptr(&self) -> *const T {
|
||||||
|
super::is_used::debug_set_true();
|
||||||
|
|
||||||
|
let offset = self.get_offset();
|
||||||
|
|
||||||
|
let local_base = arch::cpu::local::get_base() as usize;
|
||||||
|
let local_va = local_base + offset;
|
||||||
|
|
||||||
|
// A sanity check about the alignment.
|
||||||
|
debug_assert_eq!(local_va % core::mem::align_of::<T>(), 0);
|
||||||
|
|
||||||
|
local_va as *const T
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the offset of the CPU-local object in the CPU-local area.
|
||||||
|
fn get_offset(&self) -> usize {
|
||||||
|
let bsp_va = self as *const _ as usize;
|
||||||
|
let bsp_base = __cpu_local_start as usize;
|
||||||
|
// The implementation should ensure that the CPU-local object resides in the `.cpu_local`.
|
||||||
|
debug_assert!(bsp_va + core::mem::size_of::<T>() <= __cpu_local_end as usize);
|
||||||
|
|
||||||
|
bsp_va - bsp_base
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl<T: 'static> AnyStorage<T> for StaticStorage<T> {
|
||||||
|
fn get_ptr_on_current(&self, _guard: &DisabledLocalIrqGuard) -> *const T {
|
||||||
|
self.as_ptr()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_ptr_on_target(&self, cpu_id: CpuId) -> *const T {
|
||||||
|
super::is_used::debug_set_true();
|
||||||
|
|
||||||
|
let cpu_id = cpu_id.as_usize();
|
||||||
|
|
||||||
|
// If on the BSP, just use the statically linked storage.
|
||||||
|
if cpu_id == 0 {
|
||||||
|
return &self.0 as *const T;
|
||||||
|
}
|
||||||
|
|
||||||
|
let base = {
|
||||||
|
// SAFETY: At this time we have a non-BSP `CpuId`, which means that
|
||||||
|
// `init_cpu_nums` must have been called, so `copy_bsp_for_ap` must
|
||||||
|
// also have been called (see the implementation of `cpu::init_on_bsp`),
|
||||||
|
// so `CPU_LOCAL_STORAGES` must already be initialized.
|
||||||
|
let storages = unsafe { super::CPU_LOCAL_STORAGES.get_unchecked() };
|
||||||
|
// SAFETY: `cpu_id` is guaranteed to be in range because the type
|
||||||
|
// invariant of `CpuId`.
|
||||||
|
let storage = unsafe { *storages.get_unchecked(cpu_id - 1) };
|
||||||
|
crate::mm::paddr_to_vaddr(storage)
|
||||||
|
};
|
||||||
|
|
||||||
|
let offset = self.get_offset();
|
||||||
|
(base + offset) as *const T
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_mut_ptr_on_target(&mut self, _: CpuId) -> *mut T {
|
||||||
|
// `StaticStorage<T>` does not support `get_mut_ptr_on_target`, because
|
||||||
|
// statically-allocated CPU-local objects do not require per-CPU initialization.
|
||||||
|
panic!("Can't get the mutable pointer of StaticStorage<T> on a target CPU.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: 'static> CpuLocal<T, StaticStorage<T>> {
|
||||||
|
/// Creates a new statically-allocated CPU-local object.
|
||||||
|
///
|
||||||
|
/// Please do not call this function directly. Instead, use the
|
||||||
|
/// `cpu_local!` macro.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The caller should ensure that the object initialized by this
|
||||||
|
/// function resides in the `.cpu_local` section. Otherwise the
|
||||||
|
/// behavior is undefined.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub const unsafe fn __new_static(val: T) -> Self {
|
||||||
|
Self {
|
||||||
|
storage: StaticStorage(val),
|
||||||
|
phantom: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets access to the underlying value through a raw pointer.
|
||||||
|
///
|
||||||
|
/// This method is safe, but using the returned pointer will be unsafe.
|
||||||
|
pub(crate) fn as_ptr(&self) -> *const T {
|
||||||
|
self.storage.as_ptr()
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user