diff --git a/Cargo.lock b/Cargo.lock index 5ce72d1c..f0196be5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1261,6 +1261,7 @@ version = "0.14.1" dependencies = [ "log", "ostd", + "paste", ] [[package]] diff --git a/osdk/deps/frame-allocator/Cargo.toml b/osdk/deps/frame-allocator/Cargo.toml index 875db9d6..d6fa8e33 100644 --- a/osdk/deps/frame-allocator/Cargo.toml +++ b/osdk/deps/frame-allocator/Cargo.toml @@ -10,6 +10,7 @@ readme = "README.md" [dependencies] log = "0.4" ostd = { version = "0.14.1", path = "../../../ostd" } +paste = "1.0.15" [lints] workspace = true diff --git a/osdk/deps/frame-allocator/src/lib.rs b/osdk/deps/frame-allocator/src/lib.rs index 1262d081..d91f9c3e 100644 --- a/osdk/deps/frame-allocator/src/lib.rs +++ b/osdk/deps/frame-allocator/src/lib.rs @@ -31,22 +31,28 @@ extern crate alloc; use core::alloc::Layout; use ostd::{ + cpu::PinCurrentCpu, mm::{frame::GlobalFrameAllocator, Paddr}, trap, }; mod cache; mod chunk; -mod per_cpu_counter; mod pools; mod set; +mod smp_counter; #[cfg(ktest)] mod test; +fast_smp_counter! { + /// The total size of free memory. + pub static TOTAL_FREE_SIZE: usize; +} + /// Loads the total size (in bytes) of free memory in the allocator. pub fn load_total_free_size() -> usize { - per_cpu_counter::read_total_free_size() + TOTAL_FREE_SIZE.get() } /// The global frame allocator provided by OSDK. @@ -61,20 +67,20 @@ impl GlobalFrameAllocator for FrameAllocator { let guard = trap::disable_local(); let res = cache::alloc(&guard, layout); if res.is_some() { - per_cpu_counter::sub_free_size(&guard, layout.size()); + TOTAL_FREE_SIZE.sub(guard.current_cpu(), layout.size()); } res } fn dealloc(&self, addr: Paddr, size: usize) { let guard = trap::disable_local(); - per_cpu_counter::add_free_size(&guard, size); + TOTAL_FREE_SIZE.add(guard.current_cpu(), size); cache::dealloc(&guard, addr, size); } fn add_free_memory(&self, addr: Paddr, size: usize) { let guard = trap::disable_local(); - per_cpu_counter::add_free_size(&guard, size); + TOTAL_FREE_SIZE.add(guard.current_cpu(), size); pools::add_free_memory(&guard, addr, size); } } diff --git a/osdk/deps/frame-allocator/src/per_cpu_counter.rs b/osdk/deps/frame-allocator/src/per_cpu_counter.rs deleted file mode 100644 index 8c7d47aa..00000000 --- a/osdk/deps/frame-allocator/src/per_cpu_counter.rs +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 - -//! A per-CPU counter for the total size of free memory. -//! -//! If all CPUs are updating the same counter, it causes serious contention. -//! We address it by using per-CPU counters and summing them up when needed. -//! -//! Updating is fast and scalable, but reading is slow and inaccurate. -//! -//! If we constantly allocates on one CPU and deallocates on another CPU, -//! it may cause the counters to wrap. However it is fine since if you -//! add them together, it will be correct. It will lead to inconsistency -//! or surprising values for a short period of time. - -use core::sync::atomic::{AtomicIsize, Ordering}; - -use ostd::{cpu::all_cpus, cpu_local, trap::DisabledLocalIrqGuard}; - -cpu_local! { - static FREE_SIZE: AtomicIsize = AtomicIsize::new(0); -} - -/// Adds the given size to a global total free size. -pub(super) fn add_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) { - FREE_SIZE - .get_with(irq_guard) - .fetch_add(size as isize, Ordering::Relaxed); -} - -/// Subtracts the given size from a global total free size. -pub(super) fn sub_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) { - FREE_SIZE - .get_with(irq_guard) - .fetch_sub(size as isize, Ordering::Relaxed); -} - -/// Reads the total size of free memory. -/// -/// This function is not atomic and may be inaccurate since other CPUs may be -/// updating the counter while we are reading it. -pub(super) fn read_total_free_size() -> usize { - let mut total: isize = 0; - for cpu in all_cpus() { - total = total.wrapping_add(FREE_SIZE.get_on_cpu(cpu).load(Ordering::Relaxed)); - } - if total < 0 { - 0 - } else { - total as usize - } -} diff --git a/osdk/deps/frame-allocator/src/smp_counter.rs b/osdk/deps/frame-allocator/src/smp_counter.rs new file mode 100644 index 00000000..142dfe74 --- /dev/null +++ b/osdk/deps/frame-allocator/src/smp_counter.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! A fast and scalable SMP counter. + +use ostd::cpu::{all_cpus, local::CpuLocal, CpuId}; + +use core::sync::atomic::{AtomicIsize, Ordering}; + +/// Defines a static fast SMP counter. +/// +/// See [`FastSmpCounter`] for more details. +#[macro_export] +macro_rules! fast_smp_counter { + ($(#[$attr:meta])* $vis:vis static $name:ident : usize;) => { paste::paste!{ + ostd::cpu_local! { + static [< __LOCAL_COUNTER_ $name >]: core::sync::atomic::AtomicIsize + = core::sync::atomic::AtomicIsize::new(0); + } + + $(#[$attr])* + $vis static $name: $crate::smp_counter::FastSmpCounter = + $crate::smp_counter::FastSmpCounter::new( + & [< __LOCAL_COUNTER_ $name >], + ); + }}; +} + +/// A fast, SMP-friendly, global counter. +/// +/// Users should use [`fast_smp_counter!`] macro to define a static counter. +/// +/// Updating it is fast and scalable, but reading is slow and inaccurate. +/// +/// An alternative is to use a global atomic, but if all CPUs are updating the +/// same atomic, it causes serious contention. This method address it by using +/// per-CPU counters and summing them up when needed. +/// +/// If we constantly adds on one CPU and subtracts on another CPU, it may cause +/// the counters to wrap. However it is fine since if you add them together, it +/// will be correct. It will lead to inconsistency or surprising values for a +/// short period of time. +/// +/// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter +/// will wrap on overflow. +pub struct FastSmpCounter { + per_cpu_counter: &'static CpuLocal, +} + +impl FastSmpCounter { + /// Creates a new [`FastSmpCounter`] with the given per-CPU counter. + /// + /// This function should only be used by the [`fast_smp_counter!`] macro. + #[doc(hidden)] + pub const fn new(per_cpu_counter: &'static CpuLocal) -> Self { + Self { per_cpu_counter } + } + + /// Adds `a` to the counter on the given CPU. + pub fn add(&self, on_cpu: CpuId, a: usize) { + self.per_cpu_counter + .get_on_cpu(on_cpu) + .fetch_add(a as isize, Ordering::Relaxed); + } + + /// Subtracts `a` from the counter on the given CPU. + pub fn sub(&self, on_cpu: CpuId, a: usize) { + self.per_cpu_counter + .get_on_cpu(on_cpu) + .fetch_sub(a as isize, Ordering::Relaxed); + } + + /// Gets the total counter value. + /// + /// This function may be inaccurate since other CPUs may be + /// updating the counter. + pub fn get(&self) -> usize { + let mut total: isize = 0; + for cpu in all_cpus() { + total = + total.wrapping_add(self.per_cpu_counter.get_on_cpu(cpu).load(Ordering::Relaxed)); + } + if total < 0 { + 0 + } else { + total as usize + } + } +} + +#[cfg(ktest)] +mod test { + use ostd::{cpu::PinCurrentCpu, prelude::*, trap}; + + #[ktest] + fn test_per_cpu_counter() { + fast_smp_counter! { + /// The total size of free memory. + pub static FREE_SIZE_COUNTER: usize; + } + + let guard = trap::disable_local(); + let cur_cpu = guard.current_cpu(); + FREE_SIZE_COUNTER.add(cur_cpu, 10); + assert_eq!(FREE_SIZE_COUNTER.get(), 10); + FREE_SIZE_COUNTER.add(cur_cpu, 20); + assert_eq!(FREE_SIZE_COUNTER.get(), 30); + FREE_SIZE_COUNTER.sub(cur_cpu, 5); + assert_eq!(FREE_SIZE_COUNTER.get(), 25); + } +}