Make per-cpu counter a macro

This commit is contained in:
Zhang Junyang 2025-03-20 19:27:06 +08:00 committed by Tate, Hongliang Tian
parent abc74151fb
commit e5be154ca8
5 changed files with 123 additions and 56 deletions

1
Cargo.lock generated
View File

@ -1261,6 +1261,7 @@ version = "0.14.1"
dependencies = [
"log",
"ostd",
"paste",
]
[[package]]

View File

@ -10,6 +10,7 @@ readme = "README.md"
[dependencies]
log = "0.4"
ostd = { version = "0.14.1", path = "../../../ostd" }
paste = "1.0.15"
[lints]
workspace = true

View File

@ -31,22 +31,28 @@ extern crate alloc;
use core::alloc::Layout;
use ostd::{
cpu::PinCurrentCpu,
mm::{frame::GlobalFrameAllocator, Paddr},
trap,
};
mod cache;
mod chunk;
mod per_cpu_counter;
mod pools;
mod set;
mod smp_counter;
#[cfg(ktest)]
mod test;
fast_smp_counter! {
/// The total size of free memory.
pub static TOTAL_FREE_SIZE: usize;
}
/// Loads the total size (in bytes) of free memory in the allocator.
pub fn load_total_free_size() -> usize {
per_cpu_counter::read_total_free_size()
TOTAL_FREE_SIZE.get()
}
/// The global frame allocator provided by OSDK.
@ -61,20 +67,20 @@ impl GlobalFrameAllocator for FrameAllocator {
let guard = trap::disable_local();
let res = cache::alloc(&guard, layout);
if res.is_some() {
per_cpu_counter::sub_free_size(&guard, layout.size());
TOTAL_FREE_SIZE.sub(guard.current_cpu(), layout.size());
}
res
}
fn dealloc(&self, addr: Paddr, size: usize) {
let guard = trap::disable_local();
per_cpu_counter::add_free_size(&guard, size);
TOTAL_FREE_SIZE.add(guard.current_cpu(), size);
cache::dealloc(&guard, addr, size);
}
fn add_free_memory(&self, addr: Paddr, size: usize) {
let guard = trap::disable_local();
per_cpu_counter::add_free_size(&guard, size);
TOTAL_FREE_SIZE.add(guard.current_cpu(), size);
pools::add_free_memory(&guard, addr, size);
}
}

View File

@ -1,51 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
//! A per-CPU counter for the total size of free memory.
//!
//! If all CPUs are updating the same counter, it causes serious contention.
//! We address it by using per-CPU counters and summing them up when needed.
//!
//! Updating is fast and scalable, but reading is slow and inaccurate.
//!
//! If we constantly allocates on one CPU and deallocates on another CPU,
//! it may cause the counters to wrap. However it is fine since if you
//! add them together, it will be correct. It will lead to inconsistency
//! or surprising values for a short period of time.
use core::sync::atomic::{AtomicIsize, Ordering};
use ostd::{cpu::all_cpus, cpu_local, trap::DisabledLocalIrqGuard};
cpu_local! {
static FREE_SIZE: AtomicIsize = AtomicIsize::new(0);
}
/// Adds the given size to a global total free size.
pub(super) fn add_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) {
FREE_SIZE
.get_with(irq_guard)
.fetch_add(size as isize, Ordering::Relaxed);
}
/// Subtracts the given size from a global total free size.
pub(super) fn sub_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) {
FREE_SIZE
.get_with(irq_guard)
.fetch_sub(size as isize, Ordering::Relaxed);
}
/// Reads the total size of free memory.
///
/// This function is not atomic and may be inaccurate since other CPUs may be
/// updating the counter while we are reading it.
pub(super) fn read_total_free_size() -> usize {
let mut total: isize = 0;
for cpu in all_cpus() {
total = total.wrapping_add(FREE_SIZE.get_on_cpu(cpu).load(Ordering::Relaxed));
}
if total < 0 {
0
} else {
total as usize
}
}

View File

@ -0,0 +1,110 @@
// SPDX-License-Identifier: MPL-2.0
//! A fast and scalable SMP counter.
use ostd::cpu::{all_cpus, local::CpuLocal, CpuId};
use core::sync::atomic::{AtomicIsize, Ordering};
/// Defines a static fast SMP counter.
///
/// See [`FastSmpCounter`] for more details.
#[macro_export]
macro_rules! fast_smp_counter {
($(#[$attr:meta])* $vis:vis static $name:ident : usize;) => { paste::paste!{
ostd::cpu_local! {
static [< __LOCAL_COUNTER_ $name >]: core::sync::atomic::AtomicIsize
= core::sync::atomic::AtomicIsize::new(0);
}
$(#[$attr])*
$vis static $name: $crate::smp_counter::FastSmpCounter =
$crate::smp_counter::FastSmpCounter::new(
& [< __LOCAL_COUNTER_ $name >],
);
}};
}
/// A fast, SMP-friendly, global counter.
///
/// Users should use [`fast_smp_counter!`] macro to define a static counter.
///
/// Updating it is fast and scalable, but reading is slow and inaccurate.
///
/// An alternative is to use a global atomic, but if all CPUs are updating the
/// same atomic, it causes serious contention. This method address it by using
/// per-CPU counters and summing them up when needed.
///
/// If we constantly adds on one CPU and subtracts on another CPU, it may cause
/// the counters to wrap. However it is fine since if you add them together, it
/// will be correct. It will lead to inconsistency or surprising values for a
/// short period of time.
///
/// Nevertheless, if the sum of added value exceeds [`usize::MAX`] the counter
/// will wrap on overflow.
pub struct FastSmpCounter {
per_cpu_counter: &'static CpuLocal<AtomicIsize>,
}
impl FastSmpCounter {
/// Creates a new [`FastSmpCounter`] with the given per-CPU counter.
///
/// This function should only be used by the [`fast_smp_counter!`] macro.
#[doc(hidden)]
pub const fn new(per_cpu_counter: &'static CpuLocal<AtomicIsize>) -> Self {
Self { per_cpu_counter }
}
/// Adds `a` to the counter on the given CPU.
pub fn add(&self, on_cpu: CpuId, a: usize) {
self.per_cpu_counter
.get_on_cpu(on_cpu)
.fetch_add(a as isize, Ordering::Relaxed);
}
/// Subtracts `a` from the counter on the given CPU.
pub fn sub(&self, on_cpu: CpuId, a: usize) {
self.per_cpu_counter
.get_on_cpu(on_cpu)
.fetch_sub(a as isize, Ordering::Relaxed);
}
/// Gets the total counter value.
///
/// This function may be inaccurate since other CPUs may be
/// updating the counter.
pub fn get(&self) -> usize {
let mut total: isize = 0;
for cpu in all_cpus() {
total =
total.wrapping_add(self.per_cpu_counter.get_on_cpu(cpu).load(Ordering::Relaxed));
}
if total < 0 {
0
} else {
total as usize
}
}
}
#[cfg(ktest)]
mod test {
use ostd::{cpu::PinCurrentCpu, prelude::*, trap};
#[ktest]
fn test_per_cpu_counter() {
fast_smp_counter! {
/// The total size of free memory.
pub static FREE_SIZE_COUNTER: usize;
}
let guard = trap::disable_local();
let cur_cpu = guard.current_cpu();
FREE_SIZE_COUNTER.add(cur_cpu, 10);
assert_eq!(FREE_SIZE_COUNTER.get(), 10);
FREE_SIZE_COUNTER.add(cur_cpu, 20);
assert_eq!(FREE_SIZE_COUNTER.get(), 30);
FREE_SIZE_COUNTER.sub(cur_cpu, 5);
assert_eq!(FREE_SIZE_COUNTER.get(), 25);
}
}