Add a fixed-size cache of frame allocation

This commit is contained in:
Zhang Junyang 2025-02-19 10:08:37 +08:00 committed by Tate, Hongliang Tian
parent 5f05963ee5
commit 28e7c0ff1f
8 changed files with 373 additions and 141 deletions

View File

@ -0,0 +1,151 @@
// SPDX-License-Identifier: MPL-2.0
//! A fixed-size local cache for frame allocation.
use core::{alloc::Layout, cell::RefCell};
use ostd::{
cpu_local,
mm::{Paddr, PAGE_SIZE},
trap::DisabledLocalIrqGuard,
};
cpu_local! {
static CACHE: RefCell<CacheOfSizes> = RefCell::new(CacheOfSizes::new());
}
struct CacheOfSizes {
cache1: CacheArray<1, 12>,
cache2: CacheArray<2, 6>,
cache3: CacheArray<3, 6>,
cache4: CacheArray<4, 6>,
}
/// A fixed-size local cache for frame allocation.
///
/// Each cache array contains at most `COUNT` segments. Each segment contains
/// `NR_CONT_FRAMES` contiguous frames.
struct CacheArray<const NR_CONT_FRAMES: usize, const COUNT: usize> {
inner: [Option<Paddr>; COUNT],
size: usize,
}
impl<const NR_CONT_FRAMES: usize, const COUNT: usize> CacheArray<NR_CONT_FRAMES, COUNT> {
const fn new() -> Self {
Self {
inner: [const { None }; COUNT],
size: 0,
}
}
/// The size of the segments that this cache manages.
const fn segment_size() -> usize {
NR_CONT_FRAMES * PAGE_SIZE
}
/// Allocates a segment of frames.
///
/// It may allocate directly from this cache. If the cache is empty, it
/// will fill the cache.
fn alloc(&mut self, guard: &DisabledLocalIrqGuard) -> Option<Paddr> {
if let Some(frame) = self.pop_front() {
return Some(frame);
}
let nr_to_alloc = COUNT * 2 / 3;
let allocated = super::pools::alloc(
guard,
Layout::from_size_align(nr_to_alloc * Self::segment_size(), PAGE_SIZE).unwrap(),
)?;
for i in 1..nr_to_alloc {
self.push_front(allocated + i * Self::segment_size());
}
Some(allocated)
}
/// Deallocates a segment of frames.
///
/// It may deallocate directly to this cache. If the cache is full, it will
/// deallocate to the global pool.
fn add_free_memory(&mut self, guard: &DisabledLocalIrqGuard, addr: Paddr) {
if self.push_front(addr).is_none() {
super::pools::add_free_memory(guard, addr, Self::segment_size());
let nr_to_dealloc = COUNT * 2 / 3;
for _ in 0..nr_to_dealloc {
let frame = self.pop_front().unwrap();
super::pools::add_free_memory(guard, frame, Self::segment_size());
}
};
}
fn push_front(&mut self, frame: Paddr) -> Option<()> {
if self.size == COUNT {
return None;
}
self.inner[self.size] = Some(frame);
self.size += 1;
Some(())
}
fn pop_front(&mut self) -> Option<Paddr> {
if self.size == 0 {
return None;
}
let frame = self.inner[self.size - 1].take().unwrap();
self.size -= 1;
Some(frame)
}
}
impl CacheOfSizes {
const fn new() -> Self {
Self {
cache1: CacheArray::new(),
cache2: CacheArray::new(),
cache3: CacheArray::new(),
cache4: CacheArray::new(),
}
}
}
pub(super) fn alloc(guard: &DisabledLocalIrqGuard, layout: Layout) -> Option<Paddr> {
let nr_frames = layout.size() / PAGE_SIZE;
if layout.align() > layout.size() {
return super::pools::alloc(guard, layout);
}
let cache_cell = CACHE.get_with(guard);
let mut cache = cache_cell.borrow_mut();
match nr_frames {
1 => cache.cache1.alloc(guard),
2 => cache.cache2.alloc(guard),
3 => cache.cache3.alloc(guard),
4 => cache.cache4.alloc(guard),
_ => super::pools::alloc(guard, layout),
}
}
pub(super) fn add_free_memory(guard: &DisabledLocalIrqGuard, addr: Paddr, size: usize) {
let nr_frames = size / PAGE_SIZE;
if nr_frames > 4 {
super::pools::add_free_memory(guard, addr, size);
return;
}
let cache_cell = CACHE.get_with(guard);
let mut cache = cache_cell.borrow_mut();
match nr_frames {
1 => cache.cache1.add_free_memory(guard, addr),
2 => cache.cache2.add_free_memory(guard, addr),
3 => cache.cache3.add_free_memory(guard, addr),
4 => cache.cache4.add_free_memory(guard, addr),
_ => super::pools::add_free_memory(guard, addr, size),
}
}

View File

@ -15,6 +15,30 @@ pub(crate) const fn size_of_order(order: BuddyOrder) -> usize {
(1 << order) * PAGE_SIZE
}
/// Returns an order that covers at least the given size.
pub(crate) fn greater_order_of(size: usize) -> BuddyOrder {
let size = size / PAGE_SIZE;
size.next_power_of_two().trailing_zeros() as BuddyOrder
}
/// Returns a order that covers at most the given size.
pub(crate) fn lesser_order_of(size: usize) -> BuddyOrder {
let size = size / PAGE_SIZE;
(usize::BITS - size.leading_zeros() - 1) as BuddyOrder
}
/// Returns the maximum order starting from the address.
///
/// If the start address is not aligned to the order, the address/order pair
/// cannot form a buddy chunk.
///
/// # Panics
///
/// Panics if the address is not page-aligned in debug mode.
pub(crate) fn max_order_from(addr: Paddr) -> BuddyOrder {
(addr.trailing_zeros() - PAGE_SIZE.trailing_zeros()) as BuddyOrder
}
/// The metadata of the head frame in a free buddy chunk.
#[derive(Debug)]
pub(crate) struct FreeHeadMeta {

View File

@ -22,11 +22,51 @@
//! [`GlobalFrameAllocator`]: ostd::mm::GlobalFrameAllocator
//! [`global_frame_allocator`]: ostd::global_frame_allocator
mod allocator;
use core::alloc::Layout;
use ostd::{
mm::{frame::GlobalFrameAllocator, Paddr},
trap,
};
mod cache;
mod chunk;
mod per_cpu_counter;
mod pools;
mod set;
#[cfg(ktest)]
mod test;
pub use allocator::{load_total_free_size, FrameAllocator};
/// Loads the total size (in bytes) of free memory in the allocator.
pub fn load_total_free_size() -> usize {
per_cpu_counter::read_total_free_size()
}
/// The global frame allocator provided by OSDK.
///
/// It is a singleton that provides frame allocation for the kernel. If
/// multiple instances of this struct are created, all the member functions
/// will eventually access the same allocator.
pub struct FrameAllocator;
impl GlobalFrameAllocator for FrameAllocator {
fn alloc(&self, layout: Layout) -> Option<Paddr> {
let guard = trap::disable_local();
let res = cache::alloc(&guard, layout);
if res.is_some() {
per_cpu_counter::sub_free_size(&guard, layout.size());
}
res
}
fn dealloc(&self, addr: Paddr, size: usize) {
self.add_free_memory(addr, size);
}
fn add_free_memory(&self, addr: Paddr, size: usize) {
let guard = trap::disable_local();
per_cpu_counter::add_free_size(&guard, size);
cache::add_free_memory(&guard, addr, size);
}
}

View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: MPL-2.0
//! A per-CPU counter for the total size of free memory.
//!
//! If all CPUs are updating the same counter, it causes serious contention.
//! We address it by using per-CPU counters and summing them up when needed.
//!
//! Updating is fast and scalable, but reading is slow and inaccurate.
//!
//! If we constantly allocates on one CPU and deallocates on another CPU,
//! it may cause the counters to wrap. However it is fine since if you
//! add them together, it will be correct. It will lead to inconsistency
//! or surprising values for a short period of time.
use core::sync::atomic::{AtomicIsize, Ordering};
use ostd::{cpu::all_cpus, cpu_local, trap::DisabledLocalIrqGuard};
cpu_local! {
static FREE_SIZE: AtomicIsize = AtomicIsize::new(0);
}
/// Adds the given size to a global total free size.
pub(super) fn add_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) {
FREE_SIZE
.get_with(irq_guard)
.fetch_add(size as isize, Ordering::Relaxed);
}
/// Subtracts the given size from a global total free size.
pub(super) fn sub_free_size(irq_guard: &DisabledLocalIrqGuard, size: usize) {
FREE_SIZE
.get_with(irq_guard)
.fetch_sub(size as isize, Ordering::Relaxed);
}
/// Reads the total size of free memory.
///
/// This function is not atomic and may be inaccurate since other CPUs may be
/// updating the counter while we are reading it.
pub(super) fn read_total_free_size() -> usize {
let mut total: isize = 0;
for cpu in all_cpus() {
total = total.wrapping_add(FREE_SIZE.get_on_cpu(cpu).load(Ordering::Relaxed));
}
if total < 0 {
0
} else {
total as usize
}
}

View File

@ -8,14 +8,14 @@ use core::{
};
use ostd::{
cpu::{all_cpus, PinCurrentCpu},
cpu::PinCurrentCpu,
cpu_local,
mm::{frame::GlobalFrameAllocator, Paddr, PAGE_SIZE},
mm::Paddr,
sync::{LocalIrqDisabled, SpinLock},
trap,
trap::DisabledLocalIrqGuard,
};
use crate::chunk::{size_of_order, BuddyOrder};
use crate::chunk::{greater_order_of, lesser_order_of, max_order_from, size_of_order, BuddyOrder};
use super::set::BuddySet;
@ -50,81 +50,83 @@ const MAX_BUDDY_ORDER: BuddyOrder = 32;
/// chunks.
const MAX_LOCAL_BUDDY_ORDER: BuddyOrder = 18;
/// The global frame allocator provided by OSDK.
///
/// It is a singleton that provides frame allocation for the kernel. If
/// multiple instances of this struct are created, all the member functions
/// will eventually access the same allocator.
pub struct FrameAllocator;
pub(super) fn alloc(guard: &DisabledLocalIrqGuard, layout: Layout) -> Option<Paddr> {
let local_pool_cell = LOCAL_POOL.get_with(guard);
let mut local_pool = local_pool_cell.borrow_mut();
impl GlobalFrameAllocator for FrameAllocator {
fn alloc(&self, layout: Layout) -> Option<Paddr> {
let irq_guard = trap::disable_local();
let local_pool_cell = LOCAL_POOL.get_with(&irq_guard);
let mut local_pool = local_pool_cell.borrow_mut();
let size_order = greater_order_of(layout.size());
let align_order = greater_order_of(layout.align());
let size_order = greater_order_of(layout.size());
let align_order = greater_order_of(layout.align());
let order = size_order.max(align_order);
let mut chunk_addr = None;
let order = size_order.max(align_order);
let mut chunk_addr = None;
if order < MAX_LOCAL_BUDDY_ORDER {
chunk_addr = local_pool.alloc_chunk(order);
}
// Fall back to the global free lists if the local free lists are empty.
if chunk_addr.is_none() {
chunk_addr = alloc_from_global_pool(order);
}
// TODO: On memory pressure the global pool may be not enough. We may need
// to merge all buddy chunks from the local pools to the global pool and
// try again.
// If the alignment order is larger than the size order, we need to split
// the chunk and return the rest part back to the free lists.
if align_order > size_order {
if let Some(chunk_addr) = chunk_addr {
let addr = chunk_addr + size_of_order(size_order);
let size = size_of_order(align_order) - size_of_order(size_order);
self.add_free_memory(addr, size);
}
} else {
balancing::balance(local_pool.deref_mut());
}
LOCAL_POOL_SIZE
.get_on_cpu(irq_guard.current_cpu())
.store(local_pool.total_size(), Ordering::Relaxed);
chunk_addr
if order < MAX_LOCAL_BUDDY_ORDER {
chunk_addr = local_pool.alloc_chunk(order);
}
fn add_free_memory(&self, mut addr: Paddr, mut size: usize) {
let irq_guard = trap::disable_local();
let local_pool_cell = LOCAL_POOL.get_with(&irq_guard);
let mut local_pool = local_pool_cell.borrow_mut();
// Fall back to the global free lists if the local free lists are empty.
if chunk_addr.is_none() {
chunk_addr = alloc_from_global_pool(order);
}
// TODO: On memory pressure the global pool may be not enough. We may need
// to merge all buddy chunks from the local pools to the global pool and
// try again.
// Split the range into chunks and return them to the local free lists
// respectively.
while size > 0 {
let next_chunk_order = max_order_from(addr).min(lesser_order_of(size));
if next_chunk_order >= MAX_LOCAL_BUDDY_ORDER {
dealloc_to_global_pool(addr, next_chunk_order);
} else {
local_pool.insert_chunk(addr, next_chunk_order);
}
size -= size_of_order(next_chunk_order);
addr += size_of_order(next_chunk_order);
// If the alignment order is larger than the size order, we need to split
// the chunk and return the rest part back to the free lists.
let allocated_size = size_of_order(order);
if allocated_size > layout.size() {
if let Some(chunk_addr) = chunk_addr {
dealloc_in(
&mut local_pool,
guard,
chunk_addr + layout.size(),
allocated_size - layout.size(),
);
}
} else {
balancing::balance(local_pool.deref_mut());
LOCAL_POOL_SIZE
.get_on_cpu(irq_guard.current_cpu())
.store(local_pool.total_size(), Ordering::Relaxed);
}
LOCAL_POOL_SIZE
.get_on_cpu(guard.current_cpu())
.store(local_pool.total_size(), Ordering::Relaxed);
chunk_addr
}
pub(super) fn add_free_memory(guard: &DisabledLocalIrqGuard, addr: Paddr, size: usize) {
let local_pool_cell = LOCAL_POOL.get_with(guard);
let mut local_pool = local_pool_cell.borrow_mut();
dealloc_in(&mut local_pool, guard, addr, size);
}
fn dealloc_in(
local_pool: &mut BuddySet<MAX_LOCAL_BUDDY_ORDER>,
guard: &DisabledLocalIrqGuard,
mut addr: Paddr,
mut size: usize,
) {
// Split the range into chunks and return them to the local free lists
// respectively.
while size > 0 {
let next_chunk_order = max_order_from(addr).min(lesser_order_of(size));
if next_chunk_order >= MAX_LOCAL_BUDDY_ORDER {
dealloc_to_global_pool(addr, next_chunk_order);
} else {
local_pool.insert_chunk(addr, next_chunk_order);
}
size -= size_of_order(next_chunk_order);
addr += size_of_order(next_chunk_order);
}
balancing::balance(local_pool);
LOCAL_POOL_SIZE
.get_on_cpu(guard.current_cpu())
.store(local_pool.total_size(), Ordering::Relaxed);
}
fn alloc_from_global_pool(order: BuddyOrder) -> Option<Paddr> {
@ -140,40 +142,6 @@ fn dealloc_to_global_pool(addr: Paddr, order: BuddyOrder) {
GLOBAL_POOL_SIZE.store(lock_guard.total_size(), Ordering::Relaxed);
}
/// Loads the total size (in bytes) of free memory in the allocator.
pub fn load_total_free_size() -> usize {
let mut total = 0;
total += GLOBAL_POOL_SIZE.load(Ordering::Relaxed);
for cpu in all_cpus() {
total += LOCAL_POOL_SIZE.get_on_cpu(cpu).load(Ordering::Relaxed);
}
total
}
/// Returns an order that covers at least the given size.
fn greater_order_of(size: usize) -> BuddyOrder {
let size = size / PAGE_SIZE;
size.next_power_of_two().trailing_zeros() as BuddyOrder
}
/// Returns a order that covers at most the given size.
fn lesser_order_of(size: usize) -> BuddyOrder {
let size = size / PAGE_SIZE;
(usize::BITS - size.leading_zeros() - 1) as BuddyOrder
}
/// Returns the maximum order starting from the address.
///
/// If the start address is not aligned to the order, the address/order pair
/// cannot form a buddy chunk.
///
/// # Panics
///
/// Panics if the address is not page-aligned in debug mode.
fn max_order_from(addr: Paddr) -> BuddyOrder {
(addr.trailing_zeros() - PAGE_SIZE.trailing_zeros()) as BuddyOrder
}
pub mod balancing {
//! Controlling the balancing between CPU-local free pools and the global free pool.

View File

@ -53,7 +53,8 @@ impl FrameAllocOptions {
/// Allocates a single frame with additional metadata.
pub fn alloc_frame_with<M: AnyFrameMeta>(&self, metadata: M) -> Result<Frame<M>> {
let single_layout = Layout::from_size_align(PAGE_SIZE, PAGE_SIZE).unwrap();
let frame = alloc_upcall(single_layout)
let frame = get_global_frame_allocator()
.alloc(single_layout)
.map(|paddr| Frame::from_unused(paddr, metadata).unwrap())
.ok_or(Error::NoMemory)?;
@ -87,7 +88,8 @@ impl FrameAllocOptions {
return Err(Error::InvalidArgs);
}
let layout = Layout::from_size_align(nframes * PAGE_SIZE, PAGE_SIZE).unwrap();
let segment = alloc_upcall(layout)
let segment = get_global_frame_allocator()
.alloc(layout)
.map(|start| {
Segment::from_unused(start..start + nframes * PAGE_SIZE, metadata_fn).unwrap()
})
@ -132,12 +134,12 @@ fn test_alloc_dealloc() {
///
/// The API mimics the standard Rust allocator API ([`GlobalAlloc`] and
/// [`global_allocator`]). However, this trait is much safer. Double free
/// or freeing in-use memory through this trait only mess up the allocator's
/// or freeing in-use memory through this trait only messes up the allocator's
/// state rather than causing undefined behavior.
///
/// Whenever OSTD or other modules need to allocate or deallocate frames via
/// [`FrameAllocOptions`], they are forwarded to the global frame allocator.
/// It is not encoraged to call the global allocator directly.
/// It is not encouraged to call the global allocator directly.
///
/// [`global_frame_allocator`]: crate::global_frame_allocator
/// [`GlobalAlloc`]: core::alloc::GlobalAlloc
@ -146,21 +148,30 @@ pub trait GlobalFrameAllocator: Sync {
///
/// The caller guarantees that `layout.size()` is aligned to [`PAGE_SIZE`].
///
/// When the allocated memory is not in use, OSTD return them by calling
/// [`GlobalFrameAllocator::add_free_memory`].
/// When any of the allocated memory is not in use, OSTD returns them by
/// calling [`GlobalFrameAllocator::dealloc`]. If multiple frames are
/// allocated, they may be returned in any order with any number of calls.
fn alloc(&self, layout: Layout) -> Option<Paddr>;
/// Deallocates a contiguous range of frames.
///
/// The caller guarantees that `addr` and `size` are both aligned to
/// [`PAGE_SIZE`]. The deallocated memory should always be allocated by
/// [`GlobalFrameAllocator::alloc`]. However, if
/// [`GlobalFrameAllocator::alloc`] returns multiple frames, it is possible
/// that some of them are deallocated before others. The deallocated memory
/// must never overlap with any memory that is already deallocated or
/// added, without being allocated in between.
///
/// The deallocated memory can be uninitialized.
fn dealloc(&self, addr: Paddr, size: usize);
/// Adds a contiguous range of frames to the allocator.
///
/// The caller guarantees that `addr` and `size` are both aligned to
/// [`PAGE_SIZE`]. The added memory can be uninitialized.
/// The memory being added must never overlap with any memory that was
/// added before.
///
/// The memory being added would never overlap with any memory that is
/// already added, i.e., a frame cannot be added twice without being
/// allocated in between.
///
/// However, if [`GlobalFrameAllocator::alloc`] returns multiple frames,
/// it is possible that some of them are added back before others.
/// The added memory can be uninitialized.
fn add_free_memory(&self, addr: Paddr, size: usize);
}
@ -170,24 +181,11 @@ extern "Rust" {
static __GLOBAL_FRAME_ALLOCATOR_REF: &'static dyn GlobalFrameAllocator;
}
/// Directly allocates a contiguous range of frames.
fn alloc_upcall(layout: core::alloc::Layout) -> Option<Paddr> {
// SAFETY: We believe that the global frame allocator is set up correctly
// with the `global_frame_allocator` attribute. If they use safe code only
// then the up-call is safe.
unsafe { __GLOBAL_FRAME_ALLOCATOR_REF.alloc(layout) }
}
/// Up-call to add a range of frames to the global frame allocator.
///
/// It would return the frame to the allocator for further use. This would like
/// to be done after the release of the metadata to avoid re-allocation before
/// the metadata is reset.
pub(super) fn add_free_memory_upcall(addr: Paddr, size: usize) {
// SAFETY: We believe that the global frame allocator is set up correctly
// with the `global_frame_allocator` attribute. If they use safe code only
// then the up-call is safe.
unsafe { __GLOBAL_FRAME_ALLOCATOR_REF.add_free_memory(addr, size) }
pub(super) fn get_global_frame_allocator() -> &'static dyn GlobalFrameAllocator {
// SAFETY: The global frame allocator is set up correctly with the
// `global_frame_allocator` attribute. If they use safe code only, the
// up-call is safe.
unsafe { __GLOBAL_FRAME_ALLOCATOR_REF }
}
/// Initializes the global frame allocator.
@ -215,7 +213,7 @@ pub(crate) unsafe fn init() {
for r1 in range_difference(&(region.base()..region.end()), &range_1) {
for r2 in range_difference(&r1, &range_2) {
log::info!("Adding free frames to the allocator: {:x?}", r2);
add_free_memory_upcall(r2.start, r2.len());
get_global_frame_allocator().add_free_memory(r2.start, r2.len());
}
}
}

View File

@ -222,7 +222,7 @@ impl<M: AnyFrameMeta + ?Sized> Drop for Frame<M> {
// SAFETY: this is the last reference and is about to be dropped.
unsafe { self.slot().drop_last_in_place() };
allocator::add_free_memory_upcall(self.start_paddr(), PAGE_SIZE);
allocator::get_global_frame_allocator().dealloc(self.start_paddr(), PAGE_SIZE);
}
}
}

View File

@ -155,7 +155,7 @@ impl<M: AnyFrameMeta + ?Sized> Drop for UniqueFrame<M> {
// The slot is initialized.
unsafe { self.slot().drop_last_in_place() };
super::allocator::add_free_memory_upcall(self.start_paddr(), PAGE_SIZE);
super::allocator::get_global_frame_allocator().dealloc(self.start_paddr(), PAGE_SIZE);
}
}