Add per-CPU counters and track RSS for each Vmar

This commit is contained in:
Wang Siyuan
2025-06-03 14:05:34 +00:00
committed by Ruihan Li
parent dfd3042276
commit 58e4f45bb1
10 changed files with 422 additions and 40 deletions

View File

@ -9,6 +9,7 @@ use crate::{
},
prelude::*,
process::posix_thread::AsPosixThread,
vm::vmar::RssType,
Process,
};
@ -97,6 +98,20 @@ impl FileOps for StatusFileOps {
process.tasks().lock().as_slice().len()
)
.unwrap();
{
let vmar = process.lock_root_vmar();
let anon = vmar.unwrap().get_rss_counter(RssType::RSS_ANONPAGES) * (PAGE_SIZE / 1024);
let file = vmar.unwrap().get_rss_counter(RssType::RSS_FILEPAGES) * (PAGE_SIZE / 1024);
let rss = anon + file;
writeln!(
status_output,
"VmRSS:\t{} kB\nRssAnon:\t{} kB\nRssFile:\t{} kB",
rss, anon, file
)
.unwrap();
}
Ok(status_output.into_bytes())
}
}

View File

@ -2,6 +2,7 @@
mod iovec;
pub mod net;
pub mod per_cpu_counter;
pub mod random;
pub mod ring_buffer;

View File

@ -0,0 +1,54 @@
// SPDX-License-Identifier: MPL-2.0
//! A fast and scalable per-CPU counter.
use core::sync::atomic::{AtomicIsize, Ordering};
use osdk_heap_allocator::{alloc_cpu_local, CpuLocalBox};
use ostd::cpu::{all_cpus, CpuId};
/// A fast, SMP-friendly, dynamically allocated, per-CPU counter.
///
/// Updating it is fast and scalable, but reading is slow and inaccurate.
///
// TODO: Reuse the code from [`osdk_frame_allocator::fast_smp_counter`],
// which may need to extract that code into a separate crate that needs
// to be published. Do that after we somehow stabilize the per-CPU counter.
pub struct PerCpuCounter {
per_cpu_counter: CpuLocalBox<AtomicIsize>,
}
impl PerCpuCounter {
/// Creates a new, zero-valued per-CPU counter.
pub fn new() -> Self {
Self {
per_cpu_counter: alloc_cpu_local(|_| AtomicIsize::new(0)).unwrap(),
}
}
/// Adds `increment` to the counter on the given CPU.
pub fn add(&self, on_cpu: CpuId, increment: isize) {
self.per_cpu_counter
.get_on_cpu(on_cpu)
.fetch_add(increment, Ordering::Relaxed);
}
/// Gets the total counter value.
///
/// This function may be inaccurate since other CPUs may be
/// updating the counter.
pub fn get(&self) -> usize {
let mut total: isize = 0;
for cpu in all_cpus() {
total =
total.wrapping_add(self.per_cpu_counter.get_on_cpu(cpu).load(Ordering::Relaxed));
}
if total < 0 {
// The counter is unsigned. But an observer may see a negative
// value due to race conditions. We return zero if it happens.
0
} else {
total as usize
}
}
}

View File

@ -7,11 +7,12 @@ mod interval_set;
mod static_cap;
pub mod vm_mapping;
use core::{num::NonZeroUsize, ops::Range};
use core::{array, num::NonZeroUsize, ops::Range};
use align_ext::AlignExt;
use aster_rights::Rights;
use ostd::{
cpu::CpuId,
mm::{tlb::TlbFlushOp, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR},
task::disable_preempt,
};
@ -24,6 +25,7 @@ use crate::{
prelude::*,
process::{Process, ResourceType},
thread::exception::PageFaultInfo,
util::per_cpu_counter::PerCpuCounter,
vm::{
perms::VmPerms,
vmo::{Vmo, VmoRightsOp},
@ -102,6 +104,8 @@ pub(super) struct Vmar_ {
size: usize,
/// The attached `VmSpace`
vm_space: Arc<VmSpace>,
/// The RSS counters.
rss_counters: [PerCpuCounter; NUM_RSS_COUNTERS],
}
struct VmarInner {
@ -195,6 +199,7 @@ impl VmarInner {
vm_space: &VmSpace,
offset: Vaddr,
size: usize,
rss_delta: &mut RssDelta,
) -> Result<Range<Vaddr>> {
let range = offset..offset + size;
let mut mappings_to_remove = Vec::new();
@ -215,7 +220,7 @@ impl VmarInner {
self.insert(right);
}
taken.unmap(vm_space)?;
rss_delta.add(taken.rss_type(), -(taken.unmap(vm_space)? as isize));
}
Ok(offset..(offset + size))
@ -280,19 +285,32 @@ impl Interval<usize> for Arc<Vmar_> {
}
impl Vmar_ {
fn new(inner: VmarInner, vm_space: Arc<VmSpace>, base: usize, size: usize) -> Arc<Self> {
fn new(
inner: VmarInner,
vm_space: Arc<VmSpace>,
base: usize,
size: usize,
rss_counters: [PerCpuCounter; NUM_RSS_COUNTERS],
) -> Arc<Self> {
Arc::new(Vmar_ {
inner: RwMutex::new(inner),
base,
size,
vm_space,
rss_counters,
})
}
fn new_root() -> Arc<Self> {
let vmar_inner = VmarInner::new();
let vm_space = VmSpace::new();
Vmar_::new(vmar_inner, Arc::new(vm_space), 0, ROOT_VMAR_CAP_ADDR)
Vmar_::new(
vmar_inner,
Arc::new(vm_space),
0,
ROOT_VMAR_CAP_ADDR,
array::from_fn(|_| PerCpuCounter::new()),
)
}
fn protect(&self, perms: VmPerms, range: Range<usize>) -> Result<()> {
@ -350,7 +368,10 @@ impl Vmar_ {
if let Some(vm_mapping) = inner.vm_mappings.find_one(&address) {
debug_assert!(vm_mapping.range().contains(&address));
return vm_mapping.handle_page_fault(&self.vm_space, page_fault_info);
let rss_increment = vm_mapping.handle_page_fault(&self.vm_space, page_fault_info)?;
self.add_rss_counter(vm_mapping.rss_type(), rss_increment as isize);
return Ok(());
}
return_errno_with_message!(Errno::EACCES, "page fault addr is not in current vmar");
@ -376,7 +397,14 @@ impl Vmar_ {
pub fn remove_mapping(&self, range: Range<usize>) -> Result<()> {
let mut inner = self.inner.write();
inner.alloc_free_region_exact_truncate(&self.vm_space, range.start, range.len())?;
let mut rss_delta = RssDelta::new();
inner.alloc_free_region_exact_truncate(
&self.vm_space,
range.start,
range.len(),
&mut rss_delta,
)?;
self.add_rss_delta(rss_delta);
Ok(())
}
@ -426,7 +454,13 @@ impl Vmar_ {
let new_vmar_ = {
let vmar_inner = VmarInner::new();
let new_space = VmSpace::new();
Vmar_::new(vmar_inner, Arc::new(new_space), self.base, self.size)
Vmar_::new(
vmar_inner,
Arc::new(new_space),
self.base,
self.size,
array::from_fn(|_| PerCpuCounter::new()),
)
};
{
@ -440,6 +474,8 @@ impl Vmar_ {
let mut new_cursor = new_vmspace.cursor_mut(&preempt_guard, &range).unwrap();
let cur_vmspace = self.vm_space();
let mut cur_cursor = cur_vmspace.cursor_mut(&preempt_guard, &range).unwrap();
let mut rss_delta = RssDelta::new();
for vm_mapping in inner.vm_mappings.iter() {
let base = vm_mapping.map_to_addr();
@ -453,8 +489,12 @@ impl Vmar_ {
let mut op = |page: &mut PageProperty| {
page.flags -= PageFlags::W;
};
new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op);
let num_mapped =
new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op);
rss_delta.add(vm_mapping.rss_type(), num_mapped as isize);
}
new_vmar_.add_rss_delta(rss_delta);
cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All);
cur_cursor.flusher().dispatch_tlb_flush();
cur_cursor.flusher().sync_tlb_flush();
@ -462,6 +502,24 @@ impl Vmar_ {
Ok(new_vmar_)
}
pub fn get_rss_counter(&self, rss_type: RssType) -> usize {
self.rss_counters[rss_type as usize].get()
}
fn add_rss_counter(&self, rss_type: RssType, val: isize) {
// There are races but updating a remote counter won't cause any problems.
let cpu_id = CpuId::current_racy();
self.rss_counters[rss_type as usize].add(cpu_id, val);
}
fn add_rss_delta(&self, rss_delta: RssDelta) {
for i in 0..NUM_RSS_COUNTERS {
let rss_type = RssType::try_from(i).unwrap();
let delta = rss_delta.get(rss_type);
self.add_rss_counter(rss_type, delta);
}
}
}
impl<R> Vmar<R> {
@ -476,6 +534,11 @@ impl<R> Vmar<R> {
pub fn size(&self) -> usize {
self.0.size
}
/// Returns the current RSS count for the given RSS type.
pub fn get_rss_counter(&self, rss_type: RssType) -> usize {
self.0.get_rss_counter(rss_type)
}
}
/// Options for creating a new mapping. The mapping is not allowed to overlap
@ -659,7 +722,14 @@ where
Errno::EINVAL,
"offset cannot be None since can overwrite is set",
))?;
inner.alloc_free_region_exact_truncate(parent.vm_space(), offset, map_size)?;
let mut rss_delta = RssDelta::new();
inner.alloc_free_region_exact_truncate(
parent.vm_space(),
offset,
map_size,
&mut rss_delta,
)?;
parent.0.add_rss_delta(rss_delta);
offset
} else if let Some(offset) = offset {
inner.alloc_free_region_exact(offset, map_size)?;
@ -735,3 +805,32 @@ pub fn get_intersected_range(range1: &Range<usize>, range2: &Range<usize>) -> Ra
debug_assert!(is_intersected(range1, range2));
range1.start.max(range2.start)..range1.end.min(range2.end)
}
/// The type representing categories of Resident Set Size (RSS).
///
/// See <https://github.com/torvalds/linux/blob/fac04efc5c793dccbd07e2d59af9f90b7fc0dca4/include/linux/mm_types_task.h#L26..L32>
#[repr(usize)]
#[expect(non_camel_case_types)]
#[derive(Debug, Clone, Copy, TryFromInt)]
pub enum RssType {
RSS_FILEPAGES = 0,
RSS_ANONPAGES = 1,
}
const NUM_RSS_COUNTERS: usize = 2;
struct RssDelta([isize; NUM_RSS_COUNTERS]);
impl RssDelta {
pub(self) fn new() -> Self {
Self([0; NUM_RSS_COUNTERS])
}
pub(self) fn add(&mut self, rss_type: RssType, increment: isize) {
self.0[rss_type as usize] += increment;
}
pub(self) fn get(&self, rss_type: RssType) -> isize {
self.0[rss_type as usize]
}
}

View File

@ -15,7 +15,7 @@ use ostd::{
task::disable_preempt,
};
use super::interval_set::Interval;
use super::{interval_set::Interval, RssType};
use crate::{
prelude::*,
thread::exception::PageFaultInfo,
@ -124,16 +124,26 @@ impl VmMapping {
pub fn perms(&self) -> VmPerms {
self.perms
}
// Returns the mapping's RSS type.
pub fn rss_type(&self) -> RssType {
if self.vmo.is_none() {
RssType::RSS_ANONPAGES
} else {
RssType::RSS_FILEPAGES
}
}
}
/****************************** Page faults **********************************/
impl VmMapping {
/// Handles a page fault and returns the number of pages mapped.
pub fn handle_page_fault(
&self,
vm_space: &VmSpace,
page_fault_info: &PageFaultInfo,
) -> Result<()> {
) -> Result<usize> {
if !self.perms.contains(page_fault_info.required_perms) {
trace!(
"self.perms {:?}, page_fault_info.required_perms {:?}, self.range {:?}",
@ -150,7 +160,7 @@ impl VmMapping {
let is_write = page_fault_info.required_perms.contains(VmPerms::WRITE);
if !is_write && self.vmo.is_some() && self.handle_page_faults_around {
let res = self.handle_page_faults_around(vm_space, address);
let (rss_increment, res) = self.handle_page_faults_around(vm_space, address);
// Errors caused by the "around" pages should be ignored, so here we
// only return the error if the faulting page is still not mapped.
@ -161,13 +171,14 @@ impl VmMapping {
&(page_aligned_addr..page_aligned_addr + PAGE_SIZE),
)?;
if let VmItem::Mapped { .. } = cursor.query().unwrap() {
return Ok(());
return Ok(rss_increment);
}
}
return res;
return res.map(|_| rss_increment);
}
let mut rss_increment: usize = 0;
'retry: loop {
let preempt_guard = disable_preempt();
let mut cursor = vm_space.cursor_mut(
@ -185,14 +196,14 @@ impl VmMapping {
// The page fault is already handled maybe by other threads.
// Just flush the TLB and return.
TlbFlushOp::Address(va).perform_on_current();
return Ok(());
return Ok(0);
}
assert!(is_write);
// Perform COW if it is a write access to a shared mapping.
// Skip if the page fault is already handled.
if prop.flags.contains(PageFlags::W) {
return Ok(());
return Ok(0);
}
// If the forked child or parent immediately unmaps the page after
@ -212,6 +223,7 @@ impl VmMapping {
let new_frame = duplicate_frame(&frame)?;
prop.flags |= new_flags;
cursor.map(new_frame.into(), prop);
rss_increment += 1;
}
cursor.flusher().sync_tlb_flush();
}
@ -248,11 +260,13 @@ impl VmMapping {
let map_prop = PageProperty::new_user(page_flags, CachePolicy::Writeback);
cursor.map(frame, map_prop);
rss_increment += 1;
}
}
break 'retry;
}
Ok(())
Ok(rss_increment)
}
fn prepare_page(
@ -285,7 +299,15 @@ impl VmMapping {
}
}
fn handle_page_faults_around(&self, vm_space: &VmSpace, page_fault_addr: Vaddr) -> Result<()> {
/// Handles a page fault and maps additional surrounding pages.
///
/// Returns a tuple `(mapped_pages, result)`, where `mapped_pages` is the number
/// of pages mapped successfully, even if the `result` is some error.
fn handle_page_faults_around(
&self,
vm_space: &VmSpace,
page_fault_addr: Vaddr,
) -> (usize, Result<()>) {
const SURROUNDING_PAGE_NUM: usize = 16;
const SURROUNDING_PAGE_ADDR_MASK: usize = !(SURROUNDING_PAGE_NUM * PAGE_SIZE - 1);
@ -300,9 +322,19 @@ impl VmMapping {
);
let vm_perms = self.perms - VmPerms::WRITE;
let mut rss_increment: usize = 0;
'retry: loop {
let preempt_guard = disable_preempt();
let mut cursor = vm_space.cursor_mut(&preempt_guard, &(start_addr..end_addr))?;
let mut cursor = match vm_space.cursor_mut(&preempt_guard, &(start_addr..end_addr)) {
Ok(cursor) => cursor,
Err(e) => {
return (rss_increment, Err(e.into()));
}
};
let rss_increment_ref = &mut rss_increment;
let operate =
move |commit_fn: &mut dyn FnMut()
-> core::result::Result<UFrame, VmoCommitError>| {
@ -314,6 +346,7 @@ impl VmMapping {
let page_prop = PageProperty::new_user(page_flags, CachePolicy::Writeback);
let frame = commit_fn()?;
cursor.map(frame, page_prop);
*rss_increment_ref += 1;
} else {
let next_addr = cursor.virt_addr() + PAGE_SIZE;
if next_addr < end_addr {
@ -326,14 +359,16 @@ impl VmMapping {
let start_offset = start_addr - self.map_to_addr;
let end_offset = end_addr - self.map_to_addr;
match vmo.try_operate_on_range(&(start_offset..end_offset), operate) {
Ok(_) => return Ok(()),
Ok(_) => return (rss_increment, Ok(())),
Err(VmoCommitError::NeedIo(index)) => {
drop(preempt_guard);
vmo.commit_on(index, CommitFlags::empty())?;
if let Err(e) = vmo.commit_on(index, CommitFlags::empty()) {
return (rss_increment, Err(e));
}
start_addr = index * PAGE_SIZE + self.map_to_addr;
continue 'retry;
}
Err(VmoCommitError::Err(e)) => return Err(e),
Err(VmoCommitError::Err(e)) => return (rss_increment, Err(e)),
}
}
}
@ -429,17 +464,18 @@ impl VmMapping {
/************************** VM Space operations ******************************/
impl VmMapping {
/// Unmaps the mapping from the VM space.
pub(super) fn unmap(self, vm_space: &VmSpace) -> Result<()> {
/// Unmaps the mapping from the VM space,
/// and returns the number of unmapped pages.
pub(super) fn unmap(self, vm_space: &VmSpace) -> Result<usize> {
let preempt_guard = disable_preempt();
let range = self.range();
let mut cursor = vm_space.cursor_mut(&preempt_guard, &range)?;
cursor.unmap(range.len());
let num_unmapped = cursor.unmap(range.len());
cursor.flusher().dispatch_tlb_flush();
cursor.flusher().sync_tlb_flush();
Ok(())
Ok(num_unmapped)
}
/// Change the perms of the mapping.