From 58e4f45bb17dff6ea0dbe266f9045155ee54edb8 Mon Sep 17 00:00:00 2001 From: Wang Siyuan Date: Tue, 3 Jun 2025 14:05:34 +0000 Subject: [PATCH] Add per-CPU counters and track RSS for each `Vmar` --- kernel/src/fs/procfs/pid/status.rs | 15 +++ kernel/src/util/mod.rs | 1 + kernel/src/util/per_cpu_counter.rs | 54 ++++++++ kernel/src/vm/vmar/mod.rs | 117 +++++++++++++++-- kernel/src/vm/vmar/vm_mapping.rs | 70 ++++++++--- ostd/src/mm/page_table/cursor/locking.rs | 13 +- ostd/src/mm/page_table/cursor/mod.rs | 15 ++- ostd/src/mm/vm_space.rs | 22 +++- test/apps/mmap/mmap_vmrss.c | 152 +++++++++++++++++++++++ test/apps/scripts/process.sh | 3 +- 10 files changed, 422 insertions(+), 40 deletions(-) create mode 100644 kernel/src/util/per_cpu_counter.rs create mode 100644 test/apps/mmap/mmap_vmrss.c diff --git a/kernel/src/fs/procfs/pid/status.rs b/kernel/src/fs/procfs/pid/status.rs index 83c87b06..12ca3fcd 100644 --- a/kernel/src/fs/procfs/pid/status.rs +++ b/kernel/src/fs/procfs/pid/status.rs @@ -9,6 +9,7 @@ use crate::{ }, prelude::*, process::posix_thread::AsPosixThread, + vm::vmar::RssType, Process, }; @@ -97,6 +98,20 @@ impl FileOps for StatusFileOps { process.tasks().lock().as_slice().len() ) .unwrap(); + + { + let vmar = process.lock_root_vmar(); + let anon = vmar.unwrap().get_rss_counter(RssType::RSS_ANONPAGES) * (PAGE_SIZE / 1024); + let file = vmar.unwrap().get_rss_counter(RssType::RSS_FILEPAGES) * (PAGE_SIZE / 1024); + let rss = anon + file; + writeln!( + status_output, + "VmRSS:\t{} kB\nRssAnon:\t{} kB\nRssFile:\t{} kB", + rss, anon, file + ) + .unwrap(); + } + Ok(status_output.into_bytes()) } } diff --git a/kernel/src/util/mod.rs b/kernel/src/util/mod.rs index cbe498c9..8520b9a9 100644 --- a/kernel/src/util/mod.rs +++ b/kernel/src/util/mod.rs @@ -2,6 +2,7 @@ mod iovec; pub mod net; +pub mod per_cpu_counter; pub mod random; pub mod ring_buffer; diff --git a/kernel/src/util/per_cpu_counter.rs b/kernel/src/util/per_cpu_counter.rs new file mode 100644 index 00000000..af264b29 --- /dev/null +++ b/kernel/src/util/per_cpu_counter.rs @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! A fast and scalable per-CPU counter. + +use core::sync::atomic::{AtomicIsize, Ordering}; + +use osdk_heap_allocator::{alloc_cpu_local, CpuLocalBox}; +use ostd::cpu::{all_cpus, CpuId}; + +/// A fast, SMP-friendly, dynamically allocated, per-CPU counter. +/// +/// Updating it is fast and scalable, but reading is slow and inaccurate. +/// +// TODO: Reuse the code from [`osdk_frame_allocator::fast_smp_counter`], +// which may need to extract that code into a separate crate that needs +// to be published. Do that after we somehow stabilize the per-CPU counter. +pub struct PerCpuCounter { + per_cpu_counter: CpuLocalBox, +} + +impl PerCpuCounter { + /// Creates a new, zero-valued per-CPU counter. + pub fn new() -> Self { + Self { + per_cpu_counter: alloc_cpu_local(|_| AtomicIsize::new(0)).unwrap(), + } + } + + /// Adds `increment` to the counter on the given CPU. + pub fn add(&self, on_cpu: CpuId, increment: isize) { + self.per_cpu_counter + .get_on_cpu(on_cpu) + .fetch_add(increment, Ordering::Relaxed); + } + + /// Gets the total counter value. + /// + /// This function may be inaccurate since other CPUs may be + /// updating the counter. + pub fn get(&self) -> usize { + let mut total: isize = 0; + for cpu in all_cpus() { + total = + total.wrapping_add(self.per_cpu_counter.get_on_cpu(cpu).load(Ordering::Relaxed)); + } + if total < 0 { + // The counter is unsigned. But an observer may see a negative + // value due to race conditions. We return zero if it happens. + 0 + } else { + total as usize + } + } +} diff --git a/kernel/src/vm/vmar/mod.rs b/kernel/src/vm/vmar/mod.rs index da086ca0..712ecdc2 100644 --- a/kernel/src/vm/vmar/mod.rs +++ b/kernel/src/vm/vmar/mod.rs @@ -7,11 +7,12 @@ mod interval_set; mod static_cap; pub mod vm_mapping; -use core::{num::NonZeroUsize, ops::Range}; +use core::{array, num::NonZeroUsize, ops::Range}; use align_ext::AlignExt; use aster_rights::Rights; use ostd::{ + cpu::CpuId, mm::{tlb::TlbFlushOp, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR}, task::disable_preempt, }; @@ -24,6 +25,7 @@ use crate::{ prelude::*, process::{Process, ResourceType}, thread::exception::PageFaultInfo, + util::per_cpu_counter::PerCpuCounter, vm::{ perms::VmPerms, vmo::{Vmo, VmoRightsOp}, @@ -102,6 +104,8 @@ pub(super) struct Vmar_ { size: usize, /// The attached `VmSpace` vm_space: Arc, + /// The RSS counters. + rss_counters: [PerCpuCounter; NUM_RSS_COUNTERS], } struct VmarInner { @@ -195,6 +199,7 @@ impl VmarInner { vm_space: &VmSpace, offset: Vaddr, size: usize, + rss_delta: &mut RssDelta, ) -> Result> { let range = offset..offset + size; let mut mappings_to_remove = Vec::new(); @@ -215,7 +220,7 @@ impl VmarInner { self.insert(right); } - taken.unmap(vm_space)?; + rss_delta.add(taken.rss_type(), -(taken.unmap(vm_space)? as isize)); } Ok(offset..(offset + size)) @@ -280,19 +285,32 @@ impl Interval for Arc { } impl Vmar_ { - fn new(inner: VmarInner, vm_space: Arc, base: usize, size: usize) -> Arc { + fn new( + inner: VmarInner, + vm_space: Arc, + base: usize, + size: usize, + rss_counters: [PerCpuCounter; NUM_RSS_COUNTERS], + ) -> Arc { Arc::new(Vmar_ { inner: RwMutex::new(inner), base, size, vm_space, + rss_counters, }) } fn new_root() -> Arc { let vmar_inner = VmarInner::new(); let vm_space = VmSpace::new(); - Vmar_::new(vmar_inner, Arc::new(vm_space), 0, ROOT_VMAR_CAP_ADDR) + Vmar_::new( + vmar_inner, + Arc::new(vm_space), + 0, + ROOT_VMAR_CAP_ADDR, + array::from_fn(|_| PerCpuCounter::new()), + ) } fn protect(&self, perms: VmPerms, range: Range) -> Result<()> { @@ -350,7 +368,10 @@ impl Vmar_ { if let Some(vm_mapping) = inner.vm_mappings.find_one(&address) { debug_assert!(vm_mapping.range().contains(&address)); - return vm_mapping.handle_page_fault(&self.vm_space, page_fault_info); + + let rss_increment = vm_mapping.handle_page_fault(&self.vm_space, page_fault_info)?; + self.add_rss_counter(vm_mapping.rss_type(), rss_increment as isize); + return Ok(()); } return_errno_with_message!(Errno::EACCES, "page fault addr is not in current vmar"); @@ -376,7 +397,14 @@ impl Vmar_ { pub fn remove_mapping(&self, range: Range) -> Result<()> { let mut inner = self.inner.write(); - inner.alloc_free_region_exact_truncate(&self.vm_space, range.start, range.len())?; + let mut rss_delta = RssDelta::new(); + inner.alloc_free_region_exact_truncate( + &self.vm_space, + range.start, + range.len(), + &mut rss_delta, + )?; + self.add_rss_delta(rss_delta); Ok(()) } @@ -426,7 +454,13 @@ impl Vmar_ { let new_vmar_ = { let vmar_inner = VmarInner::new(); let new_space = VmSpace::new(); - Vmar_::new(vmar_inner, Arc::new(new_space), self.base, self.size) + Vmar_::new( + vmar_inner, + Arc::new(new_space), + self.base, + self.size, + array::from_fn(|_| PerCpuCounter::new()), + ) }; { @@ -440,6 +474,8 @@ impl Vmar_ { let mut new_cursor = new_vmspace.cursor_mut(&preempt_guard, &range).unwrap(); let cur_vmspace = self.vm_space(); let mut cur_cursor = cur_vmspace.cursor_mut(&preempt_guard, &range).unwrap(); + let mut rss_delta = RssDelta::new(); + for vm_mapping in inner.vm_mappings.iter() { let base = vm_mapping.map_to_addr(); @@ -453,8 +489,12 @@ impl Vmar_ { let mut op = |page: &mut PageProperty| { page.flags -= PageFlags::W; }; - new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op); + let num_mapped = + new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op); + rss_delta.add(vm_mapping.rss_type(), num_mapped as isize); } + new_vmar_.add_rss_delta(rss_delta); + cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All); cur_cursor.flusher().dispatch_tlb_flush(); cur_cursor.flusher().sync_tlb_flush(); @@ -462,6 +502,24 @@ impl Vmar_ { Ok(new_vmar_) } + + pub fn get_rss_counter(&self, rss_type: RssType) -> usize { + self.rss_counters[rss_type as usize].get() + } + + fn add_rss_counter(&self, rss_type: RssType, val: isize) { + // There are races but updating a remote counter won't cause any problems. + let cpu_id = CpuId::current_racy(); + self.rss_counters[rss_type as usize].add(cpu_id, val); + } + + fn add_rss_delta(&self, rss_delta: RssDelta) { + for i in 0..NUM_RSS_COUNTERS { + let rss_type = RssType::try_from(i).unwrap(); + let delta = rss_delta.get(rss_type); + self.add_rss_counter(rss_type, delta); + } + } } impl Vmar { @@ -476,6 +534,11 @@ impl Vmar { pub fn size(&self) -> usize { self.0.size } + + /// Returns the current RSS count for the given RSS type. + pub fn get_rss_counter(&self, rss_type: RssType) -> usize { + self.0.get_rss_counter(rss_type) + } } /// Options for creating a new mapping. The mapping is not allowed to overlap @@ -659,7 +722,14 @@ where Errno::EINVAL, "offset cannot be None since can overwrite is set", ))?; - inner.alloc_free_region_exact_truncate(parent.vm_space(), offset, map_size)?; + let mut rss_delta = RssDelta::new(); + inner.alloc_free_region_exact_truncate( + parent.vm_space(), + offset, + map_size, + &mut rss_delta, + )?; + parent.0.add_rss_delta(rss_delta); offset } else if let Some(offset) = offset { inner.alloc_free_region_exact(offset, map_size)?; @@ -735,3 +805,32 @@ pub fn get_intersected_range(range1: &Range, range2: &Range) -> Ra debug_assert!(is_intersected(range1, range2)); range1.start.max(range2.start)..range1.end.min(range2.end) } + +/// The type representing categories of Resident Set Size (RSS). +/// +/// See +#[repr(usize)] +#[expect(non_camel_case_types)] +#[derive(Debug, Clone, Copy, TryFromInt)] +pub enum RssType { + RSS_FILEPAGES = 0, + RSS_ANONPAGES = 1, +} + +const NUM_RSS_COUNTERS: usize = 2; + +struct RssDelta([isize; NUM_RSS_COUNTERS]); + +impl RssDelta { + pub(self) fn new() -> Self { + Self([0; NUM_RSS_COUNTERS]) + } + + pub(self) fn add(&mut self, rss_type: RssType, increment: isize) { + self.0[rss_type as usize] += increment; + } + + pub(self) fn get(&self, rss_type: RssType) -> isize { + self.0[rss_type as usize] + } +} diff --git a/kernel/src/vm/vmar/vm_mapping.rs b/kernel/src/vm/vmar/vm_mapping.rs index 82ac1a94..369ee6e8 100644 --- a/kernel/src/vm/vmar/vm_mapping.rs +++ b/kernel/src/vm/vmar/vm_mapping.rs @@ -15,7 +15,7 @@ use ostd::{ task::disable_preempt, }; -use super::interval_set::Interval; +use super::{interval_set::Interval, RssType}; use crate::{ prelude::*, thread::exception::PageFaultInfo, @@ -124,16 +124,26 @@ impl VmMapping { pub fn perms(&self) -> VmPerms { self.perms } + + // Returns the mapping's RSS type. + pub fn rss_type(&self) -> RssType { + if self.vmo.is_none() { + RssType::RSS_ANONPAGES + } else { + RssType::RSS_FILEPAGES + } + } } /****************************** Page faults **********************************/ impl VmMapping { + /// Handles a page fault and returns the number of pages mapped. pub fn handle_page_fault( &self, vm_space: &VmSpace, page_fault_info: &PageFaultInfo, - ) -> Result<()> { + ) -> Result { if !self.perms.contains(page_fault_info.required_perms) { trace!( "self.perms {:?}, page_fault_info.required_perms {:?}, self.range {:?}", @@ -150,7 +160,7 @@ impl VmMapping { let is_write = page_fault_info.required_perms.contains(VmPerms::WRITE); if !is_write && self.vmo.is_some() && self.handle_page_faults_around { - let res = self.handle_page_faults_around(vm_space, address); + let (rss_increment, res) = self.handle_page_faults_around(vm_space, address); // Errors caused by the "around" pages should be ignored, so here we // only return the error if the faulting page is still not mapped. @@ -161,13 +171,14 @@ impl VmMapping { &(page_aligned_addr..page_aligned_addr + PAGE_SIZE), )?; if let VmItem::Mapped { .. } = cursor.query().unwrap() { - return Ok(()); + return Ok(rss_increment); } } - return res; + return res.map(|_| rss_increment); } + let mut rss_increment: usize = 0; 'retry: loop { let preempt_guard = disable_preempt(); let mut cursor = vm_space.cursor_mut( @@ -185,14 +196,14 @@ impl VmMapping { // The page fault is already handled maybe by other threads. // Just flush the TLB and return. TlbFlushOp::Address(va).perform_on_current(); - return Ok(()); + return Ok(0); } assert!(is_write); // Perform COW if it is a write access to a shared mapping. // Skip if the page fault is already handled. if prop.flags.contains(PageFlags::W) { - return Ok(()); + return Ok(0); } // If the forked child or parent immediately unmaps the page after @@ -212,6 +223,7 @@ impl VmMapping { let new_frame = duplicate_frame(&frame)?; prop.flags |= new_flags; cursor.map(new_frame.into(), prop); + rss_increment += 1; } cursor.flusher().sync_tlb_flush(); } @@ -248,11 +260,13 @@ impl VmMapping { let map_prop = PageProperty::new_user(page_flags, CachePolicy::Writeback); cursor.map(frame, map_prop); + rss_increment += 1; } } break 'retry; } - Ok(()) + + Ok(rss_increment) } fn prepare_page( @@ -285,7 +299,15 @@ impl VmMapping { } } - fn handle_page_faults_around(&self, vm_space: &VmSpace, page_fault_addr: Vaddr) -> Result<()> { + /// Handles a page fault and maps additional surrounding pages. + /// + /// Returns a tuple `(mapped_pages, result)`, where `mapped_pages` is the number + /// of pages mapped successfully, even if the `result` is some error. + fn handle_page_faults_around( + &self, + vm_space: &VmSpace, + page_fault_addr: Vaddr, + ) -> (usize, Result<()>) { const SURROUNDING_PAGE_NUM: usize = 16; const SURROUNDING_PAGE_ADDR_MASK: usize = !(SURROUNDING_PAGE_NUM * PAGE_SIZE - 1); @@ -300,9 +322,19 @@ impl VmMapping { ); let vm_perms = self.perms - VmPerms::WRITE; + let mut rss_increment: usize = 0; + 'retry: loop { let preempt_guard = disable_preempt(); - let mut cursor = vm_space.cursor_mut(&preempt_guard, &(start_addr..end_addr))?; + + let mut cursor = match vm_space.cursor_mut(&preempt_guard, &(start_addr..end_addr)) { + Ok(cursor) => cursor, + Err(e) => { + return (rss_increment, Err(e.into())); + } + }; + + let rss_increment_ref = &mut rss_increment; let operate = move |commit_fn: &mut dyn FnMut() -> core::result::Result| { @@ -314,6 +346,7 @@ impl VmMapping { let page_prop = PageProperty::new_user(page_flags, CachePolicy::Writeback); let frame = commit_fn()?; cursor.map(frame, page_prop); + *rss_increment_ref += 1; } else { let next_addr = cursor.virt_addr() + PAGE_SIZE; if next_addr < end_addr { @@ -326,14 +359,16 @@ impl VmMapping { let start_offset = start_addr - self.map_to_addr; let end_offset = end_addr - self.map_to_addr; match vmo.try_operate_on_range(&(start_offset..end_offset), operate) { - Ok(_) => return Ok(()), + Ok(_) => return (rss_increment, Ok(())), Err(VmoCommitError::NeedIo(index)) => { drop(preempt_guard); - vmo.commit_on(index, CommitFlags::empty())?; + if let Err(e) = vmo.commit_on(index, CommitFlags::empty()) { + return (rss_increment, Err(e)); + } start_addr = index * PAGE_SIZE + self.map_to_addr; continue 'retry; } - Err(VmoCommitError::Err(e)) => return Err(e), + Err(VmoCommitError::Err(e)) => return (rss_increment, Err(e)), } } } @@ -429,17 +464,18 @@ impl VmMapping { /************************** VM Space operations ******************************/ impl VmMapping { - /// Unmaps the mapping from the VM space. - pub(super) fn unmap(self, vm_space: &VmSpace) -> Result<()> { + /// Unmaps the mapping from the VM space, + /// and returns the number of unmapped pages. + pub(super) fn unmap(self, vm_space: &VmSpace) -> Result { let preempt_guard = disable_preempt(); let range = self.range(); let mut cursor = vm_space.cursor_mut(&preempt_guard, &range)?; - cursor.unmap(range.len()); + let num_unmapped = cursor.unmap(range.len()); cursor.flusher().dispatch_tlb_flush(); cursor.flusher().sync_tlb_flush(); - Ok(()) + Ok(num_unmapped) } /// Change the perms of the mapping. diff --git a/ostd/src/mm/page_table/cursor/locking.rs b/ostd/src/mm/page_table/cursor/locking.rs index edbda7bd..8d318571 100644 --- a/ostd/src/mm/page_table/cursor/locking.rs +++ b/ostd/src/mm/page_table/cursor/locking.rs @@ -246,7 +246,8 @@ unsafe fn dfs_release_lock<'rcu, E: PageTableEntryTrait, C: PagingConstsTrait>( } } -/// Marks all the nodes in the sub-tree rooted at the node as stray. +/// Marks all the nodes in the sub-tree rooted at the node as stray, and +/// returns the num of pages mapped within the sub-tree. /// /// This function must be called upon the node after the node is removed /// from the parent page table. @@ -263,13 +264,15 @@ unsafe fn dfs_release_lock<'rcu, E: PageTableEntryTrait, C: PagingConstsTrait>( pub(super) unsafe fn dfs_mark_stray_and_unlock( rcu_guard: &dyn InAtomicMode, mut sub_tree: PageTableGuard, -) { +) -> usize { *sub_tree.stray_mut() = true; if sub_tree.level() == 1 { - return; + return sub_tree.nr_children() as usize; } + let mut num_pages = 0; + for i in (0..nr_subpage_per_huge::()).rev() { let child = sub_tree.entry(i); match child.to_ref() { @@ -278,11 +281,13 @@ pub(super) unsafe fn dfs_mark_stray_and_unlock {} } } + + num_pages } fn dfs_get_idx_range( diff --git a/ostd/src/mm/page_table/cursor/mod.rs b/ostd/src/mm/page_table/cursor/mod.rs index 69c58fce..06e38acf 100644 --- a/ostd/src/mm/page_table/cursor/mod.rs +++ b/ostd/src/mm/page_table/cursor/mod.rs @@ -102,6 +102,7 @@ pub enum PageTableItem { pt: Frame, va: Vaddr, len: usize, + num_pages: usize, }, } @@ -600,12 +601,14 @@ impl<'rcu, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> // SAFETY: // - We checked that we are not unmapping shared kernel page table nodes. // - We must have locked the entire sub-tree since the range is locked. - unsafe { locking::dfs_mark_stray_and_unlock(rcu_guard, locked_pt) }; + let num_pages = + unsafe { locking::dfs_mark_stray_and_unlock(rcu_guard, locked_pt) }; PageTableItem::StrayPageTable { pt: (*pt).clone().into(), va: self.0.va, len: page_size::(self.0.level), + num_pages, } } Child::None | Child::PageTableRef(_) => unreachable!(), @@ -705,7 +708,8 @@ impl<'rcu, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> None } - /// Copies the mapping from the given cursor to the current cursor. + /// Copies the mapping from the given cursor to the current cursor, + /// and returns the num of pages mapped by the current cursor. /// /// All the mappings in the current cursor's range must be empty. The /// function allows the source cursor to operate on the mapping before @@ -737,7 +741,7 @@ impl<'rcu, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> src: &mut Self, len: usize, op: &mut impl FnMut(&mut PageProperty), - ) { + ) -> usize { assert!(len % page_size::(1) == 0); let this_end = self.0.va + len; assert!(this_end <= self.0.barrier_va.end); @@ -746,6 +750,7 @@ impl<'rcu, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> let rcu_guard = self.0.rcu_guard; + let mut num_mapped: usize = 0; while self.0.va < this_end && src.0.va < src_end { let src_va = src.0.va; let mut src_entry = src.0.cur_entry(); @@ -789,9 +794,13 @@ impl<'rcu, M: PageTableMode, E: PageTableEntryTrait, C: PagingConstsTrait> // This assertion is to ensure that they move by the same length. debug_assert_eq!(mapped_page_size, page_size::(src.0.level)); src.0.move_forward(); + + num_mapped += 1; } } } + + num_mapped } } diff --git a/ostd/src/mm/vm_space.rs b/ostd/src/mm/vm_space.rs index 36ad95cc..0236596a 100644 --- a/ostd/src/mm/vm_space.rs +++ b/ostd/src/mm/vm_space.rs @@ -289,7 +289,8 @@ impl<'a> CursorMut<'a> { } } - /// Clear the mapping starting from the current slot. + /// Clears the mapping starting from the current slot, + /// and returns the number of unmapped pages. /// /// This method will bring the cursor forward by `len` bytes in the virtual /// address space after the modification. @@ -305,15 +306,16 @@ impl<'a> CursorMut<'a> { /// # Panics /// /// This method will panic if `len` is not page-aligned. - pub fn unmap(&mut self, len: usize) { + pub fn unmap(&mut self, len: usize) -> usize { assert!(len % super::PAGE_SIZE == 0); let end_va = self.virt_addr() + len; - + let mut num_unmapped: usize = 0; loop { // SAFETY: It is safe to un-map memory in the userspace. let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) }; match result { PageTableItem::Mapped { va, page, .. } => { + num_unmapped += 1; self.flusher .issue_tlb_flush_with(TlbFlushOp::Address(va), page); } @@ -323,7 +325,13 @@ impl<'a> CursorMut<'a> { PageTableItem::MappedUntracked { .. } => { panic!("found untracked memory mapped into `VmSpace`"); } - PageTableItem::StrayPageTable { pt, va, len } => { + PageTableItem::StrayPageTable { + pt, + va, + len, + num_pages, + } => { + num_unmapped += num_pages; self.flusher .issue_tlb_flush_with(TlbFlushOp::Range(va..va + len), pt); } @@ -331,6 +339,7 @@ impl<'a> CursorMut<'a> { } self.flusher.dispatch_tlb_flush(); + num_unmapped } /// Applies the operation to the next slot of mapping within the range. @@ -365,7 +374,8 @@ impl<'a> CursorMut<'a> { unsafe { self.pt_cursor.protect_next(len, &mut op) } } - /// Copies the mapping from the given cursor to the current cursor. + /// Copies the mapping from the given cursor to the current cursor, + /// and returns the num of pages mapped by the current cursor. /// /// All the mappings in the current cursor's range must be empty. The /// function allows the source cursor to operate on the mapping before @@ -391,7 +401,7 @@ impl<'a> CursorMut<'a> { src: &mut Self, len: usize, op: &mut impl FnMut(&mut PageProperty), - ) { + ) -> usize { // SAFETY: Operations on user memory spaces are safe if it doesn't // involve dropping any pages. unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) } diff --git a/test/apps/mmap/mmap_vmrss.c b/test/apps/mmap/mmap_vmrss.c new file mode 100644 index 00000000..6de2a903 --- /dev/null +++ b/test/apps/mmap/mmap_vmrss.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: MPL-2.0 + +#define _GNU_SOURCE + +#include "../network/test.h" + +#include +#include +#include +#include +#include +#include + +#define PAGE_SIZE 4096 +#define NUM_PAGES 1024 +#define TOTAL_SIZE (PAGE_SIZE * NUM_PAGES) + +typedef enum rss_type { + anon, + file, + total, +} rss_type; + +long get_vm_rss_kb(rss_type type) +{ + pid_t pid = getpid(); + char path[64]; + snprintf(path, sizeof(path), "/proc/%d/status", pid); + + FILE *f = fopen(path, "r"); + if (!f) { + perror("fopen /proc/[pid]/status"); + exit(1); + } + + char line[256]; + long rss_kb = -1; + const char *target_field = NULL; + switch (type) { + case anon: + target_field = "RssAnon:"; + break; + case file: + target_field = "RssFile:"; + break; + case total: + target_field = "VmRSS:"; + break; + default: + perror("Unknown rss_type\n"); + exit(1); + } + + while (fgets(line, sizeof(line), f)) { + if (strncmp(line, target_field, strlen(target_field)) == 0) { + sscanf(line + strlen(target_field), "%ld", &rss_kb); + break; + } + } + + fclose(f); + + if (rss_kb < 0) { + fprintf(stderr, "Failed to parse VmRSS\n"); + exit(1); + } + + return rss_kb; +} + +FN_TEST(rss_anon) +{ + void *mem = mmap(NULL, TOTAL_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + // The first call to `TEST_SUCC` and `get_vm_rss_kb()` may trigger + // lazy mapping of additional pages, such as shared libraries or files. + // These pages are not counted in RSS until they are actually accessed. + TEST_SUCC(get_vm_rss_kb(anon)); + + long rss_anon_before = TEST_SUCC(get_vm_rss_kb(anon)); + long rss_file_before = TEST_SUCC(get_vm_rss_kb(file)); + long rss_before = TEST_SUCC(get_vm_rss_kb(total)); + + // Trigger page faults + for (int i = 0; i < NUM_PAGES; ++i) { + volatile char *p = (char *)mem + i * PAGE_SIZE; + *p = 42; + } + + TEST_RES(get_vm_rss_kb(anon), + _ret - rss_anon_before == NUM_PAGES * (PAGE_SIZE / 1024)); + TEST_RES(get_vm_rss_kb(file), _ret == rss_file_before); + TEST_RES(get_vm_rss_kb(total), + _ret - rss_before == NUM_PAGES * (PAGE_SIZE / 1024)); + + TEST_SUCC(munmap(mem, TOTAL_SIZE)); + + TEST_RES(get_vm_rss_kb(anon), _ret == rss_anon_before); + TEST_RES(get_vm_rss_kb(file), _ret == rss_file_before); + TEST_RES(get_vm_rss_kb(total), _ret == rss_before); +} +END_TEST() + +FN_TEST(rss_file) +{ + const char *filename = "rss_test_file"; + int fd = TEST_SUCC(open(filename, O_CREAT | O_RDWR, 0600)); + + TEST_SUCC(ftruncate(fd, TOTAL_SIZE)); + + // The first call to `TEST_SUCC` and `get_vm_rss_kb()` may trigger + // lazy mapping of additional pages, such as shared libraries or files. + // These pages are not counted in RSS until they are actually accessed. + TEST_SUCC(get_vm_rss_kb(anon)); + + long rss_anon_before = TEST_SUCC(get_vm_rss_kb(anon)); + long rss_file_before = TEST_SUCC(get_vm_rss_kb(file)); + long rss_before = TEST_SUCC(get_vm_rss_kb(total)); + + void *mem = mmap(NULL, TOTAL_SIZE, PROT_READ, MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + // Trigger page faults + for (int i = 0; i < NUM_PAGES; ++i) { + volatile char x = *((char *)mem + i * PAGE_SIZE); + x++; + } + + TEST_RES(get_vm_rss_kb(file), + _ret - rss_file_before == NUM_PAGES * (PAGE_SIZE / 1024)); + TEST_RES(get_vm_rss_kb(anon), _ret == rss_anon_before); + TEST_RES(get_vm_rss_kb(total), + _ret - rss_before == NUM_PAGES * (PAGE_SIZE / 1024)); + + TEST_SUCC(munmap(mem, TOTAL_SIZE)); + + TEST_RES(get_vm_rss_kb(anon), _ret == rss_anon_before); + TEST_RES(get_vm_rss_kb(file), _ret == rss_file_before); + TEST_RES(get_vm_rss_kb(total), _ret == rss_before); + + close(fd); + unlink(filename); +} +END_TEST() diff --git a/test/apps/scripts/process.sh b/test/apps/scripts/process.sh index 235a18df..ef8c40ee 100755 --- a/test/apps/scripts/process.sh +++ b/test/apps/scripts/process.sh @@ -30,6 +30,7 @@ itimer/timer_create mmap/mmap_and_fork mmap/mmap_shared_filebacked mmap/mmap_readahead +mmap/mmap_vmrss process/group_session process/job_control pthread/pthread_test @@ -41,7 +42,7 @@ signal_c/signal_test " for testcase in ${tests} -do +do echo "Running test ${testcase}......" ${SCRIPT_DIR}/${testcase} done