From 5b7637eac33453dcfc9c927031edcca8a6f41e74 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Sun, 23 Mar 2025 21:15:18 +0800 Subject: [PATCH] Remove the activation lock and use RCU to protect PT removal --- kernel/src/vm/vmar/mod.rs | 2 +- ostd/src/mm/page_table/mod.rs | 20 ++++---- ostd/src/mm/page_table/test.rs | 9 ++-- ostd/src/mm/test.rs | 18 +------ ostd/src/mm/tlb.rs | 93 +++++++++++++--------------------- ostd/src/mm/vm_space.rs | 90 ++++++-------------------------- 6 files changed, 71 insertions(+), 161 deletions(-) diff --git a/kernel/src/vm/vmar/mod.rs b/kernel/src/vm/vmar/mod.rs index a0d0efbd..7c666bb1 100644 --- a/kernel/src/vm/vmar/mod.rs +++ b/kernel/src/vm/vmar/mod.rs @@ -355,7 +355,7 @@ impl Vmar_ { /// Clears all content of the root VMAR. fn clear_root_vmar(&self) -> Result<()> { - self.vm_space.clear().unwrap(); + self.vm_space.clear(); let mut inner = self.inner.write(); inner.vm_mappings.clear(); Ok(()) diff --git a/ostd/src/mm/page_table/mod.rs b/ostd/src/mm/page_table/mod.rs index f0550b09..8547de71 100644 --- a/ostd/src/mm/page_table/mod.rs +++ b/ostd/src/mm/page_table/mod.rs @@ -14,6 +14,8 @@ use super::{ }; use crate::{ arch::mm::{PageTableEntry, PagingConsts}, + cpu::PinCurrentCpu, + task::disable_preempt, util::marker::SameSizeAs, Pod, }; @@ -99,22 +101,20 @@ impl PageTable { } /// Clear the page table. - /// - /// # Safety - /// - /// The caller must ensure that: - /// 1. No other cursors are accessing the page table. - /// 2. No other CPUs activates the page table. - pub(in crate::mm) unsafe fn clear(&self) { - let _guard = crate::task::disable_preempt(); + pub(super) fn clear(&self, flusher: &mut super::tlb::TlbFlusher<'_, G>) { + let _guard = disable_preempt(); let mut root_node = self.root.lock(); const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::(); for i in 0..NR_PTES_PER_NODE / 2 { let mut root_entry = root_node.entry(i); if !root_entry.is_none() { let old = root_entry.replace(Child::None); - // Since no others are accessing the old child, dropping it is fine. - drop(old); + if let Child::PageTable(pt) = old { + flusher + .issue_tlb_flush_with(crate::mm::tlb::TlbFlushOp::All, pt.clone().into()); + // There may be other cursors accessing the old child, delay it with RCU. + let _ = crate::sync::RcuDrop::new(pt); + } } } } diff --git a/ostd/src/mm/page_table/test.rs b/ostd/src/mm/page_table/test.rs index 29ddb766..3dedf135 100644 --- a/ostd/src/mm/page_table/test.rs +++ b/ostd/src/mm/page_table/test.rs @@ -5,6 +5,7 @@ use crate::{ mm::{ kspace::LINEAR_MAPPING_BASE_VADDR, page_prop::{CachePolicy, PageFlags}, + tlb::TlbFlusher, FrameAllocOptions, MAX_USERSPACE_VADDR, PAGE_SIZE, }, prelude::*, @@ -129,6 +130,7 @@ mod test_utils { mod create_page_table { use super::{test_utils::*, *}; + use crate::cpu::{AtomicCpuSet, CpuSet}; #[ktest] fn init_user_page_table() { @@ -192,10 +194,11 @@ mod create_page_table { // Confirms that the mapping exists. assert!(user_pt.query(PAGE_SIZE + 10).is_some()); + let set = AtomicCpuSet::new(CpuSet::new_empty()); + let mut tlb_flusher = TlbFlusher::new(&set, disable_preempt()); + // Clears the page table. - unsafe { - user_pt.clear(); - } + user_pt.clear(&mut tlb_flusher); // Confirms that the mapping is cleared. assert!(user_pt.query(PAGE_SIZE + 10).is_none()); diff --git a/ostd/src/mm/test.rs b/ostd/src/mm/test.rs index 5aea472c..1e2ffbae 100644 --- a/ostd/src/mm/test.rs +++ b/ostd/src/mm/test.rs @@ -9,7 +9,7 @@ use crate::{ mm::{ io::{VmIo, VmReader, VmWriter}, tlb::TlbFlushOp, - vm_space::{get_activated_vm_space, VmItem, VmSpaceClearError}, + vm_space::{get_activated_vm_space, VmItem}, CachePolicy, FallibleVmRead, FallibleVmWrite, FrameAllocOptions, PageFlags, PageProperty, UFrame, VmSpace, }, @@ -689,7 +689,7 @@ mod vmspace { } // Clears the VmSpace. - assert!(vmspace.clear().is_ok()); + vmspace.clear(); // Verifies that the mapping is cleared. let mut cursor = vmspace.cursor(&range).expect("Failed to create cursor"); @@ -702,20 +702,6 @@ mod vmspace { ); } - /// Verifies that `VmSpace::clear` returns an error when cursors are active. - #[ktest] - fn vmspace_clear_with_alive_cursors() { - let vmspace = VmSpace::new(); - let range = 0x3000..0x4000; - let _cursor_mut = vmspace - .cursor_mut(&range) - .expect("Failed to create mutable cursor"); - - // Attempts to clear the VmSpace while a cursor is active. - let result = vmspace.clear(); - assert!(matches!(result, Err(VmSpaceClearError::CursorsAlive))); - } - /// Activates and deactivates the `VmSpace` in single-CPU scenarios. #[ktest] fn vmspace_activate() { diff --git a/ostd/src/mm/tlb.rs b/ostd/src/mm/tlb.rs index 28105034..8151eb51 100644 --- a/ostd/src/mm/tlb.rs +++ b/ostd/src/mm/tlb.rs @@ -14,7 +14,7 @@ use super::{ }; use crate::{ arch::irq, - cpu::{CpuSet, PinCurrentCpu}, + cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu}, cpu_local, sync::{LocalIrqDisabled, SpinLock}, }; @@ -22,40 +22,25 @@ use crate::{ /// A TLB flusher that is aware of which CPUs are needed to be flushed. /// /// The flusher needs to stick to the current CPU. -pub struct TlbFlusher { - target_cpus: CpuSet, - // Better to store them here since loading and counting them from the CPUs - // list brings non-trivial overhead. - need_remote_flush: bool, - need_self_flush: bool, - have_unsynced_flush: bool, - _pin_current: G, +pub struct TlbFlusher<'a, G: PinCurrentCpu> { + /// The CPUs to be flushed. + /// + /// If the targets is `None`, the flusher will flush all the CPUs. + target_cpus: &'a AtomicCpuSet, + have_unsynced_flush: CpuSet, + pin_current: G, } -impl TlbFlusher { +impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { /// Creates a new TLB flusher with the specified CPUs to be flushed. /// /// The flusher needs to stick to the current CPU. So please provide a /// guard that implements [`PinCurrentCpu`]. - pub fn new(target_cpus: CpuSet, pin_current_guard: G) -> Self { - let current_cpu = pin_current_guard.current_cpu(); - - let mut need_self_flush = false; - let mut need_remote_flush = false; - - for cpu in target_cpus.iter() { - if cpu == current_cpu { - need_self_flush = true; - } else { - need_remote_flush = true; - } - } + pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self { Self { target_cpus, - need_remote_flush, - need_self_flush, - have_unsynced_flush: false, - _pin_current: pin_current_guard, + have_unsynced_flush: CpuSet::new_empty(), + pin_current: pin_current_guard, } } @@ -75,19 +60,31 @@ impl TlbFlusher { /// synchronous. Upon the return of this function, the TLB entries may not /// be coherent. pub fn dispatch_tlb_flush(&mut self) { - if !self.need_remote_flush { - return; + // `Release` to make sure our modification on the PT is visible to CPUs + // that are going to activate the PT. + let mut target_cpus = self.target_cpus.load(Ordering::Release); + + let cur_cpu = self.pin_current.current_cpu(); + let mut need_flush_on_self = false; + + if target_cpus.contains(cur_cpu) { + target_cpus.remove(cur_cpu); + need_flush_on_self = true; } - for cpu in self.target_cpus.iter() { + for cpu in target_cpus.iter() { ACK_REMOTE_FLUSH .get_on_cpu(cpu) .store(false, Ordering::Relaxed); + self.have_unsynced_flush.add(cpu); } - crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush); + crate::smp::inter_processor_call(&target_cpus, do_remote_flush); - self.have_unsynced_flush = true; + // Flush ourselves after sending all IPIs to save some time. + if need_flush_on_self { + do_remote_flush(); + } } /// Waits for all the previous TLB flush requests to be completed. @@ -106,22 +103,18 @@ impl TlbFlusher { /// processed in IRQs, two CPUs may deadlock if they are waiting for each /// other's TLB coherence. pub fn sync_tlb_flush(&mut self) { - if !self.have_unsynced_flush { - return; - } - assert!( irq::is_local_enabled(), "Waiting for remote flush with IRQs disabled" ); - for cpu in self.target_cpus.iter() { + for cpu in self.have_unsynced_flush.iter() { while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Acquire) { core::hint::spin_loop(); } } - self.have_unsynced_flush = false; + self.have_unsynced_flush = CpuSet::new_empty(); } /// Issues a TLB flush request that must happen before dropping the page. @@ -135,29 +128,15 @@ impl TlbFlusher { self.issue_tlb_flush_(op, Some(drop_after_flush)); } - /// Whether the TLB flusher needs to flush the TLB entries on other CPUs. - pub fn need_remote_flush(&self) -> bool { - self.need_remote_flush - } - - /// Whether the TLB flusher needs to flush the TLB entries on the current CPU. - pub fn need_self_flush(&self) -> bool { - self.need_self_flush - } - fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option>) { let op = op.optimize_for_large_range(); - // Fast path for single CPU cases. - if !self.need_remote_flush { - if self.need_self_flush { - op.perform_on_current(); - } - return; - } + // `Release` to make sure our modification on the PT is visible to CPUs + // that are going to activate the PT. + let target_cpus = self.target_cpus.load(Ordering::Release); // Slow path for multi-CPU cases. - for cpu in self.target_cpus.iter() { + for cpu in target_cpus.iter() { let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock(); op_queue.push(op.clone(), drop_after_flush.clone()); } @@ -221,7 +200,7 @@ fn do_remote_flush() { } /// If a TLB flushing request exceeds this threshold, we flush all. -pub(crate) const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE; +const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE; /// If the number of pending requests exceeds this threshold, we flush all the /// TLB entries instead of flushing them one by one. diff --git a/ostd/src/mm/vm_space.rs b/ostd/src/mm/vm_space.rs index 16f81dfd..a6338704 100644 --- a/ostd/src/mm/vm_space.rs +++ b/ostd/src/mm/vm_space.rs @@ -12,20 +12,17 @@ use core::{ops::Range, sync::atomic::Ordering}; use crate::{ - arch::mm::{ - current_page_table_paddr, tlb_flush_all_excluding_global, PageTableEntry, PagingConsts, - }, + arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts}, cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu}, cpu_local_cell, mm::{ io::Fallible, kspace::KERNEL_PAGE_TABLE, page_table::{self, PageTable, PageTableItem, UserMode}, - tlb::{TlbFlushOp, TlbFlusher, FLUSH_ALL_RANGE_THRESHOLD}, + tlb::{TlbFlushOp, TlbFlusher}, PageProperty, UFrame, VmReader, VmWriter, MAX_USERSPACE_VADDR, }, prelude::*, - sync::{PreemptDisabled, RwLock, RwLockReadGuard}, task::{disable_preempt, DisabledPreemptGuard}, Error, }; @@ -68,9 +65,6 @@ use crate::{ #[derive(Debug)] pub struct VmSpace { pt: PageTable, - /// A CPU can only activate a `VmSpace` when no mutable cursors are alive. - /// Cursors hold read locks and activation require a write lock. - activation_lock: RwLock<()>, cpus: AtomicCpuSet, } @@ -79,38 +73,16 @@ impl VmSpace { pub fn new() -> Self { Self { pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), - activation_lock: RwLock::new(()), cpus: AtomicCpuSet::new(CpuSet::new_empty()), } } /// Clears the user space mappings in the page table. - /// - /// This method returns error if the page table is activated on any other - /// CPUs or there are any cursors alive. - pub fn clear(&self) -> core::result::Result<(), VmSpaceClearError> { - let preempt_guard = disable_preempt(); - let _guard = self - .activation_lock - .try_write() - .ok_or(VmSpaceClearError::CursorsAlive)?; - - let cpus = self.cpus.load(Ordering::Relaxed); - let cpu = preempt_guard.current_cpu(); - let cpus_set_is_empty = cpus.is_empty(); - let cpus_set_is_single_self = cpus.count() == 1 && cpus.contains(cpu); - - if cpus_set_is_empty || cpus_set_is_single_self { - // SAFETY: We have ensured that the page table is not activated on - // other CPUs and no cursors are alive. - unsafe { self.pt.clear() }; - if cpus_set_is_single_self { - tlb_flush_all_excluding_global(); - } - Ok(()) - } else { - Err(VmSpaceClearError::PageTableActivated(cpus)) - } + pub fn clear(&self) { + let mut flusher = TlbFlusher::new(&self.cpus, disable_preempt()); + self.pt.clear(&mut flusher); + flusher.dispatch_tlb_flush(); + flusher.sync_tlb_flush(); } /// Gets an immutable cursor in the virtual address range. @@ -136,14 +108,9 @@ impl VmSpace { /// overlapping range is alive. The modification to the mapping by the /// cursor may also block or be overridden the mapping of another cursor. pub fn cursor_mut(&self, va: &Range) -> Result> { - Ok(self.pt.cursor_mut(va).map(|pt_cursor| { - let activation_lock = self.activation_lock.read(); - - CursorMut { - pt_cursor, - activation_lock, - flusher: TlbFlusher::new(self.cpus.load(Ordering::Relaxed), disable_preempt()), - } + Ok(self.pt.cursor_mut(va).map(|pt_cursor| CursorMut { + pt_cursor, + flusher: TlbFlusher::new(&self.cpus, disable_preempt()), })?) } @@ -158,12 +125,10 @@ impl VmSpace { return; } - // Ensure no mutable cursors (which holds read locks) are alive before - // we add the CPU to the CPU set. - let _activation_lock = self.activation_lock.write(); - // Record ourselves in the CPU set and the activated VM space pointer. - self.cpus.add(cpu, Ordering::Relaxed); + // `Acquire` to ensure the modification to the PT is visible by this CPU. + self.cpus.add(cpu, Ordering::Acquire); + let self_ptr = Arc::into_raw(Arc::clone(self)) as *mut VmSpace; ACTIVATED_VM_SPACE.store(self_ptr); @@ -228,17 +193,6 @@ impl Default for VmSpace { } } -/// An error that may occur when doing [`VmSpace::clear`]. -#[derive(Debug)] -pub enum VmSpaceClearError { - /// The page table is activated on other CPUs. - /// - /// The activated CPUs detected are contained in the error. - PageTableActivated(CpuSet), - /// There are still cursors alive. - CursorsAlive, -} - /// The cursor for querying over the VM space without modifying it. /// /// It exclusively owns a sub-tree of the page table, preventing others from @@ -284,14 +238,12 @@ impl Cursor<'_> { /// reading or modifying the same sub-tree. pub struct CursorMut<'a, 'b> { pt_cursor: page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>, - #[expect(dead_code)] - activation_lock: RwLockReadGuard<'b, (), PreemptDisabled>, // We have a read lock so the CPU set in the flusher is always a superset // of actual activated CPUs. - flusher: TlbFlusher, + flusher: TlbFlusher<'b, DisabledPreemptGuard>, } -impl CursorMut<'_, '_> { +impl<'b> CursorMut<'_, 'b> { /// Query about the current slot. /// /// This is the same as [`Cursor::query`]. @@ -318,7 +270,7 @@ impl CursorMut<'_, '_> { } /// Get the dedicated TLB flusher for this cursor. - pub fn flusher(&mut self) -> &mut TlbFlusher { + pub fn flusher(&mut self) -> &mut TlbFlusher<'b, DisabledPreemptGuard> { &mut self.flusher } @@ -356,18 +308,12 @@ impl CursorMut<'_, '_> { pub fn unmap(&mut self, len: usize) { assert!(len % super::PAGE_SIZE == 0); let end_va = self.virt_addr() + len; - let tlb_prefer_flush_all = len > FLUSH_ALL_RANGE_THRESHOLD; loop { // SAFETY: It is safe to un-map memory in the userspace. let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) }; match result { PageTableItem::Mapped { va, page, .. } => { - if !self.flusher.need_remote_flush() && tlb_prefer_flush_all { - // Only on single-CPU cases we can drop the page immediately before flushing. - drop(page); - continue; - } self.flusher .issue_tlb_flush_with(TlbFlushOp::Address(va), page); } @@ -380,10 +326,6 @@ impl CursorMut<'_, '_> { } } - if !self.flusher.need_remote_flush() && tlb_prefer_flush_all { - self.flusher.issue_tlb_flush(TlbFlushOp::All); - } - self.flusher.dispatch_tlb_flush(); }