Remove the activation lock and use RCU to protect PT removal

This commit is contained in:
Zhang Junyang 2025-03-23 21:15:18 +08:00 committed by Tate, Hongliang Tian
parent d873e121ff
commit 5b7637eac3
6 changed files with 71 additions and 161 deletions

View File

@ -355,7 +355,7 @@ impl Vmar_ {
/// Clears all content of the root VMAR. /// Clears all content of the root VMAR.
fn clear_root_vmar(&self) -> Result<()> { fn clear_root_vmar(&self) -> Result<()> {
self.vm_space.clear().unwrap(); self.vm_space.clear();
let mut inner = self.inner.write(); let mut inner = self.inner.write();
inner.vm_mappings.clear(); inner.vm_mappings.clear();
Ok(()) Ok(())

View File

@ -14,6 +14,8 @@ use super::{
}; };
use crate::{ use crate::{
arch::mm::{PageTableEntry, PagingConsts}, arch::mm::{PageTableEntry, PagingConsts},
cpu::PinCurrentCpu,
task::disable_preempt,
util::marker::SameSizeAs, util::marker::SameSizeAs,
Pod, Pod,
}; };
@ -99,22 +101,20 @@ impl PageTable<UserMode> {
} }
/// Clear the page table. /// Clear the page table.
/// pub(super) fn clear<G: PinCurrentCpu>(&self, flusher: &mut super::tlb::TlbFlusher<'_, G>) {
/// # Safety let _guard = disable_preempt();
///
/// The caller must ensure that:
/// 1. No other cursors are accessing the page table.
/// 2. No other CPUs activates the page table.
pub(in crate::mm) unsafe fn clear(&self) {
let _guard = crate::task::disable_preempt();
let mut root_node = self.root.lock(); let mut root_node = self.root.lock();
const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>(); const NR_PTES_PER_NODE: usize = nr_subpage_per_huge::<PagingConsts>();
for i in 0..NR_PTES_PER_NODE / 2 { for i in 0..NR_PTES_PER_NODE / 2 {
let mut root_entry = root_node.entry(i); let mut root_entry = root_node.entry(i);
if !root_entry.is_none() { if !root_entry.is_none() {
let old = root_entry.replace(Child::None); let old = root_entry.replace(Child::None);
// Since no others are accessing the old child, dropping it is fine. if let Child::PageTable(pt) = old {
drop(old); flusher
.issue_tlb_flush_with(crate::mm::tlb::TlbFlushOp::All, pt.clone().into());
// There may be other cursors accessing the old child, delay it with RCU.
let _ = crate::sync::RcuDrop::new(pt);
}
} }
} }
} }

View File

@ -5,6 +5,7 @@ use crate::{
mm::{ mm::{
kspace::LINEAR_MAPPING_BASE_VADDR, kspace::LINEAR_MAPPING_BASE_VADDR,
page_prop::{CachePolicy, PageFlags}, page_prop::{CachePolicy, PageFlags},
tlb::TlbFlusher,
FrameAllocOptions, MAX_USERSPACE_VADDR, PAGE_SIZE, FrameAllocOptions, MAX_USERSPACE_VADDR, PAGE_SIZE,
}, },
prelude::*, prelude::*,
@ -129,6 +130,7 @@ mod test_utils {
mod create_page_table { mod create_page_table {
use super::{test_utils::*, *}; use super::{test_utils::*, *};
use crate::cpu::{AtomicCpuSet, CpuSet};
#[ktest] #[ktest]
fn init_user_page_table() { fn init_user_page_table() {
@ -192,10 +194,11 @@ mod create_page_table {
// Confirms that the mapping exists. // Confirms that the mapping exists.
assert!(user_pt.query(PAGE_SIZE + 10).is_some()); assert!(user_pt.query(PAGE_SIZE + 10).is_some());
let set = AtomicCpuSet::new(CpuSet::new_empty());
let mut tlb_flusher = TlbFlusher::new(&set, disable_preempt());
// Clears the page table. // Clears the page table.
unsafe { user_pt.clear(&mut tlb_flusher);
user_pt.clear();
}
// Confirms that the mapping is cleared. // Confirms that the mapping is cleared.
assert!(user_pt.query(PAGE_SIZE + 10).is_none()); assert!(user_pt.query(PAGE_SIZE + 10).is_none());

View File

@ -9,7 +9,7 @@ use crate::{
mm::{ mm::{
io::{VmIo, VmReader, VmWriter}, io::{VmIo, VmReader, VmWriter},
tlb::TlbFlushOp, tlb::TlbFlushOp,
vm_space::{get_activated_vm_space, VmItem, VmSpaceClearError}, vm_space::{get_activated_vm_space, VmItem},
CachePolicy, FallibleVmRead, FallibleVmWrite, FrameAllocOptions, PageFlags, PageProperty, CachePolicy, FallibleVmRead, FallibleVmWrite, FrameAllocOptions, PageFlags, PageProperty,
UFrame, VmSpace, UFrame, VmSpace,
}, },
@ -689,7 +689,7 @@ mod vmspace {
} }
// Clears the VmSpace. // Clears the VmSpace.
assert!(vmspace.clear().is_ok()); vmspace.clear();
// Verifies that the mapping is cleared. // Verifies that the mapping is cleared.
let mut cursor = vmspace.cursor(&range).expect("Failed to create cursor"); let mut cursor = vmspace.cursor(&range).expect("Failed to create cursor");
@ -702,20 +702,6 @@ mod vmspace {
); );
} }
/// Verifies that `VmSpace::clear` returns an error when cursors are active.
#[ktest]
fn vmspace_clear_with_alive_cursors() {
let vmspace = VmSpace::new();
let range = 0x3000..0x4000;
let _cursor_mut = vmspace
.cursor_mut(&range)
.expect("Failed to create mutable cursor");
// Attempts to clear the VmSpace while a cursor is active.
let result = vmspace.clear();
assert!(matches!(result, Err(VmSpaceClearError::CursorsAlive)));
}
/// Activates and deactivates the `VmSpace` in single-CPU scenarios. /// Activates and deactivates the `VmSpace` in single-CPU scenarios.
#[ktest] #[ktest]
fn vmspace_activate() { fn vmspace_activate() {

View File

@ -14,7 +14,7 @@ use super::{
}; };
use crate::{ use crate::{
arch::irq, arch::irq,
cpu::{CpuSet, PinCurrentCpu}, cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu},
cpu_local, cpu_local,
sync::{LocalIrqDisabled, SpinLock}, sync::{LocalIrqDisabled, SpinLock},
}; };
@ -22,40 +22,25 @@ use crate::{
/// A TLB flusher that is aware of which CPUs are needed to be flushed. /// A TLB flusher that is aware of which CPUs are needed to be flushed.
/// ///
/// The flusher needs to stick to the current CPU. /// The flusher needs to stick to the current CPU.
pub struct TlbFlusher<G: PinCurrentCpu> { pub struct TlbFlusher<'a, G: PinCurrentCpu> {
target_cpus: CpuSet, /// The CPUs to be flushed.
// Better to store them here since loading and counting them from the CPUs ///
// list brings non-trivial overhead. /// If the targets is `None`, the flusher will flush all the CPUs.
need_remote_flush: bool, target_cpus: &'a AtomicCpuSet,
need_self_flush: bool, have_unsynced_flush: CpuSet,
have_unsynced_flush: bool, pin_current: G,
_pin_current: G,
} }
impl<G: PinCurrentCpu> TlbFlusher<G> { impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> {
/// Creates a new TLB flusher with the specified CPUs to be flushed. /// Creates a new TLB flusher with the specified CPUs to be flushed.
/// ///
/// The flusher needs to stick to the current CPU. So please provide a /// The flusher needs to stick to the current CPU. So please provide a
/// guard that implements [`PinCurrentCpu`]. /// guard that implements [`PinCurrentCpu`].
pub fn new(target_cpus: CpuSet, pin_current_guard: G) -> Self { pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self {
let current_cpu = pin_current_guard.current_cpu();
let mut need_self_flush = false;
let mut need_remote_flush = false;
for cpu in target_cpus.iter() {
if cpu == current_cpu {
need_self_flush = true;
} else {
need_remote_flush = true;
}
}
Self { Self {
target_cpus, target_cpus,
need_remote_flush, have_unsynced_flush: CpuSet::new_empty(),
need_self_flush, pin_current: pin_current_guard,
have_unsynced_flush: false,
_pin_current: pin_current_guard,
} }
} }
@ -75,19 +60,31 @@ impl<G: PinCurrentCpu> TlbFlusher<G> {
/// synchronous. Upon the return of this function, the TLB entries may not /// synchronous. Upon the return of this function, the TLB entries may not
/// be coherent. /// be coherent.
pub fn dispatch_tlb_flush(&mut self) { pub fn dispatch_tlb_flush(&mut self) {
if !self.need_remote_flush { // `Release` to make sure our modification on the PT is visible to CPUs
return; // that are going to activate the PT.
let mut target_cpus = self.target_cpus.load(Ordering::Release);
let cur_cpu = self.pin_current.current_cpu();
let mut need_flush_on_self = false;
if target_cpus.contains(cur_cpu) {
target_cpus.remove(cur_cpu);
need_flush_on_self = true;
} }
for cpu in self.target_cpus.iter() { for cpu in target_cpus.iter() {
ACK_REMOTE_FLUSH ACK_REMOTE_FLUSH
.get_on_cpu(cpu) .get_on_cpu(cpu)
.store(false, Ordering::Relaxed); .store(false, Ordering::Relaxed);
self.have_unsynced_flush.add(cpu);
} }
crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush); crate::smp::inter_processor_call(&target_cpus, do_remote_flush);
self.have_unsynced_flush = true; // Flush ourselves after sending all IPIs to save some time.
if need_flush_on_self {
do_remote_flush();
}
} }
/// Waits for all the previous TLB flush requests to be completed. /// Waits for all the previous TLB flush requests to be completed.
@ -106,22 +103,18 @@ impl<G: PinCurrentCpu> TlbFlusher<G> {
/// processed in IRQs, two CPUs may deadlock if they are waiting for each /// processed in IRQs, two CPUs may deadlock if they are waiting for each
/// other's TLB coherence. /// other's TLB coherence.
pub fn sync_tlb_flush(&mut self) { pub fn sync_tlb_flush(&mut self) {
if !self.have_unsynced_flush {
return;
}
assert!( assert!(
irq::is_local_enabled(), irq::is_local_enabled(),
"Waiting for remote flush with IRQs disabled" "Waiting for remote flush with IRQs disabled"
); );
for cpu in self.target_cpus.iter() { for cpu in self.have_unsynced_flush.iter() {
while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Acquire) { while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Acquire) {
core::hint::spin_loop(); core::hint::spin_loop();
} }
} }
self.have_unsynced_flush = false; self.have_unsynced_flush = CpuSet::new_empty();
} }
/// Issues a TLB flush request that must happen before dropping the page. /// Issues a TLB flush request that must happen before dropping the page.
@ -135,29 +128,15 @@ impl<G: PinCurrentCpu> TlbFlusher<G> {
self.issue_tlb_flush_(op, Some(drop_after_flush)); self.issue_tlb_flush_(op, Some(drop_after_flush));
} }
/// Whether the TLB flusher needs to flush the TLB entries on other CPUs.
pub fn need_remote_flush(&self) -> bool {
self.need_remote_flush
}
/// Whether the TLB flusher needs to flush the TLB entries on the current CPU.
pub fn need_self_flush(&self) -> bool {
self.need_self_flush
}
fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option<Frame<dyn AnyFrameMeta>>) { fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option<Frame<dyn AnyFrameMeta>>) {
let op = op.optimize_for_large_range(); let op = op.optimize_for_large_range();
// Fast path for single CPU cases. // `Release` to make sure our modification on the PT is visible to CPUs
if !self.need_remote_flush { // that are going to activate the PT.
if self.need_self_flush { let target_cpus = self.target_cpus.load(Ordering::Release);
op.perform_on_current();
}
return;
}
// Slow path for multi-CPU cases. // Slow path for multi-CPU cases.
for cpu in self.target_cpus.iter() { for cpu in target_cpus.iter() {
let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock(); let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock();
op_queue.push(op.clone(), drop_after_flush.clone()); op_queue.push(op.clone(), drop_after_flush.clone());
} }
@ -221,7 +200,7 @@ fn do_remote_flush() {
} }
/// If a TLB flushing request exceeds this threshold, we flush all. /// If a TLB flushing request exceeds this threshold, we flush all.
pub(crate) const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE; const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE;
/// If the number of pending requests exceeds this threshold, we flush all the /// If the number of pending requests exceeds this threshold, we flush all the
/// TLB entries instead of flushing them one by one. /// TLB entries instead of flushing them one by one.

View File

@ -12,20 +12,17 @@
use core::{ops::Range, sync::atomic::Ordering}; use core::{ops::Range, sync::atomic::Ordering};
use crate::{ use crate::{
arch::mm::{ arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts},
current_page_table_paddr, tlb_flush_all_excluding_global, PageTableEntry, PagingConsts,
},
cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu}, cpu::{AtomicCpuSet, CpuSet, PinCurrentCpu},
cpu_local_cell, cpu_local_cell,
mm::{ mm::{
io::Fallible, io::Fallible,
kspace::KERNEL_PAGE_TABLE, kspace::KERNEL_PAGE_TABLE,
page_table::{self, PageTable, PageTableItem, UserMode}, page_table::{self, PageTable, PageTableItem, UserMode},
tlb::{TlbFlushOp, TlbFlusher, FLUSH_ALL_RANGE_THRESHOLD}, tlb::{TlbFlushOp, TlbFlusher},
PageProperty, UFrame, VmReader, VmWriter, MAX_USERSPACE_VADDR, PageProperty, UFrame, VmReader, VmWriter, MAX_USERSPACE_VADDR,
}, },
prelude::*, prelude::*,
sync::{PreemptDisabled, RwLock, RwLockReadGuard},
task::{disable_preempt, DisabledPreemptGuard}, task::{disable_preempt, DisabledPreemptGuard},
Error, Error,
}; };
@ -68,9 +65,6 @@ use crate::{
#[derive(Debug)] #[derive(Debug)]
pub struct VmSpace { pub struct VmSpace {
pt: PageTable<UserMode>, pt: PageTable<UserMode>,
/// A CPU can only activate a `VmSpace` when no mutable cursors are alive.
/// Cursors hold read locks and activation require a write lock.
activation_lock: RwLock<()>,
cpus: AtomicCpuSet, cpus: AtomicCpuSet,
} }
@ -79,38 +73,16 @@ impl VmSpace {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(), pt: KERNEL_PAGE_TABLE.get().unwrap().create_user_page_table(),
activation_lock: RwLock::new(()),
cpus: AtomicCpuSet::new(CpuSet::new_empty()), cpus: AtomicCpuSet::new(CpuSet::new_empty()),
} }
} }
/// Clears the user space mappings in the page table. /// Clears the user space mappings in the page table.
/// pub fn clear(&self) {
/// This method returns error if the page table is activated on any other let mut flusher = TlbFlusher::new(&self.cpus, disable_preempt());
/// CPUs or there are any cursors alive. self.pt.clear(&mut flusher);
pub fn clear(&self) -> core::result::Result<(), VmSpaceClearError> { flusher.dispatch_tlb_flush();
let preempt_guard = disable_preempt(); flusher.sync_tlb_flush();
let _guard = self
.activation_lock
.try_write()
.ok_or(VmSpaceClearError::CursorsAlive)?;
let cpus = self.cpus.load(Ordering::Relaxed);
let cpu = preempt_guard.current_cpu();
let cpus_set_is_empty = cpus.is_empty();
let cpus_set_is_single_self = cpus.count() == 1 && cpus.contains(cpu);
if cpus_set_is_empty || cpus_set_is_single_self {
// SAFETY: We have ensured that the page table is not activated on
// other CPUs and no cursors are alive.
unsafe { self.pt.clear() };
if cpus_set_is_single_self {
tlb_flush_all_excluding_global();
}
Ok(())
} else {
Err(VmSpaceClearError::PageTableActivated(cpus))
}
} }
/// Gets an immutable cursor in the virtual address range. /// Gets an immutable cursor in the virtual address range.
@ -136,14 +108,9 @@ impl VmSpace {
/// overlapping range is alive. The modification to the mapping by the /// overlapping range is alive. The modification to the mapping by the
/// cursor may also block or be overridden the mapping of another cursor. /// cursor may also block or be overridden the mapping of another cursor.
pub fn cursor_mut(&self, va: &Range<Vaddr>) -> Result<CursorMut<'_, '_>> { pub fn cursor_mut(&self, va: &Range<Vaddr>) -> Result<CursorMut<'_, '_>> {
Ok(self.pt.cursor_mut(va).map(|pt_cursor| { Ok(self.pt.cursor_mut(va).map(|pt_cursor| CursorMut {
let activation_lock = self.activation_lock.read(); pt_cursor,
flusher: TlbFlusher::new(&self.cpus, disable_preempt()),
CursorMut {
pt_cursor,
activation_lock,
flusher: TlbFlusher::new(self.cpus.load(Ordering::Relaxed), disable_preempt()),
}
})?) })?)
} }
@ -158,12 +125,10 @@ impl VmSpace {
return; return;
} }
// Ensure no mutable cursors (which holds read locks) are alive before
// we add the CPU to the CPU set.
let _activation_lock = self.activation_lock.write();
// Record ourselves in the CPU set and the activated VM space pointer. // Record ourselves in the CPU set and the activated VM space pointer.
self.cpus.add(cpu, Ordering::Relaxed); // `Acquire` to ensure the modification to the PT is visible by this CPU.
self.cpus.add(cpu, Ordering::Acquire);
let self_ptr = Arc::into_raw(Arc::clone(self)) as *mut VmSpace; let self_ptr = Arc::into_raw(Arc::clone(self)) as *mut VmSpace;
ACTIVATED_VM_SPACE.store(self_ptr); ACTIVATED_VM_SPACE.store(self_ptr);
@ -228,17 +193,6 @@ impl Default for VmSpace {
} }
} }
/// An error that may occur when doing [`VmSpace::clear`].
#[derive(Debug)]
pub enum VmSpaceClearError {
/// The page table is activated on other CPUs.
///
/// The activated CPUs detected are contained in the error.
PageTableActivated(CpuSet),
/// There are still cursors alive.
CursorsAlive,
}
/// The cursor for querying over the VM space without modifying it. /// The cursor for querying over the VM space without modifying it.
/// ///
/// It exclusively owns a sub-tree of the page table, preventing others from /// It exclusively owns a sub-tree of the page table, preventing others from
@ -284,14 +238,12 @@ impl Cursor<'_> {
/// reading or modifying the same sub-tree. /// reading or modifying the same sub-tree.
pub struct CursorMut<'a, 'b> { pub struct CursorMut<'a, 'b> {
pt_cursor: page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>, pt_cursor: page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>,
#[expect(dead_code)]
activation_lock: RwLockReadGuard<'b, (), PreemptDisabled>,
// We have a read lock so the CPU set in the flusher is always a superset // We have a read lock so the CPU set in the flusher is always a superset
// of actual activated CPUs. // of actual activated CPUs.
flusher: TlbFlusher<DisabledPreemptGuard>, flusher: TlbFlusher<'b, DisabledPreemptGuard>,
} }
impl CursorMut<'_, '_> { impl<'b> CursorMut<'_, 'b> {
/// Query about the current slot. /// Query about the current slot.
/// ///
/// This is the same as [`Cursor::query`]. /// This is the same as [`Cursor::query`].
@ -318,7 +270,7 @@ impl CursorMut<'_, '_> {
} }
/// Get the dedicated TLB flusher for this cursor. /// Get the dedicated TLB flusher for this cursor.
pub fn flusher(&mut self) -> &mut TlbFlusher<DisabledPreemptGuard> { pub fn flusher(&mut self) -> &mut TlbFlusher<'b, DisabledPreemptGuard> {
&mut self.flusher &mut self.flusher
} }
@ -356,18 +308,12 @@ impl CursorMut<'_, '_> {
pub fn unmap(&mut self, len: usize) { pub fn unmap(&mut self, len: usize) {
assert!(len % super::PAGE_SIZE == 0); assert!(len % super::PAGE_SIZE == 0);
let end_va = self.virt_addr() + len; let end_va = self.virt_addr() + len;
let tlb_prefer_flush_all = len > FLUSH_ALL_RANGE_THRESHOLD;
loop { loop {
// SAFETY: It is safe to un-map memory in the userspace. // SAFETY: It is safe to un-map memory in the userspace.
let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) }; let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) };
match result { match result {
PageTableItem::Mapped { va, page, .. } => { PageTableItem::Mapped { va, page, .. } => {
if !self.flusher.need_remote_flush() && tlb_prefer_flush_all {
// Only on single-CPU cases we can drop the page immediately before flushing.
drop(page);
continue;
}
self.flusher self.flusher
.issue_tlb_flush_with(TlbFlushOp::Address(va), page); .issue_tlb_flush_with(TlbFlushOp::Address(va), page);
} }
@ -380,10 +326,6 @@ impl CursorMut<'_, '_> {
} }
} }
if !self.flusher.need_remote_flush() && tlb_prefer_flush_all {
self.flusher.issue_tlb_flush(TlbFlushOp::All);
}
self.flusher.dispatch_tlb_flush(); self.flusher.dispatch_tlb_flush();
} }