From fad39fdf7aa8cf30e5ca9d0a914b429802349423 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Tue, 24 Sep 2024 22:45:29 +0800 Subject: [PATCH] Make TLB flush policy relaxed --- kernel/src/vm/vmar/mod.rs | 4 +- kernel/src/vm/vmar/vm_mapping.rs | 19 ++- ostd/src/mm/mod.rs | 1 + ostd/src/mm/page_table/cursor.rs | 4 - ostd/src/mm/tlb.rs | 222 ++++++++++++++++++++++++++++++ ostd/src/mm/vm_space.rs | 223 +++++++++---------------------- 6 files changed, 302 insertions(+), 171 deletions(-) create mode 100644 ostd/src/mm/tlb.rs diff --git a/kernel/src/vm/vmar/mod.rs b/kernel/src/vm/vmar/mod.rs index c6d98e063..4cdd850de 100644 --- a/kernel/src/vm/vmar/mod.rs +++ b/kernel/src/vm/vmar/mod.rs @@ -16,7 +16,7 @@ use align_ext::AlignExt; use aster_rights::Rights; use ostd::{ cpu::CpuExceptionInfo, - mm::{PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR}, + mm::{tlb::TlbFlushOp, PageFlags, PageProperty, VmSpace, MAX_USERSPACE_VADDR}, }; use self::{ @@ -706,6 +706,8 @@ impl Vmar_ { }; new_cursor.copy_from(&mut cur_cursor, vm_mapping.map_size(), &mut op); } + cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All); + cur_cursor.flusher().dispatch_tlb_flush(); } drop(new_inner); diff --git a/kernel/src/vm/vmar/vm_mapping.rs b/kernel/src/vm/vmar/vm_mapping.rs index b5d818e66..87ed53d48 100644 --- a/kernel/src/vm/vmar/vm_mapping.rs +++ b/kernel/src/vm/vmar/vm_mapping.rs @@ -11,7 +11,8 @@ use core::{ use align_ext::AlignExt; use aster_rights::Rights; use ostd::mm::{ - vm_space::VmItem, CachePolicy, Frame, FrameAllocOptions, PageFlags, PageProperty, VmSpace, + tlb::TlbFlushOp, vm_space::VmItem, CachePolicy, Frame, FrameAllocOptions, PageFlags, + PageProperty, VmSpace, }; use super::{interval::Interval, is_intersected, Vmar, Vmar_}; @@ -224,7 +225,7 @@ impl VmMapping { match cursor.query().unwrap() { VmItem::Mapped { - va: _, + va, frame, mut prop, } if is_write => { @@ -245,7 +246,9 @@ impl VmMapping { let new_flags = PageFlags::W | PageFlags::ACCESSED | PageFlags::DIRTY; if self.is_shared || only_reference { - cursor.protect(PAGE_SIZE, |p| p.flags |= new_flags); + cursor.protect_next(PAGE_SIZE, |p| p.flags |= new_flags); + cursor.flusher().issue_tlb_flush(TlbFlushOp::Address(va)); + cursor.flusher().dispatch_tlb_flush(); } else { let new_frame = duplicate_frame(&frame)?; prop.flags |= new_flags; @@ -558,7 +561,15 @@ impl VmMappingInner { debug_assert!(range.start % PAGE_SIZE == 0); debug_assert!(range.end % PAGE_SIZE == 0); let mut cursor = vm_space.cursor_mut(&range).unwrap(); - cursor.protect(range.len(), |p| p.flags = perms.into()); + let op = |p: &mut PageProperty| p.flags = perms.into(); + while cursor.virt_addr() < range.end { + if let Some(va) = cursor.protect_next(range.end - cursor.virt_addr(), op) { + cursor.flusher().issue_tlb_flush(TlbFlushOp::Range(va)); + } else { + break; + } + } + cursor.flusher().dispatch_tlb_flush(); Ok(()) } diff --git a/ostd/src/mm/mod.rs b/ostd/src/mm/mod.rs index 6bea920ae..bfe2cafec 100644 --- a/ostd/src/mm/mod.rs +++ b/ostd/src/mm/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod page; pub(crate) mod page_prop; pub(crate) mod page_table; pub mod stat; +pub mod tlb; pub mod vm_space; use core::{fmt::Debug, ops::Range}; diff --git a/ostd/src/mm/page_table/cursor.rs b/ostd/src/mm/page_table/cursor.rs index 750a26b82..9a643ee1d 100644 --- a/ostd/src/mm/page_table/cursor.rs +++ b/ostd/src/mm/page_table/cursor.rs @@ -823,10 +823,6 @@ where } } - pub fn preempt_guard(&self) -> &DisabledPreemptGuard { - &self.0.preempt_guard - } - /// Goes down a level assuming the current slot is absent. /// /// This method will create a new child page table node and go down to it. diff --git a/ostd/src/mm/tlb.rs b/ostd/src/mm/tlb.rs new file mode 100644 index 000000000..4ec58e091 --- /dev/null +++ b/ostd/src/mm/tlb.rs @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! TLB flush operations. + +use alloc::vec::Vec; +use core::ops::Range; + +use super::{page::DynPage, Vaddr, PAGE_SIZE}; +use crate::{ + cpu::{CpuSet, PinCurrentCpu}, + cpu_local, + sync::SpinLock, + task::disable_preempt, +}; + +/// A TLB flusher that is aware of which CPUs are needed to be flushed. +/// +/// The flusher needs to stick to the current CPU. +pub struct TlbFlusher { + target_cpus: CpuSet, + // Better to store them here since loading and counting them from the CPUs + // list brings non-trivial overhead. + need_remote_flush: bool, + need_self_flush: bool, + _pin_current: G, +} + +impl TlbFlusher { + /// Creates a new TLB flusher with the specified CPUs to be flushed. + /// + /// The flusher needs to stick to the current CPU. So please provide a + /// guard that implements [`PinCurrentCpu`]. + pub fn new(target_cpus: CpuSet, pin_current_guard: G) -> Self { + let current_cpu = pin_current_guard.current_cpu(); + + let mut need_self_flush = false; + let mut need_remote_flush = false; + + for cpu in target_cpus.iter() { + if cpu == current_cpu { + need_self_flush = true; + } else { + need_remote_flush = true; + } + } + Self { + target_cpus, + need_remote_flush, + need_self_flush, + _pin_current: pin_current_guard, + } + } + + /// Issues a pending TLB flush request. + /// + /// On SMP systems, the notification is sent to all the relevant CPUs only + /// when [`Self::dispatch_tlb_flush`] is called. + pub fn issue_tlb_flush(&self, op: TlbFlushOp) { + self.issue_tlb_flush_(op, None); + } + + /// Dispatches all the pending TLB flush requests. + /// + /// The pending requests are issued by [`Self::issue_tlb_flush`]. + pub fn dispatch_tlb_flush(&self) { + if !self.need_remote_flush { + return; + } + + crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush); + } + + /// Issues a TLB flush request that must happen before dropping the page. + /// + /// If we need to remove a mapped page from the page table, we can only + /// recycle the page after all the relevant TLB entries in all CPUs are + /// flushed. Otherwise if the page is recycled for other purposes, the user + /// space program can still access the page through the TLB entries. This + /// method is designed to be used in such cases. + pub fn issue_tlb_flush_with(&self, op: TlbFlushOp, drop_after_flush: DynPage) { + self.issue_tlb_flush_(op, Some(drop_after_flush)); + } + + /// Whether the TLB flusher needs to flush the TLB entries on other CPUs. + pub fn need_remote_flush(&self) -> bool { + self.need_remote_flush + } + + /// Whether the TLB flusher needs to flush the TLB entries on the current CPU. + pub fn need_self_flush(&self) -> bool { + self.need_self_flush + } + + fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option) { + let op = op.optimize_for_large_range(); + + // Fast path for single CPU cases. + if !self.need_remote_flush { + if self.need_self_flush { + op.perform_on_current(); + } + return; + } + + // Slow path for multi-CPU cases. + for cpu in self.target_cpus.iter() { + let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock(); + if let Some(drop_after_flush) = drop_after_flush.clone() { + PAGE_KEEPER.get_on_cpu(cpu).lock().push(drop_after_flush); + } + op_queue.push(op.clone()); + } + } +} + +/// The operation to flush TLB entries. +#[derive(Debug, Clone)] +pub enum TlbFlushOp { + /// Flush all TLB entries except for the global entries. + All, + /// Flush the TLB entry for the specified virtual address. + Address(Vaddr), + /// Flush the TLB entries for the specified virtual address range. + Range(Range), +} + +impl TlbFlushOp { + /// Performs the TLB flush operation on the current CPU. + pub fn perform_on_current(&self) { + use crate::arch::mm::{ + tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global, + }; + match self { + TlbFlushOp::All => tlb_flush_all_excluding_global(), + TlbFlushOp::Address(addr) => tlb_flush_addr(*addr), + TlbFlushOp::Range(range) => tlb_flush_addr_range(range), + } + } + + fn optimize_for_large_range(self) -> Self { + match self { + TlbFlushOp::Range(range) => { + if range.len() > FLUSH_ALL_RANGE_THRESHOLD { + TlbFlushOp::All + } else { + TlbFlushOp::Range(range) + } + } + _ => self, + } + } +} + +// The queues of pending requests on each CPU. +// +// Lock ordering: lock FLUSH_OPS before PAGE_KEEPER. +cpu_local! { + static FLUSH_OPS: SpinLock = SpinLock::new(OpsStack::new()); + static PAGE_KEEPER: SpinLock> = SpinLock::new(Vec::new()); +} + +fn do_remote_flush() { + let preempt_guard = disable_preempt(); + let current_cpu = preempt_guard.current_cpu(); + + let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock(); + op_queue.flush_all(); + PAGE_KEEPER.get_on_cpu(current_cpu).lock().clear(); +} + +/// If a TLB flushing request exceeds this threshold, we flush all. +pub(crate) const FLUSH_ALL_RANGE_THRESHOLD: usize = 32 * PAGE_SIZE; + +/// If the number of pending requests exceeds this threshold, we flush all the +/// TLB entries instead of flushing them one by one. +const FLUSH_ALL_OPS_THRESHOLD: usize = 32; + +struct OpsStack { + ops: [Option; FLUSH_ALL_OPS_THRESHOLD], + need_flush_all: bool, + size: usize, +} + +impl OpsStack { + const fn new() -> Self { + const ARRAY_REPEAT_VALUE: Option = None; + Self { + ops: [ARRAY_REPEAT_VALUE; FLUSH_ALL_OPS_THRESHOLD], + need_flush_all: false, + size: 0, + } + } + + fn push(&mut self, op: TlbFlushOp) { + if self.need_flush_all { + return; + } + + if self.size < FLUSH_ALL_OPS_THRESHOLD { + self.ops[self.size] = Some(op); + self.size += 1; + } else { + self.need_flush_all = true; + self.size = 0; + } + } + + fn flush_all(&mut self) { + if self.need_flush_all { + crate::arch::mm::tlb_flush_all_excluding_global(); + self.need_flush_all = false; + } else { + for i in 0..self.size { + if let Some(op) = &self.ops[i] { + op.perform_on_current(); + } + } + } + + self.size = 0; + } +} diff --git a/ostd/src/mm/vm_space.rs b/ostd/src/mm/vm_space.rs index 68c7ccb10..3c55d6c3b 100644 --- a/ostd/src/mm/vm_space.rs +++ b/ostd/src/mm/vm_space.rs @@ -9,30 +9,25 @@ //! powerful concurrent accesses to the page table, and suffers from the same //! validity concerns as described in [`super::page_table::cursor`]. -use alloc::collections::vec_deque::VecDeque; use core::{ ops::Range, sync::atomic::{AtomicPtr, Ordering}, }; -use super::{ - io::Fallible, - kspace::KERNEL_PAGE_TABLE, - page::DynPage, - page_table::{PageTable, UserMode}, - PageProperty, VmReader, VmWriter, PAGE_SIZE, -}; use crate::{ arch::mm::{current_page_table_paddr, PageTableEntry, PagingConsts}, cpu::{num_cpus, CpuExceptionInfo, CpuSet, PinCurrentCpu}, cpu_local, mm::{ - page_table::{self, PageTableItem}, - Frame, MAX_USERSPACE_VADDR, + io::Fallible, + kspace::KERNEL_PAGE_TABLE, + page_table::{self, PageTable, PageTableItem, UserMode}, + tlb::{TlbFlushOp, TlbFlusher, FLUSH_ALL_RANGE_THRESHOLD}, + Frame, PageProperty, VmReader, VmWriter, MAX_USERSPACE_VADDR, }, prelude::*, - sync::{RwLock, RwLockReadGuard, SpinLock}, - task::disable_preempt, + sync::{RwLock, RwLockReadGuard}, + task::{disable_preempt, DisabledPreemptGuard}, Error, }; @@ -96,11 +91,7 @@ impl VmSpace { Ok(self.pt.cursor_mut(va).map(|pt_cursor| { let activation_lock = self.activation_lock.read(); - let cur_cpu = pt_cursor.preempt_guard().current_cpu(); - let mut activated_cpus = CpuSet::new_empty(); - let mut need_self_flush = false; - let mut need_remote_flush = false; for cpu in 0..num_cpus() { // The activation lock is held; other CPUs cannot activate this `VmSpace`. @@ -108,20 +99,13 @@ impl VmSpace { ACTIVATED_VM_SPACE.get_on_cpu(cpu).load(Ordering::Relaxed) as *const VmSpace; if ptr == self as *const VmSpace { activated_cpus.add(cpu); - if cpu == cur_cpu { - need_self_flush = true; - } else { - need_remote_flush = true; - } } } CursorMut { pt_cursor, activation_lock, - activated_cpus, - need_remote_flush, - need_self_flush, + flusher: TlbFlusher::new(activated_cpus, disable_preempt()), } })?) } @@ -264,12 +248,9 @@ pub struct CursorMut<'a, 'b> { pt_cursor: page_table::CursorMut<'a, UserMode, PageTableEntry, PagingConsts>, #[allow(dead_code)] activation_lock: RwLockReadGuard<'b, ()>, - // Better to store them here since loading and counting them from the CPUs - // list brings non-trivial overhead. We have a read lock so the stored set - // is always a superset of actual activated CPUs. - activated_cpus: CpuSet, - need_remote_flush: bool, - need_self_flush: bool, + // We have a read lock so the CPU set in the flusher is always a superset + // of actual activated CPUs. + flusher: TlbFlusher, } impl CursorMut<'_, '_> { @@ -298,6 +279,11 @@ impl CursorMut<'_, '_> { self.pt_cursor.virt_addr() } + /// Get the dedicated TLB flusher for this cursor. + pub fn flusher(&self) -> &TlbFlusher { + &self.flusher + } + /// Map a frame into the current slot. /// /// This method will bring the cursor to the next slot after the modification. @@ -306,9 +292,10 @@ impl CursorMut<'_, '_> { // SAFETY: It is safe to map untyped memory into the userspace. let old = unsafe { self.pt_cursor.map(frame.into(), prop) }; - if old.is_some() { - self.issue_tlb_flush(TlbFlushOp::Address(start_va), old); - self.dispatch_tlb_flush(); + if let Some(old) = old { + self.flusher + .issue_tlb_flush_with(TlbFlushOp::Address(start_va), old); + self.flusher.dispatch_tlb_flush(); } } @@ -320,25 +307,31 @@ impl CursorMut<'_, '_> { /// Already-absent mappings encountered by the cursor will be skipped. It /// is valid to unmap a range that is not mapped. /// + /// It must issue and dispatch a TLB flush after the operation. Otherwise, + /// the memory safety will be compromised. Please call this function less + /// to avoid the overhead of TLB flush. Using a large `len` is wiser than + /// splitting the operation into multiple small ones. + /// /// # Panics /// /// This method will panic if `len` is not page-aligned. pub fn unmap(&mut self, len: usize) { assert!(len % super::PAGE_SIZE == 0); let end_va = self.virt_addr() + len; - let tlb_prefer_flush_all = len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE; + let tlb_prefer_flush_all = len > FLUSH_ALL_RANGE_THRESHOLD; loop { // SAFETY: It is safe to un-map memory in the userspace. let result = unsafe { self.pt_cursor.take_next(end_va - self.virt_addr()) }; match result { PageTableItem::Mapped { va, page, .. } => { - if !self.need_remote_flush && tlb_prefer_flush_all { + if !self.flusher.need_remote_flush() && tlb_prefer_flush_all { // Only on single-CPU cases we can drop the page immediately before flushing. drop(page); continue; } - self.issue_tlb_flush(TlbFlushOp::Address(va), Some(page)); + self.flusher + .issue_tlb_flush_with(TlbFlushOp::Address(va), page); } PageTableItem::NotMapped { .. } => { break; @@ -349,41 +342,43 @@ impl CursorMut<'_, '_> { } } - if !self.need_remote_flush && tlb_prefer_flush_all { - self.issue_tlb_flush(TlbFlushOp::All, None); + if !self.flusher.need_remote_flush() && tlb_prefer_flush_all { + self.flusher.issue_tlb_flush(TlbFlushOp::All); } - self.dispatch_tlb_flush(); + self.flusher.dispatch_tlb_flush(); } - /// Change the mapping property starting from the current slot. + /// Applies the operation to the next slot of mapping within the range. /// - /// This method will bring the cursor forward by `len` bytes in the virtual - /// address space after the modification. + /// The range to be found in is the current virtual address with the + /// provided length. /// - /// The way to change the property is specified by the closure `op`. + /// The function stops and yields the actually protected range if it has + /// actually protected a page, no matter if the following pages are also + /// required to be protected. + /// + /// It also makes the cursor moves forward to the next page after the + /// protected one. If no mapped pages exist in the following range, the + /// cursor will stop at the end of the range and return [`None`]. + /// + /// Note that it will **NOT** flush the TLB after the operation. Please + /// make the decision yourself on when and how to flush the TLB using + /// [`Self::flusher`]. /// /// # Panics /// - /// This method will panic if `len` is not page-aligned. - pub fn protect(&mut self, len: usize, mut op: impl FnMut(&mut PageProperty)) { - assert!(len % super::PAGE_SIZE == 0); - let end = self.virt_addr() + len; - let tlb_prefer_flush_all = len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE; - + /// This function will panic if: + /// - the range to be protected is out of the range where the cursor + /// is required to operate; + /// - the specified virtual address range only covers a part of a page. + pub fn protect_next( + &mut self, + len: usize, + mut op: impl FnMut(&mut PageProperty), + ) -> Option> { // SAFETY: It is safe to protect memory in the userspace. - while let Some(range) = - unsafe { self.pt_cursor.protect_next(end - self.virt_addr(), &mut op) } - { - if !tlb_prefer_flush_all { - self.issue_tlb_flush(TlbFlushOp::Range(range), None); - } - } - - if tlb_prefer_flush_all { - self.issue_tlb_flush(TlbFlushOp::All, None); - } - self.dispatch_tlb_flush(); + unsafe { self.pt_cursor.protect_next(len, &mut op) } } /// Copies the mapping from the given cursor to the current cursor. @@ -395,6 +390,10 @@ impl CursorMut<'_, '_> { /// /// After the operation, both cursors will advance by the specified length. /// + /// Note that it will **NOT** flush the TLB after the operation. Please + /// make the decision yourself on when and how to flush the TLB using + /// the source's [`CursorMut::flusher`]. + /// /// # Panics /// /// This function will panic if: @@ -409,81 +408,13 @@ impl CursorMut<'_, '_> { len: usize, op: &mut impl FnMut(&mut PageProperty), ) { - let va = src.virt_addr(); - // SAFETY: Operations on user memory spaces are safe if it doesn't // involve dropping any pages. - unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) }; - - if len > TLB_FLUSH_ALL_THRESHOLD * PAGE_SIZE { - src.issue_tlb_flush(TlbFlushOp::All, None); - } else { - src.issue_tlb_flush(TlbFlushOp::Range(va..va + len), None); - } - - src.dispatch_tlb_flush(); - } - - fn issue_tlb_flush(&self, op: TlbFlushOp, drop_after_flush: Option) { - let request = TlbFlushRequest { - op, - drop_after_flush, - }; - - // Fast path for single CPU cases. - if !self.need_remote_flush { - if self.need_self_flush { - request.do_flush(); - } - return; - } - - // Slow path for multi-CPU cases. - for cpu in self.activated_cpus.iter() { - let mut queue = TLB_FLUSH_REQUESTS.get_on_cpu(cpu).lock(); - queue.push_back(request.clone()); - } - } - - fn dispatch_tlb_flush(&self) { - if !self.need_remote_flush { - return; - } - - fn do_remote_flush() { - let preempt_guard = disable_preempt(); - let mut requests = TLB_FLUSH_REQUESTS - .get_on_cpu(preempt_guard.current_cpu()) - .lock(); - if requests.len() > TLB_FLUSH_ALL_THRESHOLD { - // TODO: in most cases, we need only to flush all the TLB entries - // for an ASID if it is enabled. - crate::arch::mm::tlb_flush_all_excluding_global(); - requests.clear(); - } else { - while let Some(request) = requests.pop_front() { - request.do_flush(); - if matches!(request.op, TlbFlushOp::All) { - requests.clear(); - break; - } - } - } - } - - crate::smp::inter_processor_call(&self.activated_cpus.clone(), do_remote_flush); + unsafe { self.pt_cursor.copy_from(&mut src.pt_cursor, len, op) } } } -/// The threshold used to determine whether we need to flush all TLB entries -/// when handling a bunch of TLB flush requests. If the number of requests -/// exceeds this threshold, the overhead incurred by flushing pages -/// individually would surpass the overhead of flushing all entries at once. -const TLB_FLUSH_ALL_THRESHOLD: usize = 32; - cpu_local! { - /// The queue of pending requests. - static TLB_FLUSH_REQUESTS: SpinLock> = SpinLock::new(VecDeque::new()); /// The `Arc` pointer to the activated VM space on this CPU. If the pointer /// is NULL, it means that the activated page table is merely the kernel /// page table. @@ -493,38 +424,6 @@ cpu_local! { static ACTIVATED_VM_SPACE: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); } -#[derive(Debug, Clone)] -struct TlbFlushRequest { - op: TlbFlushOp, - // If we need to remove a mapped page from the page table, we can only - // recycle the page after all the relevant TLB entries in all CPUs are - // flushed. Otherwise if the page is recycled for other purposes, the user - // space program can still access the page through the TLB entries. - #[allow(dead_code)] - drop_after_flush: Option, -} - -#[derive(Debug, Clone)] -enum TlbFlushOp { - All, - Address(Vaddr), - Range(Range), -} - -impl TlbFlushRequest { - /// Perform the TLB flush operation on the current CPU. - fn do_flush(&self) { - use crate::arch::mm::{ - tlb_flush_addr, tlb_flush_addr_range, tlb_flush_all_excluding_global, - }; - match &self.op { - TlbFlushOp::All => tlb_flush_all_excluding_global(), - TlbFlushOp::Address(addr) => tlb_flush_addr(*addr), - TlbFlushOp::Range(range) => tlb_flush_addr_range(range), - } - } -} - /// The result of a query over the VM space. #[derive(Debug)] pub enum VmItem {