Strict TLB coherence

This commit is contained in:
Zhang Junyang
2025-03-21 17:22:05 +08:00
committed by Tate, Hongliang Tian
parent 30ec0be210
commit 4f0acddfd4
8 changed files with 109 additions and 19 deletions

View File

@ -457,6 +457,7 @@ impl Vmar_ {
} }
cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All); cur_cursor.flusher().issue_tlb_flush(TlbFlushOp::All);
cur_cursor.flusher().dispatch_tlb_flush(); cur_cursor.flusher().dispatch_tlb_flush();
cur_cursor.flusher().sync_tlb_flush();
} }
Ok(new_vmar_) Ok(new_vmar_)

View File

@ -188,6 +188,7 @@ impl VmMapping {
prop.flags |= new_flags; prop.flags |= new_flags;
cursor.map(new_frame.into(), prop); cursor.map(new_frame.into(), prop);
} }
cursor.flusher().sync_tlb_flush();
} }
VmItem::NotMapped { .. } => { VmItem::NotMapped { .. } => {
// Map a new frame to the page fault address. // Map a new frame to the page fault address.
@ -385,6 +386,8 @@ impl VmMapping {
let range = self.range(); let range = self.range();
let mut cursor = vm_space.cursor_mut(&range)?; let mut cursor = vm_space.cursor_mut(&range)?;
cursor.unmap(range.len()); cursor.unmap(range.len());
cursor.flusher().dispatch_tlb_flush();
cursor.flusher().sync_tlb_flush();
Ok(()) Ok(())
} }
@ -404,6 +407,7 @@ impl VmMapping {
} }
} }
cursor.flusher().dispatch_tlb_flush(); cursor.flusher().dispatch_tlb_flush();
cursor.flusher().sync_tlb_flush();
Self { perms, ..self } Self { perms, ..self }
} }

View File

@ -136,6 +136,8 @@ fn ap_early_entry(local_apic_id: u32) -> ! {
crate::arch::irq::enable_local(); crate::arch::irq::enable_local();
crate::mm::tlb::register_timer_callbacks_this_cpu();
// SAFETY: this function is only called once on this AP. // SAFETY: this function is only called once on this AP.
unsafe { unsafe {
crate::mm::kspace::activate_kernel_page_table(); crate::mm::kspace::activate_kernel_page_table();

View File

@ -106,14 +106,25 @@ pub fn all_cpus() -> impl Iterator<Item = CpuId> {
/// The implementor must ensure that the current task is pinned to the current /// The implementor must ensure that the current task is pinned to the current
/// CPU while any one of the instances of the implemented structure exists. /// CPU while any one of the instances of the implemented structure exists.
pub unsafe trait PinCurrentCpu { pub unsafe trait PinCurrentCpu {
/// Returns the number of the current CPU. /// Returns the ID of the current CPU.
fn current_cpu(&self) -> CpuId { fn current_cpu(&self) -> CpuId {
let id = CURRENT_CPU.load(); current_cpu_racy()
debug_assert_ne!(id, u32::MAX, "This CPU is not initialized");
CpuId(id)
} }
} }
/// Returns the ID of the current CPU.
///
/// This function is safe to call, but is vulnerable to races. The returned CPU
/// ID may be outdated if the task migrates to another CPU.
///
/// To ensure that the CPU ID is up-to-date, do it under any guards that
/// implements the [`PinCurrentCpu`] trait.
pub fn current_cpu_racy() -> CpuId {
let id = CURRENT_CPU.load();
debug_assert_ne!(id, u32::MAX, "This CPU is not initialized");
CpuId(id)
}
// SAFETY: When IRQs are disabled, the task cannot be passively preempted and // SAFETY: When IRQs are disabled, the task cannot be passively preempted and
// migrates to another CPU. If the task actively calls `yield`, it will not be // migrates to another CPU. If the task actively calls `yield`, it will not be
// successful either. // successful either.

View File

@ -106,6 +106,7 @@ unsafe fn init() {
boot::init_after_heap(); boot::init_after_heap();
mm::dma::init(); mm::dma::init();
mm::tlb::register_timer_callbacks_this_cpu();
unsafe { arch::late_init_on_bsp() }; unsafe { arch::late_init_on_bsp() };

View File

@ -215,7 +215,7 @@ impl KVirtArea<Tracked> {
assert!(self.start() <= range.start && self.end() >= range.end); assert!(self.start() <= range.start && self.end() >= range.end);
let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let mut cursor = page_table.cursor_mut(&range).unwrap(); let mut cursor = page_table.cursor_mut(&range).unwrap();
let flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt()); let mut flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt());
let mut va = self.start(); let mut va = self.start();
for page in pages.into_iter() { for page in pages.into_iter() {
// SAFETY: The constructor of the `KVirtArea<Tracked>` structure has already ensured this // SAFETY: The constructor of the `KVirtArea<Tracked>` structure has already ensured this
@ -223,11 +223,11 @@ impl KVirtArea<Tracked> {
if let Some(old) = unsafe { cursor.map(page.into(), prop) } { if let Some(old) = unsafe { cursor.map(page.into(), prop) } {
flusher.issue_tlb_flush_with(TlbFlushOp::Address(va), old); flusher.issue_tlb_flush_with(TlbFlushOp::Address(va), old);
flusher.dispatch_tlb_flush(); flusher.dispatch_tlb_flush();
// FIXME: We should synchronize the TLB here. But we may
// disable IRQs here, making deadlocks very likely.
} }
va += PAGE_SIZE; va += PAGE_SIZE;
} }
flusher.issue_tlb_flush(TlbFlushOp::Range(range));
flusher.dispatch_tlb_flush();
} }
/// Gets the mapped tracked page. /// Gets the mapped tracked page.
@ -278,13 +278,15 @@ impl KVirtArea<Untracked> {
let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let mut cursor = page_table.cursor_mut(&va_range).unwrap(); let mut cursor = page_table.cursor_mut(&va_range).unwrap();
let flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt()); let mut flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt());
// SAFETY: The caller of `map_untracked_pages` has ensured the safety of this mapping. // SAFETY: The caller of `map_untracked_pages` has ensured the safety of this mapping.
unsafe { unsafe {
cursor.map_pa(&pa_range, prop); cursor.map_pa(&pa_range, prop);
} }
flusher.issue_tlb_flush(TlbFlushOp::Range(va_range.clone())); flusher.issue_tlb_flush(TlbFlushOp::Range(va_range.clone()));
flusher.dispatch_tlb_flush(); flusher.dispatch_tlb_flush();
// FIXME: We should synchronize the TLB here. But we may
// disable IRQs here, making deadlocks very likely.
} }
/// Gets the mapped untracked page. /// Gets the mapped untracked page.
@ -318,7 +320,7 @@ impl<M: AllocatorSelector + 'static> Drop for KVirtArea<M> {
let page_table = KERNEL_PAGE_TABLE.get().unwrap(); let page_table = KERNEL_PAGE_TABLE.get().unwrap();
let range = self.start()..self.end(); let range = self.start()..self.end();
let mut cursor = page_table.cursor_mut(&range).unwrap(); let mut cursor = page_table.cursor_mut(&range).unwrap();
let flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt()); let mut flusher = TlbFlusher::new(CpuSet::new_full(), disable_preempt());
let tlb_prefer_flush_all = self.end() - self.start() > FLUSH_ALL_RANGE_THRESHOLD; let tlb_prefer_flush_all = self.end() - self.start() > FLUSH_ALL_RANGE_THRESHOLD;
loop { loop {
@ -361,6 +363,8 @@ impl<M: AllocatorSelector + 'static> Drop for KVirtArea<M> {
} }
flusher.dispatch_tlb_flush(); flusher.dispatch_tlb_flush();
// FIXME: We should synchronize the TLB here. But we may
// disable IRQs here, making deadlocks very likely.
// 2. free the virtual block // 2. free the virtual block
let allocator = M::select_allocator(); let allocator = M::select_allocator();

View File

@ -3,17 +3,20 @@
//! TLB flush operations. //! TLB flush operations.
use alloc::vec::Vec; use alloc::vec::Vec;
use core::ops::Range; use core::{
ops::Range,
sync::atomic::{AtomicBool, Ordering},
};
use super::{ use super::{
frame::{meta::AnyFrameMeta, Frame}, frame::{meta::AnyFrameMeta, Frame},
Vaddr, PAGE_SIZE, Vaddr, PAGE_SIZE,
}; };
use crate::{ use crate::{
arch::irq,
cpu::{CpuSet, PinCurrentCpu}, cpu::{CpuSet, PinCurrentCpu},
cpu_local, cpu_local,
sync::{LocalIrqDisabled, SpinLock}, sync::{LocalIrqDisabled, SpinLock},
task::disable_preempt,
}; };
/// A TLB flusher that is aware of which CPUs are needed to be flushed. /// A TLB flusher that is aware of which CPUs are needed to be flushed.
@ -25,6 +28,7 @@ pub struct TlbFlusher<G: PinCurrentCpu> {
// list brings non-trivial overhead. // list brings non-trivial overhead.
need_remote_flush: bool, need_remote_flush: bool,
need_self_flush: bool, need_self_flush: bool,
have_unsynced_flush: bool,
_pin_current: G, _pin_current: G,
} }
@ -50,27 +54,74 @@ impl<G: PinCurrentCpu> TlbFlusher<G> {
target_cpus, target_cpus,
need_remote_flush, need_remote_flush,
need_self_flush, need_self_flush,
have_unsynced_flush: false,
_pin_current: pin_current_guard, _pin_current: pin_current_guard,
} }
} }
/// Issues a pending TLB flush request. /// Issues a pending TLB flush request.
/// ///
/// On SMP systems, the notification is sent to all the relevant CPUs only /// This function does not guarantee to flush the TLB entries on either
/// when [`Self::dispatch_tlb_flush`] is called. /// this CPU or remote CPUs. The flush requests are only performed when
/// [`Self::dispatch_tlb_flush`] is called.
pub fn issue_tlb_flush(&self, op: TlbFlushOp) { pub fn issue_tlb_flush(&self, op: TlbFlushOp) {
self.issue_tlb_flush_(op, None); self.issue_tlb_flush_(op, None);
} }
/// Dispatches all the pending TLB flush requests. /// Dispatches all the pending TLB flush requests.
/// ///
/// The pending requests are issued by [`Self::issue_tlb_flush`]. /// All previous pending requests issued by [`Self::issue_tlb_flush`]
pub fn dispatch_tlb_flush(&self) { /// starts to be processed after this function. But it may not be
/// synchronous. Upon the return of this function, the TLB entries may not
/// be coherent.
pub fn dispatch_tlb_flush(&mut self) {
if !self.need_remote_flush { if !self.need_remote_flush {
return; return;
} }
for cpu in self.target_cpus.iter() {
ACK_REMOTE_FLUSH
.get_on_cpu(cpu)
.store(false, Ordering::Relaxed);
}
crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush); crate::smp::inter_processor_call(&self.target_cpus, do_remote_flush);
self.have_unsynced_flush = true;
}
/// Wait for all the previous TLB flush requests to be completed.
///
/// After this function, all TLB entries corresponding to previous
/// dispatched TLB flush requests are guaranteed to be coherent.
///
/// The TLB flush requests are issued with [`Self::issue_tlb_flush`] and
/// dispatched with [`Self::dispatch_tlb_flush`]. This method will not
/// dispatch any issued requests so it will not guarantee TLB coherence
/// of requests that are not dispatched.
///
/// # Panics
///
/// This method panics if the IRQs are disabled. Since the remote flush are
/// processed in IRQs, two CPUs may deadlock if they are waiting for each
/// other's TLB coherence.
pub fn sync_tlb_flush(&mut self) {
if !self.have_unsynced_flush {
return;
}
assert!(
irq::is_local_enabled(),
"Waiting for remote flush with IRQs disabled"
);
for cpu in self.target_cpus.iter() {
while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Acquire) {
core::hint::spin_loop();
}
}
self.have_unsynced_flush = false;
} }
/// Issues a TLB flush request that must happen before dropping the page. /// Issues a TLB flush request that must happen before dropping the page.
@ -154,21 +205,37 @@ impl TlbFlushOp {
} }
} }
/// Registers the timer callbacks for the TLB flush operations.
///
/// We check if there's pending TLB flush requests on each CPU in the timer
/// callback. This is to ensure that the TLB flush requests are processed
/// ultimately in case the IPIs are not received.
///
/// This function should be done once for each CPU during the initialization.
pub(crate) fn register_timer_callbacks_this_cpu() {
crate::timer::register_callback(do_remote_flush);
}
// The queues of pending requests on each CPU. // The queues of pending requests on each CPU.
// //
// Lock ordering: lock FLUSH_OPS before PAGE_KEEPER. // Lock ordering: lock FLUSH_OPS before PAGE_KEEPER.
cpu_local! { cpu_local! {
static FLUSH_OPS: SpinLock<OpsStack, LocalIrqDisabled> = SpinLock::new(OpsStack::new()); static FLUSH_OPS: SpinLock<OpsStack, LocalIrqDisabled> = SpinLock::new(OpsStack::new());
static PAGE_KEEPER: SpinLock<Vec<Frame<dyn AnyFrameMeta>>, LocalIrqDisabled> = SpinLock::new(Vec::new()); static PAGE_KEEPER: SpinLock<Vec<Frame<dyn AnyFrameMeta>>, LocalIrqDisabled> = SpinLock::new(Vec::new());
/// Whether this CPU finishes the last remote flush request.
static ACK_REMOTE_FLUSH: AtomicBool = AtomicBool::new(true);
} }
fn do_remote_flush() { fn do_remote_flush() {
let preempt_guard = disable_preempt(); let current_cpu = crate::cpu::current_cpu_racy(); // Safe because we are in IRQs.
let current_cpu = preempt_guard.current_cpu();
let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock(); let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock();
op_queue.flush_all(); op_queue.flush_all();
PAGE_KEEPER.get_on_cpu(current_cpu).lock().clear(); PAGE_KEEPER.get_on_cpu(current_cpu).lock().clear();
ACK_REMOTE_FLUSH
.get_on_cpu(current_cpu)
.store(true, Ordering::Release);
} }
/// If a TLB flushing request exceeds this threshold, we flush all. /// If a TLB flushing request exceeds this threshold, we flush all.

View File

@ -318,8 +318,8 @@ impl CursorMut<'_, '_> {
} }
/// Get the dedicated TLB flusher for this cursor. /// Get the dedicated TLB flusher for this cursor.
pub fn flusher(&self) -> &TlbFlusher<DisabledPreemptGuard> { pub fn flusher(&mut self) -> &mut TlbFlusher<DisabledPreemptGuard> {
&self.flusher &mut self.flusher
} }
/// Map a frame into the current slot. /// Map a frame into the current slot.