From 3472560c55540aafad785ee7e7a59081f2857b83 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Sun, 23 Mar 2025 21:44:30 +0800 Subject: [PATCH] Make TLB issuing less contended --- ostd/src/mm/tlb.rs | 151 ++++++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 55 deletions(-) diff --git a/ostd/src/mm/tlb.rs b/ostd/src/mm/tlb.rs index 8151eb51c..73407fcdb 100644 --- a/ostd/src/mm/tlb.rs +++ b/ostd/src/mm/tlb.rs @@ -23,24 +23,26 @@ use crate::{ /// /// The flusher needs to stick to the current CPU. pub struct TlbFlusher<'a, G: PinCurrentCpu> { - /// The CPUs to be flushed. - /// - /// If the targets is `None`, the flusher will flush all the CPUs. target_cpus: &'a AtomicCpuSet, have_unsynced_flush: CpuSet, - pin_current: G, + ops_stack: OpsStack, + _pin_current: G, } impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { /// Creates a new TLB flusher with the specified CPUs to be flushed. /// + /// The target CPUs should be a reference to an [`AtomicCpuSet`] that will + /// be loaded upon [`Self::dispatch_tlb_flush`]. + /// /// The flusher needs to stick to the current CPU. So please provide a /// guard that implements [`PinCurrentCpu`]. pub fn new(target_cpus: &'a AtomicCpuSet, pin_current_guard: G) -> Self { Self { target_cpus, have_unsynced_flush: CpuSet::new_empty(), - pin_current: pin_current_guard, + ops_stack: OpsStack::new(), + _pin_current: pin_current_guard, } } @@ -49,22 +51,43 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { /// This function does not guarantee to flush the TLB entries on either /// this CPU or remote CPUs. The flush requests are only performed when /// [`Self::dispatch_tlb_flush`] is called. - pub fn issue_tlb_flush(&self, op: TlbFlushOp) { - self.issue_tlb_flush_(op, None); + pub fn issue_tlb_flush(&mut self, op: TlbFlushOp) { + self.ops_stack.push(op, None); + } + + /// Issues a TLB flush request that must happen before dropping the page. + /// + /// If we need to remove a mapped page from the page table, we can only + /// recycle the page after all the relevant TLB entries in all CPUs are + /// flushed. Otherwise if the page is recycled for other purposes, the user + /// space program can still access the page through the TLB entries. This + /// method is designed to be used in such cases. + pub fn issue_tlb_flush_with( + &mut self, + op: TlbFlushOp, + drop_after_flush: Frame, + ) { + self.ops_stack.push(op, Some(drop_after_flush)); } /// Dispatches all the pending TLB flush requests. /// - /// All previous pending requests issued by [`Self::issue_tlb_flush`] - /// starts to be processed after this function. But it may not be - /// synchronous. Upon the return of this function, the TLB entries may not - /// be coherent. + /// All previous pending requests issued by [`Self::issue_tlb_flush`] or + /// [`Self::issue_tlb_flush_with`] starts to be processed after this + /// function. But it may not be synchronous. Upon the return of this + /// function, the TLB entries may not be coherent. pub fn dispatch_tlb_flush(&mut self) { + let irq_guard = crate::trap::disable_local(); + + if self.ops_stack.is_empty() { + return; + } + // `Release` to make sure our modification on the PT is visible to CPUs // that are going to activate the PT. let mut target_cpus = self.target_cpus.load(Ordering::Release); - let cur_cpu = self.pin_current.current_cpu(); + let cur_cpu = irq_guard.current_cpu(); let mut need_flush_on_self = false; if target_cpus.contains(cur_cpu) { @@ -73,9 +96,15 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { } for cpu in target_cpus.iter() { - ACK_REMOTE_FLUSH - .get_on_cpu(cpu) - .store(false, Ordering::Relaxed); + { + let mut flush_ops = FLUSH_OPS.get_on_cpu(cpu).lock(); + flush_ops.push_from(&self.ops_stack); + + // Clear ACK before dropping the lock to avoid false ACKs. + ACK_REMOTE_FLUSH + .get_on_cpu(cpu) + .store(false, Ordering::Relaxed); + } self.have_unsynced_flush.add(cpu); } @@ -83,7 +112,9 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { // Flush ourselves after sending all IPIs to save some time. if need_flush_on_self { - do_remote_flush(); + self.ops_stack.flush_all(); + } else { + self.ops_stack.clear_without_flush(); } } @@ -109,42 +140,17 @@ impl<'a, G: PinCurrentCpu> TlbFlusher<'a, G> { ); for cpu in self.have_unsynced_flush.iter() { - while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Acquire) { + while !ACK_REMOTE_FLUSH.get_on_cpu(cpu).load(Ordering::Relaxed) { core::hint::spin_loop(); } } self.have_unsynced_flush = CpuSet::new_empty(); } - - /// Issues a TLB flush request that must happen before dropping the page. - /// - /// If we need to remove a mapped page from the page table, we can only - /// recycle the page after all the relevant TLB entries in all CPUs are - /// flushed. Otherwise if the page is recycled for other purposes, the user - /// space program can still access the page through the TLB entries. This - /// method is designed to be used in such cases. - pub fn issue_tlb_flush_with(&self, op: TlbFlushOp, drop_after_flush: Frame) { - self.issue_tlb_flush_(op, Some(drop_after_flush)); - } - - fn issue_tlb_flush_(&self, op: TlbFlushOp, drop_after_flush: Option>) { - let op = op.optimize_for_large_range(); - - // `Release` to make sure our modification on the PT is visible to CPUs - // that are going to activate the PT. - let target_cpus = self.target_cpus.load(Ordering::Release); - - // Slow path for multi-CPU cases. - for cpu in target_cpus.iter() { - let mut op_queue = FLUSH_OPS.get_on_cpu(cpu).lock(); - op_queue.push(op.clone(), drop_after_flush.clone()); - } - } } /// The operation to flush TLB entries. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum TlbFlushOp { /// Flush all TLB entries except for the global entries. All, @@ -189,14 +195,23 @@ cpu_local! { } fn do_remote_flush() { - let current_cpu = crate::cpu::current_cpu_racy(); // Safe because we are in IRQs. + // No races because we are in IRQs/have disabled preempts. + let current_cpu = crate::cpu::current_cpu_racy(); - let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock(); - op_queue.flush_all(); + let mut new_op_queue = OpsStack::new(); + { + let mut op_queue = FLUSH_OPS.get_on_cpu(current_cpu).lock(); - ACK_REMOTE_FLUSH - .get_on_cpu(current_cpu) - .store(true, Ordering::Release); + core::mem::swap(&mut *op_queue, &mut new_op_queue); + + // ACK before dropping the lock so that we won't miss flush requests. + ACK_REMOTE_FLUSH + .get_on_cpu(current_cpu) + .store(true, Ordering::Relaxed); + } + // Unlock the locks quickly to avoid contention. ACK before flushing is + // fine since we cannot switch back to userspace now. + new_op_queue.flush_all(); } /// If a TLB flushing request exceeds this threshold, we flush all. @@ -223,6 +238,10 @@ impl OpsStack { } } + fn is_empty(&self) -> bool { + !self.need_flush_all && self.size == 0 + } + fn push(&mut self, op: TlbFlushOp, drop_after_flush: Option>) { if let Some(frame) = drop_after_flush { self.page_keeper.push(frame); @@ -231,13 +250,32 @@ impl OpsStack { if self.need_flush_all { return; } - - if self.size < FLUSH_ALL_OPS_THRESHOLD { - self.ops[self.size] = Some(op); - self.size += 1; - } else { + let op = op.optimize_for_large_range(); + if op == TlbFlushOp::All || self.size >= FLUSH_ALL_OPS_THRESHOLD { self.need_flush_all = true; self.size = 0; + return; + } + + self.ops[self.size] = Some(op); + self.size += 1; + } + + fn push_from(&mut self, other: &OpsStack) { + self.page_keeper.extend(other.page_keeper.iter().cloned()); + + if self.need_flush_all { + return; + } + if other.need_flush_all || self.size + other.size >= FLUSH_ALL_OPS_THRESHOLD { + self.need_flush_all = true; + self.size = 0; + return; + } + + for i in 0..other.size { + self.ops[self.size] = other.ops[i].clone(); + self.size += 1; } } @@ -252,9 +290,12 @@ impl OpsStack { } } + self.clear_without_flush(); + } + + fn clear_without_flush(&mut self) { self.need_flush_all = false; self.size = 0; - self.page_keeper.clear(); } }