From 60440d106249d1e422a457f8eac087a95003c0d5 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Thu, 24 Oct 2024 23:21:19 +0800 Subject: [PATCH] Avoid deadlocks due to drops --- kernel/src/fs/epoll/epoll_file.rs | 317 +++++++++++++++++------------- 1 file changed, 183 insertions(+), 134 deletions(-) diff --git a/kernel/src/fs/epoll/epoll_file.rs b/kernel/src/fs/epoll/epoll_file.rs index a23fb99f7..754572d2c 100644 --- a/kernel/src/fs/epoll/epoll_file.rs +++ b/kernel/src/fs/epoll/epoll_file.rs @@ -34,27 +34,19 @@ use crate::{ pub struct EpollFile { // All interesting entries. interest: Mutex>, - // Entries that are probably ready (having events happened). - ready: SpinLock>, LocalIrqDisabled>, - // A guard to ensure that ready entries can be popped by one thread at a time. - pop_guard: Mutex, - // EpollFile itself is also pollable - pollee: Pollee, - // Any EpollFile is wrapped with Arc when created. - weak_self: Weak, + // A set of ready entries. + // + // Keep this in a separate `Arc` to avoid dropping `EpollFile` in the observer callback, which + // may cause deadlocks. + ready: Arc, } -struct PopGuard; - impl EpollFile { /// Creates a new epoll file. pub fn new() -> Arc { - Arc::new_cyclic(|me| Self { + Arc::new(Self { interest: Mutex::new(BTreeSet::new()), - ready: SpinLock::new(VecDeque::new()), - pop_guard: Mutex::new(PopGuard), - pollee: Pollee::new(IoEvents::empty()), - weak_self: me.clone(), + ready: Arc::new(ReadySet::new()), }) } @@ -103,7 +95,7 @@ impl EpollFile { ); } - let entry = EpollEntry::new(fd, Arc::downgrade(&file).into(), self.weak_self.clone()); + let entry = EpollEntry::new(fd, Arc::downgrade(&file).into(), self.ready.clone()); let events = entry.update(ep_event, ep_flags); let ready_entry = if !events.is_empty() { @@ -120,7 +112,7 @@ impl EpollFile { // Add the new entry to the ready list if the file is ready if let Some(entry) = ready_entry { - self.push_ready(entry); + self.ready.push(entry.observer()); } Ok(()) @@ -176,7 +168,7 @@ impl EpollFile { // Add the updated entry to the ready list if the file is ready if let Some(entry) = ready_entry { - self.push_ready(entry); + self.ready.push(entry.observer()); } Ok(()) @@ -211,50 +203,20 @@ impl EpollFile { Ok(ep_events) } - fn push_ready(&self, entry: Arc) { - // Note that we cannot take the `EpollEntryInner` lock because we are in the callback of - // the event observer. Doing so will cause dead locks due to inconsistent locking orders. - // - // We don't need to take the lock because - // - We always call `file.poll()` immediately after calling `self.set_enabled()` and - // `file.register_observer()`, so all events are caught either here or by the immediate - // poll; in other words, we don't lose any events. - // - Catching spurious events here is always fine because we always check them later before - // returning events to the user (in `EpollEntry::poll`). - if !entry.is_enabled() { - return; - } - - let mut ready = self.ready.lock(); - - if !entry.is_ready() { - entry.set_ready(&ready); - ready.push_back(Arc::downgrade(&entry)); - } - - // Even if the entry is already set to ready, - // there might be new events that we are interested in. - // Wake the poller anyway. - self.pollee.add_events(IoEvents::IN); - } - fn pop_multi_ready(&self, max_events: usize, ep_events: &mut Vec) { - let pop_guard = self.pop_guard.lock(); - - let mut limit = None; + let mut pop_iter = self.ready.lock_pop(); loop { if ep_events.len() >= max_events { break; } - // Since we're holding `pop_guard`, no one else can pop the entries from the ready - // list. This guarantees that `pop_one_ready` will pop the ready entries we see when - // `pop_multi_ready` starts executing, so that such entries are never duplicated. - let Some((entry, new_limit)) = self.pop_one_ready(limit, &pop_guard) else { + // Since we're holding `pop_guard` (in `pop_iter`), no one else can pop the entries + // from the ready list. This guarantees that `next` will pop the ready entries we see + // when `pop_multi_ready` starts executing, so that such entries are never duplicated. + let Some(entry) = pop_iter.next() else { break; }; - limit = Some(new_limit); // Poll the events. If the file is dead, we will remove the entry. let Some((ep_event, is_still_ready)) = entry.poll() else { @@ -274,50 +236,11 @@ impl EpollFile { // Add the entry back to the ready list, if necessary. if is_still_ready { - self.push_ready(entry); + self.ready.push(entry.observer()); } } } - fn pop_one_ready( - &self, - limit: Option, - _guard: &MutexGuard, - ) -> Option<(Arc, usize)> { - if limit == Some(0) { - return None; - } - - let mut ready = self.ready.lock(); - let mut limit = limit.unwrap_or_else(|| ready.len()); - - while limit > 0 { - limit -= 1; - - // Pop the front entry. Note that `_guard` and `limit` guarantee that this entry must - // exist, so we can just unwrap it. - let weak_entry = ready.pop_front().unwrap(); - - // Clear the epoll file's events if there are no ready entries. - if ready.len() == 0 { - self.pollee.del_events(IoEvents::IN); - } - - let Some(entry) = Weak::upgrade(&weak_entry) else { - // The entry has been deleted. - continue; - }; - - // Mark the entry as not ready. We can invoke `push_ready` later to add it back to the - // ready list if we need to. - entry.reset_ready(&ready); - - return Some((entry, limit)); - } - - None - } - fn warn_unsupported_flags(&self, flags: &EpollFlags) { if flags.intersects(EpollFlags::EXCLUSIVE | EpollFlags::WAKE_UP) { warn!("{:?} contains unsupported flags", flags); @@ -327,7 +250,7 @@ impl EpollFile { impl Pollable for EpollFile { fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents { - self.pollee.poll(mask, poller) + self.ready.poll(mask, poller) } } @@ -355,6 +278,115 @@ impl FileLike for EpollFile { } } +/// A set of ready epoll entries. +pub struct ReadySet { + // Entries that are probably ready (having events happened). + entries: SpinLock>, LocalIrqDisabled>, + // A guard to ensure that ready entries can be popped by one thread at a time. + pop_guard: Mutex, + // A pollee for the ready set (i.e., for `EpollFile` itself). + pollee: Pollee, +} + +struct PopGuard; + +impl ReadySet { + pub fn new() -> Self { + Self { + entries: SpinLock::new(VecDeque::new()), + pop_guard: Mutex::new(PopGuard), + pollee: Pollee::new(IoEvents::empty()), + } + } + + pub fn push(&self, observer: &EpollEntryObserver) { + // Note that we cannot take the `EpollEntryInner` lock because we are in the callback of + // the event observer. Doing so will cause dead locks due to inconsistent locking orders. + // + // We don't need to take the lock because + // - We always call `file.poll()` immediately after calling `self.set_enabled()` and + // `file.register_observer()`, so all events are caught either here or by the immediate + // poll; in other words, we don't lose any events. + // - Catching spurious events here is always fine because we always check them later before + // returning events to the user (in `EpollEntry::poll`). + if !observer.is_enabled() { + return; + } + + let mut entries = self.entries.lock(); + + if !observer.is_ready() { + observer.set_ready(&entries); + entries.push_back(observer.weak_entry().clone()) + } + + // Even if the entry is already set to ready, + // there might be new events that we are interested in. + // Wake the poller anyway. + self.pollee.add_events(IoEvents::IN); + } + + pub fn lock_pop(&self) -> ReadySetPopIter { + ReadySetPopIter { + ready_set: self, + _pop_guard: self.pop_guard.lock(), + limit: None, + } + } + + pub fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents { + self.pollee.poll(mask, poller) + } +} + +/// An iterator to pop ready entries from a [`ReadySet`]. +pub struct ReadySetPopIter<'a> { + ready_set: &'a ReadySet, + _pop_guard: MutexGuard<'a, PopGuard>, + limit: Option, +} + +impl Iterator for ReadySetPopIter<'_> { + type Item = Arc; + + fn next(&mut self) -> Option { + if self.limit == Some(0) { + return None; + } + + let mut entries = self.ready_set.entries.lock(); + let mut limit = self.limit.unwrap_or_else(|| entries.len()); + + while limit > 0 { + limit -= 1; + + // Pop the front entry. Note that `_pop_guard` and `limit` guarantee that this entry + // must exist, so we can just unwrap it. + let weak_entry = entries.pop_front().unwrap(); + + // Clear the epoll file's events if there are no ready entries. + if entries.len() == 0 { + self.ready_set.pollee.del_events(IoEvents::IN); + } + + let Some(entry) = Weak::upgrade(&weak_entry) else { + // The entry has been deleted. + continue; + }; + + // Mark the entry as not ready. We can invoke `ReadySet::push` later to add it back to + // the ready list if we need to. + entry.observer().reset_ready(&entries); + + self.limit = Some(limit); + return Some(entry); + } + + self.limit = None; + None + } +} + /// An epoll entry that is contained in an epoll file. /// /// Each epoll entry can be added, modified, or deleted by the `EpollCtl` command. @@ -363,14 +395,11 @@ pub struct EpollEntry { key: EpollEntryKey, // The event masks and flags inner: Mutex, - // Whether the entry is enabled - is_enabled: AtomicBool, - // Whether the entry is in the ready list - is_ready: AtomicBool, - // The epoll file that contains this epoll entry - weak_epoll: Weak, - // The epoll entry itself (always inside an `Arc`) - weak_self: Weak, + // The observer that receives events. + // + // Keep this in a separate `Arc` to avoid dropping `EpollEntry` in the observer callback, which + // may cause deadlocks. + observer: Arc, } #[derive(PartialEq, Eq, PartialOrd, Ord)] @@ -408,39 +437,28 @@ impl EpollEntry { pub fn new( fd: FileDesc, file: KeyableWeak, - weak_epoll: Weak, + ready_set: Arc, ) -> Arc { Arc::new_cyclic(|me| { + let observer = Arc::new(EpollEntryObserver::new(ready_set, me.clone())); + let inner = EpollEntryInner { event: EpollEvent { events: IoEvents::empty(), user_data: 0, }, flags: EpollFlags::empty(), - poller: PollHandle::new(me.clone() as _), + poller: PollHandle::new(Arc::downgrade(&observer) as _), }; Self { key: EpollEntryKey { fd, file }, inner: Mutex::new(inner), - is_enabled: AtomicBool::new(false), - is_ready: AtomicBool::new(false), - weak_epoll, - weak_self: me.clone(), + observer, } }) } - /// Get the epoll file associated with this epoll entry. - pub fn epoll_file(&self) -> Option> { - self.weak_epoll.upgrade() - } - - /// Get an instance of `Arc` that refers to this epoll entry. - pub fn self_arc(&self) -> Arc { - self.weak_self.upgrade().unwrap() - } - /// Get the file associated with this epoll entry. /// /// Since an epoll entry only holds a weak reference to the file, @@ -459,7 +477,7 @@ impl EpollEntry { let inner = self.inner.lock(); // There are no events if the entry is disabled. - if !self.is_enabled() { + if !self.observer.is_enabled() { return Some((None, false)); } @@ -483,7 +501,7 @@ impl EpollEntry { // If there are events and the epoll entry is one-shot, we need to disable the entry until // the user enables it again via `EpollCtl::Mod`. if ep_event.is_some() && inner.flags.contains(EpollFlags::ONE_SHOT) { - self.reset_enabled(&inner); + self.observer.reset_enabled(&inner); } Some((ep_event, is_still_ready)) @@ -500,7 +518,7 @@ impl EpollEntry { inner.event = event; inner.flags = flags; - self.set_enabled(&inner); + self.observer.set_enabled(&inner); file.poll(event.events, Some(&mut inner.poller)) } @@ -511,10 +529,48 @@ impl EpollEntry { pub fn shutdown(&self) { let mut inner = self.inner.lock(); - self.reset_enabled(&inner); + self.observer.reset_enabled(&inner); inner.poller.reset(); } + /// Gets the underlying observer. + pub fn observer(&self) -> &EpollEntryObserver { + &self.observer + } + + /// Get the file descriptor associated with the epoll entry. + pub fn fd(&self) -> FileDesc { + self.key.fd + } + + /// Get the file associated with this epoll entry. + pub fn file_weak(&self) -> &KeyableWeak { + &self.key.file + } +} + +/// A observer for [`EpollEntry`] that can receive events. +pub struct EpollEntryObserver { + // Whether the entry is enabled + is_enabled: AtomicBool, + // Whether the entry is in the ready list + is_ready: AtomicBool, + // The ready set of the epoll file that contains this epoll entry + ready_set: Arc, + // The epoll entry itself (always inside an `Arc`) + weak_entry: Weak, +} + +impl EpollEntryObserver { + pub fn new(ready_set: Arc, weak_entry: Weak) -> Self { + Self { + is_enabled: AtomicBool::new(false), + is_ready: AtomicBool::new(false), + ready_set, + weak_entry, + } + } + /// Returns whether the epoll entry is in the ready list. /// /// *Caution:* If this method is called without holding the lock of the ready list, the user @@ -572,22 +628,15 @@ impl EpollEntry { self.is_enabled.store(false, Ordering::Relaxed) } - /// Get the file descriptor associated with the epoll entry. - pub fn fd(&self) -> FileDesc { - self.key.fd - } - - /// Get the file associated with this epoll entry. - pub fn file_weak(&self) -> &KeyableWeak { - &self.key.file + /// Gets an instance of `Weak` that refers to the epoll entry. + pub fn weak_entry(&self) -> &Weak { + &self.weak_entry } } -impl Observer for EpollEntry { +impl Observer for EpollEntryObserver { fn on_events(&self, _events: &IoEvents) { - if let Some(epoll_file) = self.epoll_file() { - epoll_file.push_ready(self.self_arc()); - } + self.ready_set.push(self); } }