From 4823b82e4164e6aa4f58a982be8b27c07191b8d9 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Fri, 4 Oct 2024 21:55:05 +0800 Subject: [PATCH] Catch panics in threads as oops --- kernel/src/lib.rs | 1 + kernel/src/oops.rs | 104 +++++++++++++++++++++++++++++ kernel/src/thread/kernel_thread.rs | 4 +- kernel/src/thread/task.rs | 14 ++-- 4 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 kernel/src/oops.rs diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 99bf8065d..63803a692 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -66,6 +66,7 @@ pub mod events; pub mod fs; pub mod ipc; pub mod net; +pub mod oops; pub mod prelude; mod process; mod sched; diff --git a/kernel/src/oops.rs b/kernel/src/oops.rs new file mode 100644 index 000000000..b1b846300 --- /dev/null +++ b/kernel/src/oops.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Kernel "oops" handling. +//! +//! In Asterinas, a Rust panic leads to a kernel "oops". A kernel oops behaves +//! as an exceptional control flow event. If kernel oopses happened too many +//! times, the kernel panics and the system gets halted. +//! +//! Though we can recover from the Rust panics. It is generally not recommended +//! to make Rust panics as a general exception handling mechanism. Handling +//! exceptions with [`Result`] is more idiomatic. + +use alloc::{ + boxed::Box, + string::{String, ToString}, + sync::Arc, +}; +use core::{ + result::Result, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, +}; + +use ostd::panic; + +use crate::{current_thread, Thread}; + +// TODO: Control the kernel commandline parsing from the kernel crate. +// In Linux it can be dynamically changed by writing to +// `/proc/sys/kernel/panic`. +static PANIC_ON_OOPS: AtomicBool = AtomicBool::new(true); + +/// The kernel "oops" information. +pub struct OopsInfo { + /// The "oops" message. + pub message: String, + /// The thread where the "oops" happened. + pub thread: Arc, +} + +/// Executes the given function and catches any panics that occur. +/// +/// All the panics in the given function will be regarded as oops. If a oops +/// happens, this function returns `None`. Otherwise, it returns the return +/// value of the given function. +/// +/// If the kernel is configured to panic on oops, this function will not return +/// when a oops happens. +pub fn catch_panics_as_oops(f: F) -> Result +where + F: FnOnce() -> R, +{ + if PANIC_ON_OOPS.load(Ordering::Relaxed) { + return Ok(f()); + } + + let result = panic::catch_unwind(f); + + match result { + Ok(result) => Ok(result), + Err(err) => { + let info = err.downcast::().unwrap(); + + log::error!("Oops! {}", info.message); + + let count = OOPS_COUNT.fetch_add(1, Ordering::Relaxed); + if count >= MAX_OOPS_COUNT { + // Too many oops. Panic the kernel. + // + // Note that for nested `catch_panics_as_oops` it still works as + // expected. The outer `catch_panics_as_oops` will catch the panic + // and found that the oops count is too high, then panic the kernel. + panic!("Too many oops. The kernel panics."); + } + + Err(*info) + } + } +} + +/// The maximum number of oops allowed before the kernel panics. +/// +/// It is the same as Linux's default value. +const MAX_OOPS_COUNT: usize = 10_000; + +static OOPS_COUNT: AtomicUsize = AtomicUsize::new(0); + +#[ostd::panic_handler] +fn panic_handler(info: &core::panic::PanicInfo) -> ! { + let message = info.message().to_string(); + let thread = current_thread!(); + + // Raise the panic and expect it to be caught. + // TODO: eliminate the need for heap allocation. + panic::begin_panic(Box::new(OopsInfo { + message: message.clone(), + thread, + })); + + // Halt the system if the panic is not caught. + log::error!("Uncaught panic! {:#?}", message); + + panic::print_stack_trace(); + panic::abort(); +} diff --git a/kernel/src/thread/kernel_thread.rs b/kernel/src/thread/kernel_thread.rs index fc7a342d5..311e126ea 100644 --- a/kernel/src/thread/kernel_thread.rs +++ b/kernel/src/thread/kernel_thread.rs @@ -45,8 +45,8 @@ impl KernelThreadExt for Thread { pub fn create_new_kernel_task(mut thread_options: ThreadOptions) -> Arc { let task_fn = thread_options.take_func(); let thread_fn = move || { - task_fn(); - // Ensures the thread is exit + let _ = crate::oops::catch_panics_as_oops(task_fn); + // Ensure that the thread exits. current_thread!().exit(); }; diff --git a/kernel/src/thread/task.rs b/kernel/src/thread/task.rs index 13136cff6..5e2a9a3ad 100644 --- a/kernel/src/thread/task.rs +++ b/kernel/src/thread/task.rs @@ -89,9 +89,13 @@ pub fn create_new_user_task(user_space: Arc, thread_ref: Arc) debug!("exit user loop"); } - TaskOptions::new(user_task_entry) - .data(thread_ref) - .user_space(Some(user_space)) - .build() - .expect("spawn task failed") + TaskOptions::new(|| { + // TODO: If a kernel "oops" is caught, we should kill the entire + // process rather than just ending the thread. + let _ = crate::oops::catch_panics_as_oops(user_task_entry); + }) + .data(thread_ref) + .user_space(Some(user_space)) + .build() + .expect("spawn task failed") }