Treat thread oops with better care to avoid silent panics

This commit is contained in:
Zhang Junyang
2024-10-21 17:32:28 +08:00
committed by Tate, Hongliang Tian
parent b7d101b98c
commit 4658b62f90
5 changed files with 45 additions and 29 deletions

View File

@ -5,7 +5,7 @@ use ostd::{
task::{Task, TaskOptions},
};
use super::{status::ThreadStatus, Thread};
use super::{oops, status::ThreadStatus, Thread};
use crate::{prelude::*, sched::priority::Priority};
/// The inner data of a kernel thread
@ -45,7 +45,7 @@ impl KernelThreadExt for Thread {
pub fn create_new_kernel_task(mut thread_options: ThreadOptions) -> Arc<Task> {
let task_fn = thread_options.take_func();
let thread_fn = move || {
let _ = crate::oops::catch_panics_as_oops(task_fn);
let _ = oops::catch_panics_as_oops(task_fn);
// Ensure that the thread exits.
current_thread!().exit();
};

View File

@ -14,6 +14,7 @@ use crate::{
pub mod exception;
pub mod kernel_thread;
pub mod oops;
pub mod status;
pub mod task;
pub mod work_queue;

119
kernel/src/thread/oops.rs Normal file
View File

@ -0,0 +1,119 @@
// SPDX-License-Identifier: MPL-2.0
//! Kernel "oops" handling.
//!
//! In Asterinas, a Rust panic leads to a kernel "oops". A kernel oops behaves
//! as an exceptional control flow event. If kernel oopses happened too many
//! times, the kernel panics and the system gets halted. Kernel oops are per-
//! thread, so one thread's oops does not affect other threads.
//!
//! Though we can recover from the Rust panics. It is generally not recommended
//! to make Rust panics as a general exception handling mechanism. Handling
//! exceptions with [`Result`] is more idiomatic.
use alloc::{
boxed::Box,
format,
string::{String, ToString},
sync::Arc,
};
use core::{
result::Result,
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
};
use ostd::panic;
use super::Thread;
// TODO: Control the kernel commandline parsing from the kernel crate.
// In Linux it can be dynamically changed by writing to
// `/proc/sys/kernel/panic`.
static PANIC_ON_OOPS: AtomicBool = AtomicBool::new(true);
/// The kernel "oops" information.
pub struct OopsInfo {
/// The "oops" message.
pub message: String,
/// The thread where the "oops" happened.
pub thread: Arc<Thread>,
}
/// Executes the given function and catches any panics that occur.
///
/// All the panics in the given function will be regarded as oops. If a oops
/// happens, this function returns `None`. Otherwise, it returns the return
/// value of the given function.
///
/// If the kernel is configured to panic on oops, this function will not return
/// when a oops happens.
pub fn catch_panics_as_oops<F, R>(f: F) -> Result<R, OopsInfo>
where
F: FnOnce() -> R,
{
let result = panic::catch_unwind(f);
match result {
Ok(result) => Ok(result),
Err(err) => {
let info = err.downcast::<OopsInfo>().unwrap();
log::error!("Oops! {}", info.message);
let count = OOPS_COUNT.fetch_add(1, Ordering::Relaxed);
if count >= MAX_OOPS_COUNT {
// Too many oops. Abort the kernel.
log::error!("Too many oops. The kernel panics.");
panic::abort();
}
Err(*info)
}
}
}
/// The maximum number of oops allowed before the kernel panics.
///
/// It is the same as Linux's default value.
const MAX_OOPS_COUNT: usize = 10_000;
static OOPS_COUNT: AtomicUsize = AtomicUsize::new(0);
#[ostd::panic_handler]
fn panic_handler(info: &core::panic::PanicInfo) -> ! {
let message = info.message();
if let Some(thread) = Thread::current() {
let panic_on_oops = PANIC_ON_OOPS.load(Ordering::Relaxed);
if !panic_on_oops && info.can_unwind() {
// TODO: eliminate the need for heap allocation.
let message = if let Some(location) = info.location() {
format!("{} at {}:{}", message, location.file(), location.line())
} else {
message.to_string()
};
// Raise the panic and expect it to be caught.
panic::begin_panic(Box::new(OopsInfo { message, thread }));
}
}
// Halt the system if the panic is not caught.
if let Some(location) = info.location() {
log::error!(
"Uncaught panic: {}\nat {}:{}",
message,
location.file(),
location.line(),
);
} else {
log::error!("Uncaught panic: {}", message);
}
if info.can_unwind() {
panic::print_stack_trace();
} else {
log::error!("Backtrace is disabled.");
}
panic::abort();
}

View File

@ -5,7 +5,7 @@ use ostd::{
user::{ReturnReason, UserContextApi, UserMode, UserSpace},
};
use super::Thread;
use super::{oops, Thread};
use crate::{
cpu::LinuxAbi,
get_current_userspace,
@ -92,7 +92,7 @@ pub fn create_new_user_task(user_space: Arc<UserSpace>, thread_ref: Arc<Thread>)
TaskOptions::new(|| {
// TODO: If a kernel "oops" is caught, we should kill the entire
// process rather than just ending the thread.
let _ = crate::oops::catch_panics_as_oops(user_task_entry);
let _ = oops::catch_panics_as_oops(user_task_entry);
})
.data(thread_ref)
.user_space(Some(user_space))