Treat thread oops with better care to avoid silent panics

This commit is contained in:
Zhang Junyang 2024-10-21 17:32:28 +08:00 committed by Tate, Hongliang Tian
parent b7d101b98c
commit 4658b62f90
5 changed files with 45 additions and 29 deletions

View File

@ -15,8 +15,11 @@
#![feature(format_args_nl)] #![feature(format_args_nl)]
#![feature(int_roundings)] #![feature(int_roundings)]
#![feature(let_chains)] #![feature(let_chains)]
#![feature(linked_list_cursors)]
#![feature(linked_list_remove)] #![feature(linked_list_remove)]
#![feature(linked_list_retain)]
#![feature(negative_impls)] #![feature(negative_impls)]
#![feature(panic_can_unwind)]
#![feature(register_tool)] #![feature(register_tool)]
// FIXME: This feature is used to support vm capbility now as a work around. // FIXME: This feature is used to support vm capbility now as a work around.
// Since this is an incomplete feature, use this feature is unsafe. // Since this is an incomplete feature, use this feature is unsafe.
@ -25,8 +28,6 @@
#![feature(step_trait)] #![feature(step_trait)]
#![feature(trait_alias)] #![feature(trait_alias)]
#![feature(trait_upcasting)] #![feature(trait_upcasting)]
#![feature(linked_list_retain)]
#![feature(linked_list_cursors)]
#![register_tool(component_access_control)] #![register_tool(component_access_control)]
use core::sync::atomic::Ordering; use core::sync::atomic::Ordering;
@ -66,7 +67,6 @@ pub mod events;
pub mod fs; pub mod fs;
pub mod ipc; pub mod ipc;
pub mod net; pub mod net;
pub mod oops;
pub mod prelude; pub mod prelude;
mod process; mod process;
mod sched; mod sched;

View File

@ -5,7 +5,7 @@ use ostd::{
task::{Task, TaskOptions}, task::{Task, TaskOptions},
}; };
use super::{status::ThreadStatus, Thread}; use super::{oops, status::ThreadStatus, Thread};
use crate::{prelude::*, sched::priority::Priority}; use crate::{prelude::*, sched::priority::Priority};
/// The inner data of a kernel thread /// The inner data of a kernel thread
@ -45,7 +45,7 @@ impl KernelThreadExt for Thread {
pub fn create_new_kernel_task(mut thread_options: ThreadOptions) -> Arc<Task> { pub fn create_new_kernel_task(mut thread_options: ThreadOptions) -> Arc<Task> {
let task_fn = thread_options.take_func(); let task_fn = thread_options.take_func();
let thread_fn = move || { let thread_fn = move || {
let _ = crate::oops::catch_panics_as_oops(task_fn); let _ = oops::catch_panics_as_oops(task_fn);
// Ensure that the thread exits. // Ensure that the thread exits.
current_thread!().exit(); current_thread!().exit();
}; };

View File

@ -14,6 +14,7 @@ use crate::{
pub mod exception; pub mod exception;
pub mod kernel_thread; pub mod kernel_thread;
pub mod oops;
pub mod status; pub mod status;
pub mod task; pub mod task;
pub mod work_queue; pub mod work_queue;

View File

@ -4,7 +4,8 @@
//! //!
//! In Asterinas, a Rust panic leads to a kernel "oops". A kernel oops behaves //! In Asterinas, a Rust panic leads to a kernel "oops". A kernel oops behaves
//! as an exceptional control flow event. If kernel oopses happened too many //! as an exceptional control flow event. If kernel oopses happened too many
//! times, the kernel panics and the system gets halted. //! times, the kernel panics and the system gets halted. Kernel oops are per-
//! thread, so one thread's oops does not affect other threads.
//! //!
//! Though we can recover from the Rust panics. It is generally not recommended //! Though we can recover from the Rust panics. It is generally not recommended
//! to make Rust panics as a general exception handling mechanism. Handling //! to make Rust panics as a general exception handling mechanism. Handling
@ -12,6 +13,7 @@
use alloc::{ use alloc::{
boxed::Box, boxed::Box,
format,
string::{String, ToString}, string::{String, ToString},
sync::Arc, sync::Arc,
}; };
@ -22,7 +24,7 @@ use core::{
use ostd::panic; use ostd::panic;
use crate::{current_thread, Thread}; use super::Thread;
// TODO: Control the kernel commandline parsing from the kernel crate. // TODO: Control the kernel commandline parsing from the kernel crate.
// In Linux it can be dynamically changed by writing to // In Linux it can be dynamically changed by writing to
@ -49,10 +51,6 @@ pub fn catch_panics_as_oops<F, R>(f: F) -> Result<R, OopsInfo>
where where
F: FnOnce() -> R, F: FnOnce() -> R,
{ {
if PANIC_ON_OOPS.load(Ordering::Relaxed) {
return Ok(f());
}
let result = panic::catch_unwind(f); let result = panic::catch_unwind(f);
match result { match result {
@ -64,12 +62,9 @@ where
let count = OOPS_COUNT.fetch_add(1, Ordering::Relaxed); let count = OOPS_COUNT.fetch_add(1, Ordering::Relaxed);
if count >= MAX_OOPS_COUNT { if count >= MAX_OOPS_COUNT {
// Too many oops. Panic the kernel. // Too many oops. Abort the kernel.
// log::error!("Too many oops. The kernel panics.");
// Note that for nested `catch_panics_as_oops` it still works as panic::abort();
// expected. The outer `catch_panics_as_oops` will catch the panic
// and found that the oops count is too high, then panic the kernel.
panic!("Too many oops. The kernel panics.");
} }
Err(*info) Err(*info)
@ -86,19 +81,39 @@ static OOPS_COUNT: AtomicUsize = AtomicUsize::new(0);
#[ostd::panic_handler] #[ostd::panic_handler]
fn panic_handler(info: &core::panic::PanicInfo) -> ! { fn panic_handler(info: &core::panic::PanicInfo) -> ! {
let message = info.message().to_string(); let message = info.message();
let thread = current_thread!();
// Raise the panic and expect it to be caught. if let Some(thread) = Thread::current() {
// TODO: eliminate the need for heap allocation. let panic_on_oops = PANIC_ON_OOPS.load(Ordering::Relaxed);
panic::begin_panic(Box::new(OopsInfo { if !panic_on_oops && info.can_unwind() {
message: message.clone(), // TODO: eliminate the need for heap allocation.
thread, let message = if let Some(location) = info.location() {
})); format!("{} at {}:{}", message, location.file(), location.line())
} else {
message.to_string()
};
// Raise the panic and expect it to be caught.
panic::begin_panic(Box::new(OopsInfo { message, thread }));
}
}
// Halt the system if the panic is not caught. // Halt the system if the panic is not caught.
log::error!("Uncaught panic! {:#?}", message); if let Some(location) = info.location() {
log::error!(
"Uncaught panic: {}\nat {}:{}",
message,
location.file(),
location.line(),
);
} else {
log::error!("Uncaught panic: {}", message);
}
if info.can_unwind() {
panic::print_stack_trace();
} else {
log::error!("Backtrace is disabled.");
}
panic::print_stack_trace();
panic::abort(); panic::abort();
} }

View File

@ -5,7 +5,7 @@ use ostd::{
user::{ReturnReason, UserContextApi, UserMode, UserSpace}, user::{ReturnReason, UserContextApi, UserMode, UserSpace},
}; };
use super::Thread; use super::{oops, Thread};
use crate::{ use crate::{
cpu::LinuxAbi, cpu::LinuxAbi,
get_current_userspace, get_current_userspace,
@ -92,7 +92,7 @@ pub fn create_new_user_task(user_space: Arc<UserSpace>, thread_ref: Arc<Thread>)
TaskOptions::new(|| { TaskOptions::new(|| {
// TODO: If a kernel "oops" is caught, we should kill the entire // TODO: If a kernel "oops" is caught, we should kill the entire
// process rather than just ending the thread. // process rather than just ending the thread.
let _ = crate::oops::catch_panics_as_oops(user_task_entry); let _ = oops::catch_panics_as_oops(user_task_entry);
}) })
.data(thread_ref) .data(thread_ref)
.user_space(Some(user_space)) .user_space(Some(user_space))