bugfix: 修复smp启动的时候,损坏0号核心的idle进程的内核栈的问题 (#711)

---------

Co-authored-by: longjin <longjin@DragonOS.org>
Co-authored-by: heyicong <heyicong@dragonos.org>
This commit is contained in:
曾俊 2024-04-10 19:00:32 +08:00 committed by GitHub
parent 9365e8017b
commit 3959e94df3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 69 additions and 36 deletions

View File

@ -1,4 +1,5 @@
#include "../common/asm.h"
#include <asm/apu_boot.h>
.align 0x1000 // 4k
@ -113,8 +114,8 @@ _apu_code64_vector:
.word 0x18,0
.align 0x1000
_apu_boot_tmp_stack_start:
// .org 0x400
ENTRY(_apu_boot_tmp_stack_start)
.skip APU_BOOT_TMP_STACK_SIZE
_apu_boot_tmp_stack_end:
ENTRY(_apu_boot_end)

View File

@ -3,6 +3,7 @@
// 2022/01/20
#include "common/asm.h"
#include <asm/apu_boot.h>
// multiboot2
// How many bytes from the start of the file we search for the header.
@ -249,7 +250,6 @@ ENTRY(_start64)
lidt IDT_POINTER(%rip)
//lidt $IDT_POINTER
movq GDT_POINTER(%rip), %r12
movq head_stack_start(%rip), %rsp
// apu
movq $0x1b, %rcx // IA32_APIC_BASE.BSP[8]apu
@ -257,6 +257,9 @@ ENTRY(_start64)
bt $8, %rax
jnc load_apu_cr3
// BSP
movq head_stack_start(%rip), %rsp
// 2.
//
mov $__PML4E, %eax
@ -318,11 +321,11 @@ load_cr3:
load_apu_cr3:
// ap使
// smp__APU_START_CR3
// __APU_START_CR3
movq $__APU_START_CR3, %rax
movq 0(%rax), %rax
movq %rax, %cr3
movq _apu_boot_tmp_stack_top_addr(%rip), %rsp
jmp to_switch_seg
to_switch_seg:
@ -348,7 +351,21 @@ entry64:
movq %rax, %gs
movq %rax, %ss
movq head_stack_start(%rip), %rsp //rsp
// apu,·
movq $0x1b, %rcx // IA32_APIC_BASE.BSP[8]apu
rdmsr
bt $8, %rax
jnc __set_ap_tmp_stack_start2
__set_bsp_stack_start2:
movq head_stack_start(%rip), %rsp
jmp __set_stack_start2_ok
__set_ap_tmp_stack_start2:
// ap
movq _apu_boot_tmp_stack_top_addr(%rip), %rsp
jmp __set_stack_start2_ok
__set_stack_start2_ok:
// GDTIDT
leaq GDT_Table(%rip), %r8
@ -485,6 +502,9 @@ go_to_ignore_int:
ENTRY(head_stack_start)
.quad BSP_IDLE_STACK_SPACE + 32768
ENTRY(_apu_boot_tmp_stack_top_addr)
.quad _apu_boot_tmp_stack_start + APU_BOOT_TMP_STACK_SIZE
//
.align 0x1000 //4k
__PML4E:

View File

@ -0,0 +1,2 @@
#pragma once
#define APU_BOOT_TMP_STACK_SIZE 1024

View File

@ -300,13 +300,14 @@ unsafe extern "C" fn do_general_protection(regs: &'static TrapFrame, error_code:
""
};
kerror!(
"do_general_protection(13), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t CPU: {}, \tpid: {:?}
"do_general_protection(13), \tError code: {:#x},\trsp: {:#x},\trip: {:#x},\t rflags: {:#x}\t CPU: {}, \tpid: {:?}
{}{}{}
Segment Selector Index: {:#x}\n
",
error_code,
regs.rsp,
regs.rip,
regs.rflags,
smp_get_processor_id().data(),
ProcessManager::current_pid(),
msg1, msg2, msg3,

View File

@ -460,9 +460,6 @@ unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut Arc
// mov fs, [rsi + {off_fs}]
// mov gs, [rsi + {off_gs}]
push rbp
push rax
mov [rdi + {off_rbp}], rbp
mov rbp, [rsi + {off_rbp}]
@ -509,17 +506,9 @@ unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut Arc
);
}
/// 从`switch_to_inner`返回后,执行这个函数
///
/// 也就是说,当进程再次被调度时,会从这里开始执行
#[inline(never)]
unsafe extern "sysv64" fn switch_back() {
asm!(concat!(
"
pop rax
pop rbp
"
))
#[naked]
unsafe extern "sysv64" fn switch_back() -> ! {
asm!("ret", options(noreturn));
}
pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {

View File

@ -46,11 +46,12 @@ struct ApStartStackInfo {
#[no_mangle]
unsafe extern "C" fn smp_ap_start() -> ! {
CurrentIrqArch::interrupt_disable();
let vaddr = if let Some(t) = smp_cpu_manager()
.cpuhp_state(smp_get_processor_id())
.thread()
{
t.kernel_stack().stack_max_address().data() - 16
t.kernel_stack_force_ref().stack_max_address().data() - 16
} else {
// 没有设置ap核心的栈那么就进入死循环。
loop {
@ -214,15 +215,16 @@ impl SMPArch for X86_64SMPArch {
}
fn start_cpu(cpu_id: ProcessorId, _cpu_hpstate: &CpuHpCpuState) -> Result<(), SystemError> {
kdebug!("start_cpu: cpu_id: {:#x}\n", cpu_id.data());
Self::copy_smp_start_code();
fence(Ordering::SeqCst);
ipi_send_smp_init();
fence(Ordering::SeqCst);
ipi_send_smp_startup(cpu_id)?;
fence(Ordering::SeqCst);
ipi_send_smp_startup(cpu_id)?;
fence(Ordering::SeqCst);
return Ok(());

View File

@ -33,7 +33,6 @@ fn kernel_init() -> Result<(), SystemError> {
// 由于目前加锁,速度过慢,所以先不开启双缓冲
// scm_enable_double_buffer().expect("Failed to enable double buffer");
stdio_init().expect("Failed to initialize stdio");
ahci_init().expect("Failed to initialize AHCI");
@ -55,7 +54,7 @@ fn kenrel_init_freeable() -> Result<(), SystemError> {
do_initcalls().unwrap_or_else(|err| {
panic!("Failed to initialize subsystems: {:?}", err);
});
stdio_init().expect("Failed to initialize stdio");
smp_init();
return Ok(());

View File

@ -158,11 +158,13 @@ impl IndexNode for LockedPipeInode {
_offset: usize,
len: usize,
buf: &mut [u8],
data: SpinLockGuard<FilePrivateData>,
data_guard: SpinLockGuard<FilePrivateData>,
) -> Result<usize, SystemError> {
let data = data_guard.clone();
drop(data_guard);
// 获取mode
let mode: FileMode;
if let FilePrivateData::Pipefs(pdata) = &*data {
if let FilePrivateData::Pipefs(pdata) = &data {
mode = pdata.mode;
} else {
return Err(SystemError::EBADF);

View File

@ -337,6 +337,7 @@ impl TextuiBuf<'_> {
return self.guard.as_mut().unwrap().as_mut();
}
}
pub fn put_color_in_pixel(&mut self, color: u32, index: usize) {
let index = index as isize;
match self.bit_depth {
@ -363,7 +364,7 @@ impl TextuiBuf<'_> {
};
}
_ => {
panic!("不支持的位深度!")
panic!("bidepth unsupported!")
}
}
}
@ -373,6 +374,7 @@ impl TextuiBuf<'_> {
pub fn get_index_by_x_y(x: usize, y: usize) -> usize {
textui_framework().metadata.read().buf_info().width() as usize * y + x
}
pub fn get_start_index_by_lineid_lineindex(lineid: LineId, lineindex: LineIndex) -> usize {
// x 左上角列像素点位置
// y 左上角行像素点位置
@ -622,7 +624,6 @@ impl TextuiWindow {
/// - vline_id 要刷新的虚拟行号
/// - start 起始字符号
/// - count 要刷新的字符数量
fn textui_refresh_characters(
&mut self,
vline_id: LineId,
@ -708,7 +709,6 @@ impl TextuiWindow {
/// ## 参数
/// - window 窗口结构体
/// - vline_id 虚拟行号
fn textui_new_line(&mut self) -> Result<i32, SystemError> {
// todo: 支持在两个虚拟行之间插入一个新行
let actual_line_sum = textui_framework().actual_line.load(Ordering::SeqCst);
@ -753,7 +753,6 @@ impl TextuiWindow {
/// ## 参数
/// - window
/// - character
fn true_textui_putchar_window(
&mut self,
character: char,

View File

@ -371,6 +371,11 @@ impl<T> RwLock<T> {
pub unsafe fn get_mut(&mut self) -> &mut T {
unsafe { &mut *self.data.get() }
}
#[allow(dead_code)]
pub unsafe fn force_get_ref(&self) -> &T {
unsafe { &*self.data.get() }
}
}
impl<T: Default> Default for RwLock<T> {

View File

@ -466,8 +466,13 @@ impl ProcessManager {
.expect("next_pcb is None");
// 由于进程切换前使用了SpinLockGuard::leak(),所以这里需要手动释放锁
fence(Ordering::SeqCst);
prev_pcb.arch_info.force_unlock();
fence(Ordering::SeqCst);
next_pcb.arch_info.force_unlock();
fence(Ordering::SeqCst);
}
/// 如果目标进程正在目标CPU上运行那么就让这个cpu陷入内核态
@ -818,6 +823,10 @@ impl ProcessControlBlock {
return self.kernel_stack.read();
}
pub unsafe fn kernel_stack_force_ref(&self) -> &KernelStack {
self.kernel_stack.force_get_ref()
}
#[inline(always)]
#[allow(dead_code)]
pub fn kernel_stack_mut(&self) -> RwLockWriteGuard<KernelStack> {

View File

@ -809,7 +809,7 @@ pub fn scheduler_tick() {
#[inline]
pub fn schedule(sched_mod: SchedMode) {
let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
assert!(ProcessManager::current_pcb().preempt_count() == 0);
assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
__schedule(sched_mod);
}

View File

@ -261,6 +261,7 @@ impl SmpCpuManager {
// todo: 等待CPU启动完成
ProcessManager::wakeup(cpu_state.thread.as_ref().unwrap())?;
CurrentSMPArch::start_cpu(cpu_id, cpu_state)?;
assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
self.wait_for_ap_thread(cpu_state, cpu_state.bringup);
@ -270,7 +271,10 @@ impl SmpCpuManager {
fn wait_for_ap_thread(&self, cpu_state: &mut CpuHpCpuState, bringup: bool) {
if bringup {
cpu_state.comp_done_up.wait_for_completion().ok();
cpu_state
.comp_done_up
.wait_for_completion()
.expect("failed to wait ap thread");
} else {
todo!("wait_for_ap_thread")
}

View File

@ -259,7 +259,7 @@ pub fn schedule_timeout(mut timeout: i64) -> Result<i64, SystemError> {
let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
ProcessManager::mark_sleep(true).ok();
drop(irq_guard);
schedule(SchedMode::SM_PREEMPT);
schedule(SchedMode::SM_NONE);
return Ok(MAX_TIMEOUT);
} else if timeout < 0 {
kerror!("timeout can't less than 0");
@ -278,7 +278,7 @@ pub fn schedule_timeout(mut timeout: i64) -> Result<i64, SystemError> {
drop(irq_guard);
schedule(SchedMode::SM_PREEMPT);
schedule(SchedMode::SM_NONE);
let time_remaining: i64 = timeout - TIMER_JIFFIES.load(Ordering::SeqCst) as i64;
if time_remaining >= 0 {
// 被提前唤醒,返回剩余时间