From f0c87a897fe813b7f06bf5a9e93c43ad9519dafd Mon Sep 17 00:00:00 2001 From: GnoCiYeH Date: Fri, 5 Apr 2024 17:54:48 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E5=86=99=E8=B0=83=E5=BA=A6=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=20(#679)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## PR:重写调度模块 --- ### 完成的部分 - 实现cfs调度策略 - 搭建框架,后续功能可以迭代开发 - 目前能跑,未测试性能 ### 需要后续接力的部分 - 实现组内调度(task_group) - 实现跨核负载均衡(pelt算法) - 接入sysfs,实现参数动态调节(sched_stat等) - nice值以及priority等参数的设置及调优 --- .../src/arch/x86_64/driver/apic/apic_timer.rs | 13 +- kernel/src/arch/x86_64/driver/apic/x2apic.rs | 5 + kernel/src/arch/x86_64/interrupt/handle.rs | 8 +- kernel/src/arch/x86_64/interrupt/ipi.rs | 4 +- kernel/src/arch/x86_64/ipc/signal.rs | 4 +- kernel/src/arch/x86_64/process/idle.rs | 12 +- kernel/src/arch/x86_64/process/mod.rs | 35 + kernel/src/arch/x86_64/sched.rs | 23 +- kernel/src/driver/tty/kthread.rs | 19 +- kernel/src/exception/ipi.rs | 12 +- kernel/src/exception/softirq.rs | 8 +- kernel/src/filesystem/procfs/mod.rs | 10 +- kernel/src/include/bindings/wrapper.h | 1 - kernel/src/init/init.rs | 5 +- kernel/src/ipc/pipe.rs | 7 +- kernel/src/libs/futex/futex.rs | 5 +- kernel/src/libs/mutex.rs | 5 +- kernel/src/libs/rbtree.rs | 133 +- kernel/src/libs/spinlock.rs | 4 + kernel/src/libs/wait_queue.rs | 21 +- kernel/src/net/event_poll/mod.rs | 4 +- kernel/src/net/socket/mod.rs | 11 +- kernel/src/process/exit.rs | 4 +- kernel/src/process/fork.rs | 13 + kernel/src/process/idle.rs | 16 + kernel/src/process/kthread.rs | 12 +- kernel/src/process/mod.rs | 261 ++- kernel/src/sched/cfs.rs | 283 --- kernel/src/sched/clock.rs | 38 + kernel/src/sched/core.rs | 225 -- kernel/src/sched/cputime.rs | 107 + kernel/src/sched/fair.rs | 1801 +++++++++++++++++ kernel/src/sched/idle.rs | 67 + kernel/src/sched/mod.rs | 994 ++++++++- kernel/src/sched/pelt.rs | 260 +++ kernel/src/sched/prio.rs | 33 + kernel/src/sched/rt.rs | 235 --- kernel/src/sched/sched.h | 23 - kernel/src/sched/syscall.rs | 39 - kernel/src/syscall/mod.rs | 21 +- kernel/src/time/clocksource.rs | 5 +- kernel/src/time/jiffies.rs | 1 + kernel/src/time/sleep.rs | 5 +- kernel/src/time/timer.rs | 7 +- 44 files changed, 3733 insertions(+), 1066 deletions(-) delete mode 100644 kernel/src/sched/cfs.rs create mode 100644 kernel/src/sched/clock.rs delete mode 100644 kernel/src/sched/core.rs create mode 100644 kernel/src/sched/cputime.rs create mode 100644 kernel/src/sched/fair.rs create mode 100644 kernel/src/sched/idle.rs create mode 100644 kernel/src/sched/pelt.rs create mode 100644 kernel/src/sched/prio.rs delete mode 100644 kernel/src/sched/rt.rs delete mode 100644 kernel/src/sched/sched.h delete mode 100644 kernel/src/sched/syscall.rs diff --git a/kernel/src/arch/x86_64/driver/apic/apic_timer.rs b/kernel/src/arch/x86_64/driver/apic/apic_timer.rs index 75229f98..81928da4 100644 --- a/kernel/src/arch/x86_64/driver/apic/apic_timer.rs +++ b/kernel/src/arch/x86_64/driver/apic/apic_timer.rs @@ -1,4 +1,5 @@ use core::cell::RefCell; +use core::sync::atomic::{fence, Ordering}; use crate::arch::driver::tsc::TSCManager; use crate::arch::interrupt::TrapFrame; @@ -12,7 +13,7 @@ use crate::exception::IrqNumber; use crate::kdebug; use crate::mm::percpu::PerCpu; -use crate::sched::core::sched_update_jiffies; +use crate::process::ProcessManager; use crate::smp::core::smp_get_processor_id; use crate::smp::cpu::ProcessorId; use crate::time::clocksource::HZ; @@ -66,9 +67,10 @@ impl IrqHandler for LocalApicTimerHandler { struct LocalApicTimerIrqFlowHandler; impl IrqFlowHandler for LocalApicTimerIrqFlowHandler { - fn handle(&self, _irq_desc: &Arc, _trap_frame: &mut TrapFrame) { - LocalApicTimer::handle_irq().ok(); + fn handle(&self, _irq_desc: &Arc, trap_frame: &mut TrapFrame) { + LocalApicTimer::handle_irq(trap_frame).ok(); CurrentApic.send_eoi(); + fence(Ordering::SeqCst) } } @@ -274,8 +276,9 @@ impl LocalApicTimer { return (res.ecx & (1 << 24)) != 0; } - pub(super) fn handle_irq() -> Result { - sched_update_jiffies(); + pub(super) fn handle_irq(trap_frame: &TrapFrame) -> Result { + // sched_update_jiffies(); + ProcessManager::update_process_times(trap_frame.is_from_user()); return Ok(IrqReturn::Handled); } } diff --git a/kernel/src/arch/x86_64/driver/apic/x2apic.rs b/kernel/src/arch/x86_64/driver/apic/x2apic.rs index 83025921..3f01d2e5 100644 --- a/kernel/src/arch/x86_64/driver/apic/x2apic.rs +++ b/kernel/src/arch/x86_64/driver/apic/x2apic.rs @@ -1,3 +1,5 @@ +use core::sync::atomic::{fence, Ordering}; + use x86::msr::{ rdmsr, wrmsr, IA32_APIC_BASE, IA32_X2APIC_APICID, IA32_X2APIC_EOI, IA32_X2APIC_SIVR, IA32_X2APIC_VERSION, @@ -62,9 +64,12 @@ impl LocalAPIC for X2Apic { /// 发送 EOI (End Of Interrupt) fn send_eoi(&self) { + fence(Ordering::SeqCst); unsafe { wrmsr(IA32_X2APIC_EOI, 0); } + + fence(Ordering::SeqCst); } /// 获取 x2APIC 版本 diff --git a/kernel/src/arch/x86_64/interrupt/handle.rs b/kernel/src/arch/x86_64/interrupt/handle.rs index eeb97c14..4696aa0c 100644 --- a/kernel/src/arch/x86_64/interrupt/handle.rs +++ b/kernel/src/arch/x86_64/interrupt/handle.rs @@ -1,15 +1,13 @@ use core::intrinsics::likely; use crate::{ - arch::{ - driver::apic::{apic_timer::APIC_TIMER_IRQ_NUM, CurrentApic, LocalAPIC}, - sched::sched, - }, + arch::driver::apic::{apic_timer::APIC_TIMER_IRQ_NUM, CurrentApic, LocalAPIC}, exception::{irqdesc::irq_desc_manager, softirq::do_softirq, IrqNumber}, process::{ utils::{current_pcb_flags, current_pcb_preempt_count}, ProcessFlags, }, + sched::{SchedMode, __schedule}, }; use super::TrapFrame; @@ -47,6 +45,6 @@ unsafe extern "C" fn x86_64_do_irq(trap_frame: &mut TrapFrame, vector: u32) { if (current_pcb_flags().contains(ProcessFlags::NEED_SCHEDULE)) && vector == APIC_TIMER_IRQ_NUM.data() { - sched(); + __schedule(SchedMode::SM_PREEMPT); } } diff --git a/kernel/src/arch/x86_64/interrupt/ipi.rs b/kernel/src/arch/x86_64/interrupt/ipi.rs index 073ee2b6..87cc785d 100644 --- a/kernel/src/arch/x86_64/interrupt/ipi.rs +++ b/kernel/src/arch/x86_64/interrupt/ipi.rs @@ -254,12 +254,12 @@ impl IrqFlowHandler for X86_64IpiIrqFlowHandler { } IPI_NUM_FLUSH_TLB => { FlushTLBIpiHandler.handle(irq, None, None).ok(); + CurrentApic.send_eoi(); } _ => { kerror!("Unknown IPI: {}", irq.data()); + CurrentApic.send_eoi(); } } - - CurrentApic.send_eoi(); } } diff --git a/kernel/src/arch/x86_64/ipc/signal.rs b/kernel/src/arch/x86_64/ipc/signal.rs index c874fbe6..af4c6026 100644 --- a/kernel/src/arch/x86_64/ipc/signal.rs +++ b/kernel/src/arch/x86_64/ipc/signal.rs @@ -7,7 +7,6 @@ use crate::{ fpu::FpState, interrupt::TrapFrame, process::table::{USER_CS, USER_DS}, - sched::sched, CurrentIrqArch, MMArch, }, exception::InterruptArch, @@ -18,6 +17,7 @@ use crate::{ kerror, mm::MemoryManagementArch, process::ProcessManager, + sched::{schedule, SchedMode}, syscall::{user_access::UserBufferWriter, Syscall}, }; @@ -715,7 +715,7 @@ fn sig_stop(sig: Signal) { ); }); drop(guard); - sched(); + schedule(SchedMode::SM_NONE); // TODO 暂停进程 } /// 信号默认处理函数——继续进程 diff --git a/kernel/src/arch/x86_64/process/idle.rs b/kernel/src/arch/x86_64/process/idle.rs index b3335040..2ce5f509 100644 --- a/kernel/src/arch/x86_64/process/idle.rs +++ b/kernel/src/arch/x86_64/process/idle.rs @@ -1,11 +1,21 @@ use core::hint::spin_loop; -use crate::{arch::CurrentIrqArch, exception::InterruptArch, kBUG, process::ProcessManager}; +use crate::{ + arch::CurrentIrqArch, + exception::InterruptArch, + kBUG, + process::{ProcessFlags, ProcessManager}, + sched::{SchedMode, __schedule}, +}; impl ProcessManager { /// 每个核的idle进程 pub fn arch_idle_func() -> ! { loop { + let pcb = ProcessManager::current_pcb(); + if pcb.flags().contains(ProcessFlags::NEED_SCHEDULE) { + __schedule(SchedMode::SM_NONE); + } if CurrentIrqArch::is_irq_enabled() { unsafe { x86::halt(); diff --git a/kernel/src/arch/x86_64/process/mod.rs b/kernel/src/arch/x86_64/process/mod.rs index 32d50a99..7ee6d44e 100644 --- a/kernel/src/arch/x86_64/process/mod.rs +++ b/kernel/src/arch/x86_64/process/mod.rs @@ -563,6 +563,8 @@ pub unsafe fn arch_switch_to_user(path: String, argv: Vec, envp: Vec ! { *(trapframe_vaddr as *mut TrapFrame) = trap_frame; + compiler_fence(Ordering::SeqCst); asm!( "swapgs", "mov rsp, {trapframe_vaddr}", @@ -601,3 +604,35 @@ unsafe extern "sysv64" fn ready_to_switch_to_user( ); unreachable!() } + +// bitflags! { +// pub struct ProcessThreadFlags: u32 { +// /* +// * thread information flags +// * - these are process state flags that various assembly files +// * may need to access +// */ +// const TIF_NOTIFY_RESUME = 1 << 1; /* callback before returning to user */ +// const TIF_SIGPENDING = 1 << 2; /* signal pending */ +// const TIF_NEED_RESCHED = 1 << 3; /* rescheduling necessary */ +// const TIF_SINGLESTEP = 1 << 4; /* reenable singlestep on user return*/ +// const TIF_SSBD = 1 << 5; /* Speculative store bypass disable */ +// const TIF_SPEC_IB = 1 << 9; /* Indirect branch speculation mitigation */ +// const TIF_SPEC_L1D_FLUSH = 1 << 10; /* Flush L1D on mm switches (processes) */ +// const TIF_USER_RETURN_NOTIFY = 1 << 11; /* notify kernel of userspace return */ +// const TIF_UPROBE = 1 << 12; /* breakpointed or singlestepping */ +// const TIF_PATCH_PENDING = 1 << 13; /* pending live patching update */ +// const TIF_NEED_FPU_LOAD = 1 << 14; /* load FPU on return to userspace */ +// const TIF_NOCPUID = 1 << 15; /* CPUID is not accessible in userland */ +// const TIF_NOTSC = 1 << 16; /* TSC is not accessible in userland */ +// const TIF_NOTIFY_SIGNAL = 1 << 17; /* signal notifications exist */ +// const TIF_MEMDIE = 1 << 20; /* is terminating due to OOM killer */ +// const TIF_POLLING_NRFLAG = 1 << 21; /* idle is polling for TIF_NEED_RESCHED */ +// const TIF_IO_BITMAP = 1 << 22; /* uses I/O bitmap */ +// const TIF_SPEC_FORCE_UPDATE = 1 << 23; /* Force speculation MSR update in context switch */ +// const TIF_FORCED_TF = 1 << 24; /* true if TF in eflags artificially */ +// const TIF_BLOCKSTEP = 1 << 25; /* set when we want DEBUGCTLMSR_BTF */ +// const TIF_LAZY_MMU_UPDATES = 1 << 27; /* task is updating the mmu lazily */ +// const TIF_ADDR32 = 1 << 29; /* 32-bit address space on 64 bits */ +// } +// } diff --git a/kernel/src/arch/x86_64/sched.rs b/kernel/src/arch/x86_64/sched.rs index 0a49e9e6..68f4da6d 100644 --- a/kernel/src/arch/x86_64/sched.rs +++ b/kernel/src/arch/x86_64/sched.rs @@ -1,20 +1,19 @@ use core::hint::spin_loop; -use crate::{ - exception::InterruptArch, include::bindings::bindings::enter_syscall_int, sched::SchedArch, - smp::core::smp_get_processor_id, syscall::SYS_SCHED, -}; +use crate::{exception::InterruptArch, sched::SchedArch, smp::core::smp_get_processor_id}; use super::{driver::apic::apic_timer::apic_timer_init, CurrentIrqArch}; -/// @brief 若内核代码不处在中断上下文中,那么将可以使用本函数,发起一个sys_sched系统调用,然后运行调度器。 -/// 由于只能在中断上下文中进行进程切换,因此需要发起一个系统调用SYS_SCHED。 -#[no_mangle] -pub extern "C" fn sched() { - unsafe { - enter_syscall_int(SYS_SCHED as u64, 0, 0, 0, 0, 0, 0); - } -} +// /// @brief 若内核代码不处在中断上下文中,那么将可以使用本函数,发起一个sys_sched系统调用,然后运行调度器。 +// /// 由于只能在中断上下文中进行进程切换,因此需要发起一个系统调用SYS_SCHED。 +// #[no_mangle] +// pub extern "C" fn sched() { +// let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; +// __schedule(SchedMode::SM_NONE); +// // unsafe { +// // enter_syscall_int(SYS_SCHED as u64, 0, 0, 0, 0, 0, 0); +// // } +// } static mut BSP_INIT_OK: bool = false; diff --git a/kernel/src/driver/tty/kthread.rs b/kernel/src/driver/tty/kthread.rs index f155651f..243311c6 100644 --- a/kernel/src/driver/tty/kthread.rs +++ b/kernel/src/driver/tty/kthread.rs @@ -4,12 +4,14 @@ use alloc::{string::ToString, sync::Arc}; use kdepends::thingbuf::StaticThingBuf; use crate::{ - arch::sched::sched, + arch::CurrentIrqArch, driver::tty::virtual_terminal::virtual_console::CURRENT_VCNUM, + exception::InterruptArch, process::{ kthread::{KernelThreadClosure, KernelThreadMechanism}, - ProcessControlBlock, ProcessFlags, + ProcessControlBlock, ProcessManager, }, + sched::{schedule, SchedMode}, }; use super::tty_port::current_tty_port; @@ -35,15 +37,9 @@ fn tty_refresh_thread() -> i32 { loop { if KEYBUF.is_empty() { // 如果缓冲区为空,就休眠 - unsafe { - TTY_REFRESH_THREAD - .as_ref() - .unwrap() - .flags() - .insert(ProcessFlags::NEED_SCHEDULE) - }; - - sched(); + let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + ProcessManager::mark_sleep(true).expect("TTY_REFRESH_THREAD can not mark sleep"); + schedule(SchedMode::SM_NONE); } let to_dequeue = core::cmp::min(KEYBUF.len(), TO_DEQUEUE_MAX); @@ -69,4 +65,5 @@ pub fn send_to_tty_refresh_thread(data: &[u8]) { for item in data { KEYBUF.push(*item).ok(); } + let _ = ProcessManager::wakeup(unsafe { TTY_REFRESH_THREAD.as_ref().unwrap() }); } diff --git a/kernel/src/exception/ipi.rs b/kernel/src/exception/ipi.rs index 4a2ec9c5..81b1a461 100644 --- a/kernel/src/exception/ipi.rs +++ b/kernel/src/exception/ipi.rs @@ -1,9 +1,13 @@ use alloc::sync::Arc; use system_error::SystemError; +#[cfg(target_arch = "x86_64")] +use crate::arch::driver::apic::{CurrentApic, LocalAPIC}; + use crate::{ - arch::{sched::sched, MMArch}, + arch::MMArch, mm::MemoryManagementArch, + sched::{SchedMode, __schedule}, smp::cpu::ProcessorId, }; @@ -47,7 +51,11 @@ impl IrqHandler for KickCpuIpiHandler { _static_data: Option<&dyn IrqHandlerData>, _dynamic_data: Option>, ) -> Result { - sched(); + #[cfg(target_arch = "x86_64")] + CurrentApic.send_eoi(); + + // 被其他cpu kick时应该是抢占调度 + __schedule(SchedMode::SM_PREEMPT); Ok(IrqReturn::Handled) } } diff --git a/kernel/src/exception/softirq.rs b/kernel/src/exception/softirq.rs index 7b9beb22..06e882e8 100644 --- a/kernel/src/exception/softirq.rs +++ b/kernel/src/exception/softirq.rs @@ -3,7 +3,7 @@ use core::{ intrinsics::unlikely, mem::{self, MaybeUninit}, ptr::null_mut, - sync::atomic::{compiler_fence, AtomicI16, Ordering}, + sync::atomic::{compiler_fence, fence, AtomicI16, Ordering}, }; use alloc::{boxed::Box, sync::Arc, vec::Vec}; @@ -17,6 +17,7 @@ use crate::{ libs::rwlock::RwLock, mm::percpu::{PerCpu, PerCpuVar}, process::ProcessManager, + sched::cputime::IrqTime, smp::{core::smp_get_processor_id, cpu::ProcessorId}, time::timer::clock, }; @@ -286,6 +287,11 @@ impl<'a> Drop for RunningCountGuard<'a> { } } +#[inline(never)] pub fn do_softirq() { + fence(Ordering::SeqCst); + IrqTime::irqtime_start(); softirq_vectors().do_softirq(); + IrqTime::irqtime_account_irq(ProcessManager::current_pcb()); + fence(Ordering::SeqCst); } diff --git a/kernel/src/filesystem/procfs/mod.rs b/kernel/src/filesystem/procfs/mod.rs index 908af12f..9db13a1a 100644 --- a/kernel/src/filesystem/procfs/mod.rs +++ b/kernel/src/filesystem/procfs/mod.rs @@ -168,8 +168,8 @@ impl ProcFSInode { .map(|cpu| cpu.data() as i32) .unwrap_or(-1); - let priority = sched_info_guard.priority(); - let vrtime = sched_info_guard.virtual_runtime(); + let priority = sched_info_guard.policy(); + let vrtime = sched_info_guard.sched_entity.vruntime; pdata.append(&mut format!("\nState:\t{:?}", state).as_bytes().to_owned()); pdata.append( @@ -183,11 +183,7 @@ impl ProcFSInode { .to_owned(), ); pdata.append(&mut format!("\ncpu_id:\t{}", cpu_id).as_bytes().to_owned()); - pdata.append( - &mut format!("\npriority:\t{}", priority.data()) - .as_bytes() - .to_owned(), - ); + pdata.append(&mut format!("\npriority:\t{:?}", priority).as_bytes().to_owned()); pdata.append( &mut format!("\npreempt:\t{}", pcb.preempt_count()) .as_bytes() diff --git a/kernel/src/include/bindings/wrapper.h b/kernel/src/include/bindings/wrapper.h index ddc5c501..c82bca63 100644 --- a/kernel/src/include/bindings/wrapper.h +++ b/kernel/src/include/bindings/wrapper.h @@ -24,6 +24,5 @@ #include #include #include -#include #include