diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml index f9113a12..88ad306a 100644 --- a/.github/workflows/makefile.yml +++ b/.github/workflows/makefile.yml @@ -11,14 +11,14 @@ jobs: name: Format check ${{ matrix.arch }} runs-on: ubuntu-latest continue-on-error: true - container: dragonos/dragonos-dev:v1.7 + container: dragonos/dragonos-dev:v1.8 strategy: matrix: arch: [x86_64, riscv64] steps: - - run: echo "Running in dragonos/dragonos-dev:v1.7" + - run: echo "Running in dragonos/dragonos-dev:v1.8" - uses: actions/checkout@v3 - name: Format check @@ -35,14 +35,14 @@ jobs: name: Kernel static test ${{ matrix.arch }} runs-on: ubuntu-latest continue-on-error: true - container: dragonos/dragonos-dev:v1.7 + container: dragonos/dragonos-dev:v1.8 strategy: matrix: arch: [x86_64, riscv64] steps: - - run: echo "Running in dragonos/dragonos-dev:v1.7" + - run: echo "Running in dragonos/dragonos-dev:v1.8" - uses: actions/checkout@v3 @@ -55,10 +55,10 @@ jobs: build-x86_64: runs-on: ubuntu-latest - container: dragonos/dragonos-dev:v1.7 + container: dragonos/dragonos-dev:v1.8 steps: - - run: echo "Running in dragonos/dragonos-dev:v1.7" + - run: echo "Running in dragonos/dragonos-dev:v1.8" - uses: actions/checkout@v3 - name: build the DragonOS @@ -77,10 +77,10 @@ jobs: build-riscv64: runs-on: ubuntu-latest - container: dragonos/dragonos-dev:v1.7 + container: dragonos/dragonos-dev:v1.8 steps: - - run: echo "Running in dragonos/dragonos-dev:v1.7" + - run: echo "Running in dragonos/dragonos-dev:v1.8" - uses: actions/checkout@v3 with: diff --git a/docs/introduction/build_system.md b/docs/introduction/build_system.md index ab0e76c8..8b6cb21c 100644 --- a/docs/introduction/build_system.md +++ b/docs/introduction/build_system.md @@ -215,6 +215,7 @@ make run-docker ### 5.1 创建磁盘镜像   首先,您需要使用**普通用户**权限运行`tools/create_hdd_image.sh`,为DragonOS创建一块磁盘镜像文件。该脚本会自动完成创建磁盘镜像的工作,并将其移动到`bin/`目录下。 +   请注意,由于权限问题,请务必使用**普通用户**权限运行此脚本。(运行后,需要提升权限时,系统可能会要求您输入密码) diff --git a/docs/kernel/debug/debug-kernel-with-gdb.md b/docs/kernel/debug/debug-kernel-with-gdb.md index a6266e6e..99f3813c 100644 --- a/docs/kernel/debug/debug-kernel-with-gdb.md +++ b/docs/kernel/debug/debug-kernel-with-gdb.md @@ -3,6 +3,7 @@ ## 前言   GDB是一个功能强大的开源调试工具,能够帮助您更好的诊断和修复程序中的错误。 +   它提供了一套丰富的功能,使您能够检查程序的执行状态、跟踪代码的执行流程、查看和修改变量的值、分析内存状态等。它可以与编译器配合使用,以便您在调试过程中访问程序的调试信息。   此教程将告诉您如何在DragonOS中使用`rust-gdb`来调试内核,包括如何开始调试以及相应的调试命令。 @@ -29,6 +30,7 @@ debug = true ### 1.2 运行DragonOS   准备工作完成后,您就可以编译、运行DragonOS来开展后续的调试工作了。 +   在DragonOS根目录中开启终端,使用`make run`即可开始编译运行DragonOS,如需更多编译命令方面的帮助,详见 > [构建DragonOS](https://docs.dragonos.org/zh_CN/latest/introduction/build_system.html)。 diff --git a/docs/kernel/process_management/kthread.md b/docs/kernel/process_management/kthread.md index 4e69ca82..f6db6afc 100644 --- a/docs/kernel/process_management/kthread.md +++ b/docs/kernel/process_management/kthread.md @@ -9,6 +9,7 @@   内核线程的创建是通过调用`KernelThreadMechanism::create()`或者`KernelThreadMechanism::create_and_run()`函数,向`kthreadd`守护线程发送创建任务来实现的。也就是说,内核线程的创建,最终是由`kthread_daemon`来完成。   当内核线程被创建后,默认处于睡眠状态,要使用`ProcessManager::wakeup`函数将其唤醒。 +   当内核其他模块想要停止一个内核线程的时候,可以调用`KernelThreadMechanism::stop()`函数,等待内核线程的退出,然后获得返回值并清理内核线程的pcb。   内核线程应当经常检查`KernelThreadMechanism::should_stop()`的结果,以确定其是否要退出。当检测到需要退出时,内核线程返回一个返回码,即可退出。(注意资源的清理) diff --git a/docs/kernel/sched/cfs.md b/docs/kernel/sched/cfs.md index 284f11ec..488ec57e 100644 --- a/docs/kernel/sched/cfs.md +++ b/docs/kernel/sched/cfs.md @@ -16,6 +16,7 @@   ``FairSchedEntity``是完全公平调度器中最重要的结构体,他代表一个实体单位,它不止表示一个进程,它还可以是一个组或者一个用户,但是它在cfs队列中所表示的就单单是一个调度实体。这样的设计可以为上层提供更多的思路,比如上层可以把不同的进程归纳到一个调度实体从而实现组调度等功能而不需要改变调度算法。   在cfs中,整体的结构是**一棵树**,每一个调度实体作为``cfs_rq``中的一个节点,若该调度实体不是单个进程(它可能是一个进程组),则在该调度实体中还需要维护一个自己的``cfs_rq``,这样的嵌套展开后,每一个叶子节点就是一个单独的进程。需要理解这样一棵树,**在后续文档中会以这棵树为核心讲解**。 +   该结构体具体的字段意义请查阅源代码。这里提及几个重要的字段: diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 52a58a5c..efec551c 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -63,7 +63,6 @@ paste = "=1.0.14" slabmalloc = { path = "crates/rust-slabmalloc" } log = "0.4.21" kprobe = { path = "crates/kprobe" } -xarray = "0.1.0" lru = "0.12.3" rbpf = { path = "crates/rbpf" } @@ -76,6 +75,7 @@ unwinding = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/unwi "panic", "personality" ]} +defer = "0.2.1" # target为x86_64时,使用下面的依赖 [target.'cfg(target_arch = "x86_64")'.dependencies] @@ -106,4 +106,4 @@ debug = true # Controls whether the compiler passes `-g` # The release profile, used for `cargo build --release` [profile.release] -debug = false +debug = true diff --git a/kernel/crates/rbpf/src/interpreter.rs b/kernel/crates/rbpf/src/interpreter.rs index cb4bddf3..68b9878e 100644 --- a/kernel/crates/rbpf/src/interpreter.rs +++ b/kernel/crates/rbpf/src/interpreter.rs @@ -660,9 +660,9 @@ pub fn execute_program( // Save the callee saved registers pre_stack.save_registers(®[6..=9]); // Save the return address - pre_stack.save_return_address(insn_ptr as u16); + pre_stack.save_return_address(insn_ptr as u64); // save the stack pointer - pre_stack.save_sp(reg[10] as u16); + pre_stack.save_sp(reg[10]); let mut stack = StackFrame::new(); log::trace!("BPF TO BPF CALL: new pc: {} + {} = {}",insn_ptr ,insn.imm,insn_ptr + insn.imm as usize); reg[10] = stack.as_ptr() as u64 + stack.len() as u64; @@ -695,7 +695,7 @@ pub fn execute_program( // Restore the return address insn_ptr = stack.get_return_address() as usize; // Restore the stack pointer - reg[10] = stack.get_sp() as u64; + reg[10] = stack.get_sp(); log::trace!("EXIT: new pc: {}", insn_ptr); } } diff --git a/kernel/crates/rbpf/src/stack.rs b/kernel/crates/rbpf/src/stack.rs index be173263..c1596856 100644 --- a/kernel/crates/rbpf/src/stack.rs +++ b/kernel/crates/rbpf/src/stack.rs @@ -1,9 +1,9 @@ use crate::{ebpf::STACK_SIZE, vec, Vec}; pub struct StackFrame { - return_address: u16, + return_address: u64, saved_registers: [u64; 4], - sp: u16, + sp: u64, frame: Vec, } @@ -54,22 +54,22 @@ impl StackFrame { } /// Save the return address - pub fn save_return_address(&mut self, address: u16) { + pub fn save_return_address(&mut self, address: u64) { self.return_address = address; } /// Get the return address - pub fn get_return_address(&self) -> u16 { + pub fn get_return_address(&self) -> u64 { self.return_address } /// Save the stack pointer - pub fn save_sp(&mut self, sp: u16) { + pub fn save_sp(&mut self, sp: u64) { self.sp = sp; } /// Get the stack pointer - pub fn get_sp(&self) -> u16 { + pub fn get_sp(&self) -> u64 { self.sp } } diff --git a/kernel/crates/system_error/Cargo.toml b/kernel/crates/system_error/Cargo.toml index d166286a..d143c345 100644 --- a/kernel/crates/system_error/Cargo.toml +++ b/kernel/crates/system_error/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" [dependencies] num-traits = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/num-traits.git", rev="1597c1c", default-features = false } -num-derive = "0.3" \ No newline at end of file +num-derive = "0.3" diff --git a/kernel/crates/system_error/src/lib.rs b/kernel/crates/system_error/src/lib.rs index 441b4b13..a1cd1dd7 100644 --- a/kernel/crates/system_error/src/lib.rs +++ b/kernel/crates/system_error/src/lib.rs @@ -277,31 +277,51 @@ pub enum SystemError { // === 以下错误码不应该被用户态程序使用 === ERESTARTSYS = 512, - // VMX on 虚拟化开启指令出错 - EVMXONFailed = 513, - // VMX off 虚拟化关闭指令出错 - EVMXOFFFailed = 514, - // VMX VMWRITE 写入虚拟化VMCS内存出错 - EVMWRITEFailed = 515, - EVMREADFailed = 516, - EVMPRTLDFailed = 517, - EVMLAUNCHFailed = 518, - KVM_HVA_ERR_BAD = 519, + ERESTARTNOINTR = 513, + /// restart if no handler + ERESTARTNOHAND = 514, + /// 没有对应的ioctlcmd - ENOIOCTLCMD = 520, + ENOIOCTLCMD = 515, + /// restart by calling sys restart syscall + ERESTART_RESTARTBLOCK = 516, + + // === TODO: 这几个KVM的错误码不要放在这里 === + + // VMX on 虚拟化开启指令出错 + EVMXONFailed = 1513, + // VMX off 虚拟化关闭指令出错 + EVMXOFFFailed = 1514, + // VMX VMWRITE 写入虚拟化VMCS内存出错 + EVMWRITEFailed = 1515, + EVMREADFailed = 1516, + EVMPRTLDFailed = 1517, + EVMLAUNCHFailed = 1518, + KVM_HVA_ERR_BAD = 1519, + + MAXERRNO = 4095, } impl SystemError { - /// @brief 把posix错误码转换为系统错误枚举类型。 + /// 判断一个值是否是有效的posix错误码。 + pub fn is_valid_posix_errno(val: T) -> bool + where + T: PartialOrd + From, + { + let max_errno = T::from(-(Self::MAXERRNO as i32)); + val < T::from(0) && val >= max_errno + } + + /// 尝试把posix错误码转换为系统错误枚举类型。 pub fn from_posix_errno(errno: i32) -> Option { // posix 错误码是小于0的 - if errno >= 0 { + if !Self::is_valid_posix_errno(errno) { return None; } return ::from_i32(-errno); } - /// @brief 把系统错误枚举类型转换为负数posix错误码。 + /// 把系统错误枚举类型转换为负数posix错误码。 pub fn to_posix_errno(&self) -> i32 { return -::to_i32(self).unwrap(); } diff --git a/kernel/src/arch/riscv64/ipc/signal.rs b/kernel/src/arch/riscv64/ipc/signal.rs index f902e696..befb23e2 100644 --- a/kernel/src/arch/riscv64/ipc/signal.rs +++ b/kernel/src/arch/riscv64/ipc/signal.rs @@ -1,8 +1,9 @@ use log::error; use crate::{ - arch::{sched::sched, CurrentIrqArch}, + arch::{interrupt::TrapFrame, sched::sched, CurrentIrqArch}, exception::InterruptArch, + ipc::signal_types::SignalArch, process::ProcessManager, }; @@ -339,3 +340,18 @@ fn sig_continue(sig: Signal) { fn sig_ignore(_sig: Signal) { return; } + +pub struct RiscV64SignalArch; + +impl SignalArch for RiscV64SignalArch { + // TODO: 为RISCV64实现信号处理 + // 注意,rv64现在在中断/系统调用返回用户态时,没有进入 irqentry_exit() 函数, + // 到时候实现信号处理时,需要修改中断/系统调用返回用户态的代码,进入 irqentry_exit() 函数 + unsafe fn do_signal_or_restart(_frame: &mut TrapFrame) { + todo!() + } + + fn sys_rt_sigreturn(_trap_frame: &mut TrapFrame) -> u64 { + todo!() + } +} diff --git a/kernel/src/arch/riscv64/mod.rs b/kernel/src/arch/riscv64/mod.rs index a34b99a7..fc4ca249 100644 --- a/kernel/src/arch/riscv64/mod.rs +++ b/kernel/src/arch/riscv64/mod.rs @@ -27,6 +27,8 @@ pub use self::time::RiscV64TimeArch as CurrentTimeArch; pub use self::elf::RiscV64ElfArch as CurrentElfArch; +pub use self::ipc::signal::RiscV64SignalArch as CurrentSignalArch; + pub use crate::arch::smp::RiscV64SMPArch as CurrentSMPArch; pub use crate::arch::sched::RiscV64SchedArch as CurrentSchedArch; diff --git a/kernel/src/arch/x86_64/asm/entry.S b/kernel/src/arch/x86_64/asm/entry.S index 355d9938..8df6566c 100644 --- a/kernel/src/arch/x86_64/asm/entry.S +++ b/kernel/src/arch/x86_64/asm/entry.S @@ -64,9 +64,9 @@ ENTRY(ret_from_intr) // 进入信号处理流程 cli - // 将原本要返回的栈帧的栈指针传入do_signal的第一个参数 + // 将原本要返回的栈帧的栈指针传入irqentry_exit的第一个参数 movq %rsp, %rdi - callq do_signal + callq irqentry_exit cli __entry_ret_from_intr_before_gs_check_2: @@ -375,10 +375,10 @@ ENTRY(syscall_64) sti callq *%rdx //调用服务程序 - // 将原本要返回的栈帧的栈指针传入do_signal的第一个参数 + // 将原本要返回的栈帧的栈指针传入 irqentry_exit 的第一个参数 movq %rsp, %rdi - callq do_signal + callq irqentry_exit cli diff --git a/kernel/src/arch/x86_64/interrupt/mod.rs b/kernel/src/arch/x86_64/interrupt/mod.rs index e83566ca..8198a063 100644 --- a/kernel/src/arch/x86_64/interrupt/mod.rs +++ b/kernel/src/arch/x86_64/interrupt/mod.rs @@ -125,6 +125,8 @@ pub struct TrapFrame { pub es: ::core::ffi::c_ulong, pub rax: ::core::ffi::c_ulong, pub func: ::core::ffi::c_ulong, + /// - 该字段在异常发生时,保存的是错误码 + /// - 在系统调用时,由系统调用入口函数将其设置为系统调用号 pub errcode: ::core::ffi::c_ulong, pub rip: ::core::ffi::c_ulong, pub cs: ::core::ffi::c_ulong, @@ -182,6 +184,31 @@ impl TrapFrame { pub fn set_pc(&mut self, pc: usize) { self.rip = pc as u64; } + + /// 获取系统调用号 + /// + /// # Safety + /// 该函数只能在系统调用上下文中调用, + /// 在其他上下文中,该函数返回值未定义 + pub unsafe fn syscall_nr(&self) -> Option { + if self.errcode == u64::MAX { + return None; + } + Some(self.errcode as usize) + } + + /// 获取系统调用错误码 + /// + /// # Safety + /// 该函数只能在系统调用上下文中调用, + /// 在其他上下文中,该函数返回值未定义 + /// + /// # Returns + /// 返回一个 `Option`,表示系统调用的错误码。 + pub unsafe fn syscall_error(&self) -> Option { + let val = self.rax as i32; + SystemError::from_posix_errno(val) + } } impl ProbeArgs for TrapFrame { diff --git a/kernel/src/arch/x86_64/ipc/signal.rs b/kernel/src/arch/x86_64/ipc/signal.rs index 5bc2db64..25e9eed6 100644 --- a/kernel/src/arch/x86_64/ipc/signal.rs +++ b/kernel/src/arch/x86_64/ipc/signal.rs @@ -1,5 +1,6 @@ use core::{ffi::c_void, intrinsics::unlikely, mem::size_of}; +use defer::defer; use log::error; use system_error::SystemError; @@ -8,11 +9,12 @@ use crate::{ fpu::FpState, interrupt::TrapFrame, process::table::{USER_CS, USER_DS}, + syscall::nr::SYS_RESTART_SYSCALL, CurrentIrqArch, MMArch, }, exception::InterruptArch, ipc::{ - signal::set_current_sig_blocked, + signal::{restore_saved_sigmask, set_current_blocked}, signal_types::{SaHandlerType, SigInfo, Sigaction, SigactionType, SignalArch}, }, mm::MemoryManagementArch, @@ -405,99 +407,147 @@ pub struct SigStack { pub fpstate: FpState, } -#[no_mangle] -unsafe extern "C" fn do_signal(frame: &mut TrapFrame) { - X86_64SignalArch::do_signal(frame); - return; +unsafe fn do_signal(frame: &mut TrapFrame, got_signal: &mut bool) { + let pcb = ProcessManager::current_pcb(); + + let siginfo = pcb.try_siginfo_irqsave(5); + + if unlikely(siginfo.is_none()) { + return; + } + + let siginfo_read_guard = siginfo.unwrap(); + + // 检查sigpending是否为0 + if siginfo_read_guard.sig_pending().signal().bits() == 0 || !frame.is_from_user() { + // 若没有正在等待处理的信号,或者将要返回到的是内核态,则返回 + return; + } + + let mut sig_number: Signal; + let mut info: Option; + let mut sigaction: Option; + let sig_block: SigSet = *siginfo_read_guard.sig_blocked(); + drop(siginfo_read_guard); + + let sig_guard = pcb.try_sig_struct_irqsave(5); + if unlikely(sig_guard.is_none()) { + return; + } + let siginfo_mut = pcb.try_siginfo_mut(5); + if unlikely(siginfo_mut.is_none()) { + return; + } + + let sig_guard = sig_guard.unwrap(); + let mut siginfo_mut_guard = siginfo_mut.unwrap(); + loop { + (sig_number, info) = siginfo_mut_guard.dequeue_signal(&sig_block, &pcb); + + // 如果信号非法,则直接返回 + if sig_number == Signal::INVALID { + return; + } + let sa = sig_guard.handlers[sig_number as usize - 1]; + + match sa.action() { + SigactionType::SaHandler(action_type) => match action_type { + SaHandlerType::Error => { + error!("Trying to handle a Sigerror on Process:{:?}", pcb.pid()); + return; + } + SaHandlerType::Default => { + sigaction = Some(sa); + } + SaHandlerType::Ignore => continue, + SaHandlerType::Customized(_) => { + sigaction = Some(sa); + } + }, + SigactionType::SaSigaction(_) => todo!(), + } + + if sigaction.is_some() { + break; + } + } + + let oldset = *siginfo_mut_guard.sig_blocked(); + //避免死锁 + drop(siginfo_mut_guard); + drop(sig_guard); + drop(pcb); + // 做完上面的检查后,开中断 + CurrentIrqArch::interrupt_enable(); + + if sigaction.is_none() { + return; + } + *got_signal = true; + + let mut sigaction = sigaction.unwrap(); + + // 注意!由于handle_signal里面可能会退出进程, + // 因此这里需要检查清楚:上面所有的锁、arc指针都被释放了。否则会产生资源泄露的问题! + let res: Result = + handle_signal(sig_number, &mut sigaction, &info.unwrap(), &oldset, frame); + if res.is_err() { + error!( + "Error occurred when handling signal: {}, pid={:?}, errcode={:?}", + sig_number as i32, + ProcessManager::current_pcb().pid(), + res.as_ref().unwrap_err() + ); + } +} + +fn try_restart_syscall(frame: &mut TrapFrame) { + defer!({ + // 如果没有信号需要传递,我们只需恢复保存的信号掩码 + restore_saved_sigmask(); + }); + + if unsafe { frame.syscall_nr() }.is_none() { + return; + } + + let syscall_err = unsafe { frame.syscall_error() }; + if syscall_err.is_none() { + return; + } + let syscall_err = syscall_err.unwrap(); + + let mut restart = false; + match syscall_err { + SystemError::ERESTARTSYS | SystemError::ERESTARTNOHAND | SystemError::ERESTARTNOINTR => { + frame.rax = frame.errcode; + frame.rip -= 2; + restart = true; + } + SystemError::ERESTART_RESTARTBLOCK => { + frame.rax = SYS_RESTART_SYSCALL as u64; + frame.rip -= 2; + restart = true; + } + _ => {} + } + log::debug!("try restart syscall: {:?}", restart); } pub struct X86_64SignalArch; impl SignalArch for X86_64SignalArch { - unsafe fn do_signal(frame: &mut TrapFrame) { - let pcb = ProcessManager::current_pcb(); + /// 处理信号,并尝试重启系统调用 + /// + /// 参考: https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/signal.c#865 + unsafe fn do_signal_or_restart(frame: &mut TrapFrame) { + let mut got_signal = false; + do_signal(frame, &mut got_signal); - let siginfo = pcb.try_siginfo_irqsave(5); - - if unlikely(siginfo.is_none()) { + if got_signal { return; } - - let siginfo_read_guard = siginfo.unwrap(); - - // 检查sigpending是否为0 - if siginfo_read_guard.sig_pending().signal().bits() == 0 || !frame.is_from_user() { - // 若没有正在等待处理的信号,或者将要返回到的是内核态,则返回 - return; - } - - let mut sig_number: Signal; - let mut info: Option; - let mut sigaction: Sigaction; - let sig_block: SigSet = *siginfo_read_guard.sig_block(); - drop(siginfo_read_guard); - - let sig_guard = pcb.try_sig_struct_irqsave(5); - if unlikely(sig_guard.is_none()) { - return; - } - let siginfo_mut = pcb.try_siginfo_mut(5); - if unlikely(siginfo_mut.is_none()) { - return; - } - - let sig_guard = sig_guard.unwrap(); - let mut siginfo_mut_guard = siginfo_mut.unwrap(); - loop { - (sig_number, info) = siginfo_mut_guard.dequeue_signal(&sig_block); - // 如果信号非法,则直接返回 - if sig_number == Signal::INVALID { - return; - } - - sigaction = sig_guard.handlers[sig_number as usize - 1]; - - match sigaction.action() { - SigactionType::SaHandler(action_type) => match action_type { - SaHandlerType::Error => { - error!("Trying to handle a Sigerror on Process:{:?}", pcb.pid()); - return; - } - SaHandlerType::Default => { - sigaction = Sigaction::default(); - break; - } - SaHandlerType::Ignore => continue, - SaHandlerType::Customized(_) => { - break; - } - }, - SigactionType::SaSigaction(_) => todo!(), - } - // 如果当前动作是忽略这个信号,就继续循环。 - } - - let oldset = *siginfo_mut_guard.sig_block(); - //避免死锁 - drop(siginfo_mut_guard); - drop(sig_guard); - drop(pcb); - - // 做完上面的检查后,开中断 - CurrentIrqArch::interrupt_enable(); - - // 注意!由于handle_signal里面可能会退出进程, - // 因此这里需要检查清楚:上面所有的锁、arc指针都被释放了。否则会产生资源泄露的问题! - let res: Result = - handle_signal(sig_number, &mut sigaction, &info.unwrap(), &oldset, frame); - if res.is_err() { - error!( - "Error occurred when handling signal: {}, pid={:?}, errcode={:?}", - sig_number as i32, - ProcessManager::current_pcb().pid(), - res.as_ref().unwrap_err() - ); - } + try_restart_syscall(frame); } fn sys_rt_sigreturn(trap_frame: &mut TrapFrame) -> u64 { @@ -511,7 +561,7 @@ impl SignalArch for X86_64SignalArch { return trap_frame.rax; } let mut sigmask: SigSet = unsafe { (*frame).context.oldmask }; - set_current_sig_blocked(&mut sigmask); + set_current_blocked(&mut sigmask); // 从用户栈恢复sigcontext if !unsafe { &mut (*frame).context }.restore_sigcontext(trap_frame) { error!("unable to restore sigcontext"); @@ -533,6 +583,8 @@ impl SignalArch for X86_64SignalArch { /// @param regs 之前的系统调用将要返回的时候,要弹出的栈帧的拷贝 /// /// @return Result<0,SystemError> 若Error, 则返回错误码,否则返回Ok(0) +/// +/// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/arch/x86/kernel/signal.c#787 fn handle_signal( sig: Signal, sigaction: &mut Sigaction, @@ -540,8 +592,28 @@ fn handle_signal( oldset: &SigSet, frame: &mut TrapFrame, ) -> Result { - // TODO 这里要补充一段逻辑,好像是为了保证引入线程之后的地址空间不会出问题。详见https://code.dragonos.org.cn/xref/linux-6.1.9/arch/mips/kernel/signal.c#830 - + if unsafe { frame.syscall_nr() }.is_some() { + if let Some(syscall_err) = unsafe { frame.syscall_error() } { + match syscall_err { + SystemError::ERESTARTNOHAND | SystemError::ERESTART_RESTARTBLOCK => { + frame.rax = SystemError::EINTR.to_posix_errno() as i64 as u64; + } + SystemError::ERESTARTSYS => { + if !sigaction.flags().contains(SigFlags::SA_RESTART) { + frame.rax = SystemError::EINTR.to_posix_errno() as i64 as u64; + } else { + frame.rax = frame.errcode; + frame.rip -= 2; + } + } + SystemError::ERESTARTNOINTR => { + frame.rax = frame.errcode; + frame.rip -= 2; + } + _ => {} + } + } + } // 设置栈帧 return setup_frame(sig, sigaction, info, oldset, frame); } diff --git a/kernel/src/arch/x86_64/mm/fault.rs b/kernel/src/arch/x86_64/mm/fault.rs index e38df1f2..e5521624 100644 --- a/kernel/src/arch/x86_64/mm/fault.rs +++ b/kernel/src/arch/x86_64/mm/fault.rs @@ -267,9 +267,10 @@ impl X86_64MMArch { }); } else { log::error!( - "No mapped vma, error_code: {:#b}, address: {:#x}", + "No mapped vma, error_code: {:#b}, address: {:#x}, flags: {:?}", error_code, address.data(), + flags ); let pid = ProcessManager::current_pid(); let mut info = diff --git a/kernel/src/arch/x86_64/syscall/mod.rs b/kernel/src/arch/x86_64/syscall/mod.rs index 5ff161fa..1baf8ca6 100644 --- a/kernel/src/arch/x86_64/syscall/mod.rs +++ b/kernel/src/arch/x86_64/syscall/mod.rs @@ -82,6 +82,8 @@ macro_rules! normal_syscall_return { #[no_mangle] pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) { + // 系统调用进入时,把系统调用号存入errcode字段,以便在syscall_handler退出后,仍能获取到系统调用号 + frame.errcode = frame.rax; let syscall_num = frame.rax as usize; // 防止sys_sched由于超时无法退出导致的死锁 if syscall_num == SYS_SCHED { diff --git a/kernel/src/bpf/helper/consts.rs b/kernel/src/bpf/helper/consts.rs index 69bce7d6..0c368545 100644 --- a/kernel/src/bpf/helper/consts.rs +++ b/kernel/src/bpf/helper/consts.rs @@ -1,6 +1,7 @@ pub const HELPER_MAP_LOOKUP_ELEM: u32 = 1; pub const HELPER_MAP_UPDATE_ELEM: u32 = 2; pub const HELPER_MAP_DELETE_ELEM: u32 = 3; +pub const HELPER_KTIME_GET_NS: u32 = 5; pub const HELPER_MAP_FOR_EACH_ELEM: u32 = 164; pub const HELPER_MAP_LOOKUP_PERCPU_ELEM: u32 = 195; pub const HELPER_PERF_EVENT_OUTPUT: u32 = 25; diff --git a/kernel/src/bpf/helper/mod.rs b/kernel/src/bpf/helper/mod.rs index 2f951793..2c876b25 100644 --- a/kernel/src/bpf/helper/mod.rs +++ b/kernel/src/bpf/helper/mod.rs @@ -6,6 +6,7 @@ use crate::bpf::map::{BpfCallBackFn, BpfMap}; use crate::include::bindings::linux_bpf::BPF_F_CURRENT_CPU; use crate::libs::lazy_init::Lazy; use crate::smp::core::smp_get_processor_id; +use crate::time::Instant; use alloc::{collections::BTreeMap, sync::Arc}; use core::ffi::c_void; use system_error::SystemError; @@ -300,6 +301,10 @@ pub fn map_peek_elem(map: &Arc, value: &mut [u8]) -> Result<()> { value } +pub fn bpf_ktime_get_ns() -> u64 { + (Instant::now().total_micros() * 1000) as u64 +} + pub static BPF_HELPER_FUN_SET: Lazy> = Lazy::new(); /// Initialize the helper functions. @@ -311,6 +316,7 @@ pub fn init_helper_functions() { map.insert(HELPER_MAP_LOOKUP_ELEM, define_func!(raw_map_lookup_elem)); map.insert(HELPER_MAP_UPDATE_ELEM, define_func!(raw_map_update_elem)); map.insert(HELPER_MAP_DELETE_ELEM, define_func!(raw_map_delete_elem)); + map.insert(HELPER_KTIME_GET_NS, define_func!(bpf_ktime_get_ns)); map.insert( HELPER_MAP_FOR_EACH_ELEM, define_func!(raw_map_for_each_elem), diff --git a/kernel/src/bpf/mod.rs b/kernel/src/bpf/mod.rs index 8e84f205..592ad731 100644 --- a/kernel/src/bpf/mod.rs +++ b/kernel/src/bpf/mod.rs @@ -33,7 +33,7 @@ pub fn bpf(cmd: bpf_cmd, attr: &bpf_attr) -> Result { // Program related commands bpf_cmd::BPF_PROG_LOAD => prog::bpf_prog_load(attr), // Object creation commands - bpf_cmd::BPF_BTF_LOAD => { + bpf_cmd::BPF_BTF_LOAD | bpf_cmd::BPF_LINK_CREATE | bpf_cmd::BPF_OBJ_GET_INFO_BY_FD => { error!("bpf cmd {:?} not implemented", cmd); return Err(SystemError::ENOSYS); } diff --git a/kernel/src/driver/base/device/mod.rs b/kernel/src/driver/base/device/mod.rs index 332f7067..7d05e901 100644 --- a/kernel/src/driver/base/device/mod.rs +++ b/kernel/src/driver/base/device/mod.rs @@ -310,6 +310,7 @@ pub enum DeviceType { PlatformDev, Char, Pci, + Other, } /// @brief: 设备标识符类型 diff --git a/kernel/src/driver/base/init.rs b/kernel/src/driver/base/init.rs index 93a32d81..b3856514 100644 --- a/kernel/src/driver/base/init.rs +++ b/kernel/src/driver/base/init.rs @@ -21,7 +21,6 @@ pub fn driver_init() -> Result<(), SystemError> { platform_bus_init()?; serio_bus_init()?; CpuDeviceManager::init()?; - // 至此,已完成设备驱动模型的初始化 return Ok(()); } diff --git a/kernel/src/driver/net/dma.rs b/kernel/src/driver/net/dma.rs index e03fe86a..fcfe1c94 100644 --- a/kernel/src/driver/net/dma.rs +++ b/kernel/src/driver/net/dma.rs @@ -3,7 +3,7 @@ use crate::arch::mm::kernel_page_flags; use crate::arch::MMArch; use crate::mm::kernel_mapper::KernelMapper; -use crate::mm::page::{page_manager_lock_irqsave, EntryFlags}; +use crate::mm::page::EntryFlags; use crate::mm::{ allocator::page_frame::{ allocate_page_frames, deallocate_page_frames, PageFrameCount, PhysPageFrame, @@ -61,11 +61,7 @@ pub unsafe fn dma_dealloc(paddr: usize, vaddr: NonNull, pages: usize) -> i32 flusher.flush(); unsafe { - deallocate_page_frames( - PhysPageFrame::new(PhysAddr::new(paddr)), - page_count, - &mut page_manager_lock_irqsave(), - ); + deallocate_page_frames(PhysPageFrame::new(PhysAddr::new(paddr)), page_count); } return 0; } diff --git a/kernel/src/driver/tty/tty_device.rs b/kernel/src/driver/tty/tty_device.rs index 0185db9f..7b78ef4e 100644 --- a/kernel/src/driver/tty/tty_device.rs +++ b/kernel/src/driver/tty/tty_device.rs @@ -263,7 +263,7 @@ impl IndexNode for TtyDevice { break; } - if pcb.sig_info_irqsave().sig_pending().has_pending() { + if pcb.has_pending_signal_fast() { return Err(SystemError::ERESTARTSYS); } } diff --git a/kernel/src/driver/tty/tty_job_control.rs b/kernel/src/driver/tty/tty_job_control.rs index 4cc078cc..ef242838 100644 --- a/kernel/src/driver/tty/tty_job_control.rs +++ b/kernel/src/driver/tty/tty_job_control.rs @@ -4,7 +4,7 @@ use system_error::SystemError; use crate::{ arch::ipc::signal::{SigSet, Signal}, mm::VirtAddr, - process::{Pid, ProcessManager}, + process::{Pid, ProcessFlags, ProcessManager}, syscall::{ user_access::{UserBufferReader, UserBufferWriter}, Syscall, @@ -51,9 +51,9 @@ impl TtyJobCtrlManager { if tty_pgid.is_some() && tty_pgid.unwrap() != pgid { if pcb .sig_info_irqsave() - .sig_block() + .sig_blocked() .contains(SigSet::from_bits_truncate(1 << sig as u64)) - || pcb.sig_struct_irqsave().handlers[sig as usize].is_ignore() + || pcb.sig_struct_irqsave().handlers[sig as usize - 1].is_ignore() { // 忽略该信号 if sig == Signal::SIGTTIN { @@ -62,7 +62,11 @@ impl TtyJobCtrlManager { } else { // 暂时使用kill而不是killpg Syscall::kill(pgid, sig as i32)?; - return Err(SystemError::ERESTART); + ProcessManager::current_pcb() + .flags() + .insert(ProcessFlags::HAS_PENDING_SIGNAL); + log::debug!("job_ctrl_ioctl: kill. pgid: {pgid}, tty_pgid: {tty_pgid:?}"); + return Err(SystemError::ERESTARTSYS); } } diff --git a/kernel/src/driver/tty/tty_ldisc/ntty.rs b/kernel/src/driver/tty/tty_ldisc/ntty.rs index df684e9d..535e18d3 100644 --- a/kernel/src/driver/tty/tty_ldisc/ntty.rs +++ b/kernel/src/driver/tty/tty_ldisc/ntty.rs @@ -21,7 +21,7 @@ use crate::{ }, mm::VirtAddr, net::event_poll::EPollEventType, - process::ProcessManager, + process::{ProcessFlags, ProcessManager}, syscall::{user_access::UserBufferWriter, Syscall}, }; @@ -1680,11 +1680,11 @@ impl TtyLineDiscipline for NTtyLinediscipline { break; } - if ProcessManager::current_pcb() - .sig_info_irqsave() - .sig_pending() - .has_pending() - { + if ProcessManager::current_pcb().has_pending_signal_fast() { + ProcessManager::current_pcb() + .flags() + .insert(ProcessFlags::HAS_PENDING_SIGNAL); + ret = Err(SystemError::ERESTARTSYS); break; } @@ -1763,7 +1763,11 @@ impl TtyLineDiscipline for NTtyLinediscipline { // drop(ldata); let mut offset = 0; loop { - if pcb.sig_info_irqsave().sig_pending().has_pending() { + if pcb.has_pending_signal_fast() { + ProcessManager::current_pcb() + .flags() + .insert(ProcessFlags::HAS_PENDING_SIGNAL); + return Err(SystemError::ERESTARTSYS); } if core.flags().contains(TtyFlag::HUPPED) { diff --git a/kernel/src/driver/virtio/virtio_impl.rs b/kernel/src/driver/virtio/virtio_impl.rs index 0608a0ce..87814748 100644 --- a/kernel/src/driver/virtio/virtio_impl.rs +++ b/kernel/src/driver/virtio/virtio_impl.rs @@ -3,7 +3,7 @@ use crate::arch::mm::kernel_page_flags; use crate::arch::MMArch; use crate::mm::kernel_mapper::KernelMapper; -use crate::mm::page::{page_manager_lock_irqsave, EntryFlags}; +use crate::mm::page::EntryFlags; use crate::mm::{ allocator::page_frame::{ allocate_page_frames, deallocate_page_frames, PageFrameCount, PhysPageFrame, @@ -72,11 +72,7 @@ unsafe impl Hal for HalImpl { flusher.flush(); unsafe { - deallocate_page_frames( - PhysPageFrame::new(PhysAddr::new(paddr)), - page_count, - &mut page_manager_lock_irqsave(), - ); + deallocate_page_frames(PhysPageFrame::new(PhysAddr::new(paddr)), page_count); } return 0; } diff --git a/kernel/src/exception/entry.rs b/kernel/src/exception/entry.rs new file mode 100644 index 00000000..e54def7d --- /dev/null +++ b/kernel/src/exception/entry.rs @@ -0,0 +1,46 @@ +use crate::{ + arch::{interrupt::TrapFrame, CurrentSignalArch}, + ipc::signal_types::SignalArch, + process::{ProcessFlags, ProcessManager}, +}; + +#[no_mangle] +unsafe extern "C" fn irqentry_exit(frame: &mut TrapFrame) { + if frame.is_from_user() { + irqentry_exit_to_user_mode(frame); + } +} + +/// 退出到用户态之前,在这个函数内做最后的处理 +/// +/// # Safety +/// +/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, +/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 +unsafe fn irqentry_exit_to_user_mode(frame: &mut TrapFrame) { + exit_to_user_mode_prepare(frame); +} + +/// # Safety +/// +/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, +/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 +unsafe fn exit_to_user_mode_prepare(frame: &mut TrapFrame) { + let process_flags_work = *ProcessManager::current_pcb().flags(); + if !process_flags_work.exit_to_user_mode_work().is_empty() { + exit_to_user_mode_loop(frame, process_flags_work); + } +} + +/// # Safety +/// +/// 由于这个函数内可能会直接退出进程,因此,在进入函数之前, +/// 必须保证所有的栈上的Arc/Box指针等,都已经被释放。否则,可能会导致内存泄漏。 +unsafe fn exit_to_user_mode_loop(frame: &mut TrapFrame, mut process_flags_work: ProcessFlags) { + while !process_flags_work.exit_to_user_mode_work().is_empty() { + if process_flags_work.contains(ProcessFlags::HAS_PENDING_SIGNAL) { + unsafe { CurrentSignalArch::do_signal_or_restart(frame) }; + } + process_flags_work = *ProcessManager::current_pcb().flags(); + } +} diff --git a/kernel/src/exception/mod.rs b/kernel/src/exception/mod.rs index 8eb14dd6..a12698b1 100644 --- a/kernel/src/exception/mod.rs +++ b/kernel/src/exception/mod.rs @@ -7,6 +7,7 @@ use crate::arch::CurrentIrqArch; pub mod debug; pub mod dummychip; pub mod ebreak; +pub mod entry; pub mod handle; pub mod init; pub mod ipi; diff --git a/kernel/src/filesystem/eventfd.rs b/kernel/src/filesystem/eventfd.rs index 9143697a..ecce149b 100644 --- a/kernel/src/filesystem/eventfd.rs +++ b/kernel/src/filesystem/eventfd.rs @@ -4,7 +4,7 @@ use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, M use crate::libs::spinlock::{SpinLock, SpinLockGuard}; use crate::libs::wait_queue::WaitQueue; use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData}; -use crate::process::ProcessManager; +use crate::process::{ProcessFlags, ProcessManager}; use crate::sched::SchedMode; use crate::syscall::Syscall; use alloc::collections::LinkedList; @@ -82,6 +82,21 @@ impl EventFdInode { let count = self.eventfd.lock().count; return count > 0; } + + fn do_poll( + &self, + _private_data: &FilePrivateData, + self_guard: &SpinLockGuard<'_, EventFd>, + ) -> Result { + let mut events = EPollEventType::empty(); + if self_guard.count != 0 { + events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM; + } + if self_guard.count != u64::MAX { + events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM; + } + return Ok(events.bits() as usize); + } } impl IndexNode for EventFdInode { @@ -125,8 +140,17 @@ impl IndexNode for EventFdInode { } drop(lock_efd); + + if ProcessManager::current_pcb().has_pending_signal_fast() { + return Err(SystemError::ERESTARTSYS); + } + let r = wq_wait_event_interruptible!(self.wait_queue, self.readable(), {}); if r.is_err() { + ProcessManager::current_pcb() + .flags() + .insert(ProcessFlags::HAS_PENDING_SIGNAL); + return Err(SystemError::ERESTARTSYS); } @@ -134,7 +158,7 @@ impl IndexNode for EventFdInode { } let mut val = lock_efd.count; - let mut eventfd = self.eventfd.lock(); + let mut eventfd = lock_efd; if eventfd.flags.contains(EventFdFlags::EFD_SEMAPHORE) { eventfd.count -= 1; val = 1; @@ -143,8 +167,9 @@ impl IndexNode for EventFdInode { } let val_bytes = val.to_ne_bytes(); buf[..8].copy_from_slice(&val_bytes); + let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32); + drop(eventfd); - let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32); // 唤醒epoll中等待的进程 EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?; @@ -174,6 +199,9 @@ impl IndexNode for EventFdInode { return Err(SystemError::EINVAL); } loop { + if ProcessManager::current_pcb().has_pending_signal() { + return Err(SystemError::ERESTARTSYS); + } let eventfd = self.eventfd.lock(); if u64::MAX - eventfd.count > val { break; @@ -185,13 +213,17 @@ impl IndexNode for EventFdInode { return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); } drop(eventfd); - self.wait_queue.sleep(); + self.wait_queue.sleep().ok(); } let mut eventfd = self.eventfd.lock(); eventfd.count += val; + drop(eventfd); self.wait_queue.wakeup_all(None); - let pollflag = EPollEventType::from_bits_truncate(self.poll(&data)? as u32); + let eventfd = self.eventfd.lock(); + let pollflag = EPollEventType::from_bits_truncate(self.do_poll(&data, &eventfd)? as u32); + drop(eventfd); + // 唤醒epoll中等待的进程 EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))?; return Ok(8); @@ -202,14 +234,8 @@ impl IndexNode for EventFdInode { /// - 如果 counter 的值大于 0 ,那么 fd 的状态就是可读的 /// - 如果能无阻塞地写入一个至少为 1 的值,那么 fd 的状态就是可写的 fn poll(&self, _private_data: &FilePrivateData) -> Result { - let mut events = EPollEventType::empty(); - if self.eventfd.lock().count != 0 { - events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM; - } - if self.eventfd.lock().count != u64::MAX { - events |= EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM; - } - return Ok(events.bits() as usize); + let self_guard = self.eventfd.lock(); + self.do_poll(_private_data, &self_guard) } fn metadata(&self) -> Result { diff --git a/kernel/src/filesystem/fat/fs.rs b/kernel/src/filesystem/fat/fs.rs index fdd43859..88712c45 100644 --- a/kernel/src/filesystem/fat/fs.rs +++ b/kernel/src/filesystem/fat/fs.rs @@ -14,7 +14,7 @@ use alloc::{ use crate::driver::base::block::gendisk::GenDisk; use crate::driver::base::device::device_number::DeviceNumber; -use crate::filesystem::vfs::file::PageCache; +use crate::filesystem::page_cache::PageCache; use crate::filesystem::vfs::utils::DName; use crate::filesystem::vfs::{Magic, SpecialNodeData, SuperBlock}; use crate::ipc::pipe::LockedPipeInode; @@ -129,9 +129,8 @@ pub struct FATInode { } impl FATInode { - /// @brief 更新当前inode的元数据 - pub fn update_metadata(&mut self) { - // todo: 更新文件的访问时间等信息 + /// 将inode的元数据与磁盘同步 + pub fn synchronize_metadata(&mut self) { match &self.inode_type { FATDirEntry::File(f) | FATDirEntry::VolId(f) => { self.metadata.size = f.size() as i64; @@ -146,6 +145,19 @@ impl FATInode { }; } + /// 更新inode的元数据 + pub fn update_metadata(&mut self, size: Option) { + if let Some(new_size) = size { + self.metadata.size = new_size; + } + self.update_time(); + } + + /// 更新访问时间 + pub fn update_time(&mut self) { + // log::warn!("update_time has not yet been implemented"); + } + fn find(&mut self, name: &str) -> Result, SystemError> { match &self.inode_type { FATDirEntry::Dir(d) => { @@ -234,7 +246,7 @@ impl LockedFATInode { inode.0.lock().self_ref = Arc::downgrade(&inode); - inode.0.lock().update_metadata(); + inode.0.lock().synchronize_metadata(); return inode; } @@ -1386,24 +1398,14 @@ impl FATFsInfo { } impl IndexNode for LockedFATInode { - fn read_at( - &self, - offset: usize, - len: usize, - buf: &mut [u8], - _data: SpinLockGuard, - ) -> Result { - let mut guard: SpinLockGuard = self.0.lock(); + fn read_sync(&self, offset: usize, buf: &mut [u8]) -> Result { + let guard: SpinLockGuard = self.0.lock(); match &guard.inode_type { FATDirEntry::File(f) | FATDirEntry::VolId(f) => { - let r = f.read( - &guard.fs.upgrade().unwrap(), - &mut buf[0..len], - offset as u64, - ); - guard.update_metadata(); + let r = f.read(&guard.fs.upgrade().unwrap(), buf, offset as u64); return r; } + FATDirEntry::Dir(_) => { return Err(SystemError::EISDIR); } @@ -1414,30 +1416,93 @@ impl IndexNode for LockedFATInode { } } + fn write_sync(&self, offset: usize, buf: &[u8]) -> Result { + let mut guard: SpinLockGuard = self.0.lock(); + let fs: &Arc = &guard.fs.upgrade().unwrap(); + + match &mut guard.inode_type { + FATDirEntry::File(f) | FATDirEntry::VolId(f) => { + let r = f.write(fs, buf, offset as u64); + return r; + } + + FATDirEntry::Dir(_) => { + return Err(SystemError::EISDIR); + } + + FATDirEntry::UnInit => { + error!("FATFS: param: Inode_type uninitialized."); + return Err(SystemError::EROFS); + } + } + } + + fn read_at( + &self, + offset: usize, + len: usize, + buf: &mut [u8], + data: SpinLockGuard, + ) -> Result { + let len = core::cmp::min(len, buf.len()); + let buf = &mut buf[0..len]; + + let page_cache = self.0.lock().page_cache.clone(); + if let Some(page_cache) = page_cache { + let r = page_cache.lock_irqsave().read(offset, &mut buf[0..len]); + // self.0.lock_irqsave().update_metadata(); + return r; + } else { + return self.read_direct(offset, len, buf, data); + } + } + fn write_at( + &self, + offset: usize, + len: usize, + buf: &[u8], + data: SpinLockGuard, + ) -> Result { + let len = core::cmp::min(len, buf.len()); + let buf = &buf[0..len]; + + let page_cache = self.0.lock().page_cache.clone(); + if let Some(page_cache) = page_cache { + let write_len = page_cache.lock_irqsave().write(offset, buf)?; + let mut guard = self.0.lock(); + let old_size = guard.metadata.size; + guard.update_metadata(Some(core::cmp::max(old_size, (offset + write_len) as i64))); + return Ok(write_len); + } else { + return self.write_direct(offset, len, buf, data); + } + } + + fn read_direct( + &self, + offset: usize, + len: usize, + buf: &mut [u8], + _data: SpinLockGuard, + ) -> Result { + let len = core::cmp::min(len, buf.len()); + let r = self.read_sync(offset, &mut buf[0..len]); + // self.0.lock_irqsave().update_metadata(); + return r; + } + + fn write_direct( &self, offset: usize, len: usize, buf: &[u8], _data: SpinLockGuard, ) -> Result { - let mut guard: SpinLockGuard = self.0.lock(); - let fs: &Arc = &guard.fs.upgrade().unwrap(); - - match &mut guard.inode_type { - FATDirEntry::File(f) | FATDirEntry::VolId(f) => { - let r = f.write(fs, &buf[0..len], offset as u64); - guard.update_metadata(); - return r; - } - FATDirEntry::Dir(_) => { - return Err(SystemError::EISDIR); - } - FATDirEntry::UnInit => { - error!("FATFS: param: Inode_type uninitialized."); - return Err(SystemError::EROFS); - } - } + let len = core::cmp::min(len, buf.len()); + let r = self.write_sync(offset, &buf[0..len]); + // self.0.lock_irqsave().update_metadata(); + return r; } fn create( @@ -1496,6 +1561,10 @@ impl IndexNode for LockedFATInode { Ok(()) } fn resize(&self, len: usize) -> Result<(), SystemError> { + if let Some(page_cache) = self.page_cache() { + return page_cache.lock_irqsave().resize(len); + } + let mut guard: SpinLockGuard = self.0.lock(); let fs: &Arc = &guard.fs.upgrade().unwrap(); let old_size = guard.metadata.size as usize; @@ -1527,7 +1596,7 @@ impl IndexNode for LockedFATInode { file.truncate(fs, len as u64)?; } } - guard.update_metadata(); + guard.synchronize_metadata(); return Ok(()); } FATDirEntry::Dir(_) => return Err(SystemError::ENOSYS), diff --git a/kernel/src/filesystem/mod.rs b/kernel/src/filesystem/mod.rs index 772d21f4..59bda2a1 100644 --- a/kernel/src/filesystem/mod.rs +++ b/kernel/src/filesystem/mod.rs @@ -5,6 +5,7 @@ pub mod fat; pub mod kernfs; pub mod mbr; pub mod overlayfs; +pub mod page_cache; pub mod procfs; pub mod ramfs; pub mod sysfs; diff --git a/kernel/src/filesystem/page_cache.rs b/kernel/src/filesystem/page_cache.rs new file mode 100644 index 00000000..7f595933 --- /dev/null +++ b/kernel/src/filesystem/page_cache.rs @@ -0,0 +1,346 @@ +use core::cmp::min; + +use alloc::{ + sync::{Arc, Weak}, + vec::Vec, +}; +use hashbrown::HashMap; +use system_error::SystemError; + +use super::vfs::IndexNode; +use crate::libs::spinlock::SpinLockGuard; +use crate::mm::page::FileMapInfo; +use crate::{arch::mm::LockedFrameAllocator, libs::lazy_init::Lazy}; +use crate::{ + arch::MMArch, + libs::spinlock::SpinLock, + mm::{ + page::{page_manager_lock_irqsave, page_reclaimer_lock_irqsave, Page, PageFlags}, + MemoryManagementArch, + }, +}; +use crate::{libs::align::page_align_up, mm::page::PageType}; + +/// 页面缓存 +#[derive(Debug)] +pub struct PageCache { + inner: SpinLock, + inode: Lazy>, +} + +#[derive(Debug)] +pub struct InnerPageCache { + pages: HashMap>, + page_cache_ref: Weak, +} + +impl InnerPageCache { + pub fn new(page_cache_ref: Weak) -> InnerPageCache { + Self { + pages: HashMap::new(), + page_cache_ref, + } + } + + pub fn add_page(&mut self, offset: usize, page: &Arc) { + self.pages.insert(offset, page.clone()); + } + + pub fn get_page(&self, offset: usize) -> Option> { + self.pages.get(&offset).cloned() + } + + pub fn remove_page(&mut self, offset: usize) -> Option> { + self.pages.remove(&offset) + } + + fn create_pages(&mut self, start_page_index: usize, buf: &[u8]) -> Result<(), SystemError> { + assert!(buf.len() % MMArch::PAGE_SIZE == 0); + + let page_num = buf.len() / MMArch::PAGE_SIZE; + + let len = buf.len(); + if len == 0 { + return Ok(()); + } + + let mut page_manager_guard = page_manager_lock_irqsave(); + + for i in 0..page_num { + let buf_offset = i * MMArch::PAGE_SIZE; + let page_index = start_page_index + i; + + let page = page_manager_guard.create_one_page( + PageType::File(FileMapInfo { + page_cache: self + .page_cache_ref + .upgrade() + .expect("failed to get self_arc of pagecache"), + index: page_index, + }), + PageFlags::PG_LRU, + &mut LockedFrameAllocator, + )?; + + let mut page_guard = page.write_irqsave(); + unsafe { + page_guard.copy_from_slice(&buf[buf_offset..buf_offset + MMArch::PAGE_SIZE]); + } + + self.add_page(page_index, &page); + } + + Ok(()) + } + + /// 从PageCache中读取数据。 + /// + /// ## 参数 + /// + /// - `offset` 偏移量 + /// - `buf` 缓冲区 + /// + /// ## 返回值 + /// + /// - `Ok(usize)` 成功读取的长度 + /// - `Err(SystemError)` 失败返回错误码 + pub fn read(&mut self, offset: usize, buf: &mut [u8]) -> Result { + let inode = self + .page_cache_ref + .upgrade() + .unwrap() + .inode + .upgrade() + .unwrap(); + let file_size = inode.metadata().unwrap().size; + + let len = if offset < file_size as usize { + core::cmp::min(file_size as usize, offset + buf.len()) - offset + } else { + 0 + }; + + if len == 0 { + return Ok(0); + } + + let mut not_exist = Vec::new(); + + let start_page_index = offset >> MMArch::PAGE_SHIFT; + let page_num = (page_align_up(offset + len) >> MMArch::PAGE_SHIFT) - start_page_index; + + let mut buf_offset = 0; + let mut ret = 0; + for i in 0..page_num { + let page_index = start_page_index + i; + + // 第一个页可能需要计算页内偏移 + let page_offset = if i == 0 { + offset % MMArch::PAGE_SIZE + } else { + 0 + }; + + // 第一个页和最后一个页可能不满 + let sub_len = if i == 0 { + min(len, MMArch::PAGE_SIZE - page_offset) + } else if i == page_num - 1 { + (offset + len - 1) % MMArch::PAGE_SIZE + 1 + } else { + MMArch::PAGE_SIZE + }; + + if let Some(page) = self.get_page(page_index) { + let sub_buf = &mut buf[buf_offset..(buf_offset + sub_len)]; + unsafe { + sub_buf.copy_from_slice( + &page.read_irqsave().as_slice()[page_offset..page_offset + sub_len], + ); + } + ret += sub_len; + } else if let Some((index, count)) = not_exist.last_mut() { + if *index + *count == page_index { + *count += 1; + } else { + not_exist.push((page_index, 1)); + } + } else { + not_exist.push((page_index, 1)); + } + + buf_offset += sub_len; + } + + for (page_index, count) in not_exist { + // TODO 这里使用buffer避免多次读取磁盘,将来引入异步IO直接写入页面,减少内存开销和拷贝 + let mut page_buf = vec![0u8; MMArch::PAGE_SIZE * count]; + inode.read_sync(page_index * MMArch::PAGE_SIZE, page_buf.as_mut())?; + + self.create_pages(page_index, page_buf.as_mut())?; + + // 实际要拷贝的内容在文件中的偏移量 + let copy_offset = core::cmp::max(page_index * MMArch::PAGE_SIZE, offset); + // 实际要拷贝的内容的长度 + let copy_len = core::cmp::min((page_index + count) * MMArch::PAGE_SIZE, offset + len) + - copy_offset; + + let page_buf_offset = if page_index * MMArch::PAGE_SIZE < copy_offset { + copy_offset - page_index * MMArch::PAGE_SIZE + } else { + 0 + }; + + let buf_offset = copy_offset.saturating_sub(offset); + + buf[buf_offset..buf_offset + copy_len] + .copy_from_slice(&page_buf[page_buf_offset..page_buf_offset + copy_len]); + + ret += copy_len; + + // log::debug!("page_offset:{page_offset}, count:{count}"); + // log::debug!("copy_offset:{copy_offset}, copy_len:{copy_len}"); + // log::debug!("buf_offset:{buf_offset}, page_buf_offset:{page_buf_offset}"); + } + + Ok(ret) + } + + /// 向PageCache中写入数据。 + /// + /// ## 参数 + /// + /// - `offset` 偏移量 + /// - `buf` 缓冲区 + /// + /// ## 返回值 + /// + /// - `Ok(usize)` 成功读取的长度 + /// - `Err(SystemError)` 失败返回错误码 + pub fn write(&mut self, offset: usize, buf: &[u8]) -> Result { + let len = buf.len(); + if len == 0 { + return Ok(0); + } + + // log::debug!("offset:{offset}, len:{len}"); + + let start_page_index = offset >> MMArch::PAGE_SHIFT; + let page_num = (page_align_up(offset + len) >> MMArch::PAGE_SHIFT) - start_page_index; + + let mut buf_offset = 0; + let mut ret = 0; + + for i in 0..page_num { + let page_index = start_page_index + i; + + // 第一个页可能需要计算页内偏移 + let page_offset = if i == 0 { + offset % MMArch::PAGE_SIZE + } else { + 0 + }; + + // 第一个页和最后一个页可能不满 + let sub_len = if i == 0 { + min(len, MMArch::PAGE_SIZE - page_offset) + } else if i == page_num - 1 { + (offset + len - 1) % MMArch::PAGE_SIZE + 1 + } else { + MMArch::PAGE_SIZE + }; + + let mut page = self.get_page(page_index); + + if page.is_none() { + let page_buf = vec![0u8; MMArch::PAGE_SIZE]; + self.create_pages(page_index, &page_buf)?; + page = self.get_page(page_index); + } + + if let Some(page) = page { + let sub_buf = &buf[buf_offset..(buf_offset + sub_len)]; + let mut page_guard = page.write_irqsave(); + unsafe { + page_guard.as_slice_mut()[page_offset..page_offset + sub_len] + .copy_from_slice(sub_buf); + } + page_guard.add_flags(PageFlags::PG_DIRTY); + + ret += sub_len; + + // log::debug!( + // "page_offset:{page_offset}, buf_offset:{buf_offset}, sub_len:{sub_len}" + // ); + } else { + return Err(SystemError::EIO); + }; + + buf_offset += sub_len; + } + Ok(ret) + } + + pub fn resize(&mut self, len: usize) -> Result<(), SystemError> { + let page_num = page_align_up(len) / MMArch::PAGE_SIZE; + + let mut reclaimer = page_reclaimer_lock_irqsave(); + for (_i, page) in self.pages.drain_filter(|index, _page| *index >= page_num) { + let _ = reclaimer.remove_page(&page.phys_address()); + } + + if page_num > 0 { + let last_page_index = page_num - 1; + let last_len = len - last_page_index * MMArch::PAGE_SIZE; + if let Some(page) = self.get_page(last_page_index) { + unsafe { + page.write_irqsave().truncate(last_len); + }; + } else { + return Err(SystemError::EIO); + } + } + + Ok(()) + } +} + +impl Drop for InnerPageCache { + fn drop(&mut self) { + log::debug!("page cache drop"); + let mut page_manager = page_manager_lock_irqsave(); + for page in self.pages.values() { + page_manager.remove_page(&page.phys_address()); + } + } +} + +impl PageCache { + pub fn new(inode: Option>) -> Arc { + Arc::new_cyclic(|weak| Self { + inner: SpinLock::new(InnerPageCache::new(weak.clone())), + inode: { + let v: Lazy> = Lazy::new(); + if let Some(inode) = inode { + v.init(inode); + } + v + }, + }) + } + + pub fn inode(&self) -> Option> { + self.inode.try_get().cloned() + } + + pub fn set_inode(&self, inode: Weak) -> Result<(), SystemError> { + if self.inode.initialized() { + return Err(SystemError::EINVAL); + } + self.inode.init(inode); + Ok(()) + } + + pub fn lock_irqsave(&self) -> SpinLockGuard { + self.inner.lock_irqsave() + } +} diff --git a/kernel/src/filesystem/procfs/mod.rs b/kernel/src/filesystem/procfs/mod.rs index 0defc3e0..3c131032 100644 --- a/kernel/src/filesystem/procfs/mod.rs +++ b/kernel/src/filesystem/procfs/mod.rs @@ -394,7 +394,31 @@ impl ProcFS { } else { panic!("create ksmg error"); } - + // 这个文件是用来欺骗Aya框架识别内核版本 + /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, + * but Ubuntu provides /proc/version_signature file, as described at + * https://ubuntu.com/kernel, with an example contents below, which we + * can use to get a proper LINUX_VERSION_CODE. + * + * Ubuntu 5.4.0-12.15-generic 5.4.8 + * + * In the above, 5.4.8 is what kernel is actually expecting, while + * uname() call will return 5.4.0 in info.release. + */ + let binding = inode.create("version_signature", FileType::File, ModeType::S_IRUGO); + if let Ok(version_signature) = binding { + let version_signature = version_signature + .as_any_ref() + .downcast_ref::() + .unwrap(); + version_signature.0.lock().fdata.ftype = ProcFileType::Default; + version_signature.0.lock().data = "DragonOS 6.0.0-generic 6.0.0\n" + .to_string() + .as_bytes() + .to_vec(); + } else { + panic!("create version_signature error"); + } return result; } @@ -466,6 +490,7 @@ impl IndexNode for LockedProcFSInode { let file_size = match inode.fdata.ftype { ProcFileType::ProcStatus => inode.open_status(&mut private_data)?, ProcFileType::ProcMeminfo => inode.open_meminfo(&mut private_data)?, + ProcFileType::Default => inode.data.len() as i64, _ => { todo!() } diff --git a/kernel/src/filesystem/vfs/file.rs b/kernel/src/filesystem/vfs/file.rs index 753d10b4..0833ab00 100644 --- a/kernel/src/filesystem/vfs/file.rs +++ b/kernel/src/filesystem/vfs/file.rs @@ -5,16 +5,13 @@ use alloc::{ sync::{Arc, Weak}, vec::Vec, }; -use kdepends::xarray::XArray; use log::error; use system_error::SystemError; use super::{Dirent, FileType, IndexNode, InodeId, Metadata, SpecialNodeData}; use crate::filesystem::eventfd::EventFdInode; -use crate::libs::lazy_init::Lazy; use crate::perf::PerfEventInode; use crate::{ - arch::MMArch, driver::{ base::{block::SeekFrom, device::DevicePrivateData}, tty::tty_device::TtyFilePrivateData, @@ -22,7 +19,6 @@ use crate::{ filesystem::procfs::ProcfsFilePrivateData, ipc::pipe::{LockedPipeInode, PipeFsPrivateData}, libs::{rwlock::RwLock, spinlock::SpinLock}, - mm::{page::Page, MemoryManagementArch}, net::{ event_poll::{EPollItem, EPollPrivateData, EventPoll}, socket::Inode as SocketInode, @@ -124,75 +120,6 @@ impl FileMode { } } -/// 页面缓存 -pub struct PageCache { - xarray: SpinLock>>, - inode: Lazy>, -} - -impl core::fmt::Debug for PageCache { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("PageCache") - .field( - "xarray", - &self - .xarray - .lock() - .range(0..((MMArch::PAGE_ADDRESS_SIZE >> MMArch::PAGE_SHIFT) as u64)) - .map(|(_, r)| (*r).clone()) - .collect::>>(), - ) - .finish() - } -} - -impl PageCache { - pub fn new(inode: Option>) -> Arc { - let page_cache = Self { - xarray: SpinLock::new(XArray::new()), - inode: { - let v: Lazy> = Lazy::new(); - if let Some(inode) = inode { - v.init(inode); - } - v - }, - }; - Arc::new(page_cache) - } - - pub fn inode(&self) -> Option> { - self.inode.try_get().cloned() - } - - pub fn add_page(&self, offset: usize, page: &Arc) { - let mut guard = self.xarray.lock(); - let mut cursor = guard.cursor_mut(offset as u64); - cursor.store(page.clone()); - } - - pub fn get_page(&self, offset: usize) -> Option> { - let mut guard = self.xarray.lock(); - let mut cursor = guard.cursor_mut(offset as u64); - let page = cursor.load().map(|r| (*r).clone()); - page - } - - pub fn remove_page(&self, offset: usize) { - let mut guard = self.xarray.lock(); - let mut cursor = guard.cursor_mut(offset as u64); - cursor.remove(); - } - - pub fn set_inode(&self, inode: Weak) -> Result<(), SystemError> { - if self.inode.initialized() { - return Err(SystemError::EINVAL); - } - self.inode.init(inode); - Ok(()) - } -} - /// @brief 抽象文件结构体 #[derive(Debug)] pub struct File { @@ -238,13 +165,16 @@ impl File { return Ok(f); } - /// @brief 从文件中读取指定的字节数到buffer中 + /// ## 从文件中读取指定的字节数到buffer中 /// - /// @param len 要读取的字节数 - /// @param buf 目标buffer + /// ### 参数 + /// - `len`: 要读取的字节数 + /// - `buf`: 缓冲区 + /// - `read_direct`: 忽略缓存,直接读取磁盘 /// - /// @return Ok(usize) 成功读取的字节数 - /// @return Err(SystemError) 错误码 + /// ### 返回值 + /// - `Ok(usize)`: 成功读取的字节数 + /// - `Err(SystemError)`: 错误码 pub fn read(&self, len: usize, buf: &mut [u8]) -> Result { self.do_read( self.offset.load(core::sync::atomic::Ordering::SeqCst), @@ -254,13 +184,16 @@ impl File { ) } - /// @brief 从buffer向文件写入指定的字节数的数据 + /// ## 从buffer向文件写入指定的字节数的数据 /// - /// @param len 要写入的字节数 - /// @param buf 源数据buffer + /// ### 参数 + /// - `offset`: 文件偏移量 + /// - `len`: 要写入的字节数 + /// - `buf`: 写入缓冲区 /// - /// @return Ok(usize) 成功写入的字节数 - /// @return Err(SystemError) 错误码 + /// ### 返回值 + /// - `Ok(usize)`: 成功写入的字节数 + /// - `Err(SystemError)`: 错误码 pub fn write(&self, len: usize, buf: &[u8]) -> Result { self.do_write( self.offset.load(core::sync::atomic::Ordering::SeqCst), @@ -309,16 +242,13 @@ impl File { return Err(SystemError::ENOBUFS); } - let len = self - .inode - .read_at(offset, len, buf, self.private_data.lock()) - .map_err(|e| { - if e == SystemError::ERESTARTSYS { - SystemError::EINTR - } else { - e - } - })?; + let len = if self.mode().contains(FileMode::O_DIRECT) { + self.inode + .read_direct(offset, len, buf, self.private_data.lock()) + } else { + self.inode + .read_at(offset, len, buf, self.private_data.lock()) + }?; if update_offset { self.offset @@ -343,24 +273,11 @@ impl File { // 如果文件指针已经超过了文件大小,则需要扩展文件大小 if offset > self.inode.metadata()?.size as usize { - self.inode.resize(offset).map_err(|e| { - if e == SystemError::ERESTARTSYS { - SystemError::EINTR - } else { - e - } - })?; + self.inode.resize(offset)?; } let len = self .inode - .write_at(offset, len, buf, self.private_data.lock()) - .map_err(|e| { - if e == SystemError::ERESTARTSYS { - SystemError::EINTR - } else { - e - } - })?; + .write_at(offset, len, buf, self.private_data.lock())?; if update_offset { self.offset diff --git a/kernel/src/filesystem/vfs/mod.rs b/kernel/src/filesystem/vfs/mod.rs index 67b7643a..50f61619 100644 --- a/kernel/src/filesystem/vfs/mod.rs +++ b/kernel/src/filesystem/vfs/mod.rs @@ -24,14 +24,11 @@ use crate::{ time::PosixTimeSpec, }; -use self::{ - core::generate_inode_id, - file::{FileMode, PageCache}, - syscall::ModeType, - utils::DName, -}; +use self::{core::generate_inode_id, file::FileMode, syscall::ModeType, utils::DName}; pub use self::{core::ROOT_INODE, file::FilePrivateData, mount::MountFS}; +use super::page_cache::PageCache; + /// vfs容许的最大的路径名称长度 pub const MAX_PATHLEN: usize = 1024; @@ -128,6 +125,15 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync { fn mmap(&self, _start: usize, _len: usize, _offset: usize) -> Result<(), SystemError> { return Err(SystemError::ENOSYS); } + + fn read_sync(&self, _offset: usize, _buf: &mut [u8]) -> Result { + return Err(SystemError::ENOSYS); + } + + fn write_sync(&self, _offset: usize, _buf: &[u8]) -> Result { + return Err(SystemError::ENOSYS); + } + /// @brief 打开文件 /// /// @return 成功:Ok() @@ -184,6 +190,52 @@ pub trait IndexNode: Any + Sync + Send + Debug + CastFromSync { _data: SpinLockGuard, ) -> Result; + /// # 在inode的指定偏移量开始,读取指定大小的数据,忽略PageCache + /// + /// ## 参数 + /// + /// - `offset`: 起始位置在Inode中的偏移量 + /// - `len`: 要读取的字节数 + /// - `buf`: 缓冲区 + /// - `data`: 各文件系统系统所需私有信息 + /// + /// ## 返回值 + /// + /// - `Ok(usize)``: Ok(读取的字节数) + /// - `Err(SystemError)``: Err(Posix错误码) + fn read_direct( + &self, + _offset: usize, + _len: usize, + _buf: &mut [u8], + _data: SpinLockGuard, + ) -> Result { + return Err(SystemError::ENOSYS); + } + + /// # 在inode的指定偏移量开始,写入指定大小的数据,忽略PageCache + /// + /// ## 参数 + /// + /// - `offset`: 起始位置在Inode中的偏移量 + /// - `len`: 要读取的字节数 + /// - `buf`: 缓冲区 + /// - `data`: 各文件系统系统所需私有信息 + /// + /// ## 返回值 + /// + /// - `Ok(usize)``: Ok(读取的字节数) + /// - `Err(SystemError)``: Err(Posix错误码) + fn write_direct( + &self, + _offset: usize, + _len: usize, + _buf: &[u8], + _data: SpinLockGuard, + ) -> Result { + return Err(SystemError::ENOSYS); + } + /// @brief 获取当前inode的状态。 /// /// @return PollStatus结构体 diff --git a/kernel/src/filesystem/vfs/mount.rs b/kernel/src/filesystem/vfs/mount.rs index 12f122b8..35d221d8 100644 --- a/kernel/src/filesystem/vfs/mount.rs +++ b/kernel/src/filesystem/vfs/mount.rs @@ -14,7 +14,7 @@ use system_error::SystemError; use crate::{ driver::base::device::device_number::DeviceNumber, - filesystem::vfs::ROOT_INODE, + filesystem::{page_cache::PageCache, vfs::ROOT_INODE}, libs::{ casting::DowncastArc, rwlock::RwLock, @@ -24,10 +24,8 @@ use crate::{ }; use super::{ - file::{FileMode, PageCache}, - syscall::ModeType, - utils::DName, - FilePrivateData, FileSystem, FileType, IndexNode, InodeId, Magic, SuperBlock, + file::FileMode, syscall::ModeType, utils::DName, FilePrivateData, FileSystem, FileType, + IndexNode, InodeId, Magic, SuperBlock, }; const MOUNTFS_BLOCK_SIZE: u64 = 512; @@ -296,6 +294,26 @@ impl IndexNode for MountFSInode { return self.inner_inode.write_at(offset, len, buf, data); } + fn read_direct( + &self, + offset: usize, + len: usize, + buf: &mut [u8], + data: SpinLockGuard, + ) -> Result { + self.inner_inode.read_direct(offset, len, buf, data) + } + + fn write_direct( + &self, + offset: usize, + len: usize, + buf: &[u8], + data: SpinLockGuard, + ) -> Result { + self.inner_inode.write_direct(offset, len, buf, data) + } + #[inline] fn fs(&self) -> Arc { return self.mount_fs.clone(); diff --git a/kernel/src/filesystem/vfs/syscall.rs b/kernel/src/filesystem/vfs/syscall.rs index 7ee5d8e1..5eb3e0c3 100644 --- a/kernel/src/filesystem/vfs/syscall.rs +++ b/kernel/src/filesystem/vfs/syscall.rs @@ -739,6 +739,22 @@ impl Syscall { } } + pub fn fchdir(fd: i32) -> Result { + let pcb = ProcessManager::current_pcb(); + let file = pcb + .fd_table() + .read() + .get_file_by_fd(fd) + .ok_or(SystemError::EBADF)?; + let inode = file.inode(); + if inode.metadata()?.file_type != FileType::Dir { + return Err(SystemError::ENOTDIR); + } + let path = inode.absolute_path()?; + pcb.basic_mut().set_cwd(path); + return Ok(0); + } + /// @brief 获取当前进程的工作目录路径 /// /// @param buf 指向缓冲区的指针 diff --git a/kernel/src/ipc/pipe.rs b/kernel/src/ipc/pipe.rs index 288a3662..32d57494 100644 --- a/kernel/src/ipc/pipe.rs +++ b/kernel/src/ipc/pipe.rs @@ -11,7 +11,7 @@ use crate::{ wait_queue::WaitQueue, }, net::event_poll::{EPollEventType, EPollItem, EventPoll}, - process::{ProcessManager, ProcessState}, + process::{ProcessFlags, ProcessManager, ProcessState}, sched::SchedMode, time::PosixTimeSpec, }; @@ -232,6 +232,9 @@ impl IndexNode for LockedPipeInode { drop(inode); let r = wq_wait_event_interruptible!(self.read_wait_queue, self.readable(), {}); if r.is_err() { + ProcessManager::current_pcb() + .flags() + .insert(ProcessFlags::HAS_PENDING_SIGNAL); return Err(SystemError::ERESTARTSYS); } diff --git a/kernel/src/ipc/shm.rs b/kernel/src/ipc/shm.rs index 56ecdc67..e2c0187f 100644 --- a/kernel/src/ipc/shm.rs +++ b/kernel/src/ipc/shm.rs @@ -7,16 +7,15 @@ use crate::{ }, mm::{ allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame}, - page::{page_manager_lock_irqsave, Page}, + page::{page_manager_lock_irqsave, PageFlags, PageType}, PhysAddr, }, process::{Pid, ProcessManager}, syscall::user_access::{UserBufferReader, UserBufferWriter}, time::PosixTimeSpec, }; -use alloc::{sync::Arc, vec::Vec}; use core::sync::atomic::{compiler_fence, Ordering}; -use hashbrown::{HashMap, HashSet}; +use hashbrown::HashMap; use ida::IdAllocator; use log::info; use num::ToPrimitive; @@ -159,21 +158,16 @@ impl ShmManager { // 分配共享内存页面 let page_count = PageFrameCount::from_bytes(page_align_up(size)).unwrap(); - let phys_page = - unsafe { LockedFrameAllocator.allocate(page_count) }.ok_or(SystemError::EINVAL)?; // 创建共享内存page,并添加到PAGE_MANAGER中 let mut page_manager_guard = page_manager_lock_irqsave(); - let mut cur_phys = PhysPageFrame::new(phys_page.0); - for _ in 0..page_count.data() { - let page = Arc::new(Page::new(true, cur_phys.phys_address())); - page.write_irqsave().set_shm_id(shm_id); - let paddr = cur_phys.phys_address(); - page_manager_guard.insert(paddr, &page); - cur_phys = cur_phys.next(); - } + let (paddr, _page) = page_manager_guard.create_pages( + PageType::Shm(shm_id), + PageFlags::PG_UNEVICTABLE, + &mut LockedFrameAllocator, + page_count, + )?; // 创建共享内存信息结构体 - let paddr = phys_page.0; let kern_ipc_perm = KernIpcPerm { id: shm_id, key, @@ -323,9 +317,10 @@ impl ShmManager { let mut page_manager_guard = page_manager_lock_irqsave(); if map_count > 0 { // 设置共享内存物理页当映射计数等于0时可被回收 + // TODO 后续需要加入到lru中 for _ in 0..count.data() { let page = page_manager_guard.get_unwrap(&cur_phys.phys_address()); - page.write_irqsave().set_dealloc_when_zero(true); + page.write_irqsave().remove_flags(PageFlags::PG_UNEVICTABLE); cur_phys = cur_phys.next(); } @@ -375,6 +370,8 @@ pub struct KernelShm { shm_start_paddr: PhysAddr, /// 共享内存大小(bytes),注意是用户指定的大小(未经过页面对齐) shm_size: usize, + /// 映射计数 + map_count: usize, /// 最后一次连接的时间 shm_atim: PosixTimeSpec, /// 最后一次断开连接的时间 @@ -394,6 +391,7 @@ impl KernelShm { kern_ipc_perm, shm_start_paddr, shm_size, + map_count: 0, shm_atim: PosixTimeSpec::new(0, 0), shm_dtim: PosixTimeSpec::new(0, 0), shm_ctim: PosixTimeSpec::now(), @@ -436,26 +434,7 @@ impl KernelShm { /// 共享内存段的映射计数(有多少个不同的VMA映射) pub fn map_count(&self) -> usize { - let mut page_manager_guard = page_manager_lock_irqsave(); - let mut id_set: HashSet = HashSet::new(); - let mut cur_phys = PhysPageFrame::new(self.shm_start_paddr); - let page_count = PageFrameCount::from_bytes(page_align_up(self.shm_size)).unwrap(); - - for _ in 0..page_count.data() { - let page = page_manager_guard.get(&cur_phys.phys_address()).unwrap(); - id_set.extend( - page.read_irqsave() - .anon_vma() - .iter() - .map(|vma| vma.id()) - .collect::>(), - ); - - cur_phys = cur_phys.next(); - } - - // 由于LockedVMA的id是独一无二的,因此有多少个不同的id,就代表着有多少个不同的VMA映射到共享内存段 - return id_set.len(); + self.map_count } pub fn copy_from(&mut self, shm_id_ds: PosixShmIdDs) { @@ -474,6 +453,19 @@ impl KernelShm { self.update_ctim(); } + + pub fn mode(&self) -> &ShmFlags { + &self.kern_ipc_perm.mode + } + + pub fn increase_count(&mut self) { + self.map_count += 1; + } + + pub fn decrease_count(&mut self) { + assert!(self.map_count > 0, "map_count is zero"); + self.map_count -= 1; + } } /// 共享内存权限信息 diff --git a/kernel/src/ipc/signal.rs b/kernel/src/ipc/signal.rs index fe91bd68..8c85182d 100644 --- a/kernel/src/ipc/signal.rs +++ b/kernel/src/ipc/signal.rs @@ -8,7 +8,9 @@ use crate::{ arch::ipc::signal::{SigCode, SigFlags, SigSet, Signal}, ipc::signal_types::SigactionType, libs::spinlock::SpinLockGuard, - process::{pid::PidType, Pid, ProcessControlBlock, ProcessFlags, ProcessManager}, + process::{ + pid::PidType, Pid, ProcessControlBlock, ProcessFlags, ProcessManager, ProcessSignalInfo, + }, }; use super::signal_types::{ @@ -25,7 +27,7 @@ impl Signal { return false; } - if !pcb.has_pending_signal() { + if !pcb.has_pending_signal_fast() { return false; } @@ -112,7 +114,7 @@ impl Signal { } if !self.prepare_sianal(pcb.clone(), force_send) { - return Err(SystemError::EINVAL); + return Ok(0); } // debug!("force send={}", force_send); let pcb_info = pcb.sig_info_irqsave(); @@ -213,13 +215,18 @@ impl Signal { } } - /// @brief 本函数用于检测指定的进程是否想要接收SIG这个信号。 + /// 本函数用于检测指定的进程是否想要接收SIG这个信号。 + /// /// 当我们对于进程组中的所有进程都运行了这个检查之后,我们将可以找到组内愿意接收信号的进程。 /// 这么做是为了防止我们把信号发送给了一个正在或已经退出的进程,或者是不响应该信号的进程。 #[inline] fn wants_signal(&self, pcb: Arc) -> bool { // 如果改进程屏蔽了这个signal,则不能接收 - if pcb.sig_info_irqsave().sig_block().contains((*self).into()) { + if pcb + .sig_info_irqsave() + .sig_blocked() + .contains((*self).into()) + { return false; } @@ -291,7 +298,7 @@ impl Signal { // 一个被阻塞了的信号肯定是要被处理的 if pcb .sig_info_irqsave() - .sig_block() + .sig_blocked() .contains(self.into_sigset()) { return true; @@ -316,6 +323,7 @@ fn signal_wake_up(pcb: Arc, _guard: SpinLockGuard, _guard: SpinLockGuard bool { + sigset.bits() & (!blocked.bits()) != 0 } -/// @brief 刷新指定进程的sighand的sigaction,将满足条件的sigaction恢复为Default -/// 除非某个信号被设置为ignore且force_default为false,否则都不会将其恢复 +impl ProcessControlBlock { + /// 重新计算线程的flag中的TIF_SIGPENDING位 + /// 参考: https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/signal.c?r=&mo=4806&fi=182#182 + pub fn recalc_sigpending(&self, siginfo_guard: Option<&ProcessSignalInfo>) { + if !self.recalc_sigpending_tsk(siginfo_guard) { + self.flags().remove(ProcessFlags::HAS_PENDING_SIGNAL); + } + } + + fn recalc_sigpending_tsk(&self, siginfo_guard: Option<&ProcessSignalInfo>) -> bool { + let mut _siginfo_tmp_guard = None; + let siginfo = if let Some(siginfo_guard) = siginfo_guard { + siginfo_guard + } else { + _siginfo_tmp_guard = Some(self.sig_info_irqsave()); + _siginfo_tmp_guard.as_ref().unwrap() + }; + return siginfo.do_recalc_sigpending_tsk(self); + } +} + +impl ProcessSignalInfo { + fn do_recalc_sigpending_tsk(&self, pcb: &ProcessControlBlock) -> bool { + if has_pending_signals(&self.sig_pending().signal(), self.sig_blocked()) + || has_pending_signals(&self.sig_shared_pending().signal(), self.sig_blocked()) + { + pcb.flags().insert(ProcessFlags::HAS_PENDING_SIGNAL); + return true; + } + /* + * We must never clear the flag in another thread, or in current + * when it's possible the current syscall is returning -ERESTART*. + * So we don't clear it here, and only callers who know they should do. + */ + return false; + } +} +/// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/sched/signal.h?fi=restore_saved_sigmask#547 +pub fn restore_saved_sigmask() { + if ProcessManager::current_pcb() + .flags() + .test_and_clear(ProcessFlags::RESTORE_SIG_MASK) + { + let saved = *ProcessManager::current_pcb() + .sig_info_irqsave() + .saved_sigmask(); + __set_current_blocked(&saved); + } +} + +/// 刷新指定进程的sighand的sigaction,将满足条件的sigaction恢复为默认状态。 +/// 除非某个信号被设置为忽略且 `force_default` 为 `false`,否则都不会将其恢复。 /// -/// @param pcb 要被刷新的pcb -/// @param force_default 是否强制将sigaction恢复成默认状态 +/// # 参数 +/// +/// - `pcb`: 要被刷新的pcb。 +/// - `force_default`: 是否强制将sigaction恢复成默认状态。 pub fn flush_signal_handlers(pcb: Arc, force_default: bool) { compiler_fence(core::sync::atomic::Ordering::SeqCst); // debug!("hand=0x{:018x}", hand as *const sighand_struct as usize); @@ -441,31 +500,144 @@ pub(super) fn do_sigaction( return Ok(()); } -/// 设置当前进程的屏蔽信号 (sig_block),待引入 [sigprocmask](https://man7.org/linux/man-pages/man2/sigprocmask.2.html) 系统调用后要删除这个散装函数 -/// -/// ## 参数 -/// -/// - `new_set` 新的屏蔽信号bitmap的值 -pub fn set_current_sig_blocked(new_set: &mut SigSet) { - let to_remove: SigSet = - >::into(Signal::SIGKILL) | Signal::SIGSTOP.into(); - new_set.remove(to_remove); - //TODO 把这个散装函数用 sigsetops 替换掉 - let pcb = ProcessManager::current_pcb(); +/// https://code.dragonos.org.cn/xref/linux-6.6.21/include/uapi/asm-generic/signal-defs.h#72 +/// 对应SIG_BLOCK,SIG_UNBLOCK,SIG_SETMASK +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SigHow { + Block = 0, + Unblock = 1, + SetMask = 2, +} +impl TryFrom for SigHow { + type Error = SystemError; + fn try_from(value: i32) -> Result { + match value { + 0 => Ok(SigHow::Block), + 1 => Ok(SigHow::Unblock), + 2 => Ok(SigHow::SetMask), + _ => Err(SystemError::EINVAL), + } + } +} + +fn __set_task_blocked(pcb: &Arc, new_set: &SigSet) { + //todo 还有一个对线程组是否为空的判断,进程组、线程组实现之后,需要更改这里。 + if pcb.has_pending_signal() { + let mut newblocked = *new_set; + let guard = pcb.sig_info_irqsave(); + newblocked.remove(*guard.sig_blocked()); + drop(guard); + + // 从主线程开始去遍历 + if let Some(group_leader) = pcb.threads_read_irqsave().group_leader() { + retarget_shared_pending(group_leader, newblocked); + } + } + *pcb.sig_info_mut().sig_block_mut() = *new_set; + pcb.recalc_sigpending(None); +} + +fn __set_current_blocked(new_set: &SigSet) { + let pcb = ProcessManager::current_pcb(); /* 如果当前pcb的sig_blocked和新的相等,那么就不用改变它。 请注意,一个进程的sig_blocked字段不能被其他进程修改! */ - if pcb.sig_info_irqsave().sig_block().eq(new_set) { + if pcb.sig_info_irqsave().sig_blocked().eq(new_set) { + return; + } + let guard: SpinLockGuard<'_, SignalStruct> = pcb.sig_struct_irqsave(); + + __set_task_blocked(&pcb, new_set); + + drop(guard); +} + +fn retarget_shared_pending(pcb: Arc, which: SigSet) { + let retarget = pcb.sig_info_irqsave().sig_shared_pending().signal(); + retarget.intersects(which); + if retarget.is_empty() { return; } - let guard = pcb.sig_struct_irqsave(); - // todo: 当一个进程有多个线程后,在这里需要设置每个线程的block字段,并且 retarget_shared_pending(虽然我还没搞明白linux这部分是干啥的) + // 对于线程组中的每一个线程都要执行的函数 + let thread_handling_function = |pcb: Arc, retarget: &SigSet| { + if retarget.is_empty() { + return; + } - // 设置当前进程的sig blocked - *pcb.sig_info_mut().sig_block_mut() = *new_set; - recalc_sigpending(); - drop(guard); + if pcb.flags().contains(ProcessFlags::EXITING) { + return; + } + + let blocked = pcb.sig_info_irqsave().sig_shared_pending().signal(); + if retarget.difference(blocked).is_empty() { + return; + } + + retarget.intersects(blocked); + if !pcb.has_pending_signal() { + let guard = pcb.sig_struct_irqsave(); + signal_wake_up(pcb.clone(), guard, false); + } + // 之前的对retarget的判断移动到最前面,因为对于当前线程的线程的处理已经结束,对于后面的线程在一开始判断retarget为空即可结束处理 + + // debug!("handle done"); + }; + + // 暴力遍历每一个线程,找到相同的tgid + let tgid = pcb.tgid(); + for &pid in pcb.children_read_irqsave().iter() { + if let Some(child) = ProcessManager::find(pid) { + if child.tgid() == tgid { + thread_handling_function(child, &retarget); + } + } + } + // debug!("retarget_shared_pending done!"); +} + +/// 设置当前进程的屏蔽信号 (sig_block) +/// +/// ## 参数 +/// +/// - `new_set` 新的屏蔽信号bitmap的值 +pub fn set_current_blocked(new_set: &mut SigSet) { + let to_remove: SigSet = + >::into(Signal::SIGKILL) | Signal::SIGSTOP.into(); + new_set.remove(to_remove); + __set_current_blocked(new_set); +} + +/// 设置当前进程的屏蔽信号 (sig_block) +/// +/// ## 参数 +/// +/// - `how` 设置方式 +/// - `new_set` 新的屏蔽信号bitmap的值 +pub fn set_sigprocmask(how: SigHow, set: SigSet) -> Result { + let pcb: Arc = ProcessManager::current_pcb(); + let guard = pcb.sig_info_irqsave(); + let oset = *guard.sig_blocked(); + + let mut res_set = oset; + drop(guard); + + match how { + SigHow::Block => { + // debug!("SIG_BLOCK\tGoing to insert is: {}", set.bits()); + res_set.insert(set); + } + SigHow::Unblock => { + res_set.remove(set); + } + SigHow::SetMask => { + // debug!("SIG_SETMASK\tGoing to set is: {}", set.bits()); + res_set = set; + } + } + + __set_current_blocked(&res_set); + Ok(oset) } diff --git a/kernel/src/ipc/signal_types.rs b/kernel/src/ipc/signal_types.rs index d8d7b167..befeda02 100644 --- a/kernel/src/ipc/signal_types.rs +++ b/kernel/src/ipc/signal_types.rs @@ -75,9 +75,15 @@ pub struct InnerSignalStruct { impl SignalStruct { #[inline(never)] pub fn new() -> Self { - Self { + let mut r = Self { inner: Box::::default(), - } + }; + let sig_ign = Sigaction::default(); + r.inner.handlers[Signal::SIGCHLD as usize - 1] = sig_ign; + r.inner.handlers[Signal::SIGURG as usize - 1] = sig_ign; + r.inner.handlers[Signal::SIGWINCH as usize - 1] = sig_ign; + + r } } @@ -447,8 +453,6 @@ impl SigPending { None }; - // 当一个进程具有多个线程之后,在这里需要重新计算线程的flag中的TIF_SIGPENDING位 - // recalc_sigpending(); return (sig, info); } /// @brief 从sigpending中删除mask中被置位的信号。也就是说,比如mask的第1位被置为1,那么就从sigqueue中删除所有signum为2的信号的信息。 @@ -539,10 +543,12 @@ impl SigQueue { pub trait SignalArch { /// 信号处理函数 /// + /// 处理信号或重启系统调用 + /// /// ## 参数 /// /// - `frame` 中断栈帧 - unsafe fn do_signal(frame: &mut TrapFrame); + unsafe fn do_signal_or_restart(frame: &mut TrapFrame); fn sys_rt_sigreturn(trap_frame: &mut TrapFrame) -> u64; } diff --git a/kernel/src/ipc/syscall.rs b/kernel/src/ipc/syscall.rs index 2d0b9b6b..c0d7305c 100644 --- a/kernel/src/ipc/syscall.rs +++ b/kernel/src/ipc/syscall.rs @@ -16,8 +16,7 @@ use crate::{ FilePrivateData, }, ipc::shm::{shm_manager_lock, IPC_PRIVATE}, - libs::align::page_align_up, - libs::spinlock::SpinLock, + libs::{align::page_align_up, spinlock::SpinLock}, mm::{ allocator::page_frame::{PageFrameCount, PhysPageFrame, VirtPageFrame}, page::{page_manager_lock_irqsave, EntryFlags, PageFlushAll}, @@ -35,6 +34,7 @@ use crate::{ use super::{ pipe::{LockedPipeInode, PipeFsPrivateData}, shm::{ShmCtlCmd, ShmFlags, ShmId, ShmKey}, + signal::{set_sigprocmask, SigHow}, signal_types::{ SaHandlerType, SigInfo, SigType, Sigaction, SigactionType, UserSigaction, USER_SIG_DFL, USER_SIG_ERR, USER_SIG_IGN, @@ -404,6 +404,9 @@ impl Syscall { // 更新最后一次连接时间 kernel_shm.update_atim(); + // 映射计数增加 + kernel_shm.increase_count(); + Ok(r) } @@ -432,29 +435,6 @@ impl Syscall { return Err(SystemError::EINVAL); } - // 获取映射的物理地址 - let paddr = address_write_guard - .user_mapper - .utable - .translate(vaddr) - .ok_or(SystemError::EINVAL)? - .0; - - // 如果物理页的shm_id为None,代表不是共享页 - let mut page_manager_guard = page_manager_lock_irqsave(); - let page = page_manager_guard.get(&paddr).ok_or(SystemError::EINVAL)?; - let shm_id = page.read_irqsave().shm_id().ok_or(SystemError::EINVAL)?; - drop(page_manager_guard); - - // 获取对应共享页管理信息 - let mut shm_manager_guard = shm_manager_lock(); - let kernel_shm = shm_manager_guard - .get_mut(&shm_id) - .ok_or(SystemError::EINVAL)?; - // 更新最后一次断开连接时间 - kernel_shm.update_dtim(); - drop(shm_manager_guard); - // 取消映射 let flusher: PageFlushAll = PageFlushAll::new(); vma.unmap(&mut address_write_guard.user_mapper.utable, flusher); @@ -504,4 +484,72 @@ impl Syscall { ShmCtlCmd::Default => Err(SystemError::EINVAL), } } + + /// # SYS_SIGPROCMASK系统调用函数,用于设置或查询当前进程的信号屏蔽字 + /// + /// ## 参数 + /// + /// - `how`: 指示如何修改信号屏蔽字 + /// - `nset`: 新的信号屏蔽字 + /// - `oset`: 旧的信号屏蔽字的指针,由于可以是NULL,所以用Option包装 + /// - `sigsetsize`: 信号集的大小 + /// + /// ## 返回值 + /// + /// 成功:0 + /// 失败:错误码 + /// + /// ## 说明 + /// 根据 https://man7.org/linux/man-pages/man2/sigprocmask.2.html ,传进来的oldset和newset都是指针类型,这里选择传入usize然后转换为u64的指针类型 + pub fn rt_sigprocmask( + how: i32, + newset: usize, + oldset: usize, + sigsetsize: usize, + ) -> Result { + // 对应oset传进来一个NULL的情况 + let oset = if oldset == 0 { None } else { Some(oldset) }; + let nset = if newset == 0 { None } else { Some(newset) }; + + if sigsetsize != size_of::() { + return Err(SystemError::EFAULT); + } + + let sighow = SigHow::try_from(how)?; + + let mut new_set = SigSet::default(); + if let Some(nset) = nset { + let reader = UserBufferReader::new( + VirtAddr::new(nset).as_ptr::(), + core::mem::size_of::(), + true, + )?; + + let nset = reader.read_one_from_user::(0)?; + new_set = SigSet::from_bits_truncate(*nset); + // debug!("Get Newset: {}", &new_set.bits()); + let to_remove: SigSet = + >::into(Signal::SIGKILL) | Signal::SIGSTOP.into(); + new_set.remove(to_remove); + } + + let oldset_to_return = set_sigprocmask(sighow, new_set)?; + if let Some(oldset) = oset { + // debug!("Get Oldset to return: {}", &oldset_to_return.bits()); + let mut writer = UserBufferWriter::new( + VirtAddr::new(oldset).as_ptr::(), + core::mem::size_of::(), + true, + )?; + writer.copy_one_to_user::(&oldset_to_return.bits(), 0)?; + } + + Ok(0) + } + + pub fn restart_syscall() -> Result { + // todo: https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/signal.c#2998 + unimplemented!("restart_syscall with restart block"); + // Err(SystemError::ENOSYS) + } } diff --git a/kernel/src/libs/semaphore.rs b/kernel/src/libs/semaphore.rs index 5cf600d3..00003a02 100644 --- a/kernel/src/libs/semaphore.rs +++ b/kernel/src/libs/semaphore.rs @@ -37,7 +37,7 @@ impl Semaphore { fn down(&self) { if self.counter.fetch_sub(1, Ordering::Release) <= 0 { self.counter.fetch_add(1, Ordering::Relaxed); - self.wait_queue.sleep(); + self.wait_queue.sleep().ok(); //资源不充足,信号量<=0, 此时进程睡眠 } } diff --git a/kernel/src/libs/wait_queue.rs b/kernel/src/libs/wait_queue.rs index efb3606d..5fee2105 100644 --- a/kernel/src/libs/wait_queue.rs +++ b/kernel/src/libs/wait_queue.rs @@ -1,7 +1,7 @@ // #![allow(dead_code)] use core::intrinsics::unlikely; -use alloc::{collections::LinkedList, sync::Arc, vec::Vec}; +use alloc::{collections::VecDeque, sync::Arc, vec::Vec}; use log::{error, warn}; use system_error::SystemError; @@ -19,23 +19,40 @@ use super::{ #[derive(Debug)] struct InnerWaitQueue { + /// 等待队列是否已经死亡, 如果已经死亡, 则不能再添加新的等待进程 + dead: bool, /// 等待队列的链表 - wait_list: LinkedList>, + wait_list: VecDeque>, } /// 被自旋锁保护的等待队列 #[derive(Debug)] -pub struct WaitQueue(SpinLock); +pub struct WaitQueue { + inner: SpinLock, +} #[allow(dead_code)] impl WaitQueue { pub const fn default() -> Self { - WaitQueue(SpinLock::new(InnerWaitQueue::INIT)) + WaitQueue { + inner: SpinLock::new(InnerWaitQueue::INIT), + } + } + + fn inner_irqsave(&self) -> SpinLockGuard { + self.inner.lock_irqsave() + } + + fn inner(&self) -> SpinLockGuard { + self.inner.lock() } pub fn prepare_to_wait_event(&self, interruptible: bool) -> Result<(), SystemError> { - let mut guard: SpinLockGuard = self.0.lock_irqsave(); + let mut guard: SpinLockGuard = self.inner_irqsave(); let pcb = ProcessManager::current_pcb(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } if Signal::signal_pending_state(interruptible, false, &pcb) { return Err(SystemError::ERESTARTSYS); } else { @@ -51,7 +68,7 @@ impl WaitQueue { pub fn finish_wait(&self) { let pcb = ProcessManager::current_pcb(); let mut writer = pcb.sched_info().inner_lock_write_irqsave(); - let mut guard: SpinLockGuard = self.0.lock_irqsave(); + let mut guard: SpinLockGuard = self.inner_irqsave(); writer.set_state(ProcessState::Runnable); writer.set_wakeup(); @@ -62,34 +79,49 @@ impl WaitQueue { } /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断 - pub fn sleep(&self) { + pub fn sleep(&self) -> Result<(), SystemError> { before_sleep_check(0); - let mut guard: SpinLockGuard = self.0.lock_irqsave(); + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(true).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(guard); schedule(SchedMode::SM_NONE); + Ok(()) + } + + /// 标记等待队列已经死亡,不能再添加新的等待进程 + pub fn mark_dead(&self) { + let mut guard: SpinLockGuard = self.inner_irqsave(); + guard.dead = true; + drop(guard); } /// @brief 让当前进程在等待队列上进行等待,并且,在释放waitqueue的锁之前,执行f函数闭包 - pub fn sleep_with_func(&self, f: F) + pub fn sleep_with_func(&self, f: F) -> Result<(), SystemError> where F: FnOnce(), { before_sleep_check(0); - let mut guard: SpinLockGuard = self.0.lock_irqsave(); - let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } + ProcessManager::mark_sleep(true).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); - drop(irq_guard); guard.wait_list.push_back(ProcessManager::current_pcb()); f(); drop(guard); schedule(SchedMode::SM_NONE); + + Ok(()) } /// @brief 让当前进程在等待队列上进行等待. 但是,在释放waitqueue的锁之后,不会调用调度函数。 @@ -106,80 +138,95 @@ impl WaitQueue { /// /// 由于sleep_without_schedule不会调用调度函数,因此,如果开发者忘记在执行本函数之后,手动调用调度函数, /// 由于时钟中断到来或者‘其他cpu kick了当前cpu’,可能会导致一些未定义的行为。 - pub unsafe fn sleep_without_schedule(&self) { + pub unsafe fn sleep_without_schedule(&self) -> Result<(), SystemError> { before_sleep_check(1); // 安全检查:确保当前处于中断禁止状态 assert!(!CurrentIrqArch::is_irq_enabled()); - let mut guard: SpinLockGuard = self.0.lock(); + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(true).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(guard); + Ok(()) } - pub unsafe fn sleep_without_schedule_uninterruptible(&self) { + pub unsafe fn sleep_without_schedule_uninterruptible(&self) -> Result<(), SystemError> { before_sleep_check(1); // 安全检查:确保当前处于中断禁止状态 assert!(!CurrentIrqArch::is_irq_enabled()); - let mut guard: SpinLockGuard = self.0.lock(); + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(false).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(guard); + Ok(()) } /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断 - pub fn sleep_uninterruptible(&self) { + pub fn sleep_uninterruptible(&self) -> Result<(), SystemError> { before_sleep_check(0); - let mut guard: SpinLockGuard = self.0.lock(); - let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(false).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); - drop(irq_guard); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(guard); schedule(SchedMode::SM_NONE); + Ok(()) } /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。 /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。 - pub fn sleep_unlock_spinlock(&self, to_unlock: SpinLockGuard) { + pub fn sleep_unlock_spinlock(&self, to_unlock: SpinLockGuard) -> Result<(), SystemError> { before_sleep_check(1); - let mut guard: SpinLockGuard = self.0.lock(); - let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + let mut guard: SpinLockGuard = self.inner_irqsave(); + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(true).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); - drop(irq_guard); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(to_unlock); drop(guard); schedule(SchedMode::SM_NONE); + Ok(()) } /// @brief 让当前进程在等待队列上进行等待,并且,允许被信号打断。 /// 在当前进程的pcb加入队列后,解锁指定的Mutex。 - pub fn sleep_unlock_mutex(&self, to_unlock: MutexGuard) { + pub fn sleep_unlock_mutex(&self, to_unlock: MutexGuard) -> Result<(), SystemError> { before_sleep_check(1); - let mut guard: SpinLockGuard = self.0.lock(); - let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + let mut guard: SpinLockGuard = self.inner_irqsave(); + + if !guard.can_sleep() { + return Err(SystemError::ESRCH); + } ProcessManager::mark_sleep(true).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); }); - drop(irq_guard); guard.wait_list.push_back(ProcessManager::current_pcb()); drop(to_unlock); drop(guard); schedule(SchedMode::SM_NONE); + Ok(()) } /// @brief 让当前进程在等待队列上进行等待,并且,不允许被信号打断。 /// 在当前进程的pcb加入队列后,解锁指定的自旋锁。 pub fn sleep_uninterruptible_unlock_spinlock(&self, to_unlock: SpinLockGuard) { before_sleep_check(1); - let mut guard: SpinLockGuard = self.0.lock(); + let mut guard: SpinLockGuard = self.inner_irqsave(); let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; ProcessManager::mark_sleep(false).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); @@ -195,7 +242,7 @@ impl WaitQueue { /// 在当前进程的pcb加入队列后,解锁指定的Mutex。 pub fn sleep_uninterruptible_unlock_mutex(&self, to_unlock: MutexGuard) { before_sleep_check(1); - let mut guard: SpinLockGuard = self.0.lock(); + let mut guard: SpinLockGuard = self.inner_irqsave(); let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; ProcessManager::mark_sleep(false).unwrap_or_else(|e| { panic!("sleep error: {:?}", e); @@ -217,7 +264,7 @@ impl WaitQueue { /// @return true 成功唤醒进程 /// @return false 没有唤醒进程 pub fn wakeup(&self, state: Option) -> bool { - let mut guard: SpinLockGuard = self.0.lock_irqsave(); + let mut guard: SpinLockGuard = self.inner_irqsave(); // 如果队列为空,则返回 if guard.wait_list.is_empty() { return false; @@ -246,7 +293,7 @@ impl WaitQueue { /// /// @param state 用于判断的state,如果一个进程与这个state相同,或者为None(表示不进行这个判断),则唤醒这个进程。 pub fn wakeup_all(&self, state: Option) { - let mut guard: SpinLockGuard = self.0.lock_irqsave(); + let mut guard: SpinLockGuard = self.inner_irqsave(); // 如果队列为空,则返回 if guard.wait_list.is_empty() { return; @@ -281,14 +328,19 @@ impl WaitQueue { /// @brief 获得当前等待队列的大小 pub fn len(&self) -> usize { - return self.0.lock().wait_list.len(); + return self.inner_irqsave().wait_list.len(); } } impl InnerWaitQueue { pub const INIT: InnerWaitQueue = InnerWaitQueue { - wait_list: LinkedList::new(), + wait_list: VecDeque::new(), + dead: false, }; + + pub fn can_sleep(&self) -> bool { + return !self.dead; + } } fn before_sleep_check(max_preempt: usize) { diff --git a/kernel/src/misc/events/kprobe/device.rs b/kernel/src/misc/events/kprobe/device.rs new file mode 100644 index 00000000..dcb6d66c --- /dev/null +++ b/kernel/src/misc/events/kprobe/device.rs @@ -0,0 +1,189 @@ +use crate::driver::base::class::Class; +use crate::driver::base::device::bus::Bus; +use crate::driver::base::device::driver::Driver; +use crate::driver::base::device::{Device, DeviceCommonData, DeviceType, IdTable}; +use crate::driver::base::kobject::{ + KObjType, KObject, KObjectCommonData, KObjectState, LockedKObjectState, +}; +use crate::driver::base::kset::KSet; +use crate::filesystem::kernfs::KernFSInode; +use crate::filesystem::sysfs::{Attribute, SysFSOpsSupport}; +use crate::filesystem::vfs::syscall::ModeType; +use crate::libs::rwlock::{RwLockReadGuard, RwLockWriteGuard}; +use crate::libs::spinlock::{SpinLock, SpinLockGuard}; +use alloc::string::{String, ToString}; +use alloc::sync::{Arc, Weak}; +use core::fmt::Debug; +use system_error::SystemError; + +#[derive(Debug)] +#[cast_to([sync] Device)] +pub struct KprobeDevice { + inner: SpinLock, + kobj_state: LockedKObjectState, + name: String, +} + +#[derive(Debug)] +struct InnerKprobeDevice { + kobject_common: KObjectCommonData, + device_common: DeviceCommonData, +} + +impl KprobeDevice { + pub fn new(parent: Option>) -> Arc { + let bus_device = Self { + inner: SpinLock::new(InnerKprobeDevice { + kobject_common: KObjectCommonData::default(), + device_common: DeviceCommonData::default(), + }), + kobj_state: LockedKObjectState::new(None), + name: "kprobe".to_string(), + }; + bus_device.set_parent(parent); + return Arc::new(bus_device); + } + + fn inner(&self) -> SpinLockGuard { + self.inner.lock() + } +} + +impl KObject for KprobeDevice { + fn as_any_ref(&self) -> &dyn core::any::Any { + self + } + + fn set_inode(&self, inode: Option>) { + self.inner().kobject_common.kern_inode = inode; + } + + fn inode(&self) -> Option> { + self.inner().kobject_common.kern_inode.clone() + } + + fn parent(&self) -> Option> { + self.inner().kobject_common.parent.clone() + } + + fn set_parent(&self, parent: Option>) { + self.inner().kobject_common.parent = parent; + } + + fn kset(&self) -> Option> { + self.inner().kobject_common.kset.clone() + } + + fn set_kset(&self, kset: Option>) { + self.inner().kobject_common.kset = kset; + } + + fn kobj_type(&self) -> Option<&'static dyn KObjType> { + self.inner().kobject_common.kobj_type + } + + fn set_kobj_type(&self, ktype: Option<&'static dyn KObjType>) { + self.inner().kobject_common.kobj_type = ktype; + } + + fn name(&self) -> String { + self.name.clone() + } + + fn set_name(&self, _name: String) {} + + fn kobj_state(&self) -> RwLockReadGuard { + self.kobj_state.read() + } + + fn kobj_state_mut(&self) -> RwLockWriteGuard { + self.kobj_state.write() + } + + fn set_kobj_state(&self, state: KObjectState) { + *self.kobj_state.write() = state; + } +} + +impl Device for KprobeDevice { + #[inline] + #[allow(dead_code)] + fn dev_type(&self) -> DeviceType { + return DeviceType::Other; + } + + #[inline] + fn id_table(&self) -> IdTable { + IdTable::new("kprobe".to_string(), None) + } + + fn bus(&self) -> Option> { + self.inner().device_common.bus.clone() + } + + fn set_bus(&self, bus: Option>) { + self.inner().device_common.bus = bus; + } + + fn set_class(&self, class: Option>) { + self.inner().device_common.class = class; + } + + fn driver(&self) -> Option> { + self.inner().device_common.driver.clone()?.upgrade() + } + + fn set_driver(&self, driver: Option>) { + self.inner().device_common.driver = driver; + } + + #[inline] + fn is_dead(&self) -> bool { + false + } + + fn can_match(&self) -> bool { + todo!() + } + + fn set_can_match(&self, _can_match: bool) { + todo!() + } + + fn state_synced(&self) -> bool { + todo!() + } + + fn dev_parent(&self) -> Option> { + self.inner().device_common.get_parent_weak_or_clear() + } + + fn set_dev_parent(&self, dev_parent: Option>) { + self.inner().device_common.parent = dev_parent; + } +} + +#[derive(Debug)] +pub struct KprobeAttr; + +impl Attribute for KprobeAttr { + fn name(&self) -> &str { + "type" + } + + fn mode(&self) -> ModeType { + ModeType::S_IRUGO + } + + fn support(&self) -> SysFSOpsSupport { + SysFSOpsSupport::ATTR_SHOW + } + fn show(&self, _kobj: Arc, buf: &mut [u8]) -> Result { + if buf.is_empty() { + return Err(SystemError::EINVAL); + } + // perf_type_id::PERF_TYPE_MAX + buf[0] = b'6'; + Ok(1) + } +} diff --git a/kernel/src/misc/events/kprobe/mod.rs b/kernel/src/misc/events/kprobe/mod.rs new file mode 100644 index 00000000..90bf874c --- /dev/null +++ b/kernel/src/misc/events/kprobe/mod.rs @@ -0,0 +1,31 @@ +use crate::driver::base::device::bus::Bus; +use crate::driver::base::device::{device_manager, device_register, sys_devices_kset, Device}; +use crate::driver::base::kobject::KObject; +use crate::init::initcall::INITCALL_DEVICE; +use crate::misc::events::get_event_source_bus; +use crate::misc::events::kprobe::device::{KprobeAttr, KprobeDevice}; +use alloc::sync::Arc; +use system_error::SystemError; +use unified_init::macros::unified_init; + +pub mod device; +static mut KPROBE_DEVICE: Option> = None; + +#[unified_init(INITCALL_DEVICE)] +pub fn kprobe_subsys_init() -> Result<(), SystemError> { + let kprobe_device = KprobeDevice::new(Some(Arc::downgrade( + &(sys_devices_kset() as Arc), + ))); + + let event_source_bus = get_event_source_bus().ok_or(SystemError::EINVAL)?; + kprobe_device.set_bus(Some(Arc::downgrade(&(event_source_bus as Arc)))); + + // 注册到/sys/devices下 + device_register(kprobe_device.clone())?; + unsafe { + KPROBE_DEVICE = Some(kprobe_device.clone()); + } + + device_manager().create_file(&(kprobe_device as Arc), &KprobeAttr)?; + Ok(()) +} diff --git a/kernel/src/misc/events/mod.rs b/kernel/src/misc/events/mod.rs new file mode 100644 index 00000000..8590e32e --- /dev/null +++ b/kernel/src/misc/events/mod.rs @@ -0,0 +1,28 @@ +use crate::driver::base::device::bus::{bus_register, Bus}; +use crate::init::initcall::INITCALL_SUBSYS; +use crate::misc::events::subsys::EventSourceBus; +use alloc::sync::Arc; +use system_error::SystemError; +use unified_init::macros::unified_init; + +mod kprobe; +mod subsys; + +static mut EVENT_SOURCE_BUS: Option> = None; + +fn get_event_source_bus() -> Option> { + unsafe { EVENT_SOURCE_BUS.clone() } +} + +#[unified_init(INITCALL_SUBSYS)] +pub fn init_event_source_bus() -> Result<(), SystemError> { + let event_source_bus = EventSourceBus::new(); + let r = bus_register(event_source_bus.clone() as Arc); + if r.is_err() { + unsafe { EVENT_SOURCE_BUS = None }; + return r; + } + unsafe { EVENT_SOURCE_BUS = Some(event_source_bus.clone()) }; + // kprobe::kprobe_subsys_init()?; + Ok(()) +} diff --git a/kernel/src/misc/events/subsys.rs b/kernel/src/misc/events/subsys.rs new file mode 100644 index 00000000..d581f345 --- /dev/null +++ b/kernel/src/misc/events/subsys.rs @@ -0,0 +1,52 @@ +use crate::driver::base::device::bus::Bus; +use crate::driver::base::device::Device; +use crate::driver::base::subsys::SubSysPrivate; +use alloc::string::{String, ToString}; +use alloc::sync::{Arc, Weak}; +use system_error::SystemError; + +#[derive(Debug)] +pub struct EventSourceBus { + private: SubSysPrivate, +} + +impl EventSourceBus { + pub fn new() -> Arc { + let w: Weak = Weak::new(); + let private = SubSysPrivate::new("event_source".to_string(), Some(w), None, &[]); + let bus = Arc::new(Self { private }); + bus.subsystem() + .set_bus(Some(Arc::downgrade(&(bus.clone() as Arc)))); + return bus; + } +} + +impl Bus for EventSourceBus { + fn name(&self) -> String { + "event_source".to_string() + } + + fn dev_name(&self) -> String { + self.name() + } + + fn root_device(&self) -> Option> { + None + } + + fn remove(&self, _device: &Arc) -> Result<(), SystemError> { + todo!() + } + + fn shutdown(&self, _device: &Arc) { + todo!() + } + + fn resume(&self, _device: &Arc) -> Result<(), SystemError> { + todo!() + } + + fn subsystem(&self) -> &SubSysPrivate { + &self.private + } +} diff --git a/kernel/src/misc/mod.rs b/kernel/src/misc/mod.rs index 9d8a9774..541e1db7 100644 --- a/kernel/src/misc/mod.rs +++ b/kernel/src/misc/mod.rs @@ -1 +1,2 @@ +pub mod events; pub mod ksysfs; diff --git a/kernel/src/mm/allocator/page_frame.rs b/kernel/src/mm/allocator/page_frame.rs index 180a2ac2..a7e9c8ba 100644 --- a/kernel/src/mm/allocator/page_frame.rs +++ b/kernel/src/mm/allocator/page_frame.rs @@ -5,8 +5,6 @@ use core::{ use crate::{ arch::{mm::LockedFrameAllocator, MMArch}, - ipc::shm::shm_manager_lock, - libs::spinlock::SpinLockGuard, mm::{MemoryManagementArch, PhysAddr, VirtAddr}, }; @@ -173,6 +171,8 @@ impl Iterator for VirtPageFrameIter { pub struct PageFrameCount(usize); impl PageFrameCount { + pub const ONE: PageFrameCount = PageFrameCount(1); + // @brief 初始化PageFrameCount pub const fn new(count: usize) -> Self { return Self(count); @@ -355,30 +355,8 @@ pub unsafe fn allocate_page_frames(count: PageFrameCount) -> Option<(PhysAddr, P /// /// @param frame 要释放的第一个页帧 /// @param count 要释放的页帧数量 (必须是2的n次幂) -pub unsafe fn deallocate_page_frames( - frame: PhysPageFrame, - count: PageFrameCount, - page_manager_guard: &mut SpinLockGuard<'_, crate::mm::page::PageManager>, -) { +pub unsafe fn deallocate_page_frames(frame: PhysPageFrame, count: PageFrameCount) { unsafe { LockedFrameAllocator.free(frame.phys_address(), count); - } - - let mut frame = frame; - for _ in 0..count.data() { - let paddr = frame.phys_address(); - let page = page_manager_guard.get(&paddr); - - if let Some(page) = page { - // 如果page是共享页,将其共享页信息从SHM_MANAGER中删去 - let page_guard = page.read_irqsave(); - if page_guard.shared() { - shm_manager_lock().free_id(&page_guard.shm_id().unwrap()); - } - } - - // 将已回收的物理页面对应的Page从PAGE_MANAGER中删去 - page_manager_guard.remove_page(&paddr); - frame = frame.next(); - } + }; } diff --git a/kernel/src/mm/fault.rs b/kernel/src/mm/fault.rs index e0aa1622..0fa8e6b0 100644 --- a/kernel/src/mm/fault.rs +++ b/kernel/src/mm/fault.rs @@ -22,7 +22,7 @@ use crate::mm::MemoryManagementArch; use super::{ allocator::page_frame::FrameAllocator, - page::{page_reclaimer_lock_irqsave, Page, PageFlags}, + page::{FileMapInfo, Page, PageFlags, PageType}, }; bitflags! { @@ -55,7 +55,7 @@ pub struct PageFaultMessage<'a> { flags: FaultFlags, /// 页表映射器 mapper: &'a mut PageMapper, - /// 缺页的文件页在文件中的偏移量 + /// 缺页的文件页在文件中的偏移页号 file_pgoff: Option, /// 缺页对应PageCache中的文件页 page: Option>, @@ -308,32 +308,14 @@ impl PageFaultHandler { let cache_page = pfm.page.clone().unwrap(); let mapper = &mut pfm.mapper; - let cow_page_phys = mapper.allocator_mut().allocate_one(); - if cow_page_phys.is_none() { + let mut page_manager_guard = page_manager_lock_irqsave(); + if let Ok(page) = + page_manager_guard.copy_page(&cache_page.phys_address(), mapper.allocator_mut()) + { + pfm.cow_page = Some(page.clone()); + } else { return VmFaultReason::VM_FAULT_OOM; } - let cow_page_phys = cow_page_phys.unwrap(); - - let cow_page = Arc::new(Page::new(false, cow_page_phys)); - pfm.cow_page = Some(cow_page.clone()); - - //复制PageCache内容到新的页内 - let new_frame = MMArch::phys_2_virt(cow_page_phys).unwrap(); - (new_frame.data() as *mut u8).copy_from_nonoverlapping( - MMArch::phys_2_virt(cache_page.read_irqsave().phys_address()) - .unwrap() - .data() as *mut u8, - MMArch::PAGE_SIZE, - ); - - let mut page_manager_guard = page_manager_lock_irqsave(); - - // 新页加入页管理器中 - page_manager_guard.insert(cow_page_phys, &cow_page); - cow_page.write_irqsave().set_page_cache_index( - cache_page.read_irqsave().page_cache(), - cache_page.read_irqsave().index(), - ); ret = ret.union(Self::finish_fault(pfm)); @@ -608,10 +590,10 @@ impl PageFaultHandler { << MMArch::PAGE_SHIFT); for pgoff in start_pgoff..=end_pgoff { - if let Some(page) = page_cache.get_page(pgoff) { + if let Some(page) = page_cache.lock_irqsave().get_page(pgoff) { let page_guard = page.read_irqsave(); if page_guard.flags().contains(PageFlags::PG_UPTODATE) { - let phys = page_guard.phys_address(); + let phys = page.phys_address(); let address = VirtAddr::new(addr.data() + ((pgoff - start_pgoff) << MMArch::PAGE_SHIFT)); @@ -642,7 +624,7 @@ impl PageFaultHandler { let mapper = &mut pfm.mapper; let mut ret = VmFaultReason::empty(); - if let Some(page) = page_cache.get_page(file_pgoff) { + if let Some(page) = page_cache.lock_irqsave().get_page(file_pgoff) { // TODO 异步从磁盘中预读页面进PageCache // 直接将PageCache中的页面作为要映射的页面 @@ -669,16 +651,19 @@ impl PageFaultHandler { ) .expect("failed to read file to create pagecache page"); - let page = Arc::new(Page::new(true, new_cache_page)); + let page = page_manager_lock_irqsave() + .create_one_page( + PageType::File(FileMapInfo { + page_cache: page_cache.clone(), + index: file_pgoff, + }), + PageFlags::PG_LRU, + allocator, + ) + .expect("failed to create page"); pfm.page = Some(page.clone()); - page.write_irqsave().add_flags(PageFlags::PG_LRU); - page_manager_lock_irqsave().insert(new_cache_page, &page); - page_reclaimer_lock_irqsave().insert_page(new_cache_page, &page); - page_cache.add_page(file_pgoff, &page); - - page.write_irqsave() - .set_page_cache_index(Some(page_cache), Some(file_pgoff)); + page_cache.lock_irqsave().add_page(file_pgoff, &page); } ret } @@ -710,7 +695,7 @@ impl PageFaultHandler { cache_page.expect("no cache_page in PageFaultMessage") }; - let page_phys = page_to_map.read_irqsave().phys_address(); + let page_phys = page_to_map.phys_address(); mapper.map_phys(address, page_phys, vma_guard.flags()); page_to_map.write_irqsave().insert_vma(pfm.vma()); diff --git a/kernel/src/mm/page.rs b/kernel/src/mm/page.rs index f3d90069..3ec68547 100644 --- a/kernel/src/mm/page.rs +++ b/kernel/src/mm/page.rs @@ -1,4 +1,4 @@ -use alloc::string::ToString; +use alloc::{string::ToString, vec::Vec}; use core::{ fmt::{self, Debug, Error, Formatter}, marker::PhantomData, @@ -17,7 +17,7 @@ use lru::LruCache; use crate::{ arch::{interrupt::ipi::send_ipi, mm::LockedFrameAllocator, MMArch}, exception::ipi::{IpiKind, IpiTarget}, - filesystem::vfs::{file::PageCache, FilePrivateData}, + filesystem::{page_cache::PageCache, vfs::FilePrivateData}, init::initcall::INITCALL_CORE, ipc::shm::ShmId, libs::{ @@ -29,7 +29,9 @@ use crate::{ }; use super::{ - allocator::page_frame::{FrameAllocator, PageFrameCount}, + allocator::page_frame::{ + deallocate_page_frames, FrameAllocator, PageFrameCount, PhysPageFrame, + }, syscall::ProtFlags, ucontext::LockedVMA, MemoryManagementArch, PageTableKind, PhysAddr, VirtAddr, @@ -74,6 +76,7 @@ impl PageManager { } } + #[allow(dead_code)] pub fn contains(&self, paddr: &PhysAddr) -> bool { self.phys2page.contains_key(paddr) } @@ -91,13 +94,121 @@ impl PageManager { .clone() } - pub fn insert(&mut self, paddr: PhysAddr, page: &Arc) { - self.phys2page.insert(paddr, page.clone()); + fn insert(&mut self, page: &Arc) -> Result, SystemError> { + let phys = page.phys_address(); + if !self.phys2page.contains_key(&phys) { + self.phys2page.insert(phys, page.clone()); + Ok(page.clone()) + } else { + log::error!("phys page: {phys:?} already exists."); + Err(SystemError::EINVAL) + } } pub fn remove_page(&mut self, paddr: &PhysAddr) { self.phys2page.remove(paddr); } + + /// # 创建一个新页面并加入管理器 + /// + /// ## 参数 + /// + /// - `shared`: 是否共享 + /// - `page_type`: 页面类型 + /// - `flags`: 页面标志 + /// - `allocator`: 物理页帧分配器 + /// + /// ## 返回值 + /// + /// - `Ok(Arc)`: 新页面 + /// - `Err(SystemError)`: 错误码 + pub fn create_one_page( + &mut self, + page_type: PageType, + flags: PageFlags, + allocator: &mut dyn FrameAllocator, + ) -> Result, SystemError> { + self.create_pages(page_type, flags, allocator, PageFrameCount::ONE)? + .1 + .first() + .ok_or(SystemError::ENOMEM) + .cloned() + } + + /// # 创建新页面并加入管理器 + /// + /// ## 参数 + /// + /// - `shared`: 是否共享 + /// - `page_type`: 页面类型 + /// - `flags`: 页面标志 + /// - `allocator`: 物理页帧分配器 + /// - `count`: 页面数量 + /// + /// ## 返回值 + /// + /// - `Ok((PhysAddr, Vec>))`: 页面起始物理地址,新页面集合 + /// - `Err(SystemError)`: 错误码 + pub fn create_pages( + &mut self, + page_type: PageType, + flags: PageFlags, + allocator: &mut dyn FrameAllocator, + count: PageFrameCount, + ) -> Result<(PhysAddr, Vec>), SystemError> { + compiler_fence(Ordering::SeqCst); + let (start_paddr, count) = unsafe { allocator.allocate(count).ok_or(SystemError::ENOMEM)? }; + compiler_fence(Ordering::SeqCst); + + unsafe { + let vaddr = MMArch::phys_2_virt(start_paddr).unwrap(); + MMArch::write_bytes(vaddr, 0, MMArch::PAGE_SIZE * count.data()); + } + + let mut cur_phys = PhysPageFrame::new(start_paddr); + let mut ret: Vec> = Vec::new(); + for _ in 0..count.data() { + let page = Page::new(cur_phys.phys_address(), page_type.clone(), flags); + if let Err(e) = self.insert(&page) { + for insert_page in ret { + self.remove_page(&insert_page.read_irqsave().phys_addr); + } + return Err(e); + } + ret.push(page); + cur_phys = cur_phys.next(); + } + Ok((start_paddr, ret)) + } + + /// # 拷贝管理器中原有页面并加入管理器,同时拷贝原页面内容 + /// + /// ## 参数 + /// + /// - `old_phys`: 原页面的物理地址 + /// - `allocator`: 物理页帧分配器 + /// + /// ## 返回值 + /// + /// - `Ok(Arc)`: 新页面 + /// - `Err(SystemError)`: 错误码 + pub fn copy_page( + &mut self, + old_phys: &PhysAddr, + allocator: &mut dyn FrameAllocator, + ) -> Result, SystemError> { + let old_page = self.get(old_phys).ok_or(SystemError::EINVAL)?; + let paddr = unsafe { allocator.allocate_one().ok_or(SystemError::ENOMEM)? }; + + assert!(!self.contains(&paddr), "phys page: {paddr:?} already exist"); + + let page = Page::copy(old_page.read_irqsave(), paddr) + .inspect_err(|_| unsafe { allocator.free_one(paddr) })?; + + self.insert(&page)?; + + Ok(page) + } } pub static mut PAGE_RECLAIMER: Option> = None; @@ -150,7 +261,7 @@ fn page_reclaim_thread() -> i32 { page_reclaimer_lock_irqsave().flush_dirty_pages(); // 休眠5秒 // log::info!("sleep"); - let _ = nanosleep(PosixTimeSpec::new(5, 0)); + let _ = nanosleep(PosixTimeSpec::new(0, 500_000_000)); } } } @@ -180,28 +291,31 @@ impl PageReclaimer { self.lru.put(paddr, page.clone()); } + pub fn remove_page(&mut self, paddr: &PhysAddr) -> Option> { + self.lru.pop(paddr) + } + /// lru链表缩减 /// ## 参数 /// /// - `count`: 需要缩减的页面数量 pub fn shrink_list(&mut self, count: PageFrameCount) { for _ in 0..count.data() { - let (paddr, page) = self.lru.pop_lru().expect("pagecache is empty"); - let page_cache = page.read_irqsave().page_cache().unwrap(); - for vma in page.read_irqsave().anon_vma() { - let address_space = vma.lock_irqsave().address_space().unwrap(); - let address_space = address_space.upgrade().unwrap(); - let mut guard = address_space.write(); - let mapper = &mut guard.user_mapper.utable; - let virt = vma.lock_irqsave().page_address(&page).unwrap(); - unsafe { - mapper.unmap(virt, false).unwrap().flush(); + let (_, page) = self.lru.pop_lru().expect("pagecache is empty"); + let mut guard = page.write_irqsave(); + if let PageType::File(info) = guard.page_type().clone() { + let page_cache = &info.page_cache; + let page_index = info.index; + let paddr = guard.phys_address(); + if guard.flags().contains(PageFlags::PG_DIRTY) { + // 先回写脏页 + Self::page_writeback(&mut guard, true); } - } - page_cache.remove_page(page.read_irqsave().index().unwrap()); - page_manager_lock_irqsave().remove_page(&paddr); - if page.read_irqsave().flags.contains(PageFlags::PG_DIRTY) { - Self::page_writeback(&page, true); + + // 删除页面 + page_cache.lock_irqsave().remove_page(page_index); + page_manager_lock_irqsave().remove_page(&paddr); + self.remove_page(&paddr); } } } @@ -215,24 +329,33 @@ impl PageReclaimer { /// 脏页回写函数 /// ## 参数 /// - /// - `page`: 需要回写的脏页 + /// - `guard`: 需要回写的脏页 /// - `unmap`: 是否取消映射 /// /// ## 返回值 /// - VmFaultReason: 页面错误处理信息标志 - pub fn page_writeback(page: &Arc, unmap: bool) { - if !unmap { - page.write_irqsave().remove_flags(PageFlags::PG_DIRTY); - } + pub fn page_writeback(guard: &mut RwLockWriteGuard, unmap: bool) { + // log::debug!("page writeback: {:?}", guard.phys_addr); - for vma in page.read_irqsave().anon_vma() { + let (page_cache, page_index) = match guard.page_type() { + PageType::File(info) => (info.page_cache.clone(), info.index), + _ => { + log::warn!("try to writeback a non-file page"); + return; + } + }; + let paddr = guard.phys_address(); + let inode = page_cache.inode().clone().unwrap().upgrade().unwrap(); + + for vma in guard.vma_set() { let address_space = vma.lock_irqsave().address_space().unwrap(); let address_space = address_space.upgrade().unwrap(); let mut guard = address_space.write(); let mapper = &mut guard.user_mapper.utable; - let virt = vma.lock_irqsave().page_address(page).unwrap(); + let virt = vma.lock_irqsave().page_address(page_index).unwrap(); if unmap { unsafe { + // 取消页表映射 mapper.unmap(virt, false).unwrap().flush(); } } else { @@ -245,40 +368,44 @@ impl PageReclaimer { }; } } - let inode = page - .read_irqsave() - .page_cache - .clone() - .unwrap() - .inode() - .clone() - .unwrap() - .upgrade() - .unwrap(); + + let len = if let Ok(metadata) = inode.metadata() { + let size = metadata.size as usize; + if size < page_index * MMArch::PAGE_SIZE { + 0 + } else { + size - page_index * MMArch::PAGE_SIZE + } + } else { + MMArch::PAGE_SIZE + }; + inode - .write_at( - page.read_irqsave().index().unwrap(), - MMArch::PAGE_SIZE, + .write_direct( + page_index * MMArch::PAGE_SIZE, + len, unsafe { core::slice::from_raw_parts( - MMArch::phys_2_virt(page.read_irqsave().phys_addr) - .unwrap() - .data() as *mut u8, - MMArch::PAGE_SIZE, + MMArch::phys_2_virt(paddr).unwrap().data() as *mut u8, + len, ) }, SpinLock::new(FilePrivateData::Unused).lock(), ) .unwrap(); + + // 清除标记 + guard.remove_flags(PageFlags::PG_DIRTY); } /// lru脏页刷新 - pub fn flush_dirty_pages(&self) { + pub fn flush_dirty_pages(&mut self) { // log::info!("flush_dirty_pages"); let iter = self.lru.iter(); - for (_, page) in iter { - if page.read_irqsave().flags().contains(PageFlags::PG_DIRTY) { - Self::page_writeback(page, false); + for (_paddr, page) in iter { + let mut guard = page.write_irqsave(); + if guard.flags().contains(PageFlags::PG_DIRTY) { + Self::page_writeback(&mut guard, false); } } } @@ -302,20 +429,76 @@ bitflags! { const PG_PRIVATE = 1 << 15; const PG_RECLAIM = 1 << 18; const PG_SWAPBACKED = 1 << 19; + const PG_UNEVICTABLE = 1 << 20; } } #[derive(Debug)] pub struct Page { inner: RwLock, + /// 页面所在物理地址 + phys_addr: PhysAddr, } impl Page { - pub fn new(shared: bool, phys_addr: PhysAddr) -> Self { - let inner = InnerPage::new(shared, phys_addr); - Self { + /// # 创建新页面 + /// + /// ## 参数 + /// + /// - `shared`: 是否共享 + /// - `phys_addr`: 物理地址 + /// - `page_type`: 页面类型 + /// - `flags`: 页面标志 + /// + /// ## 返回值 + /// + /// - `Arc`: 新页面 + fn new(phys_addr: PhysAddr, page_type: PageType, flags: PageFlags) -> Arc { + let inner = InnerPage::new(phys_addr, page_type, flags); + let page = Arc::new(Self { inner: RwLock::new(inner), + phys_addr, + }); + if page.read_irqsave().flags == PageFlags::PG_LRU { + page_reclaimer_lock_irqsave().insert_page(phys_addr, &page); + }; + page + } + + /// # 拷贝页面及内容 + /// + /// ## 参数 + /// + /// - `old_guard`: 源页面的读守卫 + /// - `new_phys`: 新页面的物理地址 + /// + /// ## 返回值 + /// + /// - `Ok(Arc)`: 新页面 + /// - `Err(SystemError)`: 错误码 + fn copy( + old_guard: RwLockReadGuard, + new_phys: PhysAddr, + ) -> Result, SystemError> { + let page_type = old_guard.page_type().clone(); + let flags = *old_guard.flags(); + let inner = InnerPage::new(new_phys, page_type, flags); + unsafe { + let old_vaddr = + MMArch::phys_2_virt(old_guard.phys_address()).ok_or(SystemError::EFAULT)?; + let new_vaddr = MMArch::phys_2_virt(new_phys).ok_or(SystemError::EFAULT)?; + (new_vaddr.data() as *mut u8) + .copy_from_nonoverlapping(old_vaddr.data() as *mut u8, MMArch::PAGE_SIZE); } + Ok(Arc::new(Self { + inner: RwLock::new(inner), + phys_addr: new_phys, + })) + } + + #[inline(always)] + pub fn phys_address(&self) -> PhysAddr { + self.phys_addr } pub fn read_irqsave(&self) -> RwLockReadGuard { @@ -330,107 +513,68 @@ impl Page { #[derive(Debug)] /// 物理页面信息 pub struct InnerPage { - /// 映射计数 - map_count: usize, - /// 是否为共享页 - shared: bool, - /// 映射计数为0时,是否可回收 - free_when_zero: bool, - /// 共享页id(如果是共享页) - shm_id: Option, /// 映射到当前page的VMA - anon_vma: HashSet>, + vma_set: HashSet>, /// 标志 flags: PageFlags, - /// 页所在的物理页帧号 + /// 页面所在物理地址 phys_addr: PhysAddr, - /// 在pagecache中的偏移 - index: Option, - page_cache: Option>, + /// 页面类型 + page_type: PageType, } impl InnerPage { - pub fn new(shared: bool, phys_addr: PhysAddr) -> Self { - let dealloc_when_zero = !shared; + pub fn new(phys_addr: PhysAddr, page_type: PageType, flags: PageFlags) -> Self { Self { - map_count: 0, - shared, - free_when_zero: dealloc_when_zero, - shm_id: None, - anon_vma: HashSet::new(), - flags: PageFlags::empty(), + vma_set: HashSet::new(), + flags, phys_addr, - index: None, - page_cache: None, + page_type, } } /// 将vma加入anon_vma pub fn insert_vma(&mut self, vma: Arc) { - self.anon_vma.insert(vma); - self.map_count += 1; + self.vma_set.insert(vma); } /// 将vma从anon_vma中删去 pub fn remove_vma(&mut self, vma: &LockedVMA) { - self.anon_vma.remove(vma); - self.map_count -= 1; + self.vma_set.remove(vma); } /// 判断当前物理页是否能被回 pub fn can_deallocate(&self) -> bool { - self.map_count == 0 && self.free_when_zero + self.map_count() == 0 && !self.flags.contains(PageFlags::PG_UNEVICTABLE) } pub fn shared(&self) -> bool { - self.shared - } - - pub fn shm_id(&self) -> Option { - self.shm_id - } - - pub fn index(&self) -> Option { - self.index + self.map_count() > 1 } pub fn page_cache(&self) -> Option> { - self.page_cache.clone() + match &self.page_type { + PageType::File(info) => Some(info.page_cache.clone()), + _ => None, + } } - pub fn set_page_cache(&mut self, page_cache: Option>) { - self.page_cache = page_cache; + pub fn page_type(&self) -> &PageType { + &self.page_type } - pub fn set_index(&mut self, index: Option) { - self.index = index; - } - - pub fn set_page_cache_index( - &mut self, - page_cache: Option>, - index: Option, - ) { - self.page_cache = page_cache; - self.index = index; - } - - pub fn set_shm_id(&mut self, shm_id: ShmId) { - self.shm_id = Some(shm_id); - } - - pub fn set_dealloc_when_zero(&mut self, dealloc_when_zero: bool) { - self.free_when_zero = dealloc_when_zero; + pub fn set_page_type(&mut self, page_type: PageType) { + self.page_type = page_type; } #[inline(always)] - pub fn anon_vma(&self) -> &HashSet> { - &self.anon_vma + pub fn vma_set(&self) -> &HashSet> { + &self.vma_set } #[inline(always)] pub fn map_count(&self) -> usize { - self.map_count + self.vma_set.len() } #[inline(always)] @@ -454,9 +598,83 @@ impl InnerPage { } #[inline(always)] - pub fn phys_address(&self) -> PhysAddr { + fn phys_address(&self) -> PhysAddr { self.phys_addr } + + pub unsafe fn as_slice(&self) -> &[u8] { + core::slice::from_raw_parts( + MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *const u8, + MMArch::PAGE_SIZE, + ) + } + + pub unsafe fn as_slice_mut(&mut self) -> &mut [u8] { + core::slice::from_raw_parts_mut( + MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *mut u8, + MMArch::PAGE_SIZE, + ) + } + + pub unsafe fn copy_from_slice(&mut self, slice: &[u8]) { + assert_eq!( + slice.len(), + MMArch::PAGE_SIZE, + "length of slice not match PAGE_SIZE" + ); + core::slice::from_raw_parts_mut( + MMArch::phys_2_virt(self.phys_addr).unwrap().data() as *mut u8, + MMArch::PAGE_SIZE, + ) + .copy_from_slice(slice); + } + + pub unsafe fn truncate(&mut self, len: usize) { + if len > MMArch::PAGE_SIZE { + return; + } + + let vaddr = unsafe { MMArch::phys_2_virt(self.phys_addr).unwrap() }; + + unsafe { + core::slice::from_raw_parts_mut( + (vaddr.data() + len) as *mut u8, + MMArch::PAGE_SIZE - len, + ) + .fill(0) + }; + } +} + +impl Drop for InnerPage { + fn drop(&mut self) { + assert!( + self.map_count() == 0, + "page drop when map count is non-zero" + ); + + unsafe { + deallocate_page_frames(PhysPageFrame::new(self.phys_addr), PageFrameCount::new(1)) + }; + } +} + +/// 页面类型,包含额外的页面信息 +#[derive(Debug, Clone)] +pub enum PageType { + /// 普通页面,不含额外信息 + Normal, + /// 文件映射页,含文件映射相关信息 + File(FileMapInfo), + /// 共享内存页,记录ShmId + Shm(ShmId), +} + +#[derive(Debug, Clone)] +pub struct FileMapInfo { + pub page_cache: Arc, + /// 在pagecache中的偏移 + pub index: usize, } #[derive(Debug)] @@ -634,23 +852,7 @@ impl PageTable { let phys = allocator.allocate_one()?; let mut page_manager_guard = page_manager_lock_irqsave(); let old_phys = entry.address().unwrap(); - let old_page = page_manager_guard.get_unwrap(&old_phys); - let new_page = - Arc::new(Page::new(old_page.read_irqsave().shared(), phys)); - if let Some(ref page_cache) = old_page.read_irqsave().page_cache() { - new_page.write_irqsave().set_page_cache_index( - Some(page_cache.clone()), - old_page.read_irqsave().index(), - ); - } - - page_manager_guard.insert(phys, &new_page); - let old_phys = entry.address().unwrap(); - let frame = MMArch::phys_2_virt(phys).unwrap().data() as *mut u8; - frame.copy_from_nonoverlapping( - MMArch::phys_2_virt(old_phys).unwrap().data() as *mut u8, - MMArch::PAGE_SIZE, - ); + page_manager_guard.copy_page(&old_phys, allocator).ok()?; new_table.set_entry(i, PageEntry::new(phys, entry.flags())); } } @@ -1180,21 +1382,17 @@ impl PageMapper { virt: VirtAddr, flags: EntryFlags, ) -> Option> { - compiler_fence(Ordering::SeqCst); - let phys: PhysAddr = self.frame_allocator.allocate_one()?; - compiler_fence(Ordering::SeqCst); - - unsafe { - let vaddr = MMArch::phys_2_virt(phys).unwrap(); - MMArch::write_bytes(vaddr, 0, MMArch::PAGE_SIZE); - } - let mut page_manager_guard: SpinLockGuard<'static, PageManager> = page_manager_lock_irqsave(); - if !page_manager_guard.contains(&phys) { - page_manager_guard.insert(phys, &Arc::new(Page::new(false, phys))) - } + let page = page_manager_guard + .create_one_page( + PageType::Normal, + PageFlags::empty(), + &mut self.frame_allocator, + ) + .ok()?; drop(page_manager_guard); + let phys = page.phys_address(); return self.map_phys(virt, phys, flags); } diff --git a/kernel/src/mm/ucontext.rs b/kernel/src/mm/ucontext.rs index 81abd4b0..04d953b2 100644 --- a/kernel/src/mm/ucontext.rs +++ b/kernel/src/mm/ucontext.rs @@ -21,6 +21,7 @@ use crate::{ arch::{mm::PageMapper, CurrentIrqArch, MMArch}, exception::InterruptArch, filesystem::vfs::file::File, + ipc::shm::{shm_manager_lock, ShmFlags}, libs::{ align::page_align_up, rwlock::RwLock, @@ -35,7 +36,7 @@ use super::{ allocator::page_frame::{ deallocate_page_frames, PageFrameCount, PhysPageFrame, VirtPageFrame, VirtPageFrameIter, }, - page::{EntryFlags, Flusher, InactiveFlusher, Page, PageFlushAll}, + page::{EntryFlags, Flusher, InactiveFlusher, PageFlushAll, PageType}, syscall::{MadvFlags, MapFlags, MremapFlags, ProtFlags}, MemoryManagementArch, PageTableKind, VirtAddr, VirtRegion, VmFlags, }; @@ -841,7 +842,6 @@ impl Drop for UserMapper { deallocate_page_frames( PhysPageFrame::new(self.utable.table().phys()), PageFrameCount::new(1), - &mut page_manager_lock_irqsave(), ) }; } @@ -1152,12 +1152,35 @@ impl LockedVMA { pub fn unmap(&self, mapper: &mut PageMapper, mut flusher: impl Flusher) { // todo: 如果当前vma与文件相关,完善文件相关的逻辑 - let mut guard = self.lock_irqsave(); // 获取物理页的anon_vma的守卫 let mut page_manager_guard: SpinLockGuard<'_, crate::mm::page::PageManager> = page_manager_lock_irqsave(); + + // 获取映射的物理地址 + if let Some((paddr, _flags)) = mapper.translate(guard.region().start()) { + // 如果是共享页,执行释放操作 + let page = page_manager_guard.get(&paddr).unwrap(); + let page_guard = page.read_irqsave(); + if let PageType::Shm(shm_id) = page_guard.page_type() { + let mut shm_manager_guard = shm_manager_lock(); + if let Some(kernel_shm) = shm_manager_guard.get_mut(shm_id) { + // 更新最后一次断开连接时间 + kernel_shm.update_dtim(); + + // 映射计数减少 + kernel_shm.decrease_count(); + + // 释放shm_id + if kernel_shm.map_count() == 0 && kernel_shm.mode().contains(ShmFlags::SHM_DEST) + { + shm_manager_guard.free_id(shm_id); + } + } + } + } + for page in guard.region.pages() { if mapper.translate(page.virt_address()).is_none() { continue; @@ -1167,18 +1190,13 @@ impl LockedVMA { // 从anon_vma中删除当前VMA let page = page_manager_guard.get_unwrap(&paddr); - page.write_irqsave().remove_vma(self); + let mut page_guard = page.write_irqsave(); + page_guard.remove_vma(self); - // 如果物理页的anon_vma链表长度为0并且不是共享页,则释放物理页. - if page.read_irqsave().can_deallocate() { - unsafe { - drop(page); - deallocate_page_frames( - PhysPageFrame::new(paddr), - PageFrameCount::new(1), - &mut page_manager_guard, - ) - }; + // 如果物理页的vma链表长度为0并且未标记为不可回收,则释放物理页. + // TODO 后续由lru释放物理页面 + if page_guard.can_deallocate() { + page_manager_guard.remove_page(&paddr); } flusher.consume(flush); @@ -1659,9 +1677,7 @@ impl VMA { return Ok(r); } - pub fn page_address(&self, page: &Arc) -> Result { - let page_guard = page.read_irqsave(); - let index = page_guard.index().unwrap(); + pub fn page_address(&self, index: usize) -> Result { if index >= self.file_pgoff.unwrap() { let address = self.region.start + ((index - self.file_pgoff.unwrap()) << MMArch::PAGE_SHIFT); diff --git a/kernel/src/net/event_poll/mod.rs b/kernel/src/net/event_poll/mod.rs index f6a9ad61..2c80cc72 100644 --- a/kernel/src/net/event_poll/mod.rs +++ b/kernel/src/net/event_poll/mod.rs @@ -476,8 +476,8 @@ impl EventPoll { } // 如果有未处理的信号则返回错误 - if current_pcb.sig_info_irqsave().sig_pending().signal().bits() != 0 { - return Err(SystemError::EINTR); + if current_pcb.has_pending_signal_fast() { + return Err(SystemError::ERESTARTSYS); } // 还未等待到事件发生,则睡眠 @@ -488,12 +488,18 @@ impl EventPoll { let jiffies = next_n_us_timer_jiffies( (timespec.tv_sec * 1000000 + timespec.tv_nsec / 1000) as u64, ); - let inner = Timer::new(handle, jiffies); + let inner: Arc = Timer::new(handle, jiffies); inner.activate(); timer = Some(inner); } let guard = epoll.0.lock_irqsave(); - unsafe { guard.epoll_wq.sleep_without_schedule() }; + // 睡眠,等待事件发生 + // 如果wq已经dead,则直接返回错误 + unsafe { guard.epoll_wq.sleep_without_schedule() }.inspect_err(|_| { + if let Some(timer) = timer.as_ref() { + timer.cancel(); + } + })?; drop(guard); schedule(SchedMode::SM_NONE); // 被唤醒后,检查是否有事件可读 diff --git a/kernel/src/net/event_poll/syscall.rs b/kernel/src/net/event_poll/syscall.rs index 02d61eed..6fd0dc94 100644 --- a/kernel/src/net/event_poll/syscall.rs +++ b/kernel/src/net/event_poll/syscall.rs @@ -3,7 +3,7 @@ use system_error::SystemError; use crate::{ arch::ipc::signal::SigSet, filesystem::vfs::file::FileMode, - ipc::signal::set_current_sig_blocked, + ipc::signal::set_current_blocked, mm::VirtAddr, syscall::{ user_access::{UserBufferReader, UserBufferWriter}, @@ -96,7 +96,7 @@ impl Syscall { sigmask: &mut SigSet, ) -> Result { // 设置屏蔽的信号 - set_current_sig_blocked(sigmask); + set_current_blocked(sigmask); let wait_ret = Self::epoll_wait(epfd, epoll_event, max_events, timespec); diff --git a/kernel/src/perf/bpf.rs b/kernel/src/perf/bpf.rs index b283b2e5..ec31b170 100644 --- a/kernel/src/perf/bpf.rs +++ b/kernel/src/perf/bpf.rs @@ -1,14 +1,15 @@ use super::{PerfEventOps, Result}; use crate::arch::mm::LockedFrameAllocator; use crate::arch::MMArch; -use crate::filesystem::vfs::file::PageCache; +use crate::filesystem::page_cache::PageCache; use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode}; use crate::include::bindings::linux_bpf::{ perf_event_header, perf_event_mmap_page, perf_event_type, }; +use crate::libs::align::page_align_up; use crate::libs::spinlock::{SpinLock, SpinLockGuard}; -use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame}; -use crate::mm::page::{page_manager_lock_irqsave, Page}; +use crate::mm::allocator::page_frame::{PageFrameCount, PhysPageFrame}; +use crate::mm::page::{page_manager_lock_irqsave, PageFlags, PageType}; use crate::mm::{MemoryManagementArch, PhysAddr}; use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader}; use alloc::string::String; @@ -79,54 +80,28 @@ impl RingPage { } } + #[inline] fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool { - if (data_head + 1) % self.data_region_size == data_tail { - // The buffer is full - return false; - } - let capacity = if data_head >= data_tail { - self.data_region_size - data_head + data_tail - } else { - data_tail - data_head - }; + let capacity = self.data_region_size - data_head + data_tail; data_size <= capacity } pub fn write_event(&mut self, data: &[u8]) -> Result<()> { let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail }; let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head }; - // data_tail..data_head is the region that can be written - // check if there is enough space to write the event - let sample_size = PerfSample::calculate_size(data.len()); - let can_write_sample = - self.can_write(sample_size, *data_tail as usize, *data_head as usize); - // log::error!( - // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}", - // can_write_sample, - // *data_tail, - // *data_head, - // data.len(), - // self.data_region_size - // ); - if !can_write_sample { - //we need record it to the lost record - self.lost += 1; - // log::error!( - // "Lost record: {}, data_tail: {}, data_head: {}", - // self.lost, - // *data_tail, - // *data_head - // ); - Ok(()) - } else { - // we can write the sample to the page - // If the lost record is not zero, we need to write the lost record first. + // user lib will update the tail after read the data,but it will not % data_region_size + let perf_header_size = size_of::(); + let can_write_perf_header = + self.can_write(perf_header_size, *data_tail as usize, *data_head as usize); + + if can_write_perf_header { let can_write_lost_record = self.can_write( size_of::(), *data_tail as usize, *data_head as usize, ); + // if there is lost record, we need to write the lost record first if self.lost > 0 && can_write_lost_record { let new_data_head = self.write_lost(*data_head as usize)?; *data_head = new_data_head as u64; @@ -137,8 +112,21 @@ impl RingPage { // *data_head // ); self.lost = 0; - self.write_event(data) - } else { + // try to write the event again + return self.write_event(data); + } + let sample_size = PerfSample::calculate_size(data.len()); + let can_write_sample = + self.can_write(sample_size, *data_tail as usize, *data_head as usize); + // log::error!( + // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}", + // can_write_sample, + // *data_tail, + // *data_head, + // data.len(), + // self.data_region_size + // ); + if can_write_sample { let new_data_head = self.write_sample(data, *data_head as usize)?; *data_head = new_data_head as u64; // log::info!( @@ -146,20 +134,24 @@ impl RingPage { // *data_tail, // *data_head // ); - Ok(()) + } else { + self.lost += 1; } + } else { + self.lost += 1; } + Ok(()) } /// Write any data to the page. /// /// Return the new data_head - fn write_any(&mut self, data: &[u8], data_head: usize) -> Result { + fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<()> { let data_region_len = self.data_region_size; let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut(); let data_len = data.len(); + let start = data_head % data_region_len; let end = (data_head + data_len) % data_region_len; - let start = data_head; if start < end { data_region[start..end].copy_from_slice(data); } else { @@ -167,40 +159,57 @@ impl RingPage { data_region[start..start + first_len].copy_from_slice(&data[..first_len]); data_region[0..end].copy_from_slice(&data[first_len..]); } - Ok(end) + Ok(()) + } + #[inline] + fn fill_size(&self, data_head_mod: usize) -> usize { + if self.data_region_size - data_head_mod < size_of::() { + // The remaining space is not enough to write the perf_event_header + // We need to fill the remaining space with 0 + self.data_region_size - data_head_mod + } else { + 0 + } } /// Write a sample to the page. fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result { + let sample_size = PerfSample::calculate_size(data.len()); + let maybe_end = (data_head + sample_size) % self.data_region_size; + let fill_size = self.fill_size(maybe_end); let perf_sample = PerfSample { s_hdr: SampleHeader { header: perf_event_header { type_: perf_event_type::PERF_RECORD_SAMPLE as u32, misc: 0, - size: size_of::() as u16 + data.len() as u16, + size: size_of::() as u16 + data.len() as u16 + fill_size as u16, }, size: data.len() as u32, }, value: data, }; - let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?; - self.write_any(perf_sample.value, new_head) + self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?; + self.write_any(perf_sample.value, data_head + size_of::())?; + Ok(data_head + sample_size + fill_size) } /// Write a lost record to the page. /// /// Return the new data_head fn write_lost(&mut self, data_head: usize) -> Result { + let maybe_end = (data_head + size_of::()) % self.data_region_size; + let fill_size = self.fill_size(maybe_end); let lost = LostSamples { header: perf_event_header { type_: perf_event_type::PERF_RECORD_LOST as u32, misc: 0, - size: size_of::() as u16, + size: size_of::() as u16 + fill_size as u16, }, id: 0, count: self.lost as u64, }; - self.write_any(lost.as_bytes(), data_head) + self.write_any(lost.as_bytes(), data_head)?; + Ok(data_head + size_of::() + fill_size) } pub fn readable(&self) -> bool { @@ -232,18 +241,17 @@ impl BpfPerfEvent { } pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> { let mut data = self.data.lock(); - // alloc page frame - let (phy_addr, page_count) = - unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) } - .ok_or(SystemError::ENOSPC)?; let mut page_manager_guard = page_manager_lock_irqsave(); - let mut cur_phys = PhysPageFrame::new(phy_addr); - for i in 0..page_count.data() { - let page = Arc::new(Page::new(true, cur_phys.phys_address())); - let paddr = cur_phys.phys_address(); - page_manager_guard.insert(paddr, &page); - data.page_cache.add_page(i, &page); - cur_phys = cur_phys.next(); + let (phy_addr, pages) = page_manager_guard.create_pages( + PageType::Normal, + PageFlags::PG_UNEVICTABLE, + &mut LockedFrameAllocator, + PageFrameCount::new(page_align_up(len) / PAGE_SIZE), + )?; + for i in 0..pages.len() { + data.page_cache + .lock_irqsave() + .add_page(i, pages.get(i).unwrap()); } let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?; // create mmap page diff --git a/kernel/src/perf/kprobe.rs b/kernel/src/perf/kprobe.rs index 54ac829b..28a70620 100644 --- a/kernel/src/perf/kprobe.rs +++ b/kernel/src/perf/kprobe.rs @@ -5,7 +5,8 @@ use crate::bpf::helper::BPF_HELPER_FUN_SET; use crate::bpf::prog::BpfProg; use crate::debug::kprobe::args::KprobeInfo; use crate::debug::kprobe::{register_kprobe, unregister_kprobe, LockKprobe}; -use crate::filesystem::vfs::file::{File, PageCache}; +use crate::filesystem::page_cache::PageCache; +use crate::filesystem::vfs::file::File; use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode}; use crate::libs::casting::DowncastArc; use crate::libs::spinlock::SpinLockGuard; @@ -39,8 +40,10 @@ impl KprobePerfEvent { .downcast_arc::() .ok_or(SystemError::EINVAL)?; let prog_slice = file.insns(); - let mut vm = - EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|_| SystemError::EINVAL)?; + let mut vm = EbpfVmRawOwned::new(Some(prog_slice.to_vec())).map_err(|e| { + log::error!("create ebpf vm failed: {:?}", e); + SystemError::EINVAL + })?; vm.register_helper_set(BPF_HELPER_FUN_SET.get()) .map_err(|_| SystemError::EINVAL)?; // create a callback to execute the ebpf prog @@ -75,10 +78,10 @@ impl CallBackFunc for KprobePerfCallBack { size_of::(), ) }; - let _res = self - .vm - .execute_program(probe_context) - .map_err(|_| SystemError::EINVAL); + let res = self.vm.execute_program(probe_context); + if res.is_err() { + log::error!("kprobe callback error: {:?}", res); + } } } diff --git a/kernel/src/perf/mod.rs b/kernel/src/perf/mod.rs index 0b6de2ad..99d2e25a 100644 --- a/kernel/src/perf/mod.rs +++ b/kernel/src/perf/mod.rs @@ -2,7 +2,8 @@ mod bpf; mod kprobe; mod util; -use crate::filesystem::vfs::file::{File, FileMode, PageCache}; +use crate::filesystem::page_cache::PageCache; +use crate::filesystem::vfs::file::{File, FileMode}; use crate::filesystem::vfs::syscall::ModeType; use crate::filesystem::vfs::{ FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock, diff --git a/kernel/src/process/exit.rs b/kernel/src/process/exit.rs index 9420c0fd..66d5a80d 100644 --- a/kernel/src/process/exit.rs +++ b/kernel/src/process/exit.rs @@ -5,13 +5,10 @@ use log::warn; use system_error::SystemError; use crate::{ - arch::{ - ipc::signal::{SigChildCode, Signal}, - CurrentIrqArch, - }, - exception::InterruptArch, + arch::ipc::signal::{SigChildCode, Signal}, sched::{schedule, SchedMode}, syscall::user_access::UserBufferWriter, + time::{sleep::nanosleep, Duration}, }; use super::{ @@ -108,33 +105,37 @@ pub fn kernel_wait4( /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/exit.c#1573 fn do_wait(kwo: &mut KernelWaitOption) -> Result { let mut retval: Result; - // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。 - - loop { - kwo.no_task_error = Some(SystemError::ECHILD); - let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD); - if kwo.pid_type != PidType::MAX && child_pcb.is_err() { + let mut tmp_child_pcb: Option> = None; + macro_rules! notask { + ($outer: lifetime) => { if let Some(err) = &kwo.no_task_error { retval = Err(err.clone()); } else { retval = Ok(0); } - if !kwo.options.contains(WaitOption::WNOHANG) { + if retval.is_err() && !kwo.options.contains(WaitOption::WNOHANG) { retval = Err(SystemError::ERESTARTSYS); - if !ProcessManager::current_pcb() - .sig_info_irqsave() - .sig_pending() - .has_pending() - { + if !ProcessManager::current_pcb().has_pending_signal_fast() { + schedule(SchedMode::SM_PREEMPT); // todo: 增加子进程退出的回调后,这里可以直接等待在自身的child_wait等待队列上。 continue; } else { - break; + break $outer; } } else { - break; + break $outer; } + }; + } + // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。 + + 'outer: loop { + kwo.no_task_error = Some(SystemError::ECHILD); + let child_pcb = ProcessManager::find(kwo.pid).ok_or(SystemError::ECHILD); + + if kwo.pid_type != PidType::MAX && child_pcb.is_err() { + notask!('outer); } if kwo.pid_type == PidType::PID { @@ -143,37 +144,62 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { let child_weak = Arc::downgrade(&child_pcb); let r = do_waitpid(child_pcb, kwo); if let Some(r) = r { - return r; - } else { - child_weak.upgrade().unwrap().wait_queue.sleep(); + retval = r; + break 'outer; + } else if let Err(SystemError::ESRCH) = child_weak.upgrade().unwrap().wait_queue.sleep() + { + // log::debug!("do_wait: child_pcb sleep failed"); + continue; } } else if kwo.pid_type == PidType::MAX { // 等待任意子进程 - // todo: 这里有问题!如果正在for循环的过程中,子进程退出了,可能会导致父进程永远等待。 + // todo: 这里有问题!应当让当前进程sleep到自身的child_wait等待队列上,这样才高效。(还没实现) let current_pcb = ProcessManager::current_pcb(); - let rd_childen = current_pcb.children.read(); - let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; - for pid in rd_childen.iter() { - let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?; - let state = pcb.sched_info().inner_lock_read_irqsave().state(); - if state.is_exited() { - kwo.ret_status = state.exit_code().unwrap() as i32; - drop(pcb); - unsafe { ProcessManager::release(*pid) }; - return Ok((*pid).into()); - } else { - unsafe { pcb.wait_queue.sleep_without_schedule() }; + loop { + let rd_childen = current_pcb.children.read(); + if rd_childen.is_empty() { + break; } + for pid in rd_childen.iter() { + let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?; + let sched_guard = pcb.sched_info().inner_lock_read_irqsave(); + let state = sched_guard.state(); + if state.is_exited() { + kwo.ret_status = state.exit_code().unwrap() as i32; + kwo.no_task_error = None; + // 由于pcb的drop方法里面要获取父进程的children字段的写锁,所以这里不能直接drop pcb, + // 而是要先break到外层循环,以便释放父进程的children字段的锁,才能drop pcb。 + // 否则会死锁。 + tmp_child_pcb = Some(pcb.clone()); + unsafe { ProcessManager::release(*pid) }; + retval = Ok((*pid).into()); + break 'outer; + } + } + nanosleep(Duration::from_millis(100).into())?; } - drop(irq_guard); - schedule(SchedMode::SM_NONE); } else { // todo: 对于pgid的处理 warn!("kernel_wait4: currently not support {:?}", kwo.pid_type); return Err(SystemError::EINVAL); } + + notask!('outer); } + drop(tmp_child_pcb); + ProcessManager::current_pcb() + .sched_info + .inner_lock_write_irqsave() + .set_state(ProcessState::Runnable); + + // log::debug!( + // "do_wait, kwo.pid: {}, retval = {:?}, kwo: {:?}", + // kwo.pid, + // retval, + // kwo.no_task_error + // ); + return retval; } diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index 4722cb7c..013aceb4 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -388,55 +388,68 @@ impl ProcessManager { /// - `exit_code` : 进程的退出码 pub fn exit(exit_code: usize) -> ! { // 关中断 - let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; - let pcb = ProcessManager::current_pcb(); - let pid = pcb.pid(); - pcb.sched_info - .inner_lock_write_irqsave() - .set_state(ProcessState::Exited(exit_code)); - pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true))); + let _irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() }; + let pid: Pid; + { + let pcb = ProcessManager::current_pcb(); + pid = pcb.pid(); + pcb.sched_info + .inner_lock_write_irqsave() + .set_state(ProcessState::Exited(exit_code)); + pcb.wait_queue.mark_dead(); + pcb.wait_queue.wakeup_all(Some(ProcessState::Blocked(true))); - let rq = cpu_rq(smp_get_processor_id().data() as usize); - let (rq, guard) = rq.self_lock(); - rq.deactivate_task( - pcb.clone(), - DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK, - ); - drop(guard); + let rq = cpu_rq(smp_get_processor_id().data() as usize); + let (rq, guard) = rq.self_lock(); + rq.deactivate_task( + pcb.clone(), + DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK, + ); + drop(guard); - // 进行进程退出后的工作 - let thread = pcb.thread.write_irqsave(); - if let Some(addr) = thread.set_child_tid { - unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; - } - - if let Some(addr) = thread.clear_child_tid { - if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 { - let _ = - Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY); + // 进行进程退出后的工作 + let thread = pcb.thread.write_irqsave(); + if let Some(addr) = thread.set_child_tid { + unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; } - unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; + + if let Some(addr) = thread.clear_child_tid { + if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 { + let _ = Futex::futex_wake( + addr, + FutexFlag::FLAGS_MATCH_NONE, + 1, + FUTEX_BITSET_MATCH_ANY, + ); + } + unsafe { clear_user(addr, core::mem::size_of::()).expect("clear tid failed") }; + } + + RobustListHead::exit_robust_list(pcb.clone()); + + // 如果是vfork出来的进程,则需要处理completion + if thread.vfork_done.is_some() { + thread.vfork_done.as_ref().unwrap().complete_all(); + } + drop(thread); + unsafe { pcb.basic_mut().set_user_vm(None) }; + pcb.exit_files(); + + // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空 + // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现 + if let Some(tty) = pcb.sig_info_irqsave().tty() { + // 临时解决方案!!! 临时解决方案!!! 引入进程组之后,要重写这个更新前台进程组的逻辑 + let mut g = tty.core().contorl_info_irqsave(); + if g.pgid == Some(pid) { + g.pgid = None; + } + } + pcb.sig_info_mut().set_tty(None); + + drop(pcb); + ProcessManager::exit_notify(); } - RobustListHead::exit_robust_list(pcb.clone()); - - // 如果是vfork出来的进程,则需要处理completion - if thread.vfork_done.is_some() { - thread.vfork_done.as_ref().unwrap().complete_all(); - } - drop(thread); - unsafe { pcb.basic_mut().set_user_vm(None) }; - - // TODO 由于未实现进程组,tty记录的前台进程组等于当前进程,故退出前要置空 - // 后续相关逻辑需要在SYS_EXIT_GROUP系统调用中实现 - if let Some(tty) = pcb.sig_info_irqsave().tty() { - tty.core().contorl_info_irqsave().pgid = None; - } - pcb.sig_info_mut().set_tty(None); - - drop(pcb); - ProcessManager::exit_notify(); - // unsafe { CurrentIrqArch::interrupt_enable() }; __schedule(SchedMode::SM_NONE); error!("pid {pid:?} exited but sched again!"); #[allow(clippy::empty_loop)] @@ -611,6 +624,32 @@ bitflags! { const NEED_MIGRATE = 1 << 7; /// 随机化的虚拟地址空间,主要用于动态链接器的加载 const RANDOMIZE = 1 << 8; + /// 进程有未处理的信号(这是一个用于快速判断的标志位) + /// 相当于Linux的TIF_SIGPENDING + const HAS_PENDING_SIGNAL = 1 << 9; + /// 进程需要恢复之前保存的信号掩码 + const RESTORE_SIG_MASK = 1 << 10; + } +} + +impl ProcessFlags { + pub const fn exit_to_user_mode_work(&self) -> Self { + Self::from_bits_truncate(self.bits & (Self::HAS_PENDING_SIGNAL.bits)) + } + + /// 测试并清除标志位 + /// + /// ## 参数 + /// + /// - `rhs` : 需要测试并清除的标志位 + /// + /// ## 返回值 + /// + /// 如果标志位在清除前是置位的,则返回 `true`,否则返回 `false` + pub const fn test_and_clear(&mut self, rhs: Self) -> bool { + let r = (self.bits & rhs.bits) != 0; + self.bits &= !rhs.bits; + r } } #[derive(Debug)] @@ -672,6 +711,7 @@ pub struct ProcessControlBlock { /// 进程作为主体的凭证集 cred: SpinLock, + self_ref: Weak, } impl ProcessControlBlock { @@ -733,7 +773,7 @@ impl ProcessControlBlock { let ppcb: Weak = ProcessManager::find(ppid) .map(|p| Arc::downgrade(&p)) .unwrap_or_default(); - let pcb = Self { + let mut pcb = Self { pid, tgid: pid, thread_pid: Arc::new(RwLock::new(PidStrcut::new())), @@ -758,6 +798,7 @@ impl ProcessControlBlock { robust_list: RwLock::new(None), nsproxy: Arc::new(RwLock::new(NsProxy::new())), cred: SpinLock::new(cred), + self_ref: Weak::new(), }; pcb.sig_info.write().set_tty(tty); @@ -768,7 +809,10 @@ impl ProcessControlBlock { .lock() .init_syscall_stack(&pcb.syscall_stack.read()); - let pcb = Arc::new(pcb); + let pcb = Arc::new_cyclic(|weak| { + pcb.self_ref = weak.clone(); + pcb + }); pcb.sched_info() .sched_entity() @@ -1016,6 +1060,11 @@ impl ProcessControlBlock { return has_pending; } + /// 根据 pcb 的 flags 判断当前进程是否有未处理的信号 + pub fn has_pending_signal_fast(&self) -> bool { + self.flags.get().contains(ProcessFlags::HAS_PENDING_SIGNAL) + } + pub fn sig_struct(&self) -> SpinLockGuard { self.sig_struct.lock_irqsave() } @@ -1055,6 +1104,19 @@ impl ProcessControlBlock { pub fn set_nsproxy(&self, nsprsy: NsProxy) { *self.nsproxy.write() = nsprsy; } + + /// Exit fd table when process exit + fn exit_files(&self) { + self.basic.write_irqsave().set_fd_table(None); + } + + pub fn children_read_irqsave(&self) -> RwLockReadGuard> { + self.children.read_irqsave() + } + + pub fn threads_read_irqsave(&self) -> RwLockReadGuard { + self.thread.read_irqsave() + } } impl Drop for ProcessControlBlock { @@ -1086,6 +1148,12 @@ pub struct ThreadInfo { group_leader: Weak, } +impl Default for ThreadInfo { + fn default() -> Self { + Self::new() + } +} + impl ThreadInfo { pub fn new() -> Self { Self { @@ -1526,8 +1594,9 @@ pub fn process_init() { #[derive(Debug)] pub struct ProcessSignalInfo { - // 当前进程 - sig_block: SigSet, + // 当前进程被屏蔽的信号 + sig_blocked: SigSet, + saved_sigmask: SigSet, // sig_pending 中存储当前线程要处理的信号 sig_pending: SigPending, // sig_shared_pending 中存储当前线程所属进程要处理的信号 @@ -1537,8 +1606,8 @@ pub struct ProcessSignalInfo { } impl ProcessSignalInfo { - pub fn sig_block(&self) -> &SigSet { - &self.sig_block + pub fn sig_blocked(&self) -> &SigSet { + &self.sig_blocked } pub fn sig_pending(&self) -> &SigPending { @@ -1550,7 +1619,15 @@ impl ProcessSignalInfo { } pub fn sig_block_mut(&mut self) -> &mut SigSet { - &mut self.sig_block + &mut self.sig_blocked + } + + pub fn saved_sigmask(&self) -> &SigSet { + &self.saved_sigmask + } + + pub fn saved_sigmask_mut(&mut self) -> &mut SigSet { + &mut self.saved_sigmask } pub fn sig_shared_pending_mut(&mut self) -> &mut SigPending { @@ -1575,12 +1652,19 @@ impl ProcessSignalInfo { /// /// - `sig_mask` 被忽略掉的信号 /// - pub fn dequeue_signal(&mut self, sig_mask: &SigSet) -> (Signal, Option) { + pub fn dequeue_signal( + &mut self, + sig_mask: &SigSet, + pcb: &Arc, + ) -> (Signal, Option) { let res = self.sig_pending.dequeue_signal(sig_mask); + pcb.recalc_sigpending(Some(self)); if res.0 != Signal::INVALID { return res; } else { - return self.sig_shared_pending.dequeue_signal(sig_mask); + let res = self.sig_shared_pending.dequeue_signal(sig_mask); + pcb.recalc_sigpending(Some(self)); + return res; } } } @@ -1588,7 +1672,8 @@ impl ProcessSignalInfo { impl Default for ProcessSignalInfo { fn default() -> Self { Self { - sig_block: SigSet::empty(), + sig_blocked: SigSet::empty(), + saved_sigmask: SigSet::empty(), sig_pending: SigPending::default(), sig_shared_pending: SigPending::default(), tty: None, diff --git a/kernel/src/sched/completion.rs b/kernel/src/sched/completion.rs index 61488c25..bad4de7f 100644 --- a/kernel/src/sched/completion.rs +++ b/kernel/src/sched/completion.rs @@ -4,6 +4,7 @@ use system_error::SystemError; use crate::{ libs::{spinlock::SpinLock, wait_queue::WaitQueue}, + process::ProcessManager, time::timer::schedule_timeout, }; @@ -29,20 +30,22 @@ impl Completion { /// @return 返回剩余时间或者SystemError fn do_wait_for_common(&self, mut timeout: i64, interuptible: bool) -> Result { let mut inner = self.inner.lock_irqsave(); - + let pcb = ProcessManager::current_pcb(); if inner.done == 0 { //loop break 类似 do while 保证进行一次信号检测 loop { //检查当前线程是否有未处理的信号 - // if (signal_pending_state(state, current)) { - // timeout = -ERESTARTSYS; - // break; - //} + if pcb.sig_info_irqsave().sig_pending().has_pending() { + return Err(SystemError::ERESTARTSYS); + } - if interuptible { - unsafe { inner.wait_queue.sleep_without_schedule() }; + let e = if interuptible { + unsafe { inner.wait_queue.sleep_without_schedule() } } else { - unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() }; + unsafe { inner.wait_queue.sleep_without_schedule_uninterruptible() } + }; + if e.is_err() { + break; } drop(inner); timeout = schedule_timeout(timeout)?; diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs index 89a48558..03087e15 100644 --- a/kernel/src/syscall/mod.rs +++ b/kernel/src/syscall/mod.rs @@ -238,6 +238,10 @@ impl Syscall { let r = args[0] as *const u8; Self::chdir(r) } + SYS_FCHDIR => { + let fd = args[0] as i32; + Self::fchdir(fd) + } #[allow(unreachable_patterns)] SYS_GETDENTS64 | SYS_GETDENTS => { @@ -879,8 +883,11 @@ impl Syscall { } SYS_RT_SIGPROCMASK => { - warn!("SYS_RT_SIGPROCMASK has not yet been implemented"); - Ok(0) + let how = args[0] as i32; + let nset = args[1]; + let oset = args[2]; + let sigsetsize = args[3]; + Self::rt_sigprocmask(how, nset, oset, sigsetsize) } SYS_TKILL => { @@ -1213,6 +1220,8 @@ impl Syscall { let flags = args[4] as u32; Self::sys_perf_event_open(attr, pid, cpu, group_fd, flags) } + SYS_SETRLIMIT => Ok(0), + SYS_RESTART_SYSCALL => Self::restart_syscall(), _ => panic!("Unsupported syscall ID: {}", syscall_num), }; diff --git a/tools/BUILD_CONTAINER_VERSION b/tools/BUILD_CONTAINER_VERSION index 72970ee2..8a9f2d79 100644 --- a/tools/BUILD_CONTAINER_VERSION +++ b/tools/BUILD_CONTAINER_VERSION @@ -1 +1 @@ -v1.7 \ No newline at end of file +v1.8 \ No newline at end of file diff --git a/tools/bootstrap.sh b/tools/bootstrap.sh index b859c3ae..ff6a5d96 100644 --- a/tools/bootstrap.sh +++ b/tools/bootstrap.sh @@ -24,6 +24,7 @@ DEFAULT_INSTALL="false" export RUSTUP_DIST_SERVER=${RUSTUP_DIST_SERVER:-https://rsproxy.cn} export RUSTUP_UPDATE_ROOT=${RUSTUP_UPDATE_ROOT:-https://rsproxy.cn/rustup} export RUST_VERSION="${RUST_VERSION:-nightly-2024-11-05}" +export RUST_VERSION_OLD="${RUST_VERSION:-nightly-2024-07-23}" banner() { @@ -65,7 +66,7 @@ install_ubuntu_debian_pkg() lsb-release \ llvm-dev libclang-dev clang gcc-multilib \ gcc build-essential fdisk dosfstools dnsmasq bridge-utils iptables libssl-dev pkg-config \ - sphinx make git + python3-sphinx make git # 必须分开安装,否则会出现错误 sudo "$1" install -y \ gcc-riscv64-unknown-elf gcc-riscv64-linux-gnu gdb-multiarch @@ -233,21 +234,21 @@ rustInstall() { echo "正在安装DragonOS所需的rust组件...首次安装需要一些时间来更新索引,请耐心等待..." cargo install cargo-binutils cargo install bpf-linker - rustup toolchain install nightly-2024-11-05-x86_64-unknown-linux-gnu rustup toolchain install $RUST_VERSION-x86_64-unknown-linux-gnu + rustup toolchain install $RUST_VERSION_OLD-x86_64-unknown-linux-gnu rustup component add rust-src --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu - rustup component add rust-src --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu + rustup component add rust-src --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu rustup target add x86_64-unknown-none --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu - rustup target add x86_64-unknown-none --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu - rustup target add x86_64-unknown-linux-musl --toolchain nightly-2024-11-05-x86_64-unknown-linux-gnu + rustup target add x86_64-unknown-none --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu rustup target add x86_64-unknown-linux-musl --toolchain $RUST_VERSION-x86_64-unknown-linux-gnu + rustup target add x86_64-unknown-linux-musl --toolchain $RUST_VERSION_OLD-x86_64-unknown-linux-gnu rustup toolchain install $RUST_VERSION-riscv64gc-unknown-linux-gnu --force-non-host - rustup toolchain install nightly-2024-11-05-riscv64gc-unknown-linux-gnu --force-non-host + rustup toolchain install $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu --force-non-host rustup target add riscv64gc-unknown-none-elf --toolchain $RUST_VERSION-riscv64gc-unknown-linux-gnu rustup target add riscv64imac-unknown-none-elf --toolchain $RUST_VERSION-riscv64gc-unknown-linux-gnu - rustup target add riscv64gc-unknown-none-elf --toolchain nightly-2024-11-05-riscv64gc-unknown-linux-gnu - rustup target add riscv64imac-unknown-none-elf --toolchain nightly-2024-11-05-riscv64gc-unknown-linux-gnu + rustup target add riscv64gc-unknown-none-elf --toolchain $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu + rustup target add riscv64imac-unknown-none-elf --toolchain $RUST_VERSION_OLD-riscv64gc-unknown-linux-gnu rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu rustup component add rust-src @@ -338,9 +339,9 @@ cargo install dadk || exit 1 bashpath=$(cd `dirname $0`; pwd) # 编译安装musl交叉编译工具链 -bash ${bashpath}/install_musl_gcc.sh || (echo "musl交叉编译工具链安装失败" && exit 1) +$SHELL ${bashpath}/install_musl_gcc.sh || (echo "musl交叉编译工具链安装失败" && exit 1) # 编译安装grub -bash ${bashpath}/grub_auto_install.sh || (echo "grub安装失败" && exit 1) +$SHELL ${bashpath}/grub_auto_install.sh || (echo "grub安装失败" && exit 1) # 解决kvm权限问题 USR=$USER diff --git a/tools/build_in_docker.sh b/tools/build_in_docker.sh index 8098bdc1..206f1797 100644 --- a/tools/build_in_docker.sh +++ b/tools/build_in_docker.sh @@ -1,6 +1,6 @@ docker rm -f dragonos-build || echo "No existed container" cpu_count=$(cat /proc/cpuinfo |grep "processor"|wc -l) -docker run --rm --privileged=true --cap-add SYS_ADMIN --cap-add MKNOD -v $(pwd):/data -v /dev:/dev -v dragonos-build-cargo:/root/.cargo/registry --name dragonos-build -i dragonos/dragonos-dev:v1.2 bash << EOF +docker run --rm --privileged=true --cap-add SYS_ADMIN --cap-add MKNOD -v $(pwd):/data -v /dev:/dev -v dragonos-build-cargo:/root/.cargo/registry --name dragonos-build -i dragonos/dragonos-dev:v1.8 bash << EOF source ~/.cargo/env source ~/.bashrc cd /data diff --git a/tools/change_rust_src.sh b/tools/change_rust_src.sh index 5d3ac7f2..6f522541 100644 --- a/tools/change_rust_src.sh +++ b/tools/change_rust_src.sh @@ -2,7 +2,8 @@ echo "正在为rust换源" sparse="false" -CONFIG_FILE=~/.cargo/config.toml +CARGO_HOME=${CARGO_HOME:-~/.cargo} +CONFIG_FILE=$CARGO_HOME/config.toml # 创建父目录 if [ ! -d ~/.cargo ]; then mkdir -p ~/.cargo diff --git a/tools/run-qemu.sh b/tools/run-qemu.sh index e470d518..7b48644c 100644 --- a/tools/run-qemu.sh +++ b/tools/run-qemu.sh @@ -78,7 +78,8 @@ QEMU_MONITOR="-monitor stdio" QEMU_TRACE="${qemu_trace_std}" QEMU_CPU_FEATURES="" QEMU_RTC_CLOCK="" -QEMU_SERIAL="-serial file:../serial_opt.txt" +QEMU_SERIAL_LOG_FILE="../serial_opt.txt" +QEMU_SERIAL="-serial file:${QEMU_SERIAL_LOG_FILE}" QEMU_DRIVE="id=disk,file=${QEMU_DISK_IMAGE},if=none" QEMU_ACCELARATE="" QEMU_ARGUMENT="" @@ -137,7 +138,7 @@ while true;do window) ;; nographic) - QEMU_SERIAL=" -serial mon:stdio " + QEMU_SERIAL=" -serial chardev:mux -monitor chardev:mux -chardev stdio,id=mux,mux=on,signal=off,logfile=${QEMU_SERIAL_LOG_FILE} " QEMU_MONITOR="" QEMU_ARGUMENT+=" --nographic " QEMU_ARGUMENT+=" -kernel ../bin/kernel/kernel.elf " diff --git a/user/apps/test_ebpf/syscall_ebpf/.gitignore b/user/apps/syscall_ebpf/.gitignore similarity index 95% rename from user/apps/test_ebpf/syscall_ebpf/.gitignore rename to user/apps/syscall_ebpf/.gitignore index 9db7029f..57e36ada 100644 --- a/user/apps/test_ebpf/syscall_ebpf/.gitignore +++ b/user/apps/syscall_ebpf/.gitignore @@ -7,3 +7,4 @@ target/ # These are backup files generated by rustfmt **/*.rs.bk +/install/ \ No newline at end of file diff --git a/user/apps/syscall_ebpf/Cargo.toml b/user/apps/syscall_ebpf/Cargo.toml new file mode 100644 index 00000000..7437b807 --- /dev/null +++ b/user/apps/syscall_ebpf/Cargo.toml @@ -0,0 +1,31 @@ +[workspace] +resolver = "2" +members = ["syscall_ebpf", "syscall_ebpf-common", "syscall_ebpf-ebpf"] +default-members = ["syscall_ebpf", "syscall_ebpf-common"] + +[workspace.dependencies] +aya = { version = "0.13.0", default-features = false } +aya-ebpf = { version = "0.1.1", default-features = false } +aya-log = { version = "0.2.1", default-features = false } +aya-log-ebpf = { version = "0.1.1", default-features = false } +anyhow = { version = "1", default-features = false } +cargo_metadata = { version = "0.18.0", default-features = false } +# `std` feature is currently required to build `clap`. +# +# See https://github.com/clap-rs/clap/blob/61f5ee5/clap_builder/src/lib.rs#L15. +clap = { version = "4.5.20", default-features = false, features = ["std"] } +env_logger = { version = "0.11.5", default-features = false } +libc = { version = "0.2.159", default-features = false } +log = { version = "0.4.22", default-features = false } +tokio = { version = "1.40.0", default-features = false } +which = { version = "6.0.0", default-features = false } + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" + +[profile.release.package.syscall_ebpf-ebpf] +debug = 2 +codegen-units = 1 diff --git a/user/apps/test_ebpf/Makefile b/user/apps/syscall_ebpf/Makefile similarity index 84% rename from user/apps/test_ebpf/Makefile rename to user/apps/syscall_ebpf/Makefile index 0b5d9e43..c8dadc36 100644 --- a/user/apps/test_ebpf/Makefile +++ b/user/apps/syscall_ebpf/Makefile @@ -21,10 +21,10 @@ endif run: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) -build:build-ebpf +build: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) -clean:clean-ebpf +clean: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) test: @@ -42,20 +42,16 @@ fmt-check: run-release: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release -build-release:build-ebpf +build-release: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release -clean-release:clean-ebpf +clean-release: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release test-release: RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release -build-ebpf: - cd ./syscall_ebpf && RUST_LOG=debug cargo xtask build --release -clean-ebpf: - cd ./syscall_ebpf && cargo clean .PHONY: install -install:build-ebpf - RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force +install: + RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path ./syscall_ebpf --no-track --root $(INSTALL_DIR) --force diff --git a/user/apps/syscall_ebpf/README.md b/user/apps/syscall_ebpf/README.md new file mode 100644 index 00000000..635b37bb --- /dev/null +++ b/user/apps/syscall_ebpf/README.md @@ -0,0 +1,33 @@ +# syscall_ebpf + +## Prerequisites + +1. stable rust toolchains: `rustup toolchain install stable` +1. nightly rust toolchains: `rustup toolchain install nightly --component rust-src` +1. (if cross-compiling) rustup target: `rustup target add ${ARCH}-unknown-linux-musl` +1. (if cross-compiling) LLVM: (e.g.) `brew install llvm` (on macOS) +1. (if cross-compiling) C toolchain: (e.g.) [`brew install filosottile/musl-cross/musl-cross`](https://github.com/FiloSottile/homebrew-musl-cross) (on macOS) +1. bpf-linker: `cargo install bpf-linker` (`--no-default-features` on macOS) + +## Build & Run + +Use `cargo build`, `cargo check`, etc. as normal. Run your program with: + +```shell +cargo run --release --config 'target."cfg(all())".runner="sudo -E"' +``` + +Cargo build scripts are used to automatically build the eBPF correctly and include it in the +program. + +## Cross-compiling on macOS + +Cross compilation should work on both Intel and Apple Silicon Macs. + +```shell +CC=${ARCH}-linux-musl-gcc cargo build --package syscall_ebpf --release \ + --target=${ARCH}-unknown-linux-musl \ + --config=target.${ARCH}-unknown-linux-musl.linker=\"${ARCH}-linux-musl-gcc\" +``` +The cross-compiled program `target/${ARCH}-unknown-linux-musl/release/syscall_ebpf` can be +copied to a Linux server or VM and run there. diff --git a/user/apps/syscall_ebpf/rustfmt.toml b/user/apps/syscall_ebpf/rustfmt.toml new file mode 100644 index 00000000..53f7b6d7 --- /dev/null +++ b/user/apps/syscall_ebpf/rustfmt.toml @@ -0,0 +1,4 @@ +group_imports = "StdExternalCrate" +imports_granularity = "Crate" +reorder_imports = true +unstable_features = true diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/Cargo.toml b/user/apps/syscall_ebpf/syscall_ebpf-common/Cargo.toml similarity index 56% rename from user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/Cargo.toml rename to user/apps/syscall_ebpf/syscall_ebpf-common/Cargo.toml index 7acc25d4..b453f61b 100644 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/Cargo.toml +++ b/user/apps/syscall_ebpf/syscall_ebpf-common/Cargo.toml @@ -8,7 +8,7 @@ default = [] user = ["aya"] [dependencies] -aya = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13", optional = true } +aya = { workspace = true, optional = true } [lib] path = "src/lib.rs" diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/src/lib.rs b/user/apps/syscall_ebpf/syscall_ebpf-common/src/lib.rs similarity index 100% rename from user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-common/src/lib.rs rename to user/apps/syscall_ebpf/syscall_ebpf-common/src/lib.rs diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml new file mode 100644 index 00000000..d8d7a20c --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml @@ -0,0 +1,12 @@ +# We have this so that one doesn't need to manually pass +# --target=bpfel-unknown-none -Z build-std=core when running cargo +# check/build/doc etc. +# +# NB: this file gets loaded only if you run cargo from this directory, it's +# ignored if you run from the workspace root. See +# https://doc.rust-lang.org/cargo/reference/config.html#hierarchical-structure +[build] +target = ["bpfeb-unknown-none", "bpfel-unknown-none"] + +[unstable] +build-std = ["core"] diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml new file mode 100644 index 00000000..fd061ea9 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "syscall_ebpf-ebpf" +version = "0.1.0" +edition = "2021" + +[dependencies] +syscall_ebpf-common = { path = "../syscall_ebpf-common" } + +aya-ebpf = { workspace = true } +aya-log-ebpf = { workspace = true } + +[build-dependencies] +which = { workspace = true } + +[[bin]] +name = "syscall_ebpf" +path = "src/main.rs" diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/build.rs b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/build.rs new file mode 100644 index 00000000..f83c317a --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/build.rs @@ -0,0 +1,17 @@ +use which::which; + +/// Building this crate has an undeclared dependency on the `bpf-linker` binary. This would be +/// better expressed by [artifact-dependencies][bindeps] but issues such as +/// https://github.com/rust-lang/cargo/issues/12385 make their use impractical for the time being. +/// +/// This file implements an imperfect solution: it causes cargo to rebuild the crate whenever the +/// mtime of `which bpf-linker` changes. Note that possibility that a new bpf-linker is added to +/// $PATH ahead of the one used as the cache key still exists. Solving this in the general case +/// would require rebuild-if-changed-env=PATH *and* rebuild-if-changed={every-directory-in-PATH} +/// which would likely mean far too much cache invalidation. +/// +/// [bindeps]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html?highlight=feature#artifact-dependencies +fn main() { + let bpf_linker = which("bpf-linker").unwrap(); + println!("cargo:rerun-if-changed={}", bpf_linker.to_str().unwrap()); +} diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml new file mode 100644 index 00000000..f70d2254 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly" +components = ["rust-src"] diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/lib.rs b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/lib.rs new file mode 100644 index 00000000..3ac3e595 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/lib.rs @@ -0,0 +1,3 @@ +#![no_std] + +// This file exists to enable the library target. diff --git a/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs new file mode 100644 index 00000000..c7581017 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs @@ -0,0 +1,50 @@ +#![no_std] +#![no_main] + +use aya_ebpf::{ + helpers::bpf_ktime_get_ns, + macros::{kprobe, map}, + maps::HashMap, + programs::ProbeContext, +}; +use aya_log_ebpf::info; + +#[kprobe] +pub fn syscall_ebpf(ctx: ProbeContext) -> u32 { + try_syscall_ebpf(ctx).unwrap_or_else(|ret| ret) +} + +fn try_syscall_ebpf(ctx: ProbeContext) -> Result { + let pt_regs = unsafe { &*ctx.regs }; + // first arg -> rdi + // second arg -> rsi + // third arg -> rdx + // four arg -> rcx + let syscall_num = pt_regs.rsi as usize; + if syscall_num != 1 { + unsafe { + if let Some(v) = SYSCALL_LIST.get(&(syscall_num as u32)) { + let new_v = *v + 1; + SYSCALL_LIST + .insert(&(syscall_num as u32), &new_v, 0) + .unwrap(); + } else { + SYSCALL_LIST.insert(&(syscall_num as u32), &1, 0).unwrap(); + } + } + let time = unsafe { bpf_ktime_get_ns() }; + info!(&ctx, "[{}] invoke syscall {}", time, syscall_num); + } + Ok(0) +} + +#[map] +static SYSCALL_LIST: HashMap = HashMap::::with_max_entries(1024, 0); + +#[cfg(not(test))] +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + // we need use this because the verifier will forbid loop + unsafe { core::hint::unreachable_unchecked() } + // loop{} +} diff --git a/user/apps/syscall_ebpf/syscall_ebpf/Cargo.toml b/user/apps/syscall_ebpf/syscall_ebpf/Cargo.toml new file mode 100644 index 00000000..6191f049 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "syscall_ebpf" +version = "0.1.0" +edition = "2021" + +[dependencies] +syscall_ebpf-common = { path = "../syscall_ebpf-common", features = ["user"] } + +anyhow = { workspace = true, default-features = true } +aya = { workspace = true } +aya-log = { workspace = true } +env_logger = { workspace = true } +libc = { workspace = true } +log = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "net", "signal", "time"] } + +[build-dependencies] +cargo_metadata = { workspace = true } +# TODO(https://github.com/rust-lang/cargo/issues/12375): this should be an artifact dependency, but +# it's not possible to tell cargo to use `-Z build-std` to build it. We cargo-in-cargo in the build +# script to build this, but we want to teach cargo about the dependecy so that cache invalidation +# works properly. +# +# Note also that https://github.com/rust-lang/cargo/issues/10593 occurs when `target = ...` is added +# to an artifact dependency; it seems possible to work around that by setting `resolver = "1"` in +# Cargo.toml in the workspace root. +# +# Finally note that *any* usage of `artifact = ...` in *any* Cargo.toml in the workspace breaks +# workflows with stable cargo; stable cargo outright refuses to load manifests that use unstable +# features. +syscall_ebpf-ebpf = { path = "../syscall_ebpf-ebpf" } + +[[bin]] +name = "syscall_ebpf" +path = "src/main.rs" diff --git a/user/apps/syscall_ebpf/syscall_ebpf/build.rs b/user/apps/syscall_ebpf/syscall_ebpf/build.rs new file mode 100644 index 00000000..52b2ec29 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf/build.rs @@ -0,0 +1,150 @@ +use std::{ + env, fs, + io::{BufRead as _, BufReader}, + path::PathBuf, + process::{Child, Command, Stdio}, +}; + +use cargo_metadata::{ + Artifact, CompilerMessage, Message, Metadata, MetadataCommand, Package, Target, +}; + +/// This crate has a runtime dependency on artifacts produced by the `syscall_ebpf-ebpf` crate. +/// This would be better expressed as one or more [artifact-dependencies][bindeps] but issues such +/// as: +/// +/// * https://github.com/rust-lang/cargo/issues/12374 +/// * https://github.com/rust-lang/cargo/issues/12375 +/// * https://github.com/rust-lang/cargo/issues/12385 +/// +/// prevent their use for the time being. +/// +/// [bindeps]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html?highlight=feature#artifact-dependencies +fn main() { + let Metadata { packages, .. } = MetadataCommand::new().no_deps().exec().unwrap(); + let ebpf_package = packages + .into_iter() + .find(|Package { name, .. }| name == "syscall_ebpf-ebpf") + .unwrap(); + + let out_dir = env::var_os("OUT_DIR").unwrap(); + let out_dir = PathBuf::from(out_dir); + + let endian = env::var_os("CARGO_CFG_TARGET_ENDIAN").unwrap(); + let target = if endian == "big" { + "bpfeb" + } else if endian == "little" { + "bpfel" + } else { + panic!("unsupported endian={:?}", endian) + }; + + // TODO(https://github.com/rust-lang/cargo/issues/4001): Make this `false` if we can determine + // we're in a check build. + let build_ebpf = true; + if build_ebpf { + let arch = env::var_os("CARGO_CFG_TARGET_ARCH").unwrap(); + + let target = format!("{target}-unknown-none"); + + let Package { manifest_path, .. } = ebpf_package; + let ebpf_dir = manifest_path.parent().unwrap(); + + // We have a build-dependency on `syscall_ebpf-ebpf`, so cargo will automatically rebuild us + // if `syscall_ebpf-ebpf`'s *library* target or any of its dependencies change. Since we + // depend on `syscall_ebpf-ebpf`'s *binary* targets, that only gets us half of the way. This + // stanza ensures cargo will rebuild us on changes to the binaries too, which gets us the + // rest of the way. + println!("cargo:rerun-if-changed={}", ebpf_dir.as_str()); + + let mut cmd = Command::new("cargo"); + cmd.args([ + "build", + "-Z", + "build-std=core", + "--bins", + "--message-format=json", + "--release", + "--target", + &target, + ]); + + cmd.env("CARGO_CFG_BPF_TARGET_ARCH", arch); + + // Workaround to make sure that the rust-toolchain.toml is respected. + for key in ["RUSTUP_TOOLCHAIN", "RUSTC", "RUSTC_WORKSPACE_WRAPPER"] { + cmd.env_remove(key); + } + cmd.current_dir(ebpf_dir); + + // Workaround for https://github.com/rust-lang/cargo/issues/6412 where cargo flocks itself. + let ebpf_target_dir = out_dir.join("../syscall_ebpf-ebpf"); + cmd.arg("--target-dir").arg(&ebpf_target_dir); + + let mut child = cmd + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap_or_else(|err| panic!("failed to spawn {cmd:?}: {err}")); + let Child { stdout, stderr, .. } = &mut child; + + // Trampoline stdout to cargo warnings. + let stderr = stderr.take().unwrap(); + let stderr = BufReader::new(stderr); + let stderr = std::thread::spawn(move || { + for line in stderr.lines() { + let line = line.unwrap(); + println!("cargo:warning={line}"); + } + }); + + let stdout = stdout.take().unwrap(); + let stdout = BufReader::new(stdout); + let mut executables = Vec::new(); + for message in Message::parse_stream(stdout) { + #[allow(clippy::collapsible_match)] + match message.expect("valid JSON") { + Message::CompilerArtifact(Artifact { + executable, + target: Target { name, .. }, + .. + }) => { + if let Some(executable) = executable { + executables.push((name, executable.into_std_path_buf())); + } + } + Message::CompilerMessage(CompilerMessage { message, .. }) => { + for line in message.rendered.unwrap_or_default().split('\n') { + println!("cargo:warning={line}"); + } + } + Message::TextLine(line) => { + println!("cargo:warning={line}"); + } + _ => {} + } + } + + let status = child + .wait() + .unwrap_or_else(|err| panic!("failed to wait for {cmd:?}: {err}")); + assert_eq!(status.code(), Some(0), "{cmd:?} failed: {status:?}"); + + stderr.join().map_err(std::panic::resume_unwind).unwrap(); + + for (name, binary) in executables { + let dst = out_dir.join(name); + let _: u64 = fs::copy(&binary, &dst) + .unwrap_or_else(|err| panic!("failed to copy {binary:?} to {dst:?}: {err}")); + } + } else { + let Package { targets, .. } = ebpf_package; + for Target { name, kind, .. } in targets { + if *kind != ["bin"] { + continue; + } + let dst = out_dir.join(name); + fs::write(&dst, []).unwrap_or_else(|err| panic!("failed to create {dst:?}: {err}")); + } + } +} diff --git a/user/apps/syscall_ebpf/syscall_ebpf/src/main.rs b/user/apps/syscall_ebpf/syscall_ebpf/src/main.rs new file mode 100644 index 00000000..a65e1803 --- /dev/null +++ b/user/apps/syscall_ebpf/syscall_ebpf/src/main.rs @@ -0,0 +1,74 @@ +use aya::{maps::HashMap, programs::KProbe}; +#[rustfmt::skip] +use log::{debug, warn}; +use tokio::{signal, task::yield_now, time}; + +extern crate libc; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> anyhow::Result<()> { + // env_logger::init(); + env_logger::builder() + .filter_level(log::LevelFilter::Warn) + .format_timestamp(None) + .init(); + + // Bump the memlock rlimit. This is needed for older kernels that don't use the + // new memcg based accounting, see https://lwn.net/Articles/837122/ + let rlim = libc::rlimit { + rlim_cur: libc::RLIM_INFINITY, + rlim_max: libc::RLIM_INFINITY, + }; + let ret = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlim) }; + if ret != 0 { + debug!("remove limit on locked memory failed, ret is: {}", ret); + } + + // This will include your eBPF object file as raw bytes at compile-time and load it at + // runtime. This approach is recommended for most real-world use cases. If you would + // like to specify the eBPF program at runtime rather than at compile-time, you can + // reach for `Bpf::load_file` instead. + let mut ebpf = aya::Ebpf::load(aya::include_bytes_aligned!(concat!( + env!("OUT_DIR"), + "/syscall_ebpf" + )))?; + if let Err(e) = aya_log::EbpfLogger::init(&mut ebpf) { + // This can happen if you remove all log statements from your eBPF program. + warn!("failed to initialize eBPF logger: {}", e); + } + + let program: &mut KProbe = ebpf.program_mut("syscall_ebpf").unwrap().try_into()?; + program.load()?; + program.attach("dragonos_kernel::syscall::Syscall::handle", 0)?; + // println!("attacch the kprobe to dragonos_kernel::syscall::Syscall::handle"); + + // print the value of the blocklist per 5 seconds + tokio::spawn(async move { + let blocklist: HashMap<_, u32, u32> = + HashMap::try_from(ebpf.map("SYSCALL_LIST").unwrap()).unwrap(); + let mut now = time::Instant::now(); + loop { + let new_now = time::Instant::now(); + let duration = new_now.duration_since(now); + if duration.as_secs() >= 5 { + println!("------------SYSCALL_LIST----------------"); + let iter = blocklist.iter(); + for item in iter { + if let Ok((key, value)) = item { + println!("syscall: {:?}, count: {:?}", key, value); + } + } + println!("----------------------------------------"); + now = new_now; + } + yield_now().await; + } + }); + + let ctrl_c = signal::ctrl_c(); + println!("Waiting for Ctrl-C..."); + ctrl_c.await?; + println!("Exiting..."); + + Ok(()) +} diff --git a/user/apps/test-sigprocmask/.gitignore b/user/apps/test-sigprocmask/.gitignore new file mode 100644 index 00000000..cf1f429a --- /dev/null +++ b/user/apps/test-sigprocmask/.gitignore @@ -0,0 +1 @@ +test-sigprocmask \ No newline at end of file diff --git a/user/apps/test-sigprocmask/Makefile b/user/apps/test-sigprocmask/Makefile new file mode 100644 index 00000000..d0e4c2e3 --- /dev/null +++ b/user/apps/test-sigprocmask/Makefile @@ -0,0 +1,20 @@ +ifeq ($(ARCH), x86_64) + CROSS_COMPILE=x86_64-linux-musl- +else ifeq ($(ARCH), riscv64) + CROSS_COMPILE=riscv64-linux-musl- +endif + +CC=$(CROSS_COMPILE)gcc + +.PHONY: all +all: main.c + $(CC) -static -o test-sigprocmask main.c + +.PHONY: install clean +install: all + mv test-sigprocmask $(DADK_CURRENT_BUILD_DIR)/test-sigprocmask + +clean: + rm test-sigprocmask *.o + +fmt: diff --git a/user/apps/test-sigprocmask/main.c b/user/apps/test-sigprocmask/main.c new file mode 100644 index 00000000..870d86d4 --- /dev/null +++ b/user/apps/test-sigprocmask/main.c @@ -0,0 +1,132 @@ +#include +#include +#include +#include + +#define TEST_ASSERT(left, right, success_msg, fail_msg) \ + do { \ + if ((left) == (right)) { \ + printf("[PASS] %s\n", success_msg); \ + } else { \ + printf("[FAIL] %s: Expected 0x%lx, but got 0x%lx\n", \ + fail_msg, \ + (unsigned long)(right), \ + (unsigned long)(left)); \ + } \ + } while (0) + + +static int signal_received = 0; + +void signal_handler(int signo) { + if (signo == SIGINT) { + printf("\nReceived SIGINT (Ctrl+C)\n"); + signal_received = 1; + } +} + +void print_signal_mask(const char *msg, const sigset_t *mask) { + printf("%s: ", msg); + for (int signo = 1; signo < NSIG; ++signo) { + if (sigismember(mask, signo)) { + printf("%d ", signo); + } + } + printf("\n"); +} + +// 获取当前屏蔽字的函数 +unsigned long get_signal_mask() { + sigset_t sigset; + if (sigprocmask(SIG_BLOCK, NULL, &sigset) == -1) { + perror("sigprocmask"); + return -1; // 返回错误标记 + } + + // 将信号集编码为位掩码 + unsigned long mask = 0; + for (int i = 1; i < NSIG; i++) { + if (sigismember(&sigset, i)) { + mask |= 1UL << (i - 1); + } + } + return mask; +} + +int main() { + sigset_t new_mask, old_mask; + sigemptyset(&old_mask); + + // 注册 SIGINT 的信号处理函数 + if (signal(SIGINT, signal_handler) == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } + printf("Signal handler for SIGINT is registered.\n"); + signal_received = 0; + kill(getpid(), SIGINT); + sleep(5); + + TEST_ASSERT(signal_received, 1, "SIGINT was received", "SIGINT was not received"); + signal_received = 0; + + // 初始化新的信号集,并将 SIGINT 添加到其中 + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGINT); + + // 打印 new_mask 的值 + print_signal_mask("new_mask", &new_mask); + + // 屏蔽 SIGINT + if (sigprocmask(SIG_BLOCK, &new_mask, &old_mask) < 0) { + perror("sigprocmask - SIG_BLOCK"); + exit(EXIT_FAILURE); + } + + // 打印 old_mask 的值 + print_signal_mask("old_mask", &old_mask); + + // 检查 SIGINT 是否被屏蔽 + unsigned long actual_mask = get_signal_mask(); + unsigned long expected_mask = (1UL << (SIGINT - 1)); + TEST_ASSERT(actual_mask, + expected_mask, + "Signal mask is as expected", + "Signal mask mismatch"); + + printf("SIGINT is now blocked.\n"); + signal_received = 0; + // 向当前进程发送 SIGINT + kill(getpid(), SIGINT); + + // 等待 5 秒,以便测试 SIGINT 是否被屏蔽 + sleep(5); + TEST_ASSERT(signal_received, 0, "SIGINT was blocked", "SIGINT was not blocked"); + signal_received = 0; + // 恢复原来的信号屏蔽字 + if (sigprocmask(SIG_SETMASK, &old_mask, &old_mask) < 0) { + perror("sigprocmask - SIG_SETMASK"); + exit(EXIT_FAILURE); + } + print_signal_mask("old_mask returned", &old_mask); + + // 检查 SIGINT 是否被解除屏蔽 + actual_mask = get_signal_mask(); + expected_mask = 0; + TEST_ASSERT(actual_mask, + expected_mask, + "Signal mask is as expected", + "Signal mask mismatch"); + + printf("SIGINT is now unblocked.\n"); + + signal_received = 0; + kill(getpid(), SIGINT); + + // 等待 5 秒,以便测试 SIGINT 是否解除屏蔽 + sleep(5); + TEST_ASSERT(signal_received, 1, "SIGINT was received", "SIGINT was not received"); + + printf("Exiting program.\n"); + return 0; +} diff --git a/user/apps/test_ebpf/.gitignore b/user/apps/test_ebpf/.gitignore deleted file mode 100644 index 1ac35461..00000000 --- a/user/apps/test_ebpf/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target -Cargo.lock -/install/ \ No newline at end of file diff --git a/user/apps/test_ebpf/Cargo.toml b/user/apps/test_ebpf/Cargo.toml deleted file mode 100644 index ab4a3b7f..00000000 --- a/user/apps/test_ebpf/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "test_ebpf" -version = "0.1.0" -edition = "2021" - -[dependencies] -aya = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13" } -aya-log = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/tiny-aya.git", rev = "0689f13" } - -log = "0.4.22" -env_logger = "0.11.5" -tokio = { version = "1.25", features = ["macros", "rt", "rt-multi-thread", "net", "signal", "time"] } - -[profile.release] -lto = true -strip = true diff --git a/user/apps/test_ebpf/src/main.rs b/user/apps/test_ebpf/src/main.rs deleted file mode 100644 index 1909aeeb..00000000 --- a/user/apps/test_ebpf/src/main.rs +++ /dev/null @@ -1,60 +0,0 @@ -use aya::maps::HashMap; -use aya::programs::KProbe; -use aya::{include_bytes_aligned, Ebpf}; -use aya_log::EbpfLogger; -use log::{info, warn}; -use std::error::Error; -use tokio::task::yield_now; -use tokio::{signal, time}; - -#[tokio::main(flavor = "current_thread")] -async fn main() -> Result<(), Box> { - env_logger::builder() - .filter_level(log::LevelFilter::Warn) - .format_timestamp(None) - .init(); - - let mut bpf = Ebpf::load(include_bytes_aligned!( - "../syscall_ebpf/target/bpfel-unknown-none/release/syscall_ebpf" - ))?; - - // create a async task to read the log - if let Err(e) = EbpfLogger::init(&mut bpf) { - // This can happen if you remove all log statements from your eBPF program. - warn!("failed to initialize eBPF logger: {}", e); - } - - let program: &mut KProbe = bpf.program_mut("syscall_ebpf").unwrap().try_into()?; - program.load()?; - program.attach("dragonos_kernel::syscall::Syscall::handle", 0)?; - - info!("attacch the kprobe to dragonos_kernel::syscall::Syscall::handle"); - - // print the value of the blocklist per 5 seconds - tokio::spawn(async move { - let blocklist: HashMap<_, u32, u32> = - HashMap::try_from(bpf.map("SYSCALL_LIST").unwrap()).unwrap(); - let mut now = time::Instant::now(); - loop { - let new_now = time::Instant::now(); - let duration = new_now.duration_since(now); - if duration.as_secs() >= 5 { - println!("------------SYSCALL_LIST----------------"); - let iter = blocklist.iter(); - for item in iter { - if let Ok((key, value)) = item { - println!("syscall: {:?}, count: {:?}", key, value); - } - } - println!("----------------------------------------"); - now = new_now; - } - yield_now().await; - } - }); - - info!("Waiting for Ctrl-C..."); - signal::ctrl_c().await?; - info!("Exiting..."); - Ok(()) -} diff --git a/user/apps/test_ebpf/syscall_ebpf/.cargo/config.toml b/user/apps/test_ebpf/syscall_ebpf/.cargo/config.toml deleted file mode 100644 index 35049cbc..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/.cargo/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[alias] -xtask = "run --package xtask --" diff --git a/user/apps/test_ebpf/syscall_ebpf/.vscode/settings.json b/user/apps/test_ebpf/syscall_ebpf/.vscode/settings.json deleted file mode 100644 index 0c82ac97..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "rust-analyzer.linkedProjects": ["Cargo.toml", "syscall_ebpf-ebpf/Cargo.toml"] -} diff --git a/user/apps/test_ebpf/syscall_ebpf/Cargo.toml b/user/apps/test_ebpf/syscall_ebpf/Cargo.toml deleted file mode 100644 index 6eb4e632..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/Cargo.toml +++ /dev/null @@ -1,3 +0,0 @@ -[workspace] -resolver = "2" -members = ["xtask", "syscall_ebpf-common"] diff --git a/user/apps/test_ebpf/syscall_ebpf/README.md b/user/apps/test_ebpf/syscall_ebpf/README.md deleted file mode 100644 index fe5ed32d..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# syscall_ebpf - -## Prerequisites - -1. Install bpf-linker: `cargo install bpf-linker` - -## Build eBPF - -```bash -cargo xtask build-ebpf -``` - -To perform a release build you can use the `--release` flag. -You may also change the target architecture with the `--target` flag. - -## Build Userspace - -```bash -cargo build -``` - -## Build eBPF and Userspace - -```bash -cargo xtask build -``` - -## Run - -```bash -RUST_LOG=info cargo xtask run -``` diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml deleted file mode 100644 index 4302a7f1..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.cargo/config.toml +++ /dev/null @@ -1,6 +0,0 @@ -[build] -target-dir = "../target" -target = "bpfel-unknown-none" - -[unstable] -build-std = ["core"] diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.helix/config.toml b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.helix/config.toml deleted file mode 100644 index da5424f1..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.helix/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[editor] -workspace-lsp-roots = [] diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vim/coc-settings.json b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vim/coc-settings.json deleted file mode 100644 index e2211a64..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vim/coc-settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "rust-analyzer.cargo.target": "bpfel-unknown-none", - "rust-analyzer.checkOnSave.allTargets": false -} diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vscode/settings.json b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vscode/settings.json deleted file mode 100644 index e2211a64..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "rust-analyzer.cargo.target": "bpfel-unknown-none", - "rust-analyzer.checkOnSave.allTargets": false -} diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml deleted file mode 100644 index 1911fa43..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -[package] -name = "syscall_ebpf-ebpf" -version = "0.1.0" -edition = "2021" - -[dependencies] -aya-ebpf = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/aya.git", rev = "3d57d35" } -aya-log-ebpf = { git = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/aya.git", rev = "3d57d35" } - -syscall_ebpf-common = { path = "../syscall_ebpf-common" } - -[[bin]] -name = "syscall_ebpf" -path = "src/main.rs" - -[profile.dev] -opt-level = 3 -debug = false -debug-assertions = false -overflow-checks = false -lto = true -panic = "abort" -incremental = false -codegen-units = 1 -rpath = false - -[profile.release] -lto = true -panic = "abort" -codegen-units = 1 - -[workspace] -members = [] diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml deleted file mode 100644 index fda4ec82..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/rust-toolchain.toml +++ /dev/null @@ -1,13 +0,0 @@ -[toolchain] -channel = "nightly-2024-11-05" -# The source code of rustc, provided by the rust-src component, is needed for -# building eBPF programs. -components = [ - "cargo", - "clippy", - "rust-docs", - "rust-src", - "rust-std", - "rustc", - "rustfmt", -] diff --git a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs b/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs deleted file mode 100644 index 7f9b79b6..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/syscall_ebpf-ebpf/src/main.rs +++ /dev/null @@ -1,44 +0,0 @@ -#![no_std] -#![no_main] - -use aya_ebpf::{macros::kprobe, programs::ProbeContext}; -use aya_ebpf::macros::map; -use aya_ebpf::maps::HashMap; -use aya_log_ebpf::info; - -#[kprobe] -pub fn syscall_ebpf(ctx: ProbeContext) -> u32 { - try_syscall_ebpf(ctx).unwrap_or_else(|ret| ret) -} - -fn try_syscall_ebpf(ctx: ProbeContext) -> Result { - let pt_regs = unsafe { - &*ctx.regs - }; - // first arg -> rdi - // second arg -> rsi - // third arg -> rdx - // four arg -> rcx - let syscall_num = pt_regs.rsi as usize; - if syscall_num != 1 { - unsafe { - if let Some(v) = SYSCALL_LIST.get(&(syscall_num as u32)){ - let new_v = *v + 1; - SYSCALL_LIST.insert(&(syscall_num as u32), &new_v,0).unwrap(); - }else { - SYSCALL_LIST.insert(&(syscall_num as u32), &1,0).unwrap(); - } - } - info!(&ctx, "invoke syscall {}", syscall_num); - } - Ok(0) -} - -#[map] // -static SYSCALL_LIST: HashMap = - HashMap::::with_max_entries(1024, 0); - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - unsafe { core::hint::unreachable_unchecked() } -} diff --git a/user/apps/test_ebpf/syscall_ebpf/xtask/Cargo.toml b/user/apps/test_ebpf/syscall_ebpf/xtask/Cargo.toml deleted file mode 100644 index c4dea5d1..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/xtask/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "xtask" -version = "0.1.0" -edition = "2021" - -[dependencies] -anyhow = "1" -clap = { version = "4.1", features = ["derive"] } diff --git a/user/apps/test_ebpf/syscall_ebpf/xtask/src/build.rs b/user/apps/test_ebpf/syscall_ebpf/xtask/src/build.rs deleted file mode 100644 index ddeee449..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/xtask/src/build.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::process::Command; - -use anyhow::Context as _; -use clap::Parser; - -use crate::build_ebpf::{build_ebpf, Architecture, Options as BuildOptions}; - -#[derive(Debug, Parser)] -pub struct Options { - /// Set the endianness of the BPF target - #[clap(default_value = "bpfel-unknown-none", long)] - pub bpf_target: Architecture, - /// Build and run the release target - #[clap(long)] - pub release: bool, -} - -/// Build the project -fn build_project(opts: &Options) -> Result<(), anyhow::Error> { - let mut args = vec!["build"]; - if opts.release { - args.push("--release") - } - let status = Command::new("cargo") - .args(&args) - .status() - .expect("failed to build userspace"); - assert!(status.success()); - Ok(()) -} - -/// Build our ebpf program and the project -pub fn build(opts: Options) -> Result<(), anyhow::Error> { - // build our ebpf program followed by our application - build_ebpf(BuildOptions { - target: opts.bpf_target, - release: opts.release, - }) - .context("Error while building eBPF program")?; - build_project(&opts).context("Error while building userspace application")?; - Ok(()) -} \ No newline at end of file diff --git a/user/apps/test_ebpf/syscall_ebpf/xtask/src/build_ebpf.rs b/user/apps/test_ebpf/syscall_ebpf/xtask/src/build_ebpf.rs deleted file mode 100644 index 8c6e323f..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/xtask/src/build_ebpf.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::{path::PathBuf, process::Command}; - -use clap::Parser; - -#[derive(Debug, Copy, Clone)] -pub enum Architecture { - BpfEl, - BpfEb, -} - -impl std::str::FromStr for Architecture { - type Err = String; - - fn from_str(s: &str) -> Result { - Ok(match s { - "bpfel-unknown-none" => Architecture::BpfEl, - "bpfeb-unknown-none" => Architecture::BpfEb, - _ => return Err("invalid target".to_owned()), - }) - } -} - -impl std::fmt::Display for Architecture { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - Architecture::BpfEl => "bpfel-unknown-none", - Architecture::BpfEb => "bpfeb-unknown-none", - }) - } -} - -#[derive(Debug, Parser)] -pub struct Options { - /// Set the endianness of the BPF target - #[clap(default_value = "bpfel-unknown-none", long)] - pub target: Architecture, - /// Build the release target - #[clap(long)] - pub release: bool, -} - -pub fn build_ebpf(opts: Options) -> Result<(), anyhow::Error> { - let dir = PathBuf::from("syscall_ebpf-ebpf"); - let target = format!("--target={}", opts.target); - let mut args = vec![ - "build", - target.as_str(), - "-Z", - "build-std=core", - ]; - if opts.release { - args.push("--release") - } - - // Command::new creates a child process which inherits all env variables. This means env - // vars set by the cargo xtask command are also inherited. RUSTUP_TOOLCHAIN is removed - // so the rust-toolchain.toml file in the -ebpf folder is honored. - - let status = Command::new("cargo") - .current_dir(dir) - .env_remove("RUSTUP_TOOLCHAIN") - .args(&args) - .status() - .expect("failed to build bpf program"); - assert!(status.success()); - Ok(()) -} diff --git a/user/apps/test_ebpf/syscall_ebpf/xtask/src/main.rs b/user/apps/test_ebpf/syscall_ebpf/xtask/src/main.rs deleted file mode 100644 index 50794589..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/xtask/src/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -mod build_ebpf; -mod build; -mod run; - -use std::process::exit; - -use clap::Parser; - -#[derive(Debug, Parser)] -pub struct Options { - #[clap(subcommand)] - command: Command, -} - -#[derive(Debug, Parser)] -enum Command { - BuildEbpf(build_ebpf::Options), - Build(build::Options), - Run(run::Options), -} - -fn main() { - let opts = Options::parse(); - - use Command::*; - let ret = match opts.command { - BuildEbpf(opts) => build_ebpf::build_ebpf(opts), - Run(opts) => run::run(opts), - Build(opts) => build::build(opts), - }; - - if let Err(e) = ret { - eprintln!("{e:#}"); - exit(1); - } -} diff --git a/user/apps/test_ebpf/syscall_ebpf/xtask/src/run.rs b/user/apps/test_ebpf/syscall_ebpf/xtask/src/run.rs deleted file mode 100644 index 19af11c4..00000000 --- a/user/apps/test_ebpf/syscall_ebpf/xtask/src/run.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::process::Command; - -use anyhow::Context as _; -use clap::Parser; - -use crate::{build::{build, Options as BuildOptions}, build_ebpf::Architecture}; - -#[derive(Debug, Parser)] -pub struct Options { - /// Set the endianness of the BPF target - #[clap(default_value = "bpfel-unknown-none", long)] - pub bpf_target: Architecture, - /// Build and run the release target - #[clap(long)] - pub release: bool, - /// The command used to wrap your application - #[clap(short, long, default_value = "sudo -E")] - pub runner: String, - /// Arguments to pass to your application - #[clap(name = "args", last = true)] - pub run_args: Vec, -} - - -/// Build and run the project -pub fn run(opts: Options) -> Result<(), anyhow::Error> { - // Build our ebpf program and the project - build(BuildOptions{ - bpf_target: opts.bpf_target, - release: opts.release, - }).context("Error while building project")?; - - // profile we are building (release or debug) - let profile = if opts.release { "release" } else { "debug" }; - let bin_path = format!("target/{profile}/syscall_ebpf"); - - // arguments to pass to the application - let mut run_args: Vec<_> = opts.run_args.iter().map(String::as_str).collect(); - - // configure args - let mut args: Vec<_> = opts.runner.trim().split_terminator(' ').collect(); - args.push(bin_path.as_str()); - args.append(&mut run_args); - - // run the command - let status = Command::new(args.first().expect("No first argument")) - .args(args.iter().skip(1)) - .status() - .expect("failed to run the command"); - - if !status.success() { - anyhow::bail!("Failed to run `{}`", args.join(" ")); - } - Ok(()) -} diff --git a/user/apps/test_signal_restart/.gitignore b/user/apps/test_signal_restart/.gitignore new file mode 100644 index 00000000..802b4439 --- /dev/null +++ b/user/apps/test_signal_restart/.gitignore @@ -0,0 +1 @@ +test_signal diff --git a/user/apps/test_signal_restart/Makefile b/user/apps/test_signal_restart/Makefile new file mode 100644 index 00000000..2a9e25aa --- /dev/null +++ b/user/apps/test_signal_restart/Makefile @@ -0,0 +1,20 @@ +ifeq ($(ARCH), x86_64) + CROSS_COMPILE=x86_64-linux-musl- +else ifeq ($(ARCH), riscv64) + CROSS_COMPILE=riscv64-linux-musl- +endif + +CC=$(CROSS_COMPILE)gcc + +.PHONY: all +all: main.c + $(CC) -static -o test_signal_restart main.c + +.PHONY: install clean +install: all + mv test_signal_restart $(DADK_CURRENT_BUILD_DIR)/test_signal_restart + +clean: + rm test_signal_restart *.o + +fmt: diff --git a/user/apps/test_signal_restart/main.c b/user/apps/test_signal_restart/main.c new file mode 100644 index 00000000..e0c90756 --- /dev/null +++ b/user/apps/test_signal_restart/main.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include +#include + +#define BUFFER_SIZE 1024 + +#define MSG "Hello from parent!\n" + +static int handled_signal = 0; +// 子进程的信号处理函数 +void child_signal_handler(int sig) { + printf("Child received signal %d\n", sig); + handled_signal = 1; +} + +// 父进程的信号处理函数 +void parent_signal_handler(int sig) { + printf("Parent received signal %d\n", sig); +} + +int main() { + int pipefd[2]; + pid_t pid; + char buffer[BUFFER_SIZE]; + + // 创建管道 + if (pipe(pipefd) == -1) { + perror("pipe"); + exit(EXIT_FAILURE); + } + + // 创建子进程 + pid = fork(); + if (pid == -1) { + perror("fork"); + exit(EXIT_FAILURE); + } + + if (pid == 0) { + // 子进程 + close(pipefd[1]); // 关闭写端 + + // 设置子进程的信号处理函数 + signal(SIGUSR1, child_signal_handler); + + printf("Child: Waiting for data...\n"); + + // 尝试从管道中读取数据 + ssize_t bytes_read = read(pipefd[0], buffer, BUFFER_SIZE - 1); + if (bytes_read == -1) { + printf("[FAILED]: Child: read error, errno=%d\n", errno); + exit(EXIT_FAILURE); + } else if (bytes_read == 0) { + printf("Child: End of file\n"); + } + + if (bytes_read != sizeof(MSG) - 1) { + printf("[FAILED]: Child: read error: got %ld bytes, expected %ld\n", + bytes_read, sizeof(MSG) - 1); + } else { + printf("[PASS]: Child: read success: got %ld bytes, expected %ld\n", + bytes_read, sizeof(MSG) - 1); + } + + buffer[bytes_read] = '\0'; + printf("Child: Received message: %s", buffer); + + close(pipefd[0]); + + if (!handled_signal) + printf("[FAILED]: Parent: child did not handle signal\n"); + else + printf("[PASS]: Parent: child handled signal\n"); + exit(EXIT_SUCCESS); + } else { + // 父进程 + close(pipefd[0]); // 关闭读端 + + // 设置父进程的信号处理函数 + signal(SIGCHLD, parent_signal_handler); + + // 发送信号给子进程,中断它的读操作 + sleep(1); // 确保子进程已经开始读取 + // printf("Parent: Sending SIGCHLD to child...\n"); + // kill(pid, SIGCHLD); + printf("Parent: Sending SIGUSR1 to child...\n"); + kill(pid, SIGUSR1); + sleep(1); // 确保子进程已经处理了信号 + + write(pipefd[1], MSG, strlen(MSG)); + + printf("Parent: Sent message: %s", MSG); + + // 等待子进程结束 + waitpid(pid, NULL, 0); + + printf("Parent: Child process finished.\n"); + + close(pipefd[1]); + exit(EXIT_SUCCESS); + } +} \ No newline at end of file diff --git a/user/dadk/config/nova_shell-0.1.0.toml b/user/dadk/config/nova_shell-0.1.0.toml index 190de5fd..e5778ddf 100644 --- a/user/dadk/config/nova_shell-0.1.0.toml +++ b/user/dadk/config/nova_shell-0.1.0.toml @@ -24,7 +24,7 @@ source = "git" source-path = "https://git.mirrors.dragonos.org.cn/DragonOS-Community/NovaShell.git" # git标签或分支 # 注意: branch和revision只能二选一,且source要设置为"git" -revision = "cb835e03e4" +revision = "feaebefaef" # 构建相关信息 [build] # (可选)构建命令 diff --git a/user/dadk/config/test_ebpf_0_1_0.toml b/user/dadk/config/syscall_ebpf_0_1_0.toml similarity index 91% rename from user/dadk/config/test_ebpf_0_1_0.toml rename to user/dadk/config/syscall_ebpf_0_1_0.toml index 6937a23e..3df5f625 100644 --- a/user/dadk/config/test_ebpf_0_1_0.toml +++ b/user/dadk/config/syscall_ebpf_0_1_0.toml @@ -1,5 +1,5 @@ # 用户程序名称 -name = "test_ebpf" +name = "test_ebpf_new" # 版本号 version = "0.1.0" # 用户程序描述信息 @@ -21,7 +21,7 @@ type = "build-from-source" # "install_from_prebuilt" 可选值:"local", "archive" source = "local" # 路径或URL -source-path = "user/apps/test_ebpf" +source-path = "user/apps/syscall_ebpf" # 构建相关信息 [build] # (可选)构建命令 diff --git a/user/dadk/config/test_signal_restart-0.1.0.toml b/user/dadk/config/test_signal_restart-0.1.0.toml new file mode 100644 index 00000000..0b389625 --- /dev/null +++ b/user/dadk/config/test_signal_restart-0.1.0.toml @@ -0,0 +1,41 @@ +# 用户程序名称 +name = "test_signal_restart" +# 版本号 +version = "0.1.0" +# 用户程序描述信息 +description = "一个用来测试signal能够正常运行的app" +# (可选)默认: false 是否只构建一次,如果为true,DADK会在构建成功后,将构建结果缓存起来,下次构建时,直接使用缓存的构建结果 +build-once = false +# (可选) 默认: false 是否只安装一次,如果为true,DADK会在安装成功后,不再重复安装 +install-once = false +# 目标架构 +# 可选值:"x86_64", "aarch64", "riscv64" +target-arch = ["x86_64"] +# 任务源 +[task-source] +# 构建类型 +# 可选值:"build-from_source", "install-from-prebuilt" +type = "build-from-source" +# 构建来源 +# "build_from_source" 可选值:"git", "local", "archive" +# "install_from_prebuilt" 可选值:"local", "archive" +source = "local" +# 路径或URL +source-path = "user/apps/test_signal_restart" +# 构建相关信息 +[build] +# (可选)构建命令 +build-command = "make install" +# 安装相关信息 +[install] +# (可选)安装到DragonOS的路径 +in-dragonos-path = "/bin" +# 清除相关信息 +[clean] +# (可选)清除命令 +clean-command = "make clean" +# (可选)依赖项 +# 注意:如果没有依赖项,忽略此项,不允许只留一个[[depends]] +# 由于原文件中依赖项为空,此处省略[[depends]]部分 +# (可选)环境变量 +# 由于原文件中环境变量为空,此处省略[[envs]]部分 diff --git a/user/dadk/config/test_sigprocmask_0_1_0.toml b/user/dadk/config/test_sigprocmask_0_1_0.toml new file mode 100644 index 00000000..015e1203 --- /dev/null +++ b/user/dadk/config/test_sigprocmask_0_1_0.toml @@ -0,0 +1,41 @@ +# 用户程序名称 +name = "test_sigprocmask" +# 版本号 +version = "0.1.0" +# 用户程序描述信息 +description = "一个用来测试sigprocmask能够正常运行的app" +# (可选)默认: false 是否只构建一次,如果为true,DADK会在构建成功后,将构建结果缓存起来,下次构建时,直接使用缓存的构建结果 +build-once = false +# (可选) 默认: false 是否只安装一次,如果为true,DADK会在安装成功后,不再重复安装 +install-once = false +# 目标架构 +# 可选值:"x86_64", "aarch64", "riscv64" +target-arch = ["x86_64"] +# 任务源 +[task-source] +# 构建类型 +# 可选值:"build-from_source", "install-from-prebuilt" +type = "build-from-source" +# 构建来源 +# "build_from_source" 可选值:"git", "local", "archive" +# "install_from_prebuilt" 可选值:"local", "archive" +source = "local" +# 路径或URL +source-path = "user/apps/test-sigprocmask" +# 构建相关信息 +[build] +# (可选)构建命令 +build-command = "make install" +# 安装相关信息 +[install] +# (可选)安装到DragonOS的路径 +in-dragonos-path = "/bin" +# 清除相关信息 +[clean] +# (可选)清除命令 +clean-command = "make clean" +# (可选)依赖项 +# 注意:如果没有依赖项,忽略此项,不允许只留一个[[depends]] +# 由于原文件中依赖项为空,此处省略[[depends]]部分 +# (可选)环境变量 +# 由于原文件中环境变量为空,此处省略[[envs]]部分