添加thread和futex机制 (#411)

* 初步实现clone系统调用

* 实现了线程,初步实现futex机制,添加了几个小的系统调用

* 更改pcb引用计数问题

* 解决死锁bug

---------

Co-authored-by: LoGin <longjin@DragonOS.org>
This commit is contained in:
GnoCiYeH 2023-11-01 20:55:57 +08:00 committed by GitHub
parent 665f4a7707
commit 971462be94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1643 additions and 149 deletions

View File

@ -5,7 +5,11 @@ use core::{
sync::atomic::{compiler_fence, Ordering},
};
use alloc::{string::String, sync::Arc, vec::Vec};
use alloc::{
string::String,
sync::{Arc, Weak},
vec::Vec,
};
use memoffset::offset_of;
use x86::{controlregs::Cr4, segmentation::SegmentSelector};
@ -20,14 +24,16 @@ use crate::{
VirtAddr,
},
process::{
fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager,
SwitchResult, SWITCH_RESULT,
fork::{CloneFlags, KernelCloneArgs},
KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult,
SWITCH_RESULT,
},
syscall::{Syscall, SystemError},
};
use self::{
kthread::kernel_thread_bootstrap_stage1,
syscall::ARCH_SET_FS,
table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
};
@ -38,6 +44,9 @@ pub mod kthread;
pub mod syscall;
pub mod table;
pub const IA32_FS_BASE: u32 = 0xC000_0100;
pub const IA32_GS_BASE: u32 = 0xC000_0101;
extern "C" {
/// 从中断返回
fn ret_from_intr();
@ -175,7 +184,7 @@ impl ArchPCBInfo {
if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
self.fsbase = x86::current::segmentation::rdfsbase() as usize;
} else {
self.fsbase = 0;
self.fsbase = x86::msr::rdmsr(IA32_FS_BASE) as usize;
}
}
@ -183,19 +192,23 @@ impl ArchPCBInfo {
if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
self.gsbase = x86::current::segmentation::rdgsbase() as usize;
} else {
self.gsbase = 0;
self.gsbase = x86::msr::rdmsr(IA32_GS_BASE) as usize;
}
}
pub unsafe fn restore_fsbase(&mut self) {
if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
x86::current::segmentation::wrfsbase(self.fsbase as u64);
} else {
x86::msr::wrmsr(IA32_FS_BASE, self.fsbase as u64);
}
}
pub unsafe fn restore_gsbase(&mut self) {
if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
x86::current::segmentation::wrgsbase(self.gsbase as u64);
} else {
x86::msr::wrmsr(IA32_GS_BASE, self.gsbase as u64);
}
}
@ -228,11 +241,11 @@ impl ProcessControlBlock {
panic!("current_pcb is null");
}
unsafe {
// 为了防止内核栈的pcb指针被释放,这里需要将其包装一下使得Arc的drop不会被调用
let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
ManuallyDrop::new(Arc::from_raw(*p));
// 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下
let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
ManuallyDrop::new(Weak::from_raw(*p));
let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap();
return new_arc;
}
}
@ -255,11 +268,12 @@ impl ProcessManager {
///
/// 由于这个过程与具体的架构相关,所以放在这里
pub fn copy_thread(
_clone_flags: &CloneFlags,
current_pcb: &Arc<ProcessControlBlock>,
new_pcb: &Arc<ProcessControlBlock>,
clone_args: KernelCloneArgs,
current_trapframe: &TrapFrame,
) -> Result<(), SystemError> {
let clone_flags = clone_args.flags;
let mut child_trapframe = current_trapframe.clone();
// 子进程的返回值为0
@ -278,6 +292,10 @@ impl ProcessManager {
// 拷贝栈帧
unsafe {
let usp = clone_args.stack;
if usp != 0 {
child_trapframe.rsp = usp as u64;
}
let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
*trap_frame_ptr = child_trapframe;
}
@ -296,15 +314,19 @@ impl ProcessManager {
drop(current_arch_guard);
// 设置返回地址(子进程开始执行的指令地址)
if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
} else {
new_arch_guard.rip = ret_from_intr as usize;
}
// 设置tls
if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
drop(new_arch_guard);
Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?;
}
return Ok(());
}
@ -335,28 +357,28 @@ impl ProcessManager {
compiler_fence(Ordering::SeqCst);
next_addr_space.read().user_mapper.utable.make_current();
drop(next_addr_space);
compiler_fence(Ordering::SeqCst);
// 切换内核栈
// 获取arch info的锁并强制泄露其守卫切换上下文后在switch_finish_hook中会释放锁
let next_arch = SpinLockGuard::leak(next.arch_info());
let prev_arch = SpinLockGuard::leak(prev.arch_info());
let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo;
let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo;
prev_arch.rip = switch_back as usize;
(*prev_arch).rip = switch_back as usize;
// 恢复当前的 preempt count*2
ProcessManager::current_pcb().preempt_enable();
ProcessManager::current_pcb().preempt_enable();
SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone());
SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone());
// 切换tss
TSSManager::current_tss().set_rsp(
x86::Ring::Ring0,
next.kernel_stack().stack_max_address().data() as u64,
);
SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev);
SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next);
// kdebug!("switch tss ok");
compiler_fence(Ordering::SeqCst);
// 正式切换上下文
switch_to_inner(prev_arch, next_arch);
@ -365,7 +387,7 @@ impl ProcessManager {
/// 保存上下文然后切换进程接着jmp到`switch_finish_hook`钩子函数
#[naked]
unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) {
unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) {
asm!(
// As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
//
@ -394,6 +416,9 @@ unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut Arc
mov [rdi + {off_fs}], fs
mov [rdi + {off_gs}], gs
// mov fs, [rsi + {off_fs}]
// mov gs, [rsi + {off_gs}]
push rbp
push rax

View File

@ -1,4 +1,4 @@
use alloc::{string::String, vec::Vec};
use alloc::{string::String, sync::Arc, vec::Vec};
use crate::{
arch::{
@ -10,9 +10,9 @@ use crate::{
mm::ucontext::AddressSpace,
process::{
exec::{load_binary_file, ExecParam, ExecParamFlags},
ProcessManager,
ProcessControlBlock, ProcessManager,
},
syscall::{Syscall, SystemError},
syscall::{user_access::UserBufferWriter, Syscall, SystemError},
};
impl Syscall {
@ -105,6 +105,8 @@ impl Syscall {
regs.rflags = 0x200;
regs.rax = 1;
drop(param);
// kdebug!("regs: {:?}\n", regs);
// kdebug!(
@ -114,4 +116,70 @@ impl Syscall {
return Ok(());
}
/// ## 用于控制和查询与体系结构相关的进程特定选项
pub fn arch_prctl(option: usize, arg2: usize) -> Result<usize, SystemError> {
let pcb = ProcessManager::current_pcb();
if let Err(SystemError::EINVAL) = Self::do_arch_prctl_64(&pcb, option, arg2, true) {
Self::do_arch_prctl_common(option, arg2)?;
}
Ok(0)
}
/// ## 64位下控制fs/gs base寄存器的方法
pub fn do_arch_prctl_64(
pcb: &Arc<ProcessControlBlock>,
option: usize,
arg2: usize,
from_user: bool,
) -> Result<usize, SystemError> {
let mut arch_info = pcb.arch_info_irqsave();
match option {
ARCH_GET_FS => {
unsafe { arch_info.save_fsbase() };
let mut writer = UserBufferWriter::new(
arg2 as *mut usize,
core::mem::size_of::<usize>(),
from_user,
)?;
writer.copy_one_to_user(&arch_info.fsbase, 0)?;
}
ARCH_GET_GS => {
unsafe { arch_info.save_gsbase() };
let mut writer = UserBufferWriter::new(
arg2 as *mut usize,
core::mem::size_of::<usize>(),
from_user,
)?;
writer.copy_one_to_user(&arch_info.gsbase, 0)?;
}
ARCH_SET_FS => {
arch_info.fsbase = arg2;
// 如果是当前进程则直接写入寄存器
if pcb.pid() == ProcessManager::current_pcb().pid() {
unsafe { arch_info.restore_fsbase() }
}
}
ARCH_SET_GS => {
arch_info.gsbase = arg2;
if pcb.pid() == ProcessManager::current_pcb().pid() {
unsafe { arch_info.restore_gsbase() }
}
}
_ => {
return Err(SystemError::EINVAL);
}
}
Ok(0)
}
#[allow(dead_code)]
pub fn do_arch_prctl_common(_option: usize, _arg2: usize) -> Result<usize, SystemError> {
todo!("do_arch_prctl_common not unimplemented");
}
}
pub const ARCH_SET_GS: usize = 0x1001;
pub const ARCH_SET_FS: usize = 0x1002;
pub const ARCH_GET_FS: usize = 0x1003;
pub const ARCH_GET_GS: usize = 0x1004;

View File

@ -1,5 +1,38 @@
use core::arch::x86_64::_rdtsc;
use alloc::vec::Vec;
use crate::{
libs::rand::GRandFlags,
syscall::{user_access::UserBufferWriter, Syscall, SystemError},
};
pub fn rand() -> usize {
return unsafe { (_rdtsc() * _rdtsc() + 998244353_u64 * _rdtsc()) as usize };
}
impl Syscall {
/// ## 将随机字节填入buf
///
/// ### 该系统调用与linux不一致因为目前没有其他随机源
pub fn get_random(buf: *mut u8, len: usize, flags: GRandFlags) -> Result<usize, SystemError> {
if flags.bits() == (GRandFlags::GRND_INSECURE.bits() | GRandFlags::GRND_RANDOM.bits()) {
return Err(SystemError::EINVAL);
}
let mut writer = UserBufferWriter::new(buf, len, true)?;
let mut ret = Vec::new();
let mut count = 0;
while count < len {
let rand = rand();
for offset in 0..4 {
ret.push((rand >> offset * 2) as u8);
count += 1;
}
}
writer.copy_to_user(&ret, 0)?;
Ok(len)
}
}

View File

@ -16,7 +16,7 @@ use crate::driver::pci::pci::{
};
use crate::driver::pci::pci_irq::{IrqCommonMsg, IrqMsg, IrqSpecificMsg, PciInterrupt, IRQ};
use crate::include::bindings::bindings::pt_regs;
use crate::libs::volatile::{ReadOnly, Volatile, VolatileReadable, VolatileWritable, WriteOnly};
use crate::libs::volatile::{ReadOnly, Volatile, WriteOnly};
use crate::net::net_core::poll_ifaces_try_lock_onetime;
use crate::{kdebug, kinfo};
@ -45,8 +45,10 @@ const E1000E_DEVICE_ID: [u16; 14] = [
// BAR0空间大小(128KB)
const E1000E_BAR_REG_SIZE: u32 = 128 * 1024;
// BAR0空间对齐(64bit)
#[allow(dead_code)]
const E1000E_BAR_REG_ALIGN: u8 = 64;
// 单个寄存器大小(32bit, 4字节)
#[allow(dead_code)]
const E1000E_REG_SIZE: u8 = 4;
// TxBuffer和RxBuffer的大小(DMA页)
@ -112,11 +114,13 @@ impl E1000EBuffer {
}
}
#[allow(dead_code)]
pub fn as_addr(&self) -> NonNull<u8> {
assert!(self.length != 0);
return self.buffer;
}
#[allow(dead_code)]
pub fn as_addr_u64(&self) -> u64 {
assert!(self.length != 0);
return self.buffer.as_ptr() as u64;
@ -127,6 +131,7 @@ impl E1000EBuffer {
return self.paddr;
}
#[allow(dead_code)]
pub fn as_slice(&self) -> &[u8] {
assert!(self.length != 0);
return unsafe { from_raw_parts(self.buffer.as_ptr(), self.length) };
@ -154,10 +159,11 @@ impl E1000EBuffer {
// 中断处理函数, 调用协议栈的poll函数未来可能会用napi来替换这里
// Interrupt handler
unsafe extern "C" fn e1000e_irq_handler(irq_num: u64, irq_paramer: u64, regs: *mut pt_regs) {
unsafe extern "C" fn e1000e_irq_handler(_irq_num: u64, _irq_paramer: u64, _regs: *mut pt_regs) {
poll_ifaces_try_lock_onetime().ok();
}
#[allow(dead_code)]
pub struct E1000EDevice {
// 设备寄存器
// device registers
@ -194,6 +200,7 @@ pub struct E1000EDevice {
impl E1000EDevice {
// 从PCI标准设备进行驱动初始化
// init the device for PCI standard device struct
#[allow(unused_assignments)]
pub fn new(device: &mut PciDeviceStructureGeneralDevice) -> Result<Self, E1000EPciError> {
// 从BAR0获取我们需要的寄存器
// Build registers sturcts from BAR0
@ -479,6 +486,7 @@ impl E1000EDevice {
// 切换是否接受分组到达的中断
// change whether the receive timer interrupt is enabled
// Note: this method is not completely implemented and not used in the current version
#[allow(dead_code)]
pub fn e1000e_intr_set(&mut self, state: bool) {
let mut ims = unsafe { volread!(self.interrupt_regs, ims) };
match state {
@ -491,6 +499,7 @@ impl E1000EDevice {
// 实现了一部分napi机制的收包函数, 现在还没有投入使用
// This method is a partial implementation of napi (New API) techniques
// Note: this method is not completely implemented and not used in the current version
#[allow(dead_code)]
pub fn e1000e_receive2(&mut self) -> Option<E1000EBuffer> {
// 向设备表明我们已经接受到了之前的中断
// Tell e1000e we have received the interrupt
@ -585,8 +594,8 @@ pub extern "C" fn rs_e1000e_init() {
pub fn e1000e_init() -> () {
match e1000e_probe() {
Ok(code) => kinfo!("Successfully init e1000e device!"),
Err(error) => kinfo!("Error occurred!"),
Ok(_code) => kinfo!("Successfully init e1000e device!"),
Err(_error) => kinfo!("Error occurred!"),
}
}
@ -618,6 +627,7 @@ pub fn e1000e_probe() -> Result<u64, E1000EPciError> {
// 用到的e1000e寄存器结构体
// pp.275, Table 13-3
// 设备通用寄存器
#[allow(dead_code)]
struct GeneralRegs {
ctrl: Volatile<u32>, //0x00000
ctrl_alias: Volatile<u32>, //0x00004
@ -630,6 +640,7 @@ struct GeneralRegs {
mdic: Volatile<u32>, //0x00020
}
// 中断控制
#[allow(dead_code)]
struct InterruptRegs {
icr: Volatile<u32>, //0x000c0 ICR寄存器应当为只读寄存器但我们需要向其中写入来清除对应位
itr: Volatile<u32>, //0x000c4
@ -644,6 +655,7 @@ struct ReceiveCtrlRegs {
rctl: Volatile<u32>, //0x00100
}
// 发包功能控制
#[allow(dead_code)]
struct TransmitCtrlRegs {
tctl: Volatile<u32>, //0x00400
tctl_ext: Volatile<u32>, //0x00404
@ -652,6 +664,7 @@ struct TransmitCtrlRegs {
tipg: Volatile<u32>, //0x00410
}
// 收包功能相关
#[allow(dead_code)]
struct ReceiveRegs {
rdbal0: Volatile<u32>, //0x02800
rdbah0: Volatile<u32>, //0x02804
@ -666,6 +679,7 @@ struct ReceiveRegs {
rxdctl: Volatile<u32>, //0x2828
}
// 发包功能相关
#[allow(dead_code)]
struct TransimitRegs {
tdbal0: Volatile<u32>, //0x03800
tdbah0: Volatile<u32>, //0x03804
@ -689,6 +703,7 @@ struct ReceiveAddressRegs {
struct PCIeRegs {
gcr: Volatile<u32>, //0x05b00
}
#[allow(dead_code)]
struct StatisticsRegs {}
// 0x05200-0x053fc
@ -714,15 +729,19 @@ const E1000E_CTRL_SLU: u32 = 1 << 6;
const E1000E_CTRL_FRCSPD: u32 = 1 << 11;
const E1000E_CTRL_FRCDPLX: u32 = 1 << 12;
const E1000E_CTRL_RST: u32 = 1 << 26;
#[allow(dead_code)]
const E1000E_CTRL_RFCE: u32 = 1 << 27;
#[allow(dead_code)]
const E1000E_CTRL_TFCE: u32 = 1 << 28;
const E1000E_CTRL_PHY_RST: u32 = 1 << 31;
// IMS
const E1000E_IMS_LSC: u32 = 1 << 2;
const E1000E_IMS_RXDMT0: u32 = 1 << 4;
#[allow(dead_code)]
const E1000E_IMS_RXO: u32 = 1 << 6;
const E1000E_IMS_RXT0: u32 = 1 << 7;
#[allow(dead_code)]
const E1000E_IMS_RXQ0: u32 = 1 << 20;
const E1000E_IMS_OTHER: u32 = 1 << 24; // qemu use this bit to set msi-x interrupt

View File

@ -3,16 +3,12 @@
use crate::{
driver::{
base::{
device::{
bus::Bus,
driver::{Driver, DriverError},
Device, DevicePrivateData, IdTable,
},
device::{bus::Bus, driver::Driver, Device, IdTable},
kobject::{KObjType, KObject, KObjectState},
},
net::NetDriver,
},
kdebug, kinfo,
kinfo,
libs::spinlock::SpinLock,
net::{generate_iface_id, NET_DRIVERS},
syscall::SystemError,
@ -84,7 +80,7 @@ impl phy::RxToken for E1000ERxToken {
}
impl phy::TxToken for E1000ETxToken {
fn consume<R, F>(self, len: usize, f: F) -> R
fn consume<R, F>(self, _len: usize, f: F) -> R
where
F: FnOnce(&mut [u8]) -> R,
{

View File

@ -310,7 +310,7 @@ impl<T: Transport + 'static> NetDriver for VirtioInterface<T> {
let mut guard = self.iface.lock();
let poll_res = guard.poll(timestamp, self.driver.force_get_mut(), sockets);
// todo: notify!!!
kdebug!("Virtio Interface poll:{poll_res}");
// kdebug!("Virtio Interface poll:{poll_res}");
if poll_res {
return Ok(());
}

View File

@ -151,7 +151,6 @@ impl Syscall {
/// @return 文件描述符编号,或者是错误码
pub fn open(path: &str, mode: FileMode) -> Result<usize, SystemError> {
// kdebug!("open: path: {}, mode: {:?}", path, mode);
// 文件名过长
if path.len() > MAX_PATHLEN as usize {
return Err(SystemError::ENAMETOOLONG);
@ -727,6 +726,13 @@ impl Syscall {
return Ok(kstat);
}
fn do_stat(path: &str) -> Result<PosixKstat, SystemError> {
let fd = Self::open(path, FileMode::O_RDONLY)?;
let ret = Self::do_fstat(fd as i32);
Self::close(fd)?;
ret
}
pub fn fstat(fd: i32, usr_kstat: *mut PosixKstat) -> Result<usize, SystemError> {
let kstat = Self::do_fstat(fd)?;
if usr_kstat.is_null() {
@ -738,6 +744,14 @@ impl Syscall {
return Ok(0);
}
pub fn stat(path: &str, user_kstat: *mut PosixKstat) -> Result<usize, SystemError> {
let fd = Self::open(path, FileMode::O_RDONLY)?;
Self::fstat(fd as i32, user_kstat).map_err(|e| {
Self::close(fd).ok();
e
})
}
pub fn mknod(
path_ptr: *const i8,
mode: ModeType,
@ -771,6 +785,15 @@ impl Syscall {
return Ok(0);
}
pub fn writev(fd: i32, iov: usize, count: usize) -> Result<usize, SystemError> {
// IoVecs会进行用户态检验
let iovecs = unsafe { IoVecs::from_user(iov as *const IoVec, count, false) }?;
let data = iovecs.gather();
Self::write(fd, &data)
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]

View File

@ -20,6 +20,7 @@
#![feature(ptr_to_from_bits)]
#![feature(concat_idents)]
#![cfg_attr(target_os = "none", no_std)]
#![feature(atomic_mut_ptr)]
#[cfg(test)]
#[macro_use]

View File

@ -0,0 +1,54 @@
bitflags! {
pub struct FutexArg: u32 {
const FUTEX_WAIT = 0;
const FUTEX_WAKE = 1;
const FUTEX_FD = 2;
const FUTEX_REQUEUE = 3;
const FUTEX_CMP_REQUEUE = 4;
const FUTEX_WAKE_OP = 5;
const FUTEX_LOCK_PI = 6;
const FUTEX_UNLOCK_PI = 7;
const FUTEX_TRYLOCK_PI = 8;
const FUTEX_WAIT_BITSET = 9;
const FUTEX_WAKE_BITSET = 10;
const FUTEX_WAIT_REQUEUE_PI = 11;
const FUTEX_CMP_REQUEUE_PI = 12;
const FUTEX_LOCK_PI2 = 13;
}
pub struct FutexFlag: u32 {
const FLAGS_MATCH_NONE = 0x01;
const FLAGS_SHARED = 0x01;
const FLAGS_CLOCKRT = 0x02;
const FLAGS_HAS_TIMEOUT = 0x04;
const FUTEX_PRIVATE_FLAG = 128;
const FUTEX_CLOCK_REALTIME = 256;
const FUTEX_CMD_MASK = !(Self::FUTEX_PRIVATE_FLAG.bits() | Self::FUTEX_CLOCK_REALTIME.bits());
}
pub struct FutexOP: u32 {
const FUTEX_OP_SET = 0;
const FUTEX_OP_ADD = 1;
const FUTEX_OP_OR = 2;
const FUTEX_OP_ANDN = 3;
const FUTEX_OP_XOR = 4;
const FUTEX_OP_OPARG_SHIFT = 8;
}
pub struct FutexOpCMP: u32 {
const FUTEX_OP_CMP_EQ = 0;
const FUTEX_OP_CMP_NE = 1;
const FUTEX_OP_CMP_LT = 2;
const FUTEX_OP_CMP_LE = 3;
const FUTEX_OP_CMP_GT = 4;
const FUTEX_OP_CMP_GE = 5;
}
}
#[allow(dead_code)]
pub const FUTEX_WAITERS: u32 = 0x80000000;
#[allow(dead_code)]
pub const FUTEX_OWNER_DIED: u32 = 0x40000000;
#[allow(dead_code)]
pub const FUTEX_TID_MASK: u32 = 0x3fffffff;
pub const FUTEX_BITSET_MATCH_ANY: u32 = 0xffffffff;

View File

@ -0,0 +1,647 @@
use alloc::{
collections::LinkedList,
sync::{Arc, Weak},
};
use core::hash::{Hash, Hasher};
use core::{intrinsics::likely, sync::atomic::AtomicU64};
use hashbrown::HashMap;
use crate::{
arch::{sched::sched, CurrentIrqArch, MMArch},
exception::InterruptArch,
libs::spinlock::{SpinLock, SpinLockGuard},
mm::{ucontext::AddressSpace, MemoryManagementArch, VirtAddr},
process::{ProcessControlBlock, ProcessManager},
syscall::{user_access::UserBufferReader, SystemError},
time::{
timer::{next_n_us_timer_jiffies, Timer, WakeUpHelper},
TimeSpec,
},
};
use super::constant::*;
static mut FUTEX_DATA: Option<FutexData> = None;
pub struct FutexData {
data: SpinLock<HashMap<FutexKey, FutexHashBucket>>,
}
impl FutexData {
pub fn futex_map() -> SpinLockGuard<'static, HashMap<FutexKey, FutexHashBucket>> {
unsafe { FUTEX_DATA.as_ref().unwrap().data.lock() }
}
pub fn try_remove(key: &FutexKey) -> Option<FutexHashBucket> {
unsafe {
let mut guard = FUTEX_DATA.as_ref().unwrap().data.lock();
if let Some(futex) = guard.get(key) {
if futex.chain.is_empty() {
return guard.remove(key);
}
}
}
None
}
}
pub struct Futex;
// 对于同一个futex的进程或线程将会在这个bucket等待
pub struct FutexHashBucket {
// 该futex维护的等待队列
chain: LinkedList<Arc<FutexObj>>,
}
impl FutexHashBucket {
/// ## 判断是否在bucket里
pub fn contains(&self, futex_q: &FutexObj) -> bool {
self.chain
.iter()
.filter(|x| futex_q.pcb.ptr_eq(&x.pcb) && x.key == futex_q.key)
.count()
!= 0
}
/// 让futex_q在该bucket上挂起
///
/// 进入该函数前,需要关中断
#[inline(always)]
pub fn sleep_no_sched(&mut self, futex_q: Arc<FutexObj>) -> Result<(), SystemError> {
assert!(CurrentIrqArch::is_irq_enabled() == false);
self.chain.push_back(futex_q);
ProcessManager::mark_sleep(true)?;
Ok(())
}
/// ## 唤醒队列中的最多nr_wake个进程
///
/// return: 唤醒的进程数
#[inline(always)]
pub fn wake_up(
&mut self,
key: FutexKey,
bitset: Option<u32>,
nr_wake: u32,
) -> Result<usize, SystemError> {
let mut count = 0;
let mut pop_count = 0;
while let Some(futex_q) = self.chain.pop_front() {
if futex_q.key == key {
// TODO: 考虑优先级继承的机制
if let Some(bitset) = bitset {
if futex_q.bitset != bitset {
self.chain.push_back(futex_q);
continue;
}
}
// 唤醒
if futex_q.pcb.upgrade().is_some() {
self.remove(futex_q.clone());
ProcessManager::wakeup(&futex_q.pcb.upgrade().unwrap())?;
}
// 判断唤醒数
count += 1;
if count >= nr_wake {
break;
}
} else {
self.chain.push_back(futex_q);
}
// 判断是否循环完队列了
pop_count += 1;
if pop_count >= self.chain.len() {
break;
}
}
Ok(count as usize)
}
/// 将FutexObj从bucket中删除
pub fn remove(&mut self, futex: Arc<FutexObj>) {
self.chain.drain_filter(|x| Arc::ptr_eq(x, &futex));
}
}
#[derive(Debug)]
pub struct FutexObj {
pcb: Weak<ProcessControlBlock>,
key: FutexKey,
bitset: u32,
// TODO: 优先级继承
}
pub enum FutexAccess {
FutexRead,
FutexWrite,
}
#[allow(dead_code)]
#[derive(Hash, PartialEq, Eq, Clone, Debug)]
/// ### 用于定位内核唯一的futex
pub enum InnerFutexKey {
Shared(SharedKey),
Private(PrivateKey),
}
#[derive(Hash, PartialEq, Eq, Clone, Debug)]
pub struct FutexKey {
ptr: u64,
word: u64,
offset: u32,
key: InnerFutexKey,
}
/// 不同进程间通过文件共享futex变量表明该变量在文件中的位置
#[derive(Hash, PartialEq, Eq, Clone, Debug)]
pub struct SharedKey {
i_seq: u64,
page_offset: u64,
}
/// 同一进程的不同线程共享futex变量表明该变量在进程地址空间中的位置
#[derive(Clone, Debug)]
pub struct PrivateKey {
// 所在的地址空间
address_space: Option<Weak<AddressSpace>>,
// 表示所在页面的初始地址
address: u64,
}
impl Hash for PrivateKey {
fn hash<H: ~const Hasher>(&self, state: &mut H) {
self.address.hash(state);
}
}
impl Eq for PrivateKey {}
impl PartialEq for PrivateKey {
fn eq(&self, other: &Self) -> bool {
if self.address_space.is_none() && other.address_space.is_none() {
return self.address == other.address;
} else {
return self
.address_space
.as_ref()
.unwrap_or(&Weak::default())
.ptr_eq(&other.address_space.as_ref().unwrap_or(&Weak::default()))
&& self.address == other.address;
}
}
}
impl Futex {
/// ### 初始化FUTEX_DATA
pub fn init() {
unsafe {
FUTEX_DATA = Some(FutexData {
data: SpinLock::new(HashMap::new()),
})
};
}
/// ### 让当前进程在指定futex上等待直到futex_wake显式唤醒
pub fn futex_wait(
uaddr: VirtAddr,
flags: FutexFlag,
val: u32,
abs_time: Option<TimeSpec>,
bitset: u32,
) -> Result<usize, SystemError> {
if bitset == 0 {
return Err(SystemError::EINVAL);
}
// 获取全局hash表的key值
let key = Self::get_futex_key(
uaddr,
flags.contains(FutexFlag::FLAGS_SHARED),
FutexAccess::FutexRead,
)?;
let mut futex_map_guard = FutexData::futex_map();
let bucket = futex_map_guard.get_mut(&key);
let bucket_mut = match bucket {
Some(bucket) => bucket,
None => {
let bucket = FutexHashBucket {
chain: LinkedList::new(),
};
futex_map_guard.insert(key.clone(), bucket);
futex_map_guard.get_mut(&key).unwrap()
}
};
// 使用UserBuffer读取futex
let user_reader =
UserBufferReader::new(uaddr.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
// 从用户空间读取到futex的val
let mut uval = 0;
// 读取
// 这里只尝试一种方式去读取用户空间与linux不太一致
// 对于linux如果bucket被锁住时读取失败将会将bucket解锁后重新读取
user_reader.copy_one_from_user::<u32>(&mut uval, 0)?;
// 不满足wait条件返回错误
if uval != val {
return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
}
let pcb = ProcessManager::current_pcb();
// 创建超时计时器任务
let mut timer = None;
if !abs_time.is_none() {
let time = abs_time.unwrap();
let wakeup_helper = WakeUpHelper::new(pcb.clone());
let sec = time.tv_sec;
let nsec = time.tv_nsec;
let jiffies = next_n_us_timer_jiffies((nsec / 1000 + sec * 1_000_000) as u64);
let wake_up = Timer::new(wakeup_helper, jiffies);
wake_up.activate();
timer = Some(wake_up);
}
let futex_q = Arc::new(FutexObj {
pcb: Arc::downgrade(&pcb),
key: key.clone(),
bitset,
});
let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
// 满足条件则将当前进程在该bucket上挂起
bucket_mut.sleep_no_sched(futex_q.clone()).map_err(|e| {
kwarn!("error:{e:?}");
e
})?;
drop(bucket_mut);
drop(futex_map_guard);
drop(irq_guard);
sched();
// 被唤醒后的检查
let mut futex_map_guard = FutexData::futex_map();
let bucket = futex_map_guard.get_mut(&key);
let bucket_mut = match bucket {
// 如果该pcb不在链表里面了或者该链表已经被释放就证明是正常的Wake操作
Some(bucket_mut) => {
if !bucket_mut.contains(&futex_q) {
// 取消定时器任务
if timer.is_some() {
timer.unwrap().cancel();
}
return Ok(0);
}
// 非正常唤醒,返回交给下层
bucket_mut
}
None => {
// 取消定时器任务
if timer.is_some() {
timer.unwrap().cancel();
}
return Ok(0);
}
};
// 如果是超时唤醒,则返回错误
if timer.is_some() {
if timer.clone().unwrap().timeout() {
bucket_mut.remove(futex_q);
return Err(SystemError::ETIMEDOUT);
}
}
// TODO: 如果没有挂起的信号则重新判断是否满足wait要求重新进入wait
// 经过前面的几个判断,到这里之后,
// 当前进程被唤醒大概率是其他进程更改了uval,需要重新去判断当前进程是否满足wait
// 到这里之后,前面的唤醒条件都不满足,则是被信号唤醒
// 需要处理信号然后重启futex系统调用
// 取消定时器任务
if timer.is_some() {
let timer = timer.unwrap();
if !timer.timeout() {
timer.cancel();
}
}
Ok(0)
}
// ### 唤醒指定futex上挂起的最多nr_wake个进程
pub fn futex_wake(
uaddr: VirtAddr,
flags: FutexFlag,
nr_wake: u32,
bitset: u32,
) -> Result<usize, SystemError> {
if bitset == 0 {
return Err(SystemError::EINVAL);
}
// 获取futex_key,并且判断地址空间合法性
let key = Self::get_futex_key(
uaddr,
flags.contains(FutexFlag::FLAGS_SHARED),
FutexAccess::FutexRead,
)?;
let mut binding = FutexData::futex_map();
let bucket_mut = binding.get_mut(&key).ok_or(SystemError::EINVAL)?;
// 确保后面的唤醒操作是有意义的
if bucket_mut.chain.is_empty() {
return Ok(0);
}
// 从队列中唤醒
let count = bucket_mut.wake_up(key.clone(), Some(bitset), nr_wake)?;
drop(bucket_mut);
drop(binding);
FutexData::try_remove(&key);
Ok(count)
}
/// ### 唤醒制定uaddr1上的最多nr_wake个进程然后将uaddr1最多nr_requeue个进程移动到uaddr2绑定的futex上
pub fn futex_requeue(
uaddr1: VirtAddr,
flags: FutexFlag,
uaddr2: VirtAddr,
nr_wake: i32,
nr_requeue: i32,
cmpval: Option<u32>,
requeue_pi: bool,
) -> Result<usize, SystemError> {
if nr_requeue < 0 || nr_wake < 0 {
return Err(SystemError::EINVAL);
}
// 暂时不支持优先级继承
if requeue_pi {
return Err(SystemError::ENOSYS);
}
let key1 = Self::get_futex_key(
uaddr1,
flags.contains(FutexFlag::FLAGS_SHARED),
FutexAccess::FutexRead,
)?;
let key2 = Self::get_futex_key(uaddr2, flags.contains(FutexFlag::FLAGS_SHARED), {
match requeue_pi {
true => FutexAccess::FutexWrite,
false => FutexAccess::FutexRead,
}
})?;
if requeue_pi && key1 == key2 {
return Err(SystemError::EINVAL);
}
if likely(!cmpval.is_none()) {
let uval_reader =
UserBufferReader::new(uaddr1.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
let curval = uval_reader.read_one_from_user::<u32>(0)?;
// 判断是否满足条件
if *curval != cmpval.unwrap() {
return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
}
}
let mut futex_data_guard = FutexData::futex_map();
if !requeue_pi {
// 唤醒nr_wake个进程
let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
let ret = bucket_1_mut.wake_up(key1.clone(), None, nr_wake as u32)?;
drop(bucket_1_mut);
// 将bucket1中最多nr_requeue个任务转移到bucket2
for _ in 0..nr_requeue {
let bucket_1_mut = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
let futex_q = bucket_1_mut.chain.pop_front();
match futex_q {
Some(futex_q) => {
let bucket_2_mut =
futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?;
bucket_2_mut.chain.push_back(futex_q);
}
None => {
break;
}
}
}
return Ok(ret);
} else {
// 暂时不支持优先级继承
todo!()
}
}
/// ### 唤醒futex上的进程的同时进行一些操作
pub fn futex_wake_op(
uaddr1: VirtAddr,
flags: FutexFlag,
uaddr2: VirtAddr,
nr_wake: i32,
nr_wake2: i32,
op: i32,
) -> Result<usize, SystemError> {
let key1 = Futex::get_futex_key(
uaddr1,
flags.contains(FutexFlag::FLAGS_SHARED),
FutexAccess::FutexRead,
)?;
let key2 = Futex::get_futex_key(
uaddr2,
flags.contains(FutexFlag::FLAGS_SHARED),
FutexAccess::FutexWrite,
)?;
let mut futex_data_guard = FutexData::futex_map();
let bucket1 = futex_data_guard.get_mut(&key1).ok_or(SystemError::EINVAL)?;
let mut wake_count = 0;
// 唤醒uaddr1中的进程
wake_count += bucket1.wake_up(key1, None, nr_wake as u32)?;
match Self::futex_atomic_op_inuser(op as u32, uaddr2) {
Ok(ret) => {
// 操作成功则唤醒uaddr2中的进程
if ret {
let bucket2 = futex_data_guard.get_mut(&key2).ok_or(SystemError::EINVAL)?;
wake_count += bucket2.wake_up(key2, None, nr_wake2 as u32)?;
}
}
Err(e) => {
// TODO:retry?
return Err(e);
}
}
Ok(wake_count)
}
fn get_futex_key(
uaddr: VirtAddr,
fshared: bool,
_access: FutexAccess,
) -> Result<FutexKey, SystemError> {
let mut address = uaddr.data();
// 计算相对页的偏移量
let offset = address & (MMArch::PAGE_SIZE - 1);
// 判断内存对齐
if !(uaddr.data() & (core::mem::size_of::<u32>() - 1) == 0) {
return Err(SystemError::EINVAL);
}
// 目前address指向所在页面的起始地址
address -= offset;
// 若不是进程间共享的futex则返回Private
if !fshared {
return Ok(FutexKey {
ptr: 0,
word: 0,
offset: offset as u32,
key: InnerFutexKey::Private(PrivateKey {
address: address as u64,
address_space: None,
}),
});
}
// 获取到地址所在地址空间
let address_space = AddressSpace::current()?;
// TODO 判断是否为匿名映射是匿名映射才返回PrivateKey
return Ok(FutexKey {
ptr: 0,
word: 0,
offset: offset as u32,
key: InnerFutexKey::Private(PrivateKey {
address: address as u64,
address_space: Some(Arc::downgrade(&address_space)),
}),
});
// 未实现共享内存机制,贡献内存部分应该通过inode构建SharedKey
// todo!("Shared memory not implemented");
}
pub fn futex_atomic_op_inuser(encoded_op: u32, uaddr: VirtAddr) -> Result<bool, SystemError> {
let op = FutexOP::from_bits((encoded_op & 0x70000000) >> 28).ok_or(SystemError::ENOSYS)?;
let cmp =
FutexOpCMP::from_bits((encoded_op & 0x0f000000) >> 24).ok_or(SystemError::ENOSYS)?;
let sign_extend32 = |value: u32, index: i32| {
let shift = (31 - index) as u8;
return (value << shift) >> shift;
};
let mut oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
let cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
if encoded_op & (FutexOP::FUTEX_OP_OPARG_SHIFT.bits() << 28) != 0 {
if oparg > 31 {
kwarn!(
"futex_wake_op: pid:{} tries to shift op by {}; fix this program",
ProcessManager::current_pcb().pid().data(),
oparg
);
oparg &= 31;
}
}
// TODO: 这个汇编似乎是有问题的,目前不好测试
let old_val = Self::arch_futex_atomic_op_inuser(op, oparg, uaddr)?;
match cmp {
FutexOpCMP::FUTEX_OP_CMP_EQ => {
return Ok(cmparg == old_val);
}
FutexOpCMP::FUTEX_OP_CMP_NE => {
return Ok(cmparg != old_val);
}
FutexOpCMP::FUTEX_OP_CMP_LT => {
return Ok(cmparg < old_val);
}
FutexOpCMP::FUTEX_OP_CMP_LE => {
return Ok(cmparg <= old_val);
}
FutexOpCMP::FUTEX_OP_CMP_GE => {
return Ok(cmparg >= old_val);
}
FutexOpCMP::FUTEX_OP_CMP_GT => {
return Ok(cmparg > old_val);
}
_ => {
return Err(SystemError::ENOSYS);
}
}
}
/// ### 对futex进行操作
///
/// 进入该方法会关闭中断保证修改的原子性,所以进入该方法前应确保中断锁已释放
///
/// ### return uaddr原来的值
#[allow(unused_assignments)]
pub fn arch_futex_atomic_op_inuser(
op: FutexOP,
oparg: u32,
uaddr: VirtAddr,
) -> Result<u32, SystemError> {
let guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
let reader =
UserBufferReader::new(uaddr.as_ptr::<u32>(), core::mem::size_of::<u32>(), true)?;
let oldval = reader.read_one_from_user::<u32>(0)?;
let atomic_addr = AtomicU64::new(uaddr.data() as u64);
// 这个指针是指向指针的指针
let ptr = atomic_addr.as_mut_ptr();
match op {
FutexOP::FUTEX_OP_SET => unsafe {
*((*ptr) as *mut u32) = oparg;
},
FutexOP::FUTEX_OP_ADD => unsafe {
*((*ptr) as *mut u32) += oparg;
},
FutexOP::FUTEX_OP_OR => unsafe {
*((*ptr) as *mut u32) |= oparg;
},
FutexOP::FUTEX_OP_ANDN => unsafe {
*((*ptr) as *mut u32) &= oparg;
},
FutexOP::FUTEX_OP_XOR => unsafe {
*((*ptr) as *mut u32) ^= oparg;
},
_ => return Err(SystemError::ENOSYS),
}
drop(guard);
Ok(*oldval)
}
}
#[no_mangle]
unsafe extern "C" fn rs_futex_init() {
Futex::init();
}

View File

@ -0,0 +1,3 @@
pub mod constant;
pub mod futex;
pub mod syscall;

View File

@ -0,0 +1,106 @@
use crate::{
mm::VirtAddr,
syscall::{Syscall, SystemError},
time::TimeSpec,
};
use super::{constant::*, futex::Futex};
impl Syscall {
pub fn do_futex(
uaddr: VirtAddr,
operation: FutexFlag,
val: u32,
timeout: Option<TimeSpec>,
uaddr2: VirtAddr,
val2: u32,
val3: u32,
) -> Result<usize, SystemError> {
let cmd = FutexArg::from_bits(operation.bits() & FutexFlag::FUTEX_CMD_MASK.bits())
.ok_or(SystemError::ENOSYS)?;
let mut flags = FutexFlag::FLAGS_MATCH_NONE;
if !operation.contains(FutexFlag::FUTEX_PRIVATE_FLAG) {
flags.insert(FutexFlag::FLAGS_SHARED);
}
if operation.contains(FutexFlag::FUTEX_CLOCK_REALTIME) {
flags.insert(FutexFlag::FLAGS_CLOCKRT);
if cmd != FutexArg::FUTEX_WAIT_BITSET
&& cmd != FutexArg::FUTEX_WAIT_REQUEUE_PI
&& cmd != FutexArg::FUTEX_LOCK_PI2
{
return Err(SystemError::ENOSYS);
}
}
match cmd {
FutexArg::FUTEX_WAIT => {
return Futex::futex_wait(uaddr, flags, val, timeout, FUTEX_BITSET_MATCH_ANY);
}
FutexArg::FUTEX_WAIT_BITSET => {
return Futex::futex_wait(uaddr, flags, val, timeout, val3);
}
FutexArg::FUTEX_WAKE => {
return Futex::futex_wake(uaddr, flags, val, FUTEX_BITSET_MATCH_ANY);
}
FutexArg::FUTEX_WAKE_BITSET => {
return Futex::futex_wake(uaddr, flags, val, val3);
}
FutexArg::FUTEX_REQUEUE => {
return Futex::futex_requeue(
uaddr,
flags,
uaddr2,
val as i32,
val2 as i32,
None,
false,
);
}
FutexArg::FUTEX_CMP_REQUEUE => {
return Futex::futex_requeue(
uaddr,
flags,
uaddr2,
val as i32,
val2 as i32,
Some(val3),
false,
);
}
FutexArg::FUTEX_WAKE_OP => {
return Futex::futex_wake_op(
uaddr,
flags,
uaddr2,
val as i32,
val2 as i32,
val3 as i32,
);
}
FutexArg::FUTEX_LOCK_PI => {
todo!()
}
FutexArg::FUTEX_LOCK_PI2 => {
todo!()
}
FutexArg::FUTEX_UNLOCK_PI => {
todo!()
}
FutexArg::FUTEX_TRYLOCK_PI => {
todo!()
}
FutexArg::FUTEX_WAIT_REQUEUE_PI => {
todo!()
}
FutexArg::FUTEX_CMP_REQUEUE_PI => {
todo!()
}
_ => {
return Err(SystemError::ENOSYS);
}
}
}
}

View File

@ -21,4 +21,6 @@ pub mod spinlock;
pub mod vec_cursor;
#[macro_use]
pub mod volatile;
pub mod futex;
pub mod rand;
pub mod wait_queue;

7
kernel/src/libs/rand.rs Normal file
View File

@ -0,0 +1,7 @@
bitflags! {
pub struct GRandFlags: u8{
const GRND_NONBLOCK = 0x0001;
const GRND_RANDOM = 0x0002;
const GRND_INSECURE = 0x0004;
}
}

View File

@ -38,6 +38,7 @@ extern void rs_kthread_init();
extern void rs_init_intertrait();
extern void rs_init_before_mem_init();
extern int rs_setup_arch();
extern void rs_futex_init();
extern int rs_hpet_init();
extern int rs_hpet_enable();
extern int rs_tsc_init();
@ -152,6 +153,7 @@ void system_initialize()
smp_init();
io_mfence();
rs_futex_init();
cli();
rs_hpet_init();
rs_hpet_enable();

View File

@ -706,7 +706,7 @@ impl UserMappings {
/// 请注意在调用本函数之前必须先确定region所在范围内没有VMA。
fn reserve_hole(&mut self, region: &VirtRegion) {
let prev_hole: Option<(&VirtAddr, &mut usize)> =
self.vm_holes.range_mut(..region.start()).next_back();
self.vm_holes.range_mut(..=region.start()).next_back();
if let Some((prev_hole_vaddr, prev_hole_size)) = prev_hole {
let prev_hole_end = prev_hole_vaddr.add(*prev_hole_size);
@ -946,6 +946,8 @@ pub struct VMA {
/// VMA所属的用户地址空间
user_address_space: Option<Weak<AddressSpace>>,
self_ref: Weak<LockedVMA>,
provider: Provider,
}
impl core::hash::Hash for VMA {
@ -956,6 +958,12 @@ impl core::hash::Hash for VMA {
}
}
/// 描述不同类型的内存提供者或资源
#[derive(Debug)]
pub enum Provider {
Allocated, // TODO:其他
}
#[allow(dead_code)]
impl VMA {
pub fn region(&self) -> &VirtRegion {
@ -974,6 +982,7 @@ impl VMA {
mapped: self.mapped,
user_address_space: self.user_address_space.clone(),
self_ref: self.self_ref.clone(),
provider: Provider::Allocated,
};
}
@ -1019,8 +1028,15 @@ impl VMA {
///
/// - `prot_flags` 要检查的标志位
pub fn can_have_flags(&self, prot_flags: ProtFlags) -> bool {
return (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE))
let is_downgrade = (self.flags.has_write() || !prot_flags.contains(ProtFlags::PROT_WRITE))
&& (self.flags.has_execute() || !prot_flags.contains(ProtFlags::PROT_EXEC));
match self.provider {
Provider::Allocated { .. } => true,
#[allow(unreachable_patterns)]
_ => is_downgrade,
}
}
/// 把物理地址映射到虚拟地址
@ -1070,6 +1086,7 @@ impl VMA {
mapped: true,
user_address_space: None,
self_ref: Weak::default(),
provider: Provider::Allocated,
});
return Ok(r);
}
@ -1118,6 +1135,7 @@ impl VMA {
mapped: true,
user_address_space: None,
self_ref: Weak::default(),
provider: Provider::Allocated,
});
drop(flusher);
// kdebug!("VMA::zeroed: flusher dropped");

View File

@ -36,16 +36,25 @@ pub extern "C" fn rs_current_pcb_pid() -> u32 {
#[no_mangle]
pub extern "C" fn rs_current_pcb_preempt_count() -> u32 {
if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
return 0;
}
return ProcessManager::current_pcb().preempt_count() as u32;
}
#[no_mangle]
pub extern "C" fn rs_current_pcb_flags() -> u32 {
if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
return 0;
}
return ProcessManager::current_pcb().flags().bits() as u32;
}
#[no_mangle]
pub extern "C" fn rs_current_pcb_thread_rbp() -> u64 {
if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
return 0;
}
return ProcessManager::current_pcb().arch_info_irqsave().rbp() as u64;
}
@ -61,5 +70,8 @@ pub extern "C" fn rs_preempt_enable() {
#[no_mangle]
pub extern "C" fn rs_process_do_exit(exit_code: usize) -> usize {
if unsafe { !__PROCESS_MANAGEMENT_INIT_DONE } {
return 0;
}
ProcessManager::exit(exit_code);
}

View File

@ -1,9 +1,13 @@
use alloc::{string::ToString, sync::Arc};
use crate::{
arch::interrupt::TrapFrame, filesystem::procfs::procfs_register_pid,
ipc::signal::flush_signal_handlers, libs::rwlock::RwLock, process::ProcessFlags,
syscall::SystemError,
arch::interrupt::TrapFrame,
filesystem::procfs::procfs_register_pid,
ipc::signal::flush_signal_handlers,
libs::rwlock::RwLock,
mm::VirtAddr,
process::ProcessFlags,
syscall::{user_access::UserBufferWriter, SystemError},
};
use super::{
@ -13,21 +17,116 @@ use super::{
bitflags! {
/// 进程克隆标志
pub struct CloneFlags: u32 {
/// 在进程间共享文件系统信息
const CLONE_FS = (1 << 0);
/// 克隆时,与父进程共享信号结构体
const CLONE_SIGNAL = (1 << 1);
/// 克隆时,与父进程共享信号处理结构体
const CLONE_SIGHAND = (1 << 2);
/// 克隆时将原本被设置为SIG_IGNORE的信号设置回SIG_DEFAULT
const CLONE_CLEAR_SIGHAND = (1 << 3);
pub struct CloneFlags: u64 {
/// 在进程间共享虚拟内存空间
const CLONE_VM = (1 << 4);
/// 拷贝线程
const CLONE_THREAD = (1 << 5);
const CLONE_VM = 0x00000100;
/// 在进程间共享文件系统信息
const CLONE_FS = 0x00000200;
/// 共享打开的文件
const CLONE_FILES = (1 << 6);
const CLONE_FILES = 0x00000400;
/// 克隆时,与父进程共享信号处理结构体
const CLONE_SIGHAND = 0x00000800;
/// 返回进程的文件描述符
const CLONE_PIDFD = 0x00001000;
/// 使克隆对象成为父进程的跟踪对象
const CLONE_PTRACE = 0x00002000;
/// 在执行 exec() 或 _exit() 之前挂起父进程的执行
const CLONE_VFORK = 0x00004000;
/// 使克隆对象的父进程为调用进程的父进程
const CLONE_PARENT = 0x00008000;
/// 拷贝线程
const CLONE_THREAD = 0x00010000;
/// 创建一个新的命名空间,其中包含独立的文件系统挂载点层次结构。
const CLONE_NEWNS = 0x00020000;
/// 与父进程共享 System V 信号量。
const CLONE_SYSVSEM = 0x00040000;
/// 设置其线程本地存储
const CLONE_SETTLS = 0x00080000;
/// 设置partent_tid地址为子进程线程 ID
const CLONE_PARENT_SETTID = 0x00100000;
/// 在子进程中设置一个清除线程 ID 的用户空间地址
const CLONE_CHILD_CLEARTID = 0x00200000;
/// 创建一个新线程,将其设置为分离状态
const CLONE_DETACHED = 0x00400000;
/// 使其在创建者进程或线程视角下成为无法跟踪的。
const CLONE_UNTRACED = 0x00800000;
/// 设置其子进程线程 ID
const CLONE_CHILD_SETTID = 0x01000000;
/// 将其放置在一个新的 cgroup 命名空间中
const CLONE_NEWCGROUP = 0x02000000;
/// 将其放置在一个新的 UTS 命名空间中
const CLONE_NEWUTS = 0x04000000;
/// 将其放置在一个新的 IPC 命名空间中
const CLONE_NEWIPC = 0x08000000;
/// 将其放置在一个新的用户命名空间中
const CLONE_NEWUSER = 0x10000000;
/// 将其放置在一个新的 PID 命名空间中
const CLONE_NEWPID = 0x20000000;
/// 将其放置在一个新的网络命名空间中
const CLONE_NEWNET = 0x40000000;
/// 在新的 I/O 上下文中运行它
const CLONE_IO = 0x80000000;
/// 克隆时,与父进程共享信号结构体
const CLONE_SIGNAL = 0x00010000 | 0x00000800;
/// 克隆时将原本被设置为SIG_IGNORE的信号设置回SIG_DEFAULT
const CLONE_CLEAR_SIGHAND = 0x100000000;
}
}
/// ## clone与clone3系统调用的参数载体
///
/// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活
///
/// 仅仅作为参数传递
#[derive(Debug, Clone, Copy)]
pub struct KernelCloneArgs {
pub flags: CloneFlags,
// 下列属性均来自用户空间
pub pidfd: VirtAddr,
pub child_tid: VirtAddr,
pub parent_tid: VirtAddr,
pub set_tid: VirtAddr,
pub exit_signal: i32,
pub stack: usize,
// clone3用到
pub stack_size: usize,
pub tls: usize,
pub set_tid_size: usize,
pub cgroup: i32,
pub io_thread: bool,
pub kthread: bool,
pub idle: bool,
pub func: VirtAddr,
pub fn_arg: VirtAddr,
// cgrp 和 cset?
}
impl KernelCloneArgs {
pub fn new() -> Self {
let null_addr = VirtAddr::new(0);
Self {
flags: unsafe { CloneFlags::from_bits_unchecked(0) },
pidfd: null_addr,
child_tid: null_addr,
parent_tid: null_addr,
set_tid: null_addr,
exit_signal: 0,
stack: 0,
stack_size: 0,
tls: 0,
set_tid_size: 0,
cgroup: 0,
io_thread: false,
kthread: false,
idle: false,
func: null_addr,
fn_arg: null_addr,
}
}
}
@ -56,53 +155,9 @@ impl ProcessManager {
let name = current_pcb.basic().name().to_string();
let pcb = ProcessControlBlock::new(name, new_kstack);
// 克隆架构相关信息
*pcb.arch_info() = current_pcb.arch_info_irqsave().clone();
// 为内核线程设置worker private字段。也许由内核线程机制去做会更好
if current_pcb.flags().contains(ProcessFlags::KTHREAD) {
*pcb.worker_private() = Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new()))
}
// 拷贝标志位
ProcessManager::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// 拷贝用户地址空间
ProcessManager::copy_mm(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// 拷贝文件描述符表
ProcessManager::copy_files(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
//拷贝信号相关数据
ProcessManager::copy_sighand(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy sighands from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// 拷贝线程
ProcessManager::copy_thread(&clone_flags, &current_pcb, &pcb, &current_trapframe).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
let mut args = KernelCloneArgs::new();
args.flags = clone_flags;
Self::copy_process(&current_pcb, &pcb, args, current_trapframe)?;
ProcessManager::add_pcb(pcb.clone());
@ -168,7 +223,6 @@ impl ProcessManager {
unsafe { new_pcb.basic_mut().set_user_vm(Some(old_address_space)) };
return Ok(());
}
let new_address_space = old_address_space.write().try_clone().unwrap_or_else(|e| {
panic!(
"copy_mm: Failed to clone address space of current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
@ -215,4 +269,139 @@ impl ProcessManager {
}
return Ok(());
}
/// 拷贝进程信息
///
/// ## panic:
/// 某一步拷贝失败时会引发panic
/// 例如copy_mm等失败时会触发panic
///
/// ## 参数
///
/// - clone_flags 标志位
/// - des_pcb 目标pcb
/// - src_pcb 拷贝源pcb
///
/// ## return
/// - 发生错误时返回Err(SystemError)
pub fn copy_process(
current_pcb: &Arc<ProcessControlBlock>,
pcb: &Arc<ProcessControlBlock>,
clone_args: KernelCloneArgs,
current_trapframe: &mut TrapFrame,
) -> Result<(), SystemError> {
let clone_flags = clone_args.flags;
// 不允许与不同namespace的进程共享根目录
if (clone_flags == (CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_FS))
|| clone_flags == (CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_FS)
{
return Err(SystemError::EINVAL);
}
// 线程组必须共享信号,分离线程只能在线程组内启动。
if clone_flags.contains(CloneFlags::CLONE_THREAD)
&& !clone_flags.contains(CloneFlags::CLONE_SIGHAND)
{
return Err(SystemError::EINVAL);
}
// 共享信号处理器意味着共享vm。
// 线程组也意味着共享vm。阻止这种情况可以简化其他代码。
if clone_flags.contains(CloneFlags::CLONE_SIGHAND)
&& !clone_flags.contains(CloneFlags::CLONE_VM)
{
return Err(SystemError::EINVAL);
}
// TODO: 处理CLONE_PARENT 与 SIGNAL_UNKILLABLE的情况
// 如果新进程使用不同的 pid 或 namespace
// 则不允许它与分叉任务共享线程组。
if clone_flags.contains(CloneFlags::CLONE_THREAD) {
if clone_flags.contains(CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID) {
return Err(SystemError::EINVAL);
}
// TODO: 判断新进程与当前进程namespace是否相同不同则返回错误
}
// 如果新进程将处于不同的time namespace
// 则不能让它共享vm或线程组。
if clone_flags.contains(CloneFlags::CLONE_THREAD | CloneFlags::CLONE_VM) {
// TODO: 判断time namespace不同则返回错误
}
if clone_flags.contains(CloneFlags::CLONE_PIDFD)
&& clone_flags.contains(CloneFlags::CLONE_DETACHED | CloneFlags::CLONE_THREAD)
{
return Err(SystemError::EINVAL);
}
// TODO: 克隆前应该锁信号处理,等待克隆完成后再处理
// 克隆架构相关
*pcb.arch_info() = current_pcb.arch_info_irqsave().clone();
// 为内核线程设置WorkerPrivate
if current_pcb.flags().contains(ProcessFlags::KTHREAD) {
*pcb.worker_private() =
Some(WorkerPrivate::KernelThread(KernelThreadPcbPrivate::new()));
}
// 设置clear_child_tid在线程结束时将其置0以通知父进程
if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
pcb.thread.write().clear_child_tid = Some(clone_args.child_tid);
}
// 设置child_tid意味着子线程能够知道自己的id
if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
pcb.thread.write().set_child_tid = Some(clone_args.child_tid);
}
// 将子进程/线程的id存储在用户态传进的地址中
if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
let mut writer = UserBufferWriter::new(
clone_args.parent_tid.data() as *mut i32,
core::mem::size_of::<i32>(),
true,
)?;
writer.copy_one_to_user(&(pcb.pid().0 as i32), 0)?;
}
// 拷贝标志位
Self::copy_flags(&clone_flags, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy flags from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// 拷贝用户地址空间
Self::copy_mm(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy mm from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// 拷贝文件描述符表
Self::copy_files(&clone_flags, &current_pcb, &pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy files from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
// todo: 拷贝信号相关数据
// 拷贝线程
Self::copy_thread(&current_pcb, &pcb, clone_args,&current_trapframe).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
Ok(())
}
}

View File

@ -282,8 +282,10 @@ impl KernelThreadMechanism {
// 初始化kthreadd
let closure = KernelThreadClosure::EmptyClosure((Box::new(Self::kthread_daemon), ()));
let info = KernelThreadCreateInfo::new(closure, "kthreadd".to_string());
let kthreadd_pid: Pid =
Self::__inner_create(&info, CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL)
let kthreadd_pid: Pid = Self::__inner_create(
&info,
CloneFlags::CLONE_VM | CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL,
)
.expect("Failed to create kthread daemon");
let pcb = ProcessManager::find(kthreadd_pid).unwrap();

View File

@ -1,5 +1,6 @@
use core::{
hash::{Hash, Hasher},
hint::spin_loop,
intrinsics::{likely, unlikely},
mem::ManuallyDrop,
sync::atomic::{compiler_fence, AtomicBool, AtomicI32, AtomicIsize, AtomicUsize, Ordering},
@ -29,6 +30,10 @@ use crate::{
libs::{
align::AlignedBox,
casting::DowncastArc,
futex::{
constant::{FutexFlag, FUTEX_BITSET_MATCH_ANY},
futex::Futex,
},
rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard},
spinlock::{SpinLock, SpinLockGuard},
wait_queue::WaitQueue,
@ -36,11 +41,12 @@ use crate::{
mm::{percpu::PerCpuVar, set_INITIAL_PROCESS_ADDRESS_SPACE, ucontext::AddressSpace, VirtAddr},
net::socket::SocketInode,
sched::{
completion::Completion,
core::{sched_enqueue, CPU_EXECUTING},
SchedPolicy, SchedPriority,
},
smp::kick_cpu,
syscall::{Syscall, SystemError},
syscall::{user_access::clear_user, Syscall, SystemError},
};
use self::kthread::WorkerPrivate;
@ -116,6 +122,12 @@ impl ProcessManager {
/// 获取当前进程的pcb
pub fn current_pcb() -> Arc<ProcessControlBlock> {
if unlikely(unsafe { !__PROCESS_MANAGEMENT_INIT_DONE }) {
kerror!("unsafe__PROCESS_MANAGEMENT_INIT_DONE == false");
loop {
spin_loop();
}
}
return ProcessControlBlock::arch_current_pcb();
}
@ -252,7 +264,7 @@ impl ProcessManager {
assert_eq!(
CurrentIrqArch::is_irq_enabled(),
false,
"interrupt must be disabled before enter ProcessManager::mark_sleep()"
"interrupt must be disabled before enter ProcessManager::mark_stop()"
);
let pcb = ProcessManager::current_pcb();
@ -306,9 +318,31 @@ impl ProcessManager {
.write()
.set_state(ProcessState::Exited(exit_code));
pcb.wait_queue.wakeup(Some(ProcessState::Blocked(true)));
// 进行进程退出后的工作
let thread = pcb.thread.write();
if let Some(addr) = thread.set_child_tid {
unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
}
if let Some(addr) = thread.clear_child_tid {
if Arc::strong_count(&pcb.basic().user_vm().expect("User VM Not found")) > 1 {
let _ =
Futex::futex_wake(addr, FutexFlag::FLAGS_MATCH_NONE, 1, FUTEX_BITSET_MATCH_ANY);
}
unsafe { clear_user(addr, core::mem::size_of::<i32>()).expect("clear tid failed") };
}
// 如果是vfork出来的进程则需要处理completion
if thread.vfork_done.is_some() {
thread.vfork_done.as_ref().unwrap().complete_all();
}
drop(thread);
unsafe { pcb.basic_mut().set_user_vm(None) };
drop(pcb);
ProcessManager::exit_notify();
drop(irq_guard);
sched();
loop {}
}
@ -316,15 +350,21 @@ impl ProcessManager {
pub unsafe fn release(pid: Pid) {
let pcb = ProcessManager::find(pid);
if !pcb.is_none() {
let pcb = pcb.unwrap();
// let pcb = pcb.unwrap();
// 判断该pcb是否在全局没有任何引用
if Arc::strong_count(&pcb) <= 1 {
drop(pcb);
// TODO: 当前pcb的Arc指针存在泄露问题引用计数不正确打算在接下来实现debug专用的Arc方便调试然后解决这个bug。
// 因此目前暂时注释掉,使得能跑
// if Arc::strong_count(&pcb) <= 2 {
// drop(pcb);
// ALL_PROCESS.lock().as_mut().unwrap().remove(&pid);
// } else {
// // 如果不为1就panic
// let msg = format!("pcb '{:?}' is still referenced, strong count={}",pcb.pid(), Arc::strong_count(&pcb));
// kerror!("{}", msg);
// panic!()
// }
ALL_PROCESS.lock().as_mut().unwrap().remove(&pid);
} else {
// 如果不为1就panic
panic!("pcb is still referenced");
}
}
}
@ -482,10 +522,13 @@ pub struct ProcessControlBlock {
parent_pcb: RwLock<Weak<ProcessControlBlock>>,
/// 子进程链表
children: RwLock<HashMap<Pid, Arc<ProcessControlBlock>>>,
children: RwLock<Vec<Pid>>,
/// 等待队列
wait_queue: WaitQueue,
/// 线程信息
thread: RwLock<ThreadInfo>,
}
impl ProcessControlBlock {
@ -545,20 +588,26 @@ impl ProcessControlBlock {
sig_info: RwLock::new(ProcessSignalInfo::default()),
sig_struct: SpinLock::new(SignalStruct::default()),
parent_pcb: RwLock::new(ppcb),
children: RwLock::new(HashMap::new()),
children: RwLock::new(Vec::new()),
wait_queue: WaitQueue::INIT,
thread: RwLock::new(ThreadInfo::new()),
};
let pcb = Arc::new(pcb);
// 设置进程的arc指针到内核栈的最低地址处
unsafe { pcb.kernel_stack.write().set_pcb(Arc::clone(&pcb)).unwrap() };
unsafe {
pcb.kernel_stack
.write()
.set_pcb(Arc::downgrade(&pcb))
.unwrap()
};
// 将当前pcb加入父进程的子进程哈希表中
if pcb.pid() > Pid(1) {
if let Some(ppcb_arc) = pcb.parent_pcb.read().upgrade() {
let mut children = ppcb_arc.children.write();
children.insert(pcb.pid(), pcb.clone());
children.push(pcb.pid());
} else {
panic!("parent pcb is None");
}
@ -700,11 +749,11 @@ impl ProcessControlBlock {
unsafe fn adopt_childen(&self) -> Result<(), SystemError> {
match ProcessManager::find(Pid(1)) {
Some(init_pcb) => {
let mut childen_guard = self.children.write();
let childen_guard = self.children.write();
let mut init_childen_guard = init_pcb.children.write();
childen_guard.drain().for_each(|(pid, child)| {
init_childen_guard.insert(pid, child);
childen_guard.iter().for_each(|pid| {
init_childen_guard.push(*pid);
});
return Ok(());
@ -747,12 +796,31 @@ impl Drop for ProcessControlBlock {
.unwrap_or_else(|e| panic!("procfs_unregister_pid failed: error: {e:?}"));
if let Some(ppcb) = self.parent_pcb.read().upgrade() {
ppcb.children.write().remove(&self.pid());
ppcb.children.write().drain_filter(|pid| *pid == self.pid());
}
}
}
unsafe { ProcessManager::release(self.pid()) };
/// 线程信息
#[derive(Debug)]
pub struct ThreadInfo {
// 来自用户空间记录用户线程id的地址在该线程结束时将该地址置0以通知父进程
clear_child_tid: Option<VirtAddr>,
set_child_tid: Option<VirtAddr>,
vfork_done: Option<Arc<Completion>>,
}
impl ThreadInfo {
pub fn new() -> Self {
Self {
clear_child_tid: None,
set_child_tid: None,
vfork_done: None,
}
}
}
/// 进程的基本信息
///
/// 这个结构体保存进程的基本信息,主要是那些不会随着进程的运行而经常改变的信息。
@ -994,9 +1062,9 @@ impl KernelStack {
return VirtAddr::new(self.stack.as_ref().unwrap().as_ptr() as usize + Self::SIZE);
}
pub unsafe fn set_pcb(&mut self, pcb: Arc<ProcessControlBlock>) -> Result<(), SystemError> {
// 将一个Arc<ProcessControlBlock>放到内核栈的最低地址处
let p: *const ProcessControlBlock = Arc::into_raw(pcb);
pub unsafe fn set_pcb(&mut self, pcb: Weak<ProcessControlBlock>) -> Result<(), SystemError> {
// 将一个Weak<ProcessControlBlock>放到内核栈的最低地址处
let p: *const ProcessControlBlock = Weak::into_raw(pcb);
let stack_bottom_ptr = self.start_address().data() as *mut *const ProcessControlBlock;
// 如果内核栈的最低地址处已经有了一个pcb那么这里就不再设置,直接返回错误
@ -1021,10 +1089,10 @@ impl KernelStack {
}
// 为了防止内核栈的pcb指针被释放这里需要将其包装一下使得Arc的drop不会被调用
let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
ManuallyDrop::new(Arc::from_raw(p));
let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
ManuallyDrop::new(Weak::from_raw(p));
let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade()?;
return Some(new_arc);
}
}
@ -1032,8 +1100,8 @@ impl KernelStack {
impl Drop for KernelStack {
fn drop(&mut self) {
if !self.stack.is_none() {
let pcb_ptr: Arc<ProcessControlBlock> = unsafe {
Arc::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock)
let pcb_ptr: Weak<ProcessControlBlock> = unsafe {
Weak::from_raw(self.stack.as_ref().unwrap().as_ptr() as *const ProcessControlBlock)
};
drop(pcb_ptr);
}

View File

@ -1,13 +1,24 @@
use core::ffi::c_void;
use alloc::{string::String, vec::Vec};
use alloc::{
string::{String, ToString},
sync::Arc,
vec::Vec,
};
use super::{abi::WaitOption, fork::CloneFlags, Pid, ProcessManager, ProcessState};
use super::{
abi::WaitOption,
fork::{CloneFlags, KernelCloneArgs},
KernelStack, Pid, ProcessManager, ProcessState,
};
use crate::{
arch::{interrupt::TrapFrame, sched::sched, CurrentIrqArch},
exception::InterruptArch,
filesystem::vfs::MAX_PATHLEN,
filesystem::{procfs::procfs_register_pid, vfs::MAX_PATHLEN},
include::bindings::bindings::verify_area,
mm::VirtAddr,
process::ProcessControlBlock,
sched::completion::Completion,
syscall::{
user_access::{
check_and_clone_cstr, check_and_clone_cstr_array, UserBufferReader, UserBufferWriter,
@ -42,6 +53,11 @@ impl Syscall {
// argv,
// envp
// );
// kdebug!(
// "before execve: strong count: {}",
// Arc::strong_count(&ProcessManager::current_pcb())
// );
if path.is_null() {
return Err(SystemError::EINVAL);
}
@ -66,6 +82,10 @@ impl Syscall {
// 关闭设置了O_CLOEXEC的文件描述符
let fd_table = ProcessManager::current_pcb().fd_table();
fd_table.write().close_on_exec();
// kdebug!(
// "after execve: strong count: {}",
// Arc::strong_count(&ProcessManager::current_pcb())
// );
return Ok(());
}
@ -95,12 +115,13 @@ impl Syscall {
if pid > 0 {
let pid = Pid(pid as usize);
let child_pcb = rd_childen.get(&pid).ok_or(SystemError::ECHILD)?.clone();
let child_pcb = ProcessManager::find(pid).ok_or(SystemError::ECHILD)?;
drop(rd_childen);
loop {
let state = child_pcb.sched_info().state();
// 获取退出码
match child_pcb.sched_info().state() {
match state {
ProcessState::Runnable => {
if options.contains(WaitOption::WNOHANG)
|| options.contains(WaitOption::WNOWAIT)
@ -123,12 +144,16 @@ impl Syscall {
}
}
ProcessState::Exited(status) => {
// kdebug!("wait4: child exited, pid: {:?}, status: {status}\n", pid);
if !wstatus.is_null() {
wstatus_buf.copy_one_to_user(
&(status | WaitOption::WEXITED.bits() as usize),
&(status as u32 | WaitOption::WEXITED.bits()),
0,
)?;
}
drop(child_pcb);
// kdebug!("wait4: to release {pid:?}");
unsafe { ProcessManager::release(pid) };
return Ok(pid.into());
}
};
@ -147,7 +172,8 @@ impl Syscall {
} else {
// 等待任意子进程(这两)
let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
for (pid, pcb) in rd_childen.iter() {
for pid in rd_childen.iter() {
let pcb = ProcessManager::find(*pid).ok_or(SystemError::ECHILD)?;
if pcb.sched_info().state().is_exited() {
if !wstatus.is_null() {
wstatus_buf.copy_one_to_user(&0, 0)?;
@ -200,4 +226,73 @@ impl Syscall {
let current_pcb = ProcessManager::current_pcb();
return Ok(current_pcb.basic().ppid());
}
pub fn clone(
current_trapframe: &mut TrapFrame,
clone_args: KernelCloneArgs,
) -> Result<usize, SystemError> {
let flags = clone_args.flags;
let vfork = Arc::new(Completion::new());
if flags.contains(CloneFlags::CLONE_PIDFD)
&& flags.contains(CloneFlags::CLONE_PARENT_SETTID)
{
return Err(SystemError::EINVAL);
}
let current_pcb = ProcessManager::current_pcb();
let new_kstack = KernelStack::new()?;
let name = current_pcb.basic().name().to_string();
let pcb = ProcessControlBlock::new(name, new_kstack);
// 克隆pcb
ProcessManager::copy_process(&current_pcb, &pcb, clone_args, current_trapframe)?;
ProcessManager::add_pcb(pcb.clone());
// 向procfs注册进程
procfs_register_pid(pcb.pid()).unwrap_or_else(|e| {
panic!(
"fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}",
pcb.pid(),
e
)
});
if flags.contains(CloneFlags::CLONE_VFORK) {
pcb.thread.write().vfork_done = Some(vfork.clone());
}
if pcb.thread.read().set_child_tid.is_some() {
let addr = pcb.thread.read().set_child_tid.unwrap();
let mut writer =
UserBufferWriter::new(addr.as_ptr::<i32>(), core::mem::size_of::<i32>(), true)?;
writer.copy_one_to_user(&(pcb.pid().data() as i32), 0)?;
}
ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
panic!(
"fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}",
pcb.pid(),
e
)
});
if flags.contains(CloneFlags::CLONE_VFORK) {
// 等待子进程结束或者exec;
vfork.wait_for_completion_interruptible()?;
}
return Ok(pcb.pid().0);
}
/// 设置线程地址
pub fn set_tid_address(ptr: usize) -> Result<usize, SystemError> {
if !unsafe { verify_area(ptr as u64, core::mem::size_of::<i32>() as u64) } {
return Err(SystemError::EFAULT);
}
let pcb = ProcessManager::current_pcb();
pcb.thread.write().clear_child_tid = Some(VirtAddr::new(ptr));
Ok(pcb.pid.0)
}
}

View File

@ -97,7 +97,7 @@ impl Completion {
}
/// @brief 永久标记done为Complete_All并从wait_queue中删除所有节点
pub fn complete_all(&mut self) {
pub fn complete_all(&self) {
let mut inner = self.inner.lock_irqsave();
inner.done = COMPLETE_ALL;
inner.wait_queue.wakeup_all(None);

View File

@ -3,7 +3,10 @@ use core::{
sync::atomic::{AtomicBool, Ordering},
};
use crate::kdebug;
use crate::{
libs::{futex::constant::FutexFlag, rand::GRandFlags},
process::fork::KernelCloneArgs,
};
use num_traits::{FromPrimitive, ToPrimitive};
@ -21,7 +24,7 @@ use crate::{
libs::align::page_align_up,
mm::{verify_area, MemoryManagementArch, VirtAddr},
net::syscall::SockAddr,
process::Pid,
process::{fork::CloneFlags, Pid},
time::{
syscall::{PosixTimeZone, PosixTimeval},
TimeSpec,
@ -372,7 +375,7 @@ pub const SYS_BIND: usize = 49;
pub const SYS_LISTEN: usize = 50;
pub const SYS_GETSOCKNAME: usize = 51;
pub const SYS_GETPEERNAME: usize = 52;
pub const SYS_SOCKET_PAIR: usize = 53;
pub const SYS_SETSOCKOPT: usize = 54;
pub const SYS_GETSOCKOPT: usize = 55;
@ -536,7 +539,6 @@ impl Syscall {
Self::lseek(fd, w)
}
SYS_IOCTL => {
kdebug!("SYS_IOCTL");
let fd = args[0];
let cmd = args[1];
let data = args[2];
@ -943,7 +945,7 @@ impl Syscall {
SYS_MUNMAP => {
let addr = args[0];
let len = page_align_up(args[1]);
if addr & MMArch::PAGE_SIZE != 0 {
if addr & (MMArch::PAGE_SIZE - 1) != 0 {
// The addr argument is not a multiple of the page size
Err(SystemError::EINVAL)
} else {
@ -953,7 +955,7 @@ impl Syscall {
SYS_MPROTECT => {
let addr = args[0];
let len = page_align_up(args[1]);
if addr & MMArch::PAGE_SIZE != 0 {
if addr & (MMArch::PAGE_SIZE - 1) != 0 {
// The addr argument is not a multiple of the page size
Err(SystemError::EINVAL)
} else {
@ -1023,6 +1025,102 @@ impl Syscall {
Self::mknod(path as *const i8, flags, DeviceNumber::from(dev_t))
}
SYS_CLONE => {
let parent_tid = VirtAddr::new(args[2]);
let child_tid = VirtAddr::new(args[3]);
// 地址校验
verify_area(parent_tid, core::mem::size_of::<i32>())?;
verify_area(child_tid, core::mem::size_of::<i32>())?;
let mut clone_args = KernelCloneArgs::new();
clone_args.flags = CloneFlags::from_bits_truncate(args[0] as u64);
clone_args.stack = args[1];
clone_args.parent_tid = parent_tid;
clone_args.child_tid = child_tid;
clone_args.tls = args[4];
Self::clone(frame, clone_args)
}
SYS_FUTEX => {
let uaddr = VirtAddr::new(args[0]);
let operation = FutexFlag::from_bits(args[1] as u32).ok_or(SystemError::ENOSYS)?;
let val = args[2] as u32;
let utime = args[3];
let uaddr2 = VirtAddr::new(args[4]);
let val3 = args[5] as u32;
verify_area(uaddr, core::mem::size_of::<u32>())?;
verify_area(uaddr2, core::mem::size_of::<u32>())?;
let mut timespec = None;
if utime != 0 && operation.contains(FutexFlag::FLAGS_HAS_TIMEOUT) {
let reader = UserBufferReader::new(
utime as *const TimeSpec,
core::mem::size_of::<TimeSpec>(),
true,
)?;
timespec = Some(reader.read_one_from_user::<TimeSpec>(0)?.clone());
}
Self::do_futex(uaddr, operation, val, timespec, uaddr2, utime as u32, val3)
}
SYS_WRITEV => Self::writev(args[0] as i32, args[1], args[2]),
SYS_ARCH_PRCTL => Self::arch_prctl(args[0], args[1]),
SYS_SET_TID_ADDR => Self::set_tid_address(args[0]),
SYS_STAT => {
let path: &CStr = unsafe { CStr::from_ptr(args[0] as *const c_char) };
let path: Result<&str, core::str::Utf8Error> = path.to_str();
let res = if path.is_err() {
Err(SystemError::EINVAL)
} else {
let path: &str = path.unwrap();
let kstat = args[1] as *mut PosixKstat;
let vaddr = VirtAddr::new(kstat as usize);
match verify_area(vaddr, core::mem::size_of::<PosixKstat>()) {
Ok(_) => Self::stat(path, kstat),
Err(e) => Err(e),
}
};
res
}
// 目前为了适配musl-libc,以下系统调用先这样写着
SYS_GET_RANDOM => {
let flags = GRandFlags::from_bits(args[2] as u8).ok_or(SystemError::EINVAL)?;
Self::get_random(args[0] as *mut u8, args[1], flags)
}
SYS_SOCKET_PAIR => {
unimplemented!()
}
SYS_POLL => {
kwarn!("SYS_POLL has not yet been implemented");
Ok(0)
}
SYS_RT_SIGPROCMASK => {
kwarn!("SYS_RT_SIGPROCMASK has not yet been implemented");
Ok(0)
}
SYS_TKILL => {
kwarn!("SYS_TKILL has not yet been implemented");
Ok(0)
}
SYS_SIGALTSTACK => {
kwarn!("SYS_SIGALTSTACK has not yet been implemented");
Ok(0)
}
_ => panic!("Unsupported syscall ID: {}", syscall_num),
};
return r;

View File

@ -172,6 +172,10 @@ impl<'a> UserBufferReader<'a> {
});
}
pub fn size(&self) -> usize {
return self.buffer.len();
}
/// 从用户空间读取数据(到变量中)
///
/// @param offset 字节偏移量
@ -266,6 +270,10 @@ impl<'a> UserBufferWriter<'a> {
});
}
pub fn size(&self) -> usize {
return self.buffer.len();
}
/// 从指定地址写入数据到用户空间
///
/// @param data 要写入的数据地址
@ -274,7 +282,7 @@ impl<'a> UserBufferWriter<'a> {
///
pub fn copy_to_user<T: core::marker::Copy>(
&'a mut self,
src: &'a [T],
src: &[T],
offset: usize,
) -> Result<usize, SystemError> {
let dst = Self::convert_with_offset(self.buffer, offset)?;
@ -290,7 +298,7 @@ impl<'a> UserBufferWriter<'a> {
///
pub fn copy_one_to_user<T: core::marker::Copy>(
&'a mut self,
src: &'a T,
src: &T,
offset: usize,
) -> Result<(), SystemError> {
let dst = Self::convert_one_with_offset::<T>(self.buffer, offset)?;

View File

@ -72,6 +72,7 @@ impl Timer {
expire_jiffies,
timer_func,
self_ref: Weak::default(),
triggered: false,
})));
result.0.lock().self_ref = Arc::downgrade(&result);
@ -112,7 +113,9 @@ impl Timer {
#[inline]
fn run(&self) {
let r = self.0.lock().timer_func.run();
let mut timer = self.0.lock();
timer.triggered = true;
let r = timer.timer_func.run();
if unlikely(r.is_err()) {
kerror!(
"Failed to run timer function: {self:?} {:?}",
@ -120,6 +123,19 @@ impl Timer {
);
}
}
/// ## 判断定时器是否已经触发
pub fn timeout(&self) -> bool {
self.0.lock().triggered
}
/// ## 取消定时器任务
pub fn cancel(&self) -> bool {
TIMER_LIST
.lock()
.drain_filter(|x| Arc::<Timer>::as_ptr(&x) == self as *const Timer);
true
}
}
/// 定时器类型
@ -131,6 +147,8 @@ pub struct InnerTimer {
pub timer_func: Box<dyn TimerFunction>,
/// self_ref
self_ref: Weak<Timer>,
/// 判断该计时器是否触发
triggered: bool,
}
#[derive(Debug)]