Remove the shim kernel crate

2025-06-26 02:43:24 +00:00 · 2024-08-19 19:15:22 +08:00
parent d76c7a5b1e
commit dafd16075f
416 changed files with 231 additions and 273 deletions
--- a/kernel/src/process/clone.rs
+++ b/kernel/src/process/clone.rs
@ -0,0 +1,443 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::Ordering;
+
+use ostd::{
+    cpu::UserContext,
+    user::{UserContextApi, UserSpace},
+};
+
+use super::{
+    posix_thread::{PosixThread, PosixThreadBuilder, PosixThreadExt, ThreadName},
+    process_table,
+    process_vm::ProcessVm,
+    signal::sig_disposition::SigDispositions,
+    Credentials, Process, ProcessBuilder,
+};
+use crate::{
+    cpu::LinuxAbi,
+    fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
+    prelude::*,
+    thread::{allocate_tid, thread_table, Thread, Tid},
+};
+
+bitflags! {
+    pub struct CloneFlags: u32 {
+        const CLONE_VM      = 0x00000100;       /* Set if VM shared between processes.  */
+        const CLONE_FS      = 0x00000200;       /* Set if fs info shared between processes.  */
+        const CLONE_FILES   = 0x00000400;       /* Set if open files shared between processes.  */
+        const CLONE_SIGHAND = 0x00000800;       /* Set if signal handlers shared.  */
+        const CLONE_PIDFD   = 0x00001000;       /* Set if a pidfd should be placed in parent.  */
+        const CLONE_PTRACE  = 0x00002000;       /* Set if tracing continues on the child.  */
+        const CLONE_VFORK   = 0x00004000;       /* Set if the parent wants the child to wake it up on mm_release.  */
+        const CLONE_PARENT  = 0x00008000;       /* Set if we want to have the same parent as the cloner.  */
+        const CLONE_THREAD  = 0x00010000;       /* Set to add to same thread group.  */
+        const CLONE_NEWNS   = 0x00020000;       /* Set to create new namespace.  */
+        const CLONE_SYSVSEM = 0x00040000;       /* Set to shared SVID SEM_UNDO semantics.  */
+        const CLONE_SETTLS  = 0x00080000;       /* Set TLS info.  */
+        const CLONE_PARENT_SETTID = 0x00100000; /* Store TID in userlevel buffer before MM copy.  */
+        const CLONE_CHILD_CLEARTID = 0x00200000;/* Register exit futex and memory location to clear.  */
+        const CLONE_DETACHED = 0x00400000;      /* Create clone detached.  */
+        const CLONE_UNTRACED = 0x00800000;      /* Set if the tracing process can't force CLONE_PTRACE on this clone.  */
+        const CLONE_CHILD_SETTID = 0x01000000;  /* Store TID in userlevel buffer in the child.  */
+        const CLONE_NEWCGROUP   = 0x02000000;	/* New cgroup namespace.  */
+        const CLONE_NEWUTS	= 0x04000000;	    /* New utsname group.  */
+        const CLONE_NEWIPC	= 0x08000000;	    /* New ipcs.  */
+        const CLONE_NEWUSER	= 0x10000000;	    /* New user namespace.  */
+        const CLONE_NEWPID	= 0x20000000;	    /* New pid namespace.  */
+        const CLONE_NEWNET	= 0x40000000;	    /* New network namespace.  */
+        const CLONE_IO	= 0x80000000;	        /* Clone I/O context.  */
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct CloneArgs {
+    new_sp: u64,
+    stack_size: usize,
+    parent_tidptr: Vaddr,
+    child_tidptr: Vaddr,
+    tls: u64,
+    clone_flags: CloneFlags,
+}
+
+impl CloneArgs {
+    /// Clone Args for syscall fork.
+    /// TODO: set the correct values
+    pub const fn for_fork() -> Self {
+        CloneArgs {
+            new_sp: 0,
+            stack_size: 0,
+            parent_tidptr: 0,
+            child_tidptr: 0,
+            tls: 0,
+            clone_flags: CloneFlags::empty(),
+        }
+    }
+
+    pub const fn new(
+        new_sp: u64,
+        stack_size: usize,
+        parent_tidptr: Vaddr,
+        child_tidptr: Vaddr,
+        tls: u64,
+        clone_flags: CloneFlags,
+    ) -> Self {
+        CloneArgs {
+            new_sp,
+            stack_size,
+            parent_tidptr,
+            child_tidptr,
+            tls,
+            clone_flags,
+        }
+    }
+}
+
+impl From<u64> for CloneFlags {
+    fn from(flags: u64) -> Self {
+        // We use the lower 32 bits
+        let clone_flags = (flags & 0xffff_ffff) as u32;
+        CloneFlags::from_bits_truncate(clone_flags)
+    }
+}
+
+impl CloneFlags {
+    fn check_unsupported_flags(&self) -> Result<()> {
+        let supported_flags = CloneFlags::CLONE_VM
+            | CloneFlags::CLONE_FS
+            | CloneFlags::CLONE_FILES
+            | CloneFlags::CLONE_SIGHAND
+            | CloneFlags::CLONE_THREAD
+            | CloneFlags::CLONE_SYSVSEM
+            | CloneFlags::CLONE_SETTLS
+            | CloneFlags::CLONE_PARENT_SETTID
+            | CloneFlags::CLONE_CHILD_SETTID
+            | CloneFlags::CLONE_CHILD_CLEARTID;
+        let unsupported_flags = *self - supported_flags;
+        if !unsupported_flags.is_empty() {
+            panic!("contains unsupported clone flags: {:?}", unsupported_flags);
+        }
+        Ok(())
+    }
+}
+
+/// Clone a child thread or child process.
+///
+/// FIXME: currently, the child process or thread will be scheduled to run at once,
+/// but this may not be the expected bahavior.
+pub fn clone_child(
+    ctx: &Context,
+    parent_context: &UserContext,
+    clone_args: CloneArgs,
+) -> Result<Tid> {
+    clone_args.clone_flags.check_unsupported_flags()?;
+    if clone_args.clone_flags.contains(CloneFlags::CLONE_THREAD) {
+        let child_thread = clone_child_thread(ctx, parent_context, clone_args)?;
+        child_thread.run();
+
+        let child_tid = child_thread.tid();
+        Ok(child_tid)
+    } else {
+        let child_process = clone_child_process(ctx, parent_context, clone_args)?;
+        child_process.run();
+
+        let child_pid = child_process.pid();
+        Ok(child_pid)
+    }
+}
+
+fn clone_child_thread(
+    ctx: &Context,
+    parent_context: &UserContext,
+    clone_args: CloneArgs,
+) -> Result<Arc<Thread>> {
+    let Context {
+        process,
+        posix_thread,
+        thread: _,
+        task: _,
+    } = ctx;
+
+    let clone_flags = clone_args.clone_flags;
+    debug_assert!(clone_flags.contains(CloneFlags::CLONE_VM));
+    debug_assert!(clone_flags.contains(CloneFlags::CLONE_FILES));
+    debug_assert!(clone_flags.contains(CloneFlags::CLONE_SIGHAND));
+    let child_root_vmar = process.root_vmar();
+
+    let child_user_space = {
+        let child_vm_space = child_root_vmar.vm_space().clone();
+        let child_cpu_context = clone_cpu_context(
+            parent_context,
+            clone_args.new_sp,
+            clone_args.stack_size,
+            clone_args.tls,
+            clone_flags,
+        );
+        Arc::new(UserSpace::new(child_vm_space, child_cpu_context))
+    };
+    clone_sysvsem(clone_flags)?;
+
+    // Inherit sigmask from current thread
+    let sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into();
+
+    let child_tid = allocate_tid();
+    let child_thread = {
+        let credentials = {
+            let credentials = ctx.posix_thread.credentials();
+            Credentials::new_from(&credentials)
+        };
+
+        let thread_builder = PosixThreadBuilder::new(child_tid, child_user_space, credentials)
+            .process(posix_thread.weak_process())
+            .sig_mask(sig_mask);
+        thread_builder.build()
+    };
+
+    process.threads().lock().push(child_thread.clone());
+
+    let child_posix_thread = child_thread.as_posix_thread().unwrap();
+    clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
+    clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
+    clone_child_settid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
+    Ok(child_thread)
+}
+
+fn clone_child_process(
+    ctx: &Context,
+    parent_context: &UserContext,
+    clone_args: CloneArgs,
+) -> Result<Arc<Process>> {
+    let Context {
+        process,
+        posix_thread,
+        thread: _,
+        task: _,
+    } = ctx;
+
+    let clone_flags = clone_args.clone_flags;
+
+    // clone vm
+    let child_process_vm = {
+        let parent_process_vm = process.vm();
+        clone_vm(parent_process_vm, clone_flags)?
+    };
+
+    // clone user space
+    let child_user_space = {
+        let child_cpu_context = clone_cpu_context(
+            parent_context,
+            clone_args.new_sp,
+            clone_args.stack_size,
+            clone_args.tls,
+            clone_flags,
+        );
+        let child_vm_space = {
+            let child_root_vmar = child_process_vm.root_vmar();
+            child_root_vmar.vm_space().clone()
+        };
+        Arc::new(UserSpace::new(child_vm_space, child_cpu_context))
+    };
+
+    // clone file table
+    let child_file_table = clone_files(process.file_table(), clone_flags);
+
+    // clone fs
+    let child_fs = clone_fs(process.fs(), clone_flags);
+
+    // clone umask
+    let child_umask = {
+        let parent_umask = process.umask().read().get();
+        Arc::new(RwLock::new(FileCreationMask::new(parent_umask)))
+    };
+
+    // clone sig dispositions
+    let child_sig_dispositions = clone_sighand(process.sig_dispositions(), clone_flags);
+
+    // clone system V semaphore
+    clone_sysvsem(clone_flags)?;
+
+    // inherit parent's sig mask
+    let child_sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into();
+
+    // inherit parent's nice value
+    let child_nice = process.nice().load(Ordering::Relaxed);
+
+    let child_tid = allocate_tid();
+
+    let child = {
+        let child_elf_path = process.executable_path();
+        let child_thread_builder = {
+            let child_thread_name = ThreadName::new_from_executable_path(&child_elf_path)?;
+
+            let credentials = {
+                let credentials = ctx.posix_thread.credentials();
+                Credentials::new_from(&credentials)
+            };
+
+            PosixThreadBuilder::new(child_tid, child_user_space, credentials)
+                .thread_name(Some(child_thread_name))
+                .sig_mask(child_sig_mask)
+        };
+
+        let mut process_builder =
+            ProcessBuilder::new(child_tid, &child_elf_path, posix_thread.weak_process());
+
+        process_builder
+            .main_thread_builder(child_thread_builder)
+            .process_vm(child_process_vm)
+            .file_table(child_file_table)
+            .fs(child_fs)
+            .umask(child_umask)
+            .sig_dispositions(child_sig_dispositions)
+            .nice(child_nice);
+
+        process_builder.build()?
+    };
+
+    // Deals with clone flags
+    let child_thread = thread_table::get_thread(child_tid).unwrap();
+    let child_posix_thread = child_thread.as_posix_thread().unwrap();
+    clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
+    clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
+    clone_child_settid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
+
+    // Sets parent process and group for child process.
+    set_parent_and_group(process, &child);
+
+    Ok(child)
+}
+
+fn clone_child_cleartid(
+    child_posix_thread: &PosixThread,
+    child_tidptr: Vaddr,
+    clone_flags: CloneFlags,
+) -> Result<()> {
+    if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
+        *child_posix_thread.clear_child_tid().lock() = child_tidptr;
+    }
+    Ok(())
+}
+
+fn clone_child_settid(
+    child_posix_thread: &PosixThread,
+    child_tidptr: Vaddr,
+    clone_flags: CloneFlags,
+) -> Result<()> {
+    if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
+        *child_posix_thread.set_child_tid().lock() = child_tidptr;
+    }
+    Ok(())
+}
+
+fn clone_parent_settid(
+    child_tid: Tid,
+    parent_tidptr: Vaddr,
+    clone_flags: CloneFlags,
+) -> Result<()> {
+    if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
+        CurrentUserSpace::get().write_val(parent_tidptr, &child_tid)?;
+    }
+    Ok(())
+}
+
+/// Clone child process vm. If CLONE_VM is set, both threads share the same root vmar.
+/// Otherwise, fork a new copy-on-write vmar.
+fn clone_vm(parent_process_vm: &ProcessVm, clone_flags: CloneFlags) -> Result<ProcessVm> {
+    if clone_flags.contains(CloneFlags::CLONE_VM) {
+        Ok(parent_process_vm.clone())
+    } else {
+        ProcessVm::fork_from(parent_process_vm)
+    }
+}
+
+fn clone_cpu_context(
+    parent_context: &UserContext,
+    new_sp: u64,
+    stack_size: usize,
+    tls: u64,
+    clone_flags: CloneFlags,
+) -> UserContext {
+    let mut child_context = *parent_context;
+    // The return value of child thread is zero
+    child_context.set_syscall_ret(0);
+
+    if clone_flags.contains(CloneFlags::CLONE_VM) {
+        // if parent and child shares the same address space, a new stack must be specified.
+        debug_assert!(new_sp != 0);
+    }
+    if new_sp != 0 {
+        // If stack size is not 0, the `new_sp` points to the BOTTOMMOST byte of stack.
+        if stack_size != 0 {
+            child_context.set_stack_pointer(new_sp as usize + stack_size);
+        }
+        // If stack size is 0, the new_sp points to the TOPMOST byte of stack.
+        else {
+            child_context.set_stack_pointer(new_sp as usize);
+        }
+    }
+    if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
+        child_context.set_tls_pointer(tls as usize);
+    }
+
+    child_context
+}
+
+fn clone_fs(
+    parent_fs: &Arc<RwMutex<FsResolver>>,
+    clone_flags: CloneFlags,
+) -> Arc<RwMutex<FsResolver>> {
+    if clone_flags.contains(CloneFlags::CLONE_FS) {
+        parent_fs.clone()
+    } else {
+        Arc::new(RwMutex::new(parent_fs.read().clone()))
+    }
+}
+
+fn clone_files(
+    parent_file_table: &Arc<Mutex<FileTable>>,
+    clone_flags: CloneFlags,
+) -> Arc<Mutex<FileTable>> {
+    // if CLONE_FILES is set, the child and parent shares the same file table
+    // Otherwise, the child will deep copy a new file table.
+    // FIXME: the clone may not be deep copy.
+    if clone_flags.contains(CloneFlags::CLONE_FILES) {
+        parent_file_table.clone()
+    } else {
+        Arc::new(Mutex::new(parent_file_table.lock().clone()))
+    }
+}
+
+fn clone_sighand(
+    parent_sig_dispositions: &Arc<Mutex<SigDispositions>>,
+    clone_flags: CloneFlags,
+) -> Arc<Mutex<SigDispositions>> {
+    // similer to CLONE_FILES
+    if clone_flags.contains(CloneFlags::CLONE_SIGHAND) {
+        parent_sig_dispositions.clone()
+    } else {
+        Arc::new(Mutex::new(*parent_sig_dispositions.lock()))
+    }
+}
+
+fn clone_sysvsem(clone_flags: CloneFlags) -> Result<()> {
+    if clone_flags.contains(CloneFlags::CLONE_SYSVSEM) {
+        warn!("CLONE_SYSVSEM is not supported now");
+    }
+    Ok(())
+}
+
+fn set_parent_and_group(parent: &Process, child: &Arc<Process>) {
+    let process_group = parent.process_group().unwrap();
+
+    let mut process_table_mut = process_table::process_table_mut();
+    let mut group_inner = process_group.inner.lock();
+    let mut child_group_mut = child.process_group.lock();
+    let mut children_mut = parent.children().lock();
+
+    children_mut.insert(child.pid(), child.clone());
+
+    group_inner.processes.insert(child.pid(), child.clone());
+    *child_group_mut = Arc::downgrade(&process_group);
+
+    process_table_mut.insert(child.pid(), child.clone());
+}
--- a/kernel/src/process/credentials/c_types.rs
+++ b/kernel/src/process/credentials/c_types.rs
@ -0,0 +1,21 @@
+// SPDX-License-Identifier: MPL-2.0
+#![allow(non_camel_case_types)]
+
+use crate::{prelude::*, process::Pid};
+
+#[derive(Debug, Clone, Copy, Pod)]
+#[repr(C)]
+pub struct cap_user_header_t {
+    pub version: u32,
+    pub pid: Pid,
+}
+
+#[derive(Debug, Clone, Copy, Pod)]
+#[repr(C)]
+pub struct cap_user_data_t {
+    pub effective: u32,
+    pub permitted: u32,
+    pub inheritable: u32,
+}
+
+pub const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;
--- a/kernel/src/process/credentials/capabilities.rs
+++ b/kernel/src/process/credentials/capabilities.rs
@ -0,0 +1,94 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::{AtomicU64, Ordering};
+
+use bitflags::bitflags;
+
+bitflags! {
+    /// Represents a set of Linux capabilities.
+    pub struct CapSet: u64 {
+        const CHOWN = 1 << 0;
+        const DAC_OVERRIDE = 1 << 1;
+        const DAC_READ_SEARCH = 1 << 2;
+        const FOWNER = 1 << 3;
+        const FSETID = 1 << 4;
+        const KILL = 1 << 5;
+        const SETGID = 1 << 6;
+        const SETUID = 1 << 7;
+        const SETPCAP = 1 << 8;
+        const LINUX_IMMUTABLE = 1 << 9;
+        const NET_BIND_SERVICE = 1 << 10;
+        const NET_BROADCAST = 1 << 11;
+        const NET_ADMIN = 1 << 12;
+        const NET_RAW = 1 << 13;
+        const IPC_LOCK = 1 << 14;
+        const IPC_OWNER = 1 << 15;
+        const SYS_MODULE = 1 << 16;
+        const SYS_RAWIO = 1 << 17;
+        const SYS_CHROOT = 1 << 18;
+        const SYS_PTRACE = 1 << 19;
+        const SYS_PACCT = 1 << 20;
+        const SYS_ADMIN = 1 << 21;
+        const SYS_BOOT = 1 << 22;
+        const SYS_NICE = 1 << 23;
+        const SYS_RESOURCE = 1 << 24;
+        const SYS_TIME = 1 << 25;
+        const SYS_TTY_CONFIG = 1 << 26;
+        const MKNOD = 1 << 27;
+        const LEASE = 1 << 28;
+        const AUDIT_WRITE = 1 << 29;
+        const AUDIT_CONTROL = 1 << 30;
+        const SETFCAP = 1 << 31;
+        const MAC_OVERRIDE = 1 << 32;
+        const MAC_ADMIN = 1 << 33;
+        const SYSLOG = 1 << 34;
+        const WAKE_ALARM = 1 << 35;
+        const BLOCK_SUSPEND = 1 << 36;
+        const AUDIT_READ = 1 << 37;
+        const PERFMON = 1 << 38;
+        const BPF = 1 << 39;
+        const CHECKPOINT_RESTORE = 1u64 << 40;
+        // ... include other capabilities as needed
+    }
+}
+
+impl CapSet {
+    /// Converts the capability set to a `u32`. The higher bits are truncated.
+    pub fn as_u32(&self) -> u32 {
+        self.bits() as u32
+    }
+
+    /// Creates a new `CapSet` with the `SYS_ADMIN` capability set, typically for a root user.
+    pub const fn new_root() -> Self {
+        CapSet::SYS_ADMIN
+    }
+
+    /// The most significant bit in a 64-bit `CapSet` that may be set to represent a Linux capability.
+    pub fn most_significant_bit() -> u8 {
+        // CHECKPOINT_RESTORE is the Linux capability with the largest numerical value
+        40
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct AtomicCapSet(AtomicU64);
+
+impl AtomicCapSet {
+    pub const fn new(capset: CapSet) -> Self {
+        Self(AtomicU64::new(capset.bits))
+    }
+
+    pub fn set(&self, capset: CapSet) {
+        self.0.store(capset.bits(), Ordering::Relaxed);
+    }
+
+    pub fn get(&self) -> CapSet {
+        CapSet::from_bits_truncate(self.0.load(Ordering::Relaxed))
+    }
+}
+
+impl Clone for AtomicCapSet {
+    fn clone(&self) -> Self {
+        Self::new(self.get())
+    }
+}
--- a/kernel/src/process/credentials/credentials_.rs
+++ b/kernel/src/process/credentials/credentials_.rs
@ -0,0 +1,440 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use ostd::sync::{RwLockReadGuard, RwLockWriteGuard};
+
+use super::{group::AtomicGid, user::AtomicUid, Gid, Uid};
+use crate::{
+    prelude::*,
+    process::credentials::capabilities::{AtomicCapSet, CapSet},
+};
+
+#[derive(Debug)]
+pub(super) struct Credentials_ {
+    /// Real user id. The user to which the process belongs.
+    ruid: AtomicUid,
+    /// Effective user id. Used to determine the permissions granted to a process when it tries to perform various operations (i.e., system calls)
+    euid: AtomicUid,
+    /// Saved-set uid. Used by set_uid elf, the saved_set_uid will be set if the elf has setuid bit
+    suid: AtomicUid,
+    /// User id used for filesystem checks.
+    fsuid: AtomicUid,
+
+    /// Real group id. The group to which the process belongs
+    rgid: AtomicGid,
+    /// Effective gid,
+    egid: AtomicGid,
+    /// Saved-set gid. Used by set_gid elf, the saved_set_gid will be set if the elf has setgid bit
+    sgid: AtomicGid,
+    /// Group id used for file system checks.
+    fsgid: AtomicGid,
+
+    /// A set of additional groups to which a process belongs.
+    supplementary_gids: RwLock<BTreeSet<Gid>>,
+
+    /// The Linux capabilities.
+    /// This is not the capability (in static_cap.rs) enforced on rust objects.
+
+    /// Capability that child processes can inherit
+    inheritable_capset: AtomicCapSet,
+
+    /// Capabilities that a process can potentially be granted.
+    /// It defines the maximum set of privileges that the process could possibly have.
+    /// Even if the process is not currently using these privileges, it has the potential ability to enable them.
+    permitted_capset: AtomicCapSet,
+
+    /// Capability that we can actually use
+    effective_capset: AtomicCapSet,
+}
+
+impl Credentials_ {
+    /// Create a new credentials. ruid, euid, suid will be set as the same uid, and gid is the same.
+    pub fn new(uid: Uid, gid: Gid, capset: CapSet) -> Self {
+        let mut supplementary_gids = BTreeSet::new();
+        supplementary_gids.insert(gid);
+
+        Self {
+            ruid: AtomicUid::new(uid),
+            euid: AtomicUid::new(uid),
+            suid: AtomicUid::new(uid),
+            fsuid: AtomicUid::new(uid),
+            rgid: AtomicGid::new(gid),
+            egid: AtomicGid::new(gid),
+            sgid: AtomicGid::new(gid),
+            fsgid: AtomicGid::new(gid),
+            supplementary_gids: RwLock::new(supplementary_gids),
+            inheritable_capset: AtomicCapSet::new(capset),
+            permitted_capset: AtomicCapSet::new(capset),
+            effective_capset: AtomicCapSet::new(capset),
+        }
+    }
+
+    fn is_privileged(&self) -> bool {
+        self.euid.is_root()
+    }
+
+    //  ******* Uid methods *******
+
+    pub(super) fn ruid(&self) -> Uid {
+        self.ruid.get()
+    }
+
+    pub(super) fn euid(&self) -> Uid {
+        self.euid.get()
+    }
+
+    pub(super) fn suid(&self) -> Uid {
+        self.suid.get()
+    }
+
+    pub(super) fn fsuid(&self) -> Uid {
+        self.fsuid.get()
+    }
+
+    pub(super) fn set_uid(&self, uid: Uid) {
+        if self.is_privileged() {
+            self.ruid.set(uid);
+            self.euid.set(uid);
+            self.suid.set(uid);
+        } else {
+            self.euid.set(uid);
+        }
+    }
+
+    pub(super) fn set_reuid(&self, ruid: Option<Uid>, euid: Option<Uid>) -> Result<()> {
+        self.check_uid_perm(ruid.as_ref(), euid.as_ref(), None, false)?;
+
+        let should_set_suid = ruid.is_some() || euid.is_some_and(|euid| euid != self.ruid());
+
+        self.set_resuid_unchecked(ruid, euid, None);
+
+        if should_set_suid {
+            self.suid.set(self.euid());
+        }
+
+        // FIXME: should we set fsuid here? The linux document for syscall `setfsuid` is contradictory
+        // with the document of syscall `setreuid`. The `setfsuid` document says the `fsuid` is always
+        // the same as `euid`, but `setreuid` does not mention the `fsuid` should be set.
+        self.fsuid.set(self.euid());
+
+        Ok(())
+    }
+
+    pub(super) fn set_resuid(
+        &self,
+        ruid: Option<Uid>,
+        euid: Option<Uid>,
+        suid: Option<Uid>,
+    ) -> Result<()> {
+        self.check_uid_perm(ruid.as_ref(), euid.as_ref(), suid.as_ref(), true)?;
+
+        self.set_resuid_unchecked(ruid, euid, suid);
+
+        self.fsuid.set(self.euid());
+
+        Ok(())
+    }
+
+    pub(super) fn set_fsuid(&self, fsuid: Option<Uid>) -> Result<Uid> {
+        let old_fsuid = self.fsuid();
+
+        let Some(fsuid) = fsuid else {
+            return Ok(old_fsuid);
+        };
+
+        if self.is_privileged() {
+            self.fsuid.set(fsuid);
+            return Ok(old_fsuid);
+        }
+
+        if fsuid != self.ruid() && fsuid != self.euid() && fsuid != self.suid() {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "fsuid can only be one of old ruid, old euid and old suid."
+            )
+        }
+
+        self.fsuid.set(fsuid);
+
+        Ok(old_fsuid)
+    }
+
+    pub(super) fn set_euid(&self, euid: Uid) {
+        self.euid.set(euid);
+    }
+
+    pub(super) fn set_suid(&self, suid: Uid) {
+        self.suid.set(suid);
+    }
+
+    // For `setreuid`, ruid can *NOT* be set to old suid,
+    // while for `setresuid`, ruid can be set to old suid.
+    fn check_uid_perm(
+        &self,
+        ruid: Option<&Uid>,
+        euid: Option<&Uid>,
+        suid: Option<&Uid>,
+        ruid_may_be_old_suid: bool,
+    ) -> Result<()> {
+        if self.is_privileged() {
+            return Ok(());
+        }
+
+        if let Some(ruid) = ruid
+            && *ruid != self.ruid()
+            && *ruid != self.euid()
+            && (!ruid_may_be_old_suid || *ruid != self.suid())
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "ruid can only be one of old ruid, old euid (and old suid)."
+            );
+        }
+
+        if let Some(euid) = euid
+            && *euid != self.ruid()
+            && *euid != self.euid()
+            && *euid != self.suid()
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "euid can only be one of old ruid, old euid and old suid."
+            )
+        }
+
+        if let Some(suid) = suid
+            && *suid != self.ruid()
+            && *suid != self.euid()
+            && *suid != self.suid()
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "suid can only be one of old ruid, old euid and old suid."
+            )
+        }
+
+        Ok(())
+    }
+
+    fn set_resuid_unchecked(&self, ruid: Option<Uid>, euid: Option<Uid>, suid: Option<Uid>) {
+        if let Some(ruid) = ruid {
+            self.ruid.set(ruid);
+        }
+
+        if let Some(euid) = euid {
+            self.euid.set(euid);
+        }
+
+        if let Some(suid) = suid {
+            self.suid.set(suid);
+        }
+    }
+
+    //  ******* Gid methods *******
+
+    pub(super) fn rgid(&self) -> Gid {
+        self.rgid.get()
+    }
+
+    pub(super) fn egid(&self) -> Gid {
+        self.egid.get()
+    }
+
+    pub(super) fn sgid(&self) -> Gid {
+        self.sgid.get()
+    }
+
+    pub(super) fn fsgid(&self) -> Gid {
+        self.fsgid.get()
+    }
+
+    pub(super) fn set_gid(&self, gid: Gid) {
+        if self.is_privileged() {
+            self.rgid.set(gid);
+            self.egid.set(gid);
+            self.sgid.set(gid);
+        } else {
+            self.egid.set(gid);
+        }
+    }
+
+    pub(super) fn set_regid(&self, rgid: Option<Gid>, egid: Option<Gid>) -> Result<()> {
+        self.check_gid_perm(rgid.as_ref(), egid.as_ref(), None, false)?;
+
+        let should_set_sgid = rgid.is_some() || egid.is_some_and(|egid| egid != self.rgid());
+
+        self.set_resgid_unchecked(rgid, egid, None);
+
+        if should_set_sgid {
+            self.sgid.set(self.egid());
+        }
+
+        self.fsgid.set(self.egid());
+
+        Ok(())
+    }
+
+    pub(super) fn set_resgid(
+        &self,
+        rgid: Option<Gid>,
+        egid: Option<Gid>,
+        sgid: Option<Gid>,
+    ) -> Result<()> {
+        self.check_gid_perm(rgid.as_ref(), egid.as_ref(), sgid.as_ref(), true)?;
+
+        self.set_resgid_unchecked(rgid, egid, sgid);
+
+        self.fsgid.set(self.egid());
+
+        Ok(())
+    }
+
+    pub(super) fn set_fsgid(&self, fsgid: Option<Gid>) -> Result<Gid> {
+        let old_fsgid = self.fsgid();
+
+        let Some(fsgid) = fsgid else {
+            return Ok(old_fsgid);
+        };
+
+        if self.is_privileged() {
+            self.fsgid.set(fsgid);
+            return Ok(old_fsgid);
+        }
+
+        if fsgid != self.rgid() && fsgid != self.egid() && fsgid != self.sgid() {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "fsuid can only be one of old ruid, old euid and old suid."
+            )
+        }
+
+        self.fsgid.set(fsgid);
+
+        Ok(old_fsgid)
+    }
+
+    pub(super) fn set_egid(&self, egid: Gid) {
+        self.egid.set(egid);
+    }
+
+    pub(super) fn set_sgid(&self, sgid: Gid) {
+        self.sgid.set(sgid);
+    }
+
+    // For `setregid`, rgid can *NOT* be set to old sgid,
+    // while for `setresgid`, ruid can be set to old sgid.
+    fn check_gid_perm(
+        &self,
+        rgid: Option<&Gid>,
+        egid: Option<&Gid>,
+        sgid: Option<&Gid>,
+        rgid_may_be_old_sgid: bool,
+    ) -> Result<()> {
+        if self.is_privileged() {
+            return Ok(());
+        }
+
+        if let Some(rgid) = rgid
+            && *rgid != self.rgid()
+            && *rgid != self.egid()
+            && (!rgid_may_be_old_sgid || *rgid != self.sgid())
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "rgid can only be one of old rgid, old egid (and old sgid)."
+            );
+        }
+
+        if let Some(egid) = egid
+            && *egid != self.rgid()
+            && *egid != self.egid()
+            && *egid != self.sgid()
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "egid can only be one of old rgid, old egid and old sgid."
+            )
+        }
+
+        if let Some(sgid) = sgid
+            && *sgid != self.rgid()
+            && *sgid != self.egid()
+            && *sgid != self.sgid()
+        {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "sgid can only be one of old rgid, old egid and old sgid."
+            )
+        }
+
+        Ok(())
+    }
+
+    fn set_resgid_unchecked(&self, rgid: Option<Gid>, egid: Option<Gid>, sgid: Option<Gid>) {
+        if let Some(rgid) = rgid {
+            self.rgid.set(rgid);
+        }
+
+        if let Some(egid) = egid {
+            self.egid.set(egid);
+        }
+
+        if let Some(sgid) = sgid {
+            self.sgid.set(sgid);
+        }
+    }
+
+    //  ******* Supplementary groups methods *******
+
+    pub(super) fn groups(&self) -> RwLockReadGuard<BTreeSet<Gid>> {
+        self.supplementary_gids.read()
+    }
+
+    pub(super) fn groups_mut(&self) -> RwLockWriteGuard<BTreeSet<Gid>> {
+        self.supplementary_gids.write()
+    }
+
+    //  ******* Linux Capability methods *******
+
+    pub(super) fn inheritable_capset(&self) -> CapSet {
+        self.inheritable_capset.get()
+    }
+
+    pub(super) fn permitted_capset(&self) -> CapSet {
+        self.permitted_capset.get()
+    }
+
+    pub(super) fn effective_capset(&self) -> CapSet {
+        self.effective_capset.get()
+    }
+
+    pub(super) fn set_inheritable_capset(&self, inheritable_capset: CapSet) {
+        self.inheritable_capset.set(inheritable_capset);
+    }
+
+    pub(super) fn set_permitted_capset(&self, permitted_capset: CapSet) {
+        self.permitted_capset.set(permitted_capset);
+    }
+
+    pub(super) fn set_effective_capset(&self, effective_capset: CapSet) {
+        self.effective_capset.set(effective_capset);
+    }
+}
+
+impl Clone for Credentials_ {
+    fn clone(&self) -> Self {
+        Self {
+            ruid: self.ruid.clone(),
+            euid: self.euid.clone(),
+            suid: self.suid.clone(),
+            fsuid: self.fsuid.clone(),
+            rgid: self.rgid.clone(),
+            egid: self.egid.clone(),
+            sgid: self.sgid.clone(),
+            fsgid: self.fsgid.clone(),
+            supplementary_gids: RwLock::new(self.supplementary_gids.read().clone()),
+            inheritable_capset: self.inheritable_capset.clone(),
+            permitted_capset: self.permitted_capset.clone(),
+            effective_capset: self.effective_capset.clone(),
+        }
+    }
+}
--- a/kernel/src/process/credentials/group.rs
+++ b/kernel/src/process/credentials/group.rs
@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::{AtomicU32, Ordering};
+
+use crate::prelude::*;
+
+#[derive(Debug, Clone, Copy, Pod, Default, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(C)]
+pub struct Gid(u32);
+
+impl Gid {
+    pub const fn new(gid: u32) -> Self {
+        Self(gid)
+    }
+
+    pub const fn new_root() -> Self {
+        Self(ROOT_GID)
+    }
+
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    pub const fn is_root(&self) -> bool {
+        self.0 == ROOT_GID
+    }
+}
+
+const ROOT_GID: u32 = 0;
+
+#[derive(Debug)]
+pub(super) struct AtomicGid(AtomicU32);
+
+impl AtomicGid {
+    pub const fn new(gid: Gid) -> Self {
+        Self(AtomicU32::new(gid.as_u32()))
+    }
+
+    pub fn set(&self, gid: Gid) {
+        self.0.store(gid.as_u32(), Ordering::Relaxed)
+    }
+
+    pub fn get(&self) -> Gid {
+        Gid(self.0.load(Ordering::Relaxed))
+    }
+}
+
+impl Clone for AtomicGid {
+    fn clone(&self) -> Self {
+        Self(AtomicU32::new(self.0.load(Ordering::Relaxed)))
+    }
+}
--- a/kernel/src/process/credentials/mod.rs
+++ b/kernel/src/process/credentials/mod.rs
@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MPL-2.0
+
+pub mod c_types;
+pub mod capabilities;
+mod credentials_;
+mod group;
+mod static_cap;
+mod user;
+
+use aster_rights::FullOp;
+use credentials_::Credentials_;
+pub use group::Gid;
+pub use user::Uid;
+
+use crate::prelude::*;
+
+/// `Credentials` represents a set of associated numeric user ids (UIDs) and group identifiers (GIDs)
+/// for a process.
+/// These identifiers are as follows:
+/// - real user ID and group ID;
+/// - effective user ID and group ID;
+/// - saved-set user ID and saved-set group ID;
+/// - file system user ID and group ID (Linux-specific);
+/// - supplementary group IDs;
+/// - Linux capabilities.
+pub struct Credentials<R = FullOp>(Arc<Credentials_>, R);
--- a/kernel/src/process/credentials/static_cap.rs
+++ b/kernel/src/process/credentials/static_cap.rs
@ -0,0 +1,303 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use aster_rights::{Dup, Read, TRights, Write};
+use aster_rights_proc::require;
+use ostd::sync::{RwLockReadGuard, RwLockWriteGuard};
+
+use super::{capabilities::CapSet, credentials_::Credentials_, Credentials, Gid, Uid};
+use crate::prelude::*;
+
+impl<R: TRights> Credentials<R> {
+    /// Creates a root `Credentials`. This method can only be used when creating the first process
+    pub fn new_root() -> Self {
+        let uid = Uid::new_root();
+        let gid = Gid::new_root();
+        let cap = CapSet::new_root();
+        let credentials_ = Arc::new(Credentials_::new(uid, gid, cap));
+        Self(credentials_, R::new())
+    }
+
+    /// Clones a new `Credentials` from an existing `Credentials`.
+    ///
+    /// This method requires the `Read` right.
+    #[require(R1 > Read)]
+    pub fn new_from<R1: TRights>(credentials: &Credentials<R1>) -> Self {
+        let credentials_ = Arc::new(credentials.0.as_ref().clone());
+
+        Self(credentials_, R::new())
+    }
+
+    /// Duplicates the capabilities.
+    ///
+    /// This method requires the `Dup` right.
+    #[require(R > Dup)]
+    pub fn dup(&self) -> Self {
+        Self(self.0.clone(), self.1)
+    }
+
+    /// Restricts capabilities to a smaller set.
+    #[require(R > R1)]
+    pub fn restrict<R1: TRights>(self) -> Credentials<R1> {
+        let Credentials(credentials_, _) = self;
+
+        Credentials(credentials_, R1::new())
+    }
+
+    // *********** Uid methods **********
+
+    /// Gets real user id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn ruid(&self) -> Uid {
+        self.0.ruid()
+    }
+
+    /// Gets effective user id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn euid(&self) -> Uid {
+        self.0.euid()
+    }
+
+    /// Gets saved-set user id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn suid(&self) -> Uid {
+        self.0.suid()
+    }
+
+    /// Gets file system user id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn fsuid(&self) -> Uid {
+        self.0.fsuid()
+    }
+
+    /// Sets uid. If self is privileged, sets the effective, real, saved-set user ids as `uid`,
+    /// Otherwise, sets effective user id as `uid`.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_uid(&self, uid: Uid) {
+        self.0.set_uid(uid);
+    }
+
+    /// Sets real, effective user ids as `ruid`, `euid` respectively. if `ruid` or `euid`
+    /// is `None`, the corresponding user id will leave unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_reuid(&self, ruid: Option<Uid>, euid: Option<Uid>) -> Result<()> {
+        self.0.set_reuid(ruid, euid)
+    }
+
+    /// Sets real, effective, saved-set user ids as `ruid`, `euid`, `suid` respectively. if
+    /// `ruid`, `euid` or `suid` is `None`, the corresponding user id will leave unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_resuid(
+        &self,
+        ruid: Option<Uid>,
+        euid: Option<Uid>,
+        suid: Option<Uid>,
+    ) -> Result<()> {
+        self.0.set_resuid(ruid, euid, suid)
+    }
+
+    /// Sets file system user id as `fsuid`. Returns the original file system user id.
+    /// If `fsuid` is None, leaves file system user id unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_fsuid(&self, fsuid: Option<Uid>) -> Result<Uid> {
+        self.0.set_fsuid(fsuid)
+    }
+
+    /// Sets effective user id as `euid`. This method should only be used when executing a file
+    /// whose `setuid` bit is set.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_euid(&self, euid: Uid) {
+        self.0.set_euid(euid);
+    }
+
+    /// Sets saved-set user id as the same of effective user id. This method should only be used when
+    /// executing a new executable file.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn reset_suid(&self) {
+        let euid = self.0.euid();
+        self.0.set_suid(euid);
+    }
+
+    // *********** Gid methods **********
+
+    /// Gets real group id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn rgid(&self) -> Gid {
+        self.0.rgid()
+    }
+
+    /// Gets effective group id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn egid(&self) -> Gid {
+        self.0.egid()
+    }
+
+    /// Gets saved-set group id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn sgid(&self) -> Gid {
+        self.0.sgid()
+    }
+
+    /// Gets file system group id.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn fsgid(&self) -> Gid {
+        self.0.fsgid()
+    }
+
+    /// Sets gid. If self is privileged, sets the effective, real, saved-set group ids as `gid`,
+    /// Otherwise, sets effective group id as `gid`.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_gid(&self, gid: Gid) {
+        self.0.set_gid(gid);
+    }
+
+    /// Sets real, effective group ids as `rgid`, `egid` respectively. if `rgid` or `egid`
+    /// is `None`, the corresponding group id will leave unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_regid(&self, rgid: Option<Gid>, egid: Option<Gid>) -> Result<()> {
+        self.0.set_regid(rgid, egid)
+    }
+
+    /// Sets real, effective, saved-set group ids as `rgid`, `egid`, `sgid` respectively. if
+    /// `rgid`, `egid` or `sgid` is `None`, the corresponding group id will leave unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_resgid(
+        &self,
+        rgid: Option<Gid>,
+        egid: Option<Gid>,
+        sgid: Option<Gid>,
+    ) -> Result<()> {
+        self.0.set_resgid(rgid, egid, sgid)
+    }
+
+    /// Sets file system group id as `fsgid`. Returns the original file system group id.
+    /// If `fsgid` is None, leaves file system group id unchanged.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_fsgid(&self, fsgid: Option<Gid>) -> Result<Gid> {
+        self.0.set_fsgid(fsgid)
+    }
+
+    /// Sets effective group id as `egid`. This method should only be used when executing a file
+    /// whose `setgid` bit is set.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_egid(&self, egid: Gid) {
+        self.0.set_egid(egid);
+    }
+
+    /// Sets saved-set group id as the same of effective group id. This method should only be used when
+    /// executing a new executable file.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn reset_sgid(&self) {
+        let egid = self.0.egid();
+        self.0.set_sgid(egid);
+    }
+
+    // *********** Supplementary group methods **********
+
+    /// Acquires the read lock of supplementary group ids.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn groups(&self) -> RwLockReadGuard<BTreeSet<Gid>> {
+        self.0.groups()
+    }
+
+    /// Acquires the write lock of supplementary group ids.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn groups_mut(&self) -> RwLockWriteGuard<BTreeSet<Gid>> {
+        self.0.groups_mut()
+    }
+
+    // *********** Linux Capability methods **********
+
+    /// Gets the capabilities that child process can inherit.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn inheritable_capset(&self) -> CapSet {
+        self.0.inheritable_capset()
+    }
+
+    /// Gets the capabilities that are permitted.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn permitted_capset(&self) -> CapSet {
+        self.0.permitted_capset()
+    }
+
+    /// Gets the capabilities that actually use.
+    ///
+    /// This method requies the `Read` right.
+    #[require(R > Read)]
+    pub fn effective_capset(&self) -> CapSet {
+        self.0.effective_capset()
+    }
+
+    /// Sets the capabilities that child process can inherit.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_inheritable_capset(&self, inheritable_capset: CapSet) {
+        self.0.set_inheritable_capset(inheritable_capset);
+    }
+
+    /// Sets the capabilities that are permitted.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_permitted_capset(&self, permitted_capset: CapSet) {
+        self.0.set_permitted_capset(permitted_capset);
+    }
+
+    /// Sets the capabilities that actually use.
+    ///
+    /// This method requires the `Write` right.
+    #[require(R > Write)]
+    pub fn set_effective_capset(&self, effective_capset: CapSet) {
+        self.0.set_effective_capset(effective_capset);
+    }
+}
--- a/kernel/src/process/credentials/user.rs
+++ b/kernel/src/process/credentials/user.rs
@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::{AtomicU32, Ordering};
+
+use crate::prelude::*;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Pod)]
+#[repr(C)]
+pub struct Uid(u32);
+
+const ROOT_UID: u32 = 0;
+
+impl Uid {
+    pub const fn new_root() -> Self {
+        Self(ROOT_UID)
+    }
+
+    pub const fn new(uid: u32) -> Self {
+        Self(uid)
+    }
+
+    pub const fn is_root(&self) -> bool {
+        self.0 == ROOT_UID
+    }
+
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct AtomicUid(AtomicU32);
+
+impl AtomicUid {
+    pub const fn new(uid: Uid) -> Self {
+        Self(AtomicU32::new(uid.as_u32()))
+    }
+
+    pub fn set(&self, uid: Uid) {
+        self.0.store(uid.as_u32(), Ordering::Release)
+    }
+
+    pub fn get(&self) -> Uid {
+        Uid(self.0.load(Ordering::Acquire))
+    }
+
+    pub fn is_root(&self) -> bool {
+        self.get().is_root()
+    }
+}
+
+impl Clone for AtomicUid {
+    fn clone(&self) -> Self {
+        Self(AtomicU32::new(self.0.load(Ordering::Acquire)))
+    }
+}
--- a/kernel/src/process/exit.rs
+++ b/kernel/src/process/exit.rs
@ -0,0 +1,74 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{process_table, Pid, Process, TermStatus};
+use crate::{
+    prelude::*,
+    process::{
+        posix_thread::do_exit,
+        signal::{constants::SIGCHLD, signals::kernel::KernelSignal},
+    },
+};
+
+pub fn do_exit_group(term_status: TermStatus) {
+    let current = current!();
+    debug!("exit group was called");
+    if current.is_zombie() {
+        return;
+    }
+    current.set_zombie(term_status);
+
+    // Exit all threads
+    let threads = current.threads().lock().clone();
+    for thread in threads {
+        if let Err(e) = do_exit(thread, term_status) {
+            debug!("Ignore error when call exit: {:?}", e);
+        }
+    }
+
+    // Sends parent-death signal
+    // FIXME: according to linux spec, the signal should be sent when a posix thread which
+    // creates child process exits, not when the whole process exits group.
+    for (_, child) in current.children().lock().iter() {
+        let Some(signum) = child.parent_death_signal() else {
+            continue;
+        };
+
+        // FIXME: set pid of the signal
+        let signal = KernelSignal::new(signum);
+        child.enqueue_signal(signal);
+    }
+
+    // Close all files then exit the process
+    let files = current.file_table().lock().close_all();
+    drop(files);
+
+    // Move children to the init process
+    if !is_init_process(&current) {
+        if let Some(init_process) = get_init_process() {
+            let mut init_children = init_process.children().lock();
+            for (_, child_process) in current.children().lock().extract_if(|_, _| true) {
+                let mut parent = child_process.parent.lock();
+                init_children.insert(child_process.pid(), child_process.clone());
+                *parent = Arc::downgrade(&init_process);
+            }
+        }
+    }
+
+    if let Some(parent) = current.parent() {
+        // Notify parent
+        let signal = KernelSignal::new(SIGCHLD);
+        parent.enqueue_signal(signal);
+        parent.children_pauser().resume_all();
+    }
+}
+
+const INIT_PROCESS_PID: Pid = 1;
+
+/// Gets the init process
+fn get_init_process() -> Option<Arc<Process>> {
+    process_table::get_process(INIT_PROCESS_PID)
+}
+
+fn is_init_process(process: &Process) -> bool {
+    process.pid() == INIT_PROCESS_PID
+}
--- a/kernel/src/process/kill.rs
+++ b/kernel/src/process/kill.rs
@ -0,0 +1,178 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{
+    posix_thread::PosixThreadExt,
+    process_table,
+    signal::signals::{user::UserSignal, Signal},
+    Pgid, Pid, Process, Sid, Uid,
+};
+use crate::{
+    prelude::*,
+    thread::{thread_table, Tid},
+};
+
+/// Sends a signal to a process, using the current process as the sender.
+///
+/// The credentials of the current process will be checked to determine
+/// if it is authorized to send the signal to this particular target process.
+///
+/// If `signal` is `None`, this method will only check permission without sending
+/// any signal.
+pub fn kill(pid: Pid, signal: Option<UserSignal>) -> Result<()> {
+    let process = process_table::get_process(pid)
+        .ok_or_else(|| Error::with_message(Errno::ESRCH, "the target process does not exist"))?;
+
+    kill_process(&process, signal)
+}
+
+/// Sends a signal to all processes in a group, using the current process
+/// as the sender.
+///
+/// The credentials of the current process will be checked to determine
+/// if it is authorized to send the signal to the target group.
+///
+/// If `signal` is `None`, this method will only check permission without sending
+/// any signal.
+pub fn kill_group(pgid: Pgid, signal: Option<UserSignal>) -> Result<()> {
+    let process_group = process_table::get_process_group(&pgid)
+        .ok_or_else(|| Error::with_message(Errno::ESRCH, "target group does not exist"))?;
+
+    let inner = process_group.inner.lock();
+    for process in inner.processes.values() {
+        kill_process(process, signal)?;
+    }
+
+    Ok(())
+}
+
+/// Sends a signal to a target thread, using the current process
+/// as the sender.
+///
+/// If `signal` is `None`, this method will only check permission without sending
+/// any signal.
+pub fn tgkill(tid: Tid, tgid: Pid, signal: Option<UserSignal>) -> Result<()> {
+    let thread = thread_table::get_thread(tid)
+        .ok_or_else(|| Error::with_message(Errno::ESRCH, "target thread does not exist"))?;
+
+    if thread.status().is_exited() {
+        return Ok(());
+    }
+
+    let posix_thread = thread.as_posix_thread().unwrap();
+
+    // Check tgid
+    let pid = posix_thread.process().pid();
+    if pid != tgid {
+        return_errno_with_message!(
+            Errno::EINVAL,
+            "the combination of tgid and pid is not valid"
+        );
+    }
+
+    // Check permission
+    let signum = signal.map(|signal| signal.num());
+    let sender = current_thread_sender_ids();
+    posix_thread.check_signal_perm(signum.as_ref(), &sender)?;
+
+    if let Some(signal) = signal {
+        posix_thread.enqueue_signal(Box::new(signal));
+    }
+
+    Ok(())
+}
+
+/// Sends a signal to all processes except current process and init process, using
+/// the current process as the sender.
+///
+/// The credentials of the current process will be checked to determine
+/// if it is authorized to send the signal to the target group.
+pub fn kill_all(signal: Option<UserSignal>) -> Result<()> {
+    let current = current!();
+    for process in process_table::process_table().iter() {
+        if Arc::ptr_eq(&current, process) || process.is_init_process() {
+            continue;
+        }
+
+        kill_process(process, signal)?;
+    }
+
+    Ok(())
+}
+
+fn kill_process(process: &Process, signal: Option<UserSignal>) -> Result<()> {
+    let threads = process.threads().lock();
+    let posix_threads = threads
+        .iter()
+        .map(|thread| thread.as_posix_thread().unwrap());
+
+    // First check permission
+    let signum = signal.map(|signal| signal.num());
+    let sender_ids = current_thread_sender_ids();
+    let mut permitted_threads = {
+        posix_threads.clone().filter(|posix_thread| {
+            posix_thread
+                .check_signal_perm(signum.as_ref(), &sender_ids)
+                .is_ok()
+        })
+    };
+
+    if permitted_threads.clone().count() == 0 {
+        return_errno_with_message!(Errno::EPERM, "cannot send signal to the target process");
+    }
+
+    let Some(signal) = signal else { return Ok(()) };
+
+    // Send signal to any thread that does not blocks the signal.
+    for thread in permitted_threads.clone() {
+        if !thread.has_signal_blocked(&signal) {
+            thread.enqueue_signal(Box::new(signal));
+            return Ok(());
+        }
+    }
+
+    // If all threads block the signal, send signal to the first thread.
+    let first_thread = permitted_threads.next().unwrap();
+    first_thread.enqueue_signal(Box::new(signal));
+
+    Ok(())
+}
+
+fn current_thread_sender_ids() -> SignalSenderIds {
+    let current_thread = current_thread!();
+    let current_posix_thread = current_thread.as_posix_thread().unwrap();
+    let current_process = current_posix_thread.process();
+
+    let credentials = current_posix_thread.credentials();
+    let ruid = credentials.ruid();
+    let euid = credentials.euid();
+    let sid = current_process.session().unwrap().sid();
+
+    SignalSenderIds::new(ruid, euid, sid)
+}
+
+/// The ids of the signal sender process.
+///
+/// This struct now includes effective user id, real user id and session id.
+pub(super) struct SignalSenderIds {
+    ruid: Uid,
+    euid: Uid,
+    sid: Sid,
+}
+
+impl SignalSenderIds {
+    fn new(ruid: Uid, euid: Uid, sid: Sid) -> Self {
+        Self { ruid, euid, sid }
+    }
+
+    pub(super) fn ruid(&self) -> Uid {
+        self.ruid
+    }
+
+    pub(super) fn euid(&self) -> Uid {
+        self.euid
+    }
+
+    pub(super) fn sid(&self) -> Sid {
+        self.sid
+    }
+}
--- a/kernel/src/process/mod.rs
+++ b/kernel/src/process/mod.rs
@ -0,0 +1,38 @@
+// SPDX-License-Identifier: MPL-2.0
+
+mod clone;
+pub mod credentials;
+mod exit;
+mod kill;
+pub mod posix_thread;
+#[allow(clippy::module_inception)]
+mod process;
+mod process_filter;
+pub mod process_table;
+mod process_vm;
+mod program_loader;
+mod rlimit;
+pub mod signal;
+mod status;
+pub mod sync;
+mod term_status;
+mod wait;
+
+pub use clone::{clone_child, CloneArgs, CloneFlags};
+pub use credentials::{Credentials, Gid, Uid};
+pub use exit::do_exit_group;
+pub use kill::{kill, kill_all, kill_group, tgkill};
+pub use process::{
+    ExitCode, JobControl, Pgid, Pid, Process, ProcessBuilder, ProcessGroup, Session, Sid, Terminal,
+};
+pub use process_filter::ProcessFilter;
+pub use process_vm::{MAX_ARGV_NUMBER, MAX_ARG_LEN, MAX_ENVP_NUMBER, MAX_ENV_LEN};
+pub use program_loader::{check_executable_file, load_program_to_vm};
+pub use rlimit::ResourceType;
+pub use term_status::TermStatus;
+pub use wait::{wait_child_exit, WaitOptions};
+
+pub(super) fn init() {
+    process::init();
+    posix_thread::futex::init();
+}
--- a/kernel/src/process/posix_thread/builder.rs
+++ b/kernel/src/process/posix_thread/builder.rs
@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use ostd::user::UserSpace;
+
+use super::PosixThread;
+use crate::{
+    prelude::*,
+    process::{
+        posix_thread::name::ThreadName,
+        signal::{sig_mask::AtomicSigMask, sig_queues::SigQueues},
+        Credentials, Process,
+    },
+    thread::{status::ThreadStatus, task, thread_table, Thread, Tid},
+    time::{clocks::ProfClock, TimerManager},
+};
+
+/// The builder to build a posix thread
+pub struct PosixThreadBuilder {
+    // The essential part
+    tid: Tid,
+    user_space: Arc<UserSpace>,
+    process: Weak<Process>,
+    credentials: Credentials,
+
+    // Optional part
+    thread_name: Option<ThreadName>,
+    set_child_tid: Vaddr,
+    clear_child_tid: Vaddr,
+    sig_mask: AtomicSigMask,
+    sig_queues: SigQueues,
+}
+
+impl PosixThreadBuilder {
+    pub fn new(tid: Tid, user_space: Arc<UserSpace>, credentials: Credentials) -> Self {
+        Self {
+            tid,
+            user_space,
+            process: Weak::new(),
+            credentials,
+            thread_name: None,
+            set_child_tid: 0,
+            clear_child_tid: 0,
+            sig_mask: AtomicSigMask::new_empty(),
+            sig_queues: SigQueues::new(),
+        }
+    }
+
+    pub fn process(mut self, process: Weak<Process>) -> Self {
+        self.process = process;
+        self
+    }
+
+    pub fn thread_name(mut self, thread_name: Option<ThreadName>) -> Self {
+        self.thread_name = thread_name;
+        self
+    }
+
+    pub fn set_child_tid(mut self, set_child_tid: Vaddr) -> Self {
+        self.set_child_tid = set_child_tid;
+        self
+    }
+
+    pub fn clear_child_tid(mut self, clear_child_tid: Vaddr) -> Self {
+        self.clear_child_tid = clear_child_tid;
+        self
+    }
+
+    pub fn sig_mask(mut self, sig_mask: AtomicSigMask) -> Self {
+        self.sig_mask = sig_mask;
+        self
+    }
+
+    pub fn build(self) -> Arc<Thread> {
+        let Self {
+            tid,
+            user_space,
+            process,
+            credentials,
+            thread_name,
+            set_child_tid,
+            clear_child_tid,
+            sig_mask,
+            sig_queues,
+        } = self;
+
+        let thread = Arc::new_cyclic(|thread_ref| {
+            let task = task::create_new_user_task(user_space, thread_ref.clone());
+            let status = ThreadStatus::Init;
+
+            let prof_clock = ProfClock::new();
+            let virtual_timer_manager = TimerManager::new(prof_clock.user_clock().clone());
+            let prof_timer_manager = TimerManager::new(prof_clock.clone());
+
+            let posix_thread = PosixThread {
+                process,
+                name: Mutex::new(thread_name),
+                set_child_tid: Mutex::new(set_child_tid),
+                clear_child_tid: Mutex::new(clear_child_tid),
+                credentials,
+                sig_mask,
+                sig_queues,
+                sig_context: Mutex::new(None),
+                sig_stack: Mutex::new(None),
+                robust_list: Mutex::new(None),
+                prof_clock,
+                virtual_timer_manager,
+                prof_timer_manager,
+            };
+
+            Thread::new(tid, task, posix_thread, status)
+        });
+        thread_table::add_thread(thread.clone());
+        thread
+    }
+}
--- a/kernel/src/process/posix_thread/exit.rs
+++ b/kernel/src/process/posix_thread/exit.rs
@ -0,0 +1,66 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{futex::futex_wake, robust_list::wake_robust_futex, PosixThread, PosixThreadExt};
+use crate::{
+    prelude::*,
+    process::{do_exit_group, TermStatus},
+    thread::{thread_table, Thread, Tid},
+};
+
+/// Exits the thread if the thread is a POSIX thread.
+///
+/// # Panics
+///
+/// If the thread is not a POSIX thread, this method will panic.
+pub fn do_exit(thread: Arc<Thread>, term_status: TermStatus) -> Result<()> {
+    if thread.status().is_exited() {
+        return Ok(());
+    }
+    thread.exit();
+
+    let tid = thread.tid();
+
+    let posix_thread = thread.as_posix_thread().unwrap();
+
+    let mut clear_ctid = posix_thread.clear_child_tid().lock();
+    // If clear_ctid !=0 ,do a futex wake and write zero to the clear_ctid addr.
+    if *clear_ctid != 0 {
+        futex_wake(*clear_ctid, 1)?;
+        // FIXME: the correct write length?
+        CurrentUserSpace::get()
+            .write_val(*clear_ctid, &0u32)
+            .unwrap();
+        *clear_ctid = 0;
+    }
+    // exit the robust list: walk the robust list; mark futex words as dead and do futex wake
+    wake_robust_list(posix_thread, tid);
+
+    if tid != posix_thread.process().pid() {
+        // We don't remove main thread.
+        // The main thread is removed when the process is reaped.
+        thread_table::remove_thread(tid);
+    }
+
+    if posix_thread.is_main_thread(tid) || posix_thread.is_last_thread() {
+        // exit current process.
+        do_exit_group(term_status);
+    }
+
+    futex_wake(Arc::as_ptr(&posix_thread.process()) as Vaddr, 1)?;
+    Ok(())
+}
+
+/// Walks the robust futex list, marking futex dead and wake waiters.
+/// It corresponds to Linux's exit_robust_list(), errors are silently ignored.
+fn wake_robust_list(thread: &PosixThread, tid: Tid) {
+    let mut robust_list = thread.robust_list.lock();
+    let list_head = match *robust_list {
+        Some(robust_list_head) => robust_list_head,
+        None => return,
+    };
+    trace!("wake the rubust_list: {:?}", list_head);
+    for futex_addr in list_head.futexes() {
+        wake_robust_futex(futex_addr, tid).unwrap();
+    }
+    *robust_list = None;
+}
--- a/kernel/src/process/posix_thread/futex.rs
+++ b/kernel/src/process/posix_thread/futex.rs
@ -0,0 +1,410 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink};
+use ostd::{
+    cpu::num_cpus,
+    sync::{Waiter, Waker},
+};
+use spin::Once;
+
+use crate::prelude::*;
+
+type FutexBitSet = u32;
+type FutexBucketRef = Arc<Mutex<FutexBucket>>;
+
+const FUTEX_OP_MASK: u32 = 0x0000_000F;
+const FUTEX_FLAGS_MASK: u32 = 0xFFFF_FFF0;
+const FUTEX_BITSET_MATCH_ANY: FutexBitSet = 0xFFFF_FFFF;
+
+/// do futex wait
+pub fn futex_wait(futex_addr: u64, futex_val: i32, timeout: &Option<FutexTimeout>) -> Result<()> {
+    futex_wait_bitset(futex_addr as _, futex_val, timeout, FUTEX_BITSET_MATCH_ANY)
+}
+
+/// do futex wait bitset
+pub fn futex_wait_bitset(
+    futex_addr: Vaddr,
+    futex_val: i32,
+    timeout: &Option<FutexTimeout>,
+    bitset: FutexBitSet,
+) -> Result<()> {
+    debug!(
+        "futex_wait_bitset addr: {:#x}, val: {}, timeout: {:?}, bitset: {:#x}",
+        futex_addr, futex_val, timeout, bitset
+    );
+    let futex_key = FutexKey::new(futex_addr, bitset);
+    let (futex_item, waiter) = FutexItem::create(futex_key);
+
+    let (_, futex_bucket_ref) = get_futex_bucket(futex_key);
+    // lock futex bucket ref here to avoid data race
+    let mut futex_bucket = futex_bucket_ref.lock();
+
+    if futex_key.load_val() != futex_val {
+        return_errno_with_message!(Errno::EAGAIN, "futex value does not match");
+    }
+
+    futex_bucket.add_item(futex_item);
+
+    // drop lock
+    drop(futex_bucket);
+
+    // TODO: wait on the futex item with a timeout.
+    waiter.wait();
+
+    Ok(())
+}
+
+/// do futex wake
+pub fn futex_wake(futex_addr: Vaddr, max_count: usize) -> Result<usize> {
+    futex_wake_bitset(futex_addr, max_count, FUTEX_BITSET_MATCH_ANY)
+}
+
+/// Do futex wake with bitset
+pub fn futex_wake_bitset(
+    futex_addr: Vaddr,
+    max_count: usize,
+    bitset: FutexBitSet,
+) -> Result<usize> {
+    debug!(
+        "futex_wake_bitset addr: {:#x}, max_count: {}, bitset: {:#x}",
+        futex_addr, max_count, bitset
+    );
+
+    let futex_key = FutexKey::new(futex_addr, bitset);
+    let (_, futex_bucket_ref) = get_futex_bucket(futex_key);
+    let mut futex_bucket = futex_bucket_ref.lock();
+    let res = futex_bucket.remove_and_wake_items(futex_key, max_count);
+    drop(futex_bucket);
+
+    Ok(res)
+}
+
+/// Do futex requeue
+pub fn futex_requeue(
+    futex_addr: Vaddr,
+    max_nwakes: usize,
+    max_nrequeues: usize,
+    futex_new_addr: Vaddr,
+) -> Result<usize> {
+    if futex_new_addr == futex_addr {
+        return futex_wake(futex_addr, max_nwakes);
+    }
+
+    let futex_key = FutexKey::new(futex_addr, FUTEX_BITSET_MATCH_ANY);
+    let futex_new_key = FutexKey::new(futex_new_addr, FUTEX_BITSET_MATCH_ANY);
+    let (bucket_idx, futex_bucket_ref) = get_futex_bucket(futex_key);
+    let (new_bucket_idx, futex_new_bucket_ref) = get_futex_bucket(futex_new_key);
+
+    let nwakes = {
+        if bucket_idx == new_bucket_idx {
+            let mut futex_bucket = futex_bucket_ref.lock();
+            let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes);
+            futex_bucket.update_item_keys(futex_key, futex_new_key, max_nrequeues);
+            drop(futex_bucket);
+            nwakes
+        } else {
+            let (mut futex_bucket, mut futex_new_bucket) = {
+                if bucket_idx < new_bucket_idx {
+                    let futex_bucket = futex_bucket_ref.lock();
+                    let futext_new_bucket = futex_new_bucket_ref.lock();
+                    (futex_bucket, futext_new_bucket)
+                } else {
+                    // bucket_idx > new_bucket_idx
+                    let futex_new_bucket = futex_new_bucket_ref.lock();
+                    let futex_bucket = futex_bucket_ref.lock();
+                    (futex_bucket, futex_new_bucket)
+                }
+            };
+
+            let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes);
+            futex_bucket.requeue_items_to_another_bucket(
+                futex_key,
+                &mut futex_new_bucket,
+                futex_new_key,
+                max_nrequeues,
+            );
+            nwakes
+        }
+    };
+    Ok(nwakes)
+}
+
+static FUTEX_BUCKETS: Once<FutexBucketVec> = Once::new();
+
+/// Get the futex hash bucket count.
+///
+/// This number is calculated the same way as Linux's:
+/// <https://github.com/torvalds/linux/blob/master/kernel/futex/core.c>
+fn get_bucket_count() -> usize {
+    ((1 << 8) * num_cpus()).next_power_of_two() as usize
+}
+
+fn get_futex_bucket(key: FutexKey) -> (usize, FutexBucketRef) {
+    FUTEX_BUCKETS.get().unwrap().get_bucket(key)
+}
+
+/// Initialize the futex system.
+pub fn init() {
+    FUTEX_BUCKETS.call_once(|| FutexBucketVec::new(get_bucket_count()));
+}
+
+#[derive(Debug, Clone)]
+pub struct FutexTimeout {}
+
+impl FutexTimeout {
+    pub fn new() -> Self {
+        todo!()
+    }
+}
+
+struct FutexBucketVec {
+    vec: Vec<FutexBucketRef>,
+}
+
+impl FutexBucketVec {
+    pub fn new(size: usize) -> FutexBucketVec {
+        let mut buckets = FutexBucketVec {
+            vec: Vec::with_capacity(size),
+        };
+        for _ in 0..size {
+            let bucket = Arc::new(Mutex::new(FutexBucket::new()));
+            buckets.vec.push(bucket);
+        }
+        buckets
+    }
+
+    pub fn get_bucket(&self, key: FutexKey) -> (usize, FutexBucketRef) {
+        let index = (self.vec.len() - 1) & {
+            // The addr is the multiples of 4, so we ignore the last 2 bits
+            let addr = key.addr() >> 2;
+            // simple hash
+            addr / self.size()
+        };
+        (index, self.vec[index].clone())
+    }
+
+    fn size(&self) -> usize {
+        self.vec.len()
+    }
+}
+
+struct FutexBucket {
+    items: LinkedList<FutexItemAdapter>,
+}
+
+intrusive_adapter!(FutexItemAdapter = Box<FutexItem>: FutexItem { link: LinkedListAtomicLink });
+
+impl FutexBucket {
+    pub fn new() -> FutexBucket {
+        FutexBucket {
+            items: LinkedList::new(FutexItemAdapter::new()),
+        }
+    }
+
+    pub fn add_item(&mut self, item: Box<FutexItem>) {
+        self.items.push_back(item);
+    }
+
+    pub fn remove_item(&mut self, item: &FutexItem) {
+        let mut item_cursor = self.items.front_mut();
+        while !item_cursor.is_null() {
+            // The item_cursor has been checked not null.
+            let futex_item = item_cursor.get().unwrap();
+
+            if !futex_item.match_up(item) {
+                item_cursor.move_next();
+                continue;
+            } else {
+                let _ = item_cursor.remove();
+                break;
+            }
+        }
+    }
+
+    pub fn remove_and_wake_items(&mut self, key: FutexKey, max_count: usize) -> usize {
+        let mut count = 0;
+        let mut item_cursor = self.items.front_mut();
+        while !item_cursor.is_null() && count < max_count {
+            // The item_cursor has been checked not null.
+            let item = item_cursor.get().unwrap();
+
+            if !item.key.match_up(&key) {
+                item_cursor.move_next();
+                continue;
+            }
+
+            let item = item_cursor.remove().unwrap();
+            item.wake();
+            count += 1;
+        }
+
+        count
+    }
+
+    pub fn update_item_keys(&mut self, key: FutexKey, new_key: FutexKey, max_count: usize) {
+        let mut count = 0;
+        let mut item_cursor = self.items.front_mut();
+        while !item_cursor.is_null() && count < max_count {
+            // The item_cursor has been checked not null.
+            let item = item_cursor.get().unwrap();
+
+            if !item.key.match_up(&key) {
+                item_cursor.move_next();
+                continue;
+            }
+
+            let mut item = item_cursor.remove().unwrap();
+            item.key = new_key;
+            item_cursor.insert_before(item);
+            count += 1;
+        }
+    }
+
+    pub fn requeue_items_to_another_bucket(
+        &mut self,
+        key: FutexKey,
+        another: &mut Self,
+        new_key: FutexKey,
+        max_nrequeues: usize,
+    ) {
+        let mut count = 0;
+        let mut item_cursor = self.items.front_mut();
+        while !item_cursor.is_null() && count < max_nrequeues {
+            // The item_cursor has been checked not null.
+            let item = item_cursor.get().unwrap();
+
+            if !item.key.match_up(&key) {
+                item_cursor.move_next();
+                continue;
+            }
+
+            let mut item = item_cursor.remove().unwrap();
+            item.key = new_key;
+            another.add_item(item);
+            count += 1;
+        }
+    }
+}
+
+struct FutexItem {
+    key: FutexKey,
+    waker: Arc<Waker>,
+    link: LinkedListAtomicLink,
+}
+
+impl FutexItem {
+    pub fn create(key: FutexKey) -> (Box<Self>, Waiter) {
+        let (waiter, waker) = Waiter::new_pair();
+        let futex_item = Box::new(FutexItem {
+            key,
+            waker,
+            link: LinkedListAtomicLink::new(),
+        });
+
+        (futex_item, waiter)
+    }
+
+    pub fn wake(&self) {
+        self.waker.wake_up();
+    }
+
+    pub fn match_up(&self, another: &Self) -> bool {
+        self.key.match_up(&another.key)
+    }
+}
+
+// The addr of a futex, it should be used to mark different futex word
+#[derive(Debug, Clone, Copy)]
+struct FutexKey {
+    addr: Vaddr,
+    bitset: FutexBitSet,
+}
+
+impl FutexKey {
+    pub fn new(addr: Vaddr, bitset: FutexBitSet) -> Self {
+        Self { addr, bitset }
+    }
+
+    pub fn load_val(&self) -> i32 {
+        // FIXME: how to implement a atomic load?
+        warn!("implement an atomic load");
+        CurrentUserSpace::get().read_val(self.addr).unwrap()
+    }
+
+    pub fn addr(&self) -> Vaddr {
+        self.addr
+    }
+
+    pub fn bitset(&self) -> FutexBitSet {
+        self.bitset
+    }
+
+    pub fn match_up(&self, another: &Self) -> bool {
+        self.addr == another.addr && (self.bitset & another.bitset) != 0
+    }
+}
+
+// The implementation is from occlum
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+#[allow(non_camel_case_types)]
+pub enum FutexOp {
+    FUTEX_WAIT = 0,
+    FUTEX_WAKE = 1,
+    FUTEX_FD = 2,
+    FUTEX_REQUEUE = 3,
+    FUTEX_CMP_REQUEUE = 4,
+    FUTEX_WAKE_OP = 5,
+    FUTEX_LOCK_PI = 6,
+    FUTEX_UNLOCK_PI = 7,
+    FUTEX_TRYLOCK_PI = 8,
+    FUTEX_WAIT_BITSET = 9,
+    FUTEX_WAKE_BITSET = 10,
+}
+
+impl FutexOp {
+    pub fn from_u32(bits: u32) -> Result<FutexOp> {
+        match bits {
+            0 => Ok(FutexOp::FUTEX_WAIT),
+            1 => Ok(FutexOp::FUTEX_WAKE),
+            2 => Ok(FutexOp::FUTEX_FD),
+            3 => Ok(FutexOp::FUTEX_REQUEUE),
+            4 => Ok(FutexOp::FUTEX_CMP_REQUEUE),
+            5 => Ok(FutexOp::FUTEX_WAKE_OP),
+            6 => Ok(FutexOp::FUTEX_LOCK_PI),
+            7 => Ok(FutexOp::FUTEX_UNLOCK_PI),
+            8 => Ok(FutexOp::FUTEX_TRYLOCK_PI),
+            9 => Ok(FutexOp::FUTEX_WAIT_BITSET),
+            10 => Ok(FutexOp::FUTEX_WAKE_BITSET),
+            _ => return_errno_with_message!(Errno::EINVAL, "Unknown futex op"),
+        }
+    }
+}
+
+bitflags! {
+    pub struct FutexFlags : u32 {
+        const FUTEX_PRIVATE         = 128;
+        const FUTEX_CLOCK_REALTIME  = 256;
+    }
+}
+
+impl FutexFlags {
+    pub fn from_u32(bits: u32) -> Result<FutexFlags> {
+        FutexFlags::from_bits(bits)
+            .ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown futex flags"))
+    }
+}
+
+pub fn futex_op_and_flags_from_u32(bits: u32) -> Result<(FutexOp, FutexFlags)> {
+    let op = {
+        let op_bits = bits & FUTEX_OP_MASK;
+        FutexOp::from_u32(op_bits)?
+    };
+    let flags = {
+        let flags_bits = bits & FUTEX_FLAGS_MASK;
+        FutexFlags::from_u32(flags_bits)?
+    };
+    Ok((op, flags))
+}
--- a/kernel/src/process/posix_thread/mod.rs
+++ b/kernel/src/process/posix_thread/mod.rs
@ -0,0 +1,267 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use core::sync::atomic::Ordering;
+
+use aster_rights::{ReadOp, WriteOp};
+
+use super::{
+    kill::SignalSenderIds,
+    signal::{
+        sig_mask::{AtomicSigMask, SigMask, SigSet},
+        sig_num::SigNum,
+        sig_queues::SigQueues,
+        signals::Signal,
+        SigEvents, SigEventsFilter, SigStack,
+    },
+    Credentials, Process,
+};
+use crate::{
+    events::Observer,
+    prelude::*,
+    process::signal::constants::SIGCONT,
+    thread::Tid,
+    time::{clocks::ProfClock, Timer, TimerManager},
+};
+
+mod builder;
+mod exit;
+pub mod futex;
+mod name;
+mod posix_thread_ext;
+mod robust_list;
+
+pub use builder::PosixThreadBuilder;
+pub use exit::do_exit;
+pub use name::{ThreadName, MAX_THREAD_NAME_LEN};
+pub use posix_thread_ext::PosixThreadExt;
+pub use robust_list::RobustListHead;
+
+pub struct PosixThread {
+    // Immutable part
+    process: Weak<Process>,
+
+    // Mutable part
+    name: Mutex<Option<ThreadName>>,
+
+    // Linux specific attributes.
+    // https://man7.org/linux/man-pages/man2/set_tid_address.2.html
+    set_child_tid: Mutex<Vaddr>,
+    clear_child_tid: Mutex<Vaddr>,
+
+    robust_list: Mutex<Option<RobustListHead>>,
+
+    /// Process credentials. At the kernel level, credentials are a per-thread attribute.
+    credentials: Credentials,
+
+    // Signal
+    /// Blocked signals
+    sig_mask: AtomicSigMask,
+    /// Thread-directed sigqueue
+    sig_queues: SigQueues,
+    /// Signal handler ucontext address
+    /// FIXME: This field may be removed. For glibc applications with RESTORER flag set, the sig_context is always equals with rsp.
+    sig_context: Mutex<Option<Vaddr>>,
+    sig_stack: Mutex<Option<SigStack>>,
+
+    /// A profiling clock measures the user CPU time and kernel CPU time in the thread.
+    prof_clock: Arc<ProfClock>,
+
+    /// A manager that manages timers based on the user CPU time of the current thread.
+    virtual_timer_manager: Arc<TimerManager>,
+
+    /// A manager that manages timers based on the profiling clock of the current thread.
+    prof_timer_manager: Arc<TimerManager>,
+}
+
+impl PosixThread {
+    pub fn process(&self) -> Arc<Process> {
+        self.process.upgrade().unwrap()
+    }
+
+    pub fn weak_process(&self) -> Weak<Process> {
+        Weak::clone(&self.process)
+    }
+
+    pub fn thread_name(&self) -> &Mutex<Option<ThreadName>> {
+        &self.name
+    }
+
+    pub fn set_child_tid(&self) -> &Mutex<Vaddr> {
+        &self.set_child_tid
+    }
+
+    pub fn clear_child_tid(&self) -> &Mutex<Vaddr> {
+        &self.clear_child_tid
+    }
+
+    /// Get the reference to the signal mask of the thread.
+    ///
+    /// Note that while this function offers mutable access to the signal mask,
+    /// it is not sound for callers other than the current thread to modify the
+    /// signal mask. They may only read the signal mask.
+    pub fn sig_mask(&self) -> &AtomicSigMask {
+        &self.sig_mask
+    }
+
+    pub fn sig_pending(&self) -> SigSet {
+        self.sig_queues.sig_pending()
+    }
+
+    /// Returns whether the thread has some pending signals
+    /// that are not blocked.
+    pub fn has_pending(&self) -> bool {
+        let blocked = self.sig_mask().load(Ordering::Relaxed);
+        self.sig_queues.has_pending(blocked)
+    }
+
+    /// Returns whether the signal is blocked by the thread.
+    pub(in crate::process) fn has_signal_blocked(&self, signal: &dyn Signal) -> bool {
+        self.sig_mask.contains(signal.num(), Ordering::Relaxed)
+    }
+
+    /// Checks whether the signal can be delivered to the thread.
+    ///
+    /// For a signal can be delivered to the thread, the sending thread must either
+    /// be privileged, or the real or effective user ID of the sending thread must equal
+    /// the real or saved set-user-ID of the target thread.
+    ///
+    /// For SIGCONT, the sending and receiving processes should belong to the same session.
+    pub(in crate::process) fn check_signal_perm(
+        &self,
+        signum: Option<&SigNum>,
+        sender: &SignalSenderIds,
+    ) -> Result<()> {
+        if sender.euid().is_root() {
+            return Ok(());
+        }
+
+        if let Some(signum) = signum
+            && *signum == SIGCONT
+        {
+            let receiver_sid = self.process().session().unwrap().sid();
+            if receiver_sid == sender.sid() {
+                return Ok(());
+            }
+
+            return_errno_with_message!(
+                Errno::EPERM,
+                "sigcont requires that sender and receiver belongs to the same session"
+            );
+        }
+
+        let (receiver_ruid, receiver_suid) = {
+            let credentials = self.credentials();
+            (credentials.ruid(), credentials.suid())
+        };
+
+        // FIXME: further check the below code to ensure the behavior is same as Linux. According
+        // to man(2) kill, the real or effective user ID of the sending process must equal the
+        // real or saved set-user-ID of the target process.
+        if sender.ruid() == receiver_ruid
+            || sender.ruid() == receiver_suid
+            || sender.euid() == receiver_ruid
+            || sender.euid() == receiver_suid
+        {
+            return Ok(());
+        }
+
+        return_errno_with_message!(Errno::EPERM, "sending signal to the thread is not allowed.");
+    }
+
+    /// Enqueues a thread-directed signal. This method should only be used for enqueue kernel
+    /// signal and fault signal.
+    pub fn enqueue_signal(&self, signal: Box<dyn Signal>) {
+        self.sig_queues.enqueue(signal);
+    }
+
+    /// Returns a reference to the profiling clock of the current thread.
+    pub fn prof_clock(&self) -> &Arc<ProfClock> {
+        &self.prof_clock
+    }
+
+    /// Creates a timer based on the profiling CPU clock of the current thread.
+    pub fn create_prof_timer<F>(&self, func: F) -> Arc<Timer>
+    where
+        F: Fn() + Send + Sync + 'static,
+    {
+        self.prof_timer_manager.create_timer(func)
+    }
+
+    /// Creates a timer based on the user CPU clock of the current thread.
+    pub fn create_virtual_timer<F>(&self, func: F) -> Arc<Timer>
+    where
+        F: Fn() + Send + Sync + 'static,
+    {
+        self.virtual_timer_manager.create_timer(func)
+    }
+
+    /// Checks the `TimerCallback`s that are managed by the `prof_timer_manager`.
+    /// If any have timed out, call the corresponding callback functions.
+    pub fn process_expired_timers(&self) {
+        self.prof_timer_manager.process_expired_timers();
+    }
+
+    pub fn dequeue_signal(&self, mask: &SigMask) -> Option<Box<dyn Signal>> {
+        self.sig_queues.dequeue(mask)
+    }
+
+    pub fn register_sigqueue_observer(
+        &self,
+        observer: Weak<dyn Observer<SigEvents>>,
+        filter: SigEventsFilter,
+    ) {
+        self.sig_queues.register_observer(observer, filter);
+    }
+
+    pub fn unregiser_sigqueue_observer(&self, observer: &Weak<dyn Observer<SigEvents>>) {
+        self.sig_queues.unregister_observer(observer);
+    }
+
+    pub fn sig_context(&self) -> &Mutex<Option<Vaddr>> {
+        &self.sig_context
+    }
+
+    pub fn sig_stack(&self) -> &Mutex<Option<SigStack>> {
+        &self.sig_stack
+    }
+
+    pub fn robust_list(&self) -> &Mutex<Option<RobustListHead>> {
+        &self.robust_list
+    }
+
+    fn is_main_thread(&self, tid: Tid) -> bool {
+        let process = self.process();
+        let pid = process.pid();
+        tid == pid
+    }
+
+    fn is_last_thread(&self) -> bool {
+        let process = self.process.upgrade().unwrap();
+        let threads = process.threads().lock();
+        threads
+            .iter()
+            .filter(|thread| !thread.status().is_exited())
+            .count()
+            == 0
+    }
+
+    /// Gets the read-only credentials of the thread.
+    pub fn credentials(&self) -> Credentials<ReadOp> {
+        self.credentials.dup().restrict()
+    }
+
+    /// Gets the write-only credentials of the current thread.
+    ///
+    /// It is illegal to mutate the credentials from a thread other than the
+    /// current thread. For performance reasons, this function only checks it
+    /// using debug assertions.
+    pub fn credentials_mut(&self) -> Credentials<WriteOp> {
+        debug_assert!(core::ptr::eq(
+            current_thread!().as_posix_thread().unwrap(),
+            self
+        ));
+        self.credentials.dup().restrict()
+    }
+}
--- a/kernel/src/process/posix_thread/name.rs
+++ b/kernel/src/process/posix_thread/name.rs
@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use crate::prelude::*;
+
+pub const MAX_THREAD_NAME_LEN: usize = 16;
+
+#[derive(Debug)]
+pub struct ThreadName {
+    inner: [u8; MAX_THREAD_NAME_LEN],
+    count: usize,
+}
+
+impl Default for ThreadName {
+    fn default() -> Self {
+        ThreadName::new()
+    }
+}
+
+impl ThreadName {
+    pub fn new() -> Self {
+        ThreadName {
+            inner: [0; MAX_THREAD_NAME_LEN],
+            count: 0,
+        }
+    }
+
+    pub fn new_from_executable_path(executable_path: &str) -> Result<Self> {
+        let mut thread_name = ThreadName::new();
+        let executable_file_name = executable_path
+            .split('/')
+            .last()
+            .ok_or(Error::with_message(Errno::EINVAL, "invalid elf path"))?;
+        let name = CString::new(executable_file_name)?;
+        thread_name.set_name(&name)?;
+        Ok(thread_name)
+    }
+
+    pub fn set_name(&mut self, name: &CStr) -> Result<()> {
+        let bytes = name.to_bytes_with_nul();
+        let bytes_len = bytes.len();
+        if bytes_len > MAX_THREAD_NAME_LEN {
+            // if len > MAX_THREAD_NAME_LEN, truncate it.
+            self.count = MAX_THREAD_NAME_LEN;
+            self.inner[..MAX_THREAD_NAME_LEN].clone_from_slice(&bytes[..MAX_THREAD_NAME_LEN]);
+            self.inner[MAX_THREAD_NAME_LEN - 1] = 0;
+            return Ok(());
+        }
+        self.count = bytes_len;
+        self.inner[..bytes_len].clone_from_slice(bytes);
+        Ok(())
+    }
+
+    pub fn name(&self) -> Result<Option<&CStr>> {
+        Ok(Some(CStr::from_bytes_until_nul(&self.inner)?))
+    }
+}
--- a/kernel/src/process/posix_thread/posix_thread_ext.rs
+++ b/kernel/src/process/posix_thread/posix_thread_ext.rs
@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use ostd::{
+    cpu::UserContext,
+    user::{UserContextApi, UserSpace},
+};
+
+use super::{builder::PosixThreadBuilder, name::ThreadName, PosixThread};
+use crate::{
+    fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD},
+    prelude::*,
+    process::{process_vm::ProcessVm, program_loader::load_program_to_vm, Credentials, Process},
+    thread::{Thread, Tid},
+};
+pub trait PosixThreadExt {
+    fn as_posix_thread(&self) -> Option<&PosixThread>;
+    #[allow(clippy::too_many_arguments)]
+    fn new_posix_thread_from_executable(
+        tid: Tid,
+        credentials: Credentials,
+        process_vm: &ProcessVm,
+        fs_resolver: &FsResolver,
+        executable_path: &str,
+        process: Weak<Process>,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+    ) -> Result<Arc<Self>>;
+}
+
+impl PosixThreadExt for Thread {
+    /// This function should only be called when launch shell()
+    fn new_posix_thread_from_executable(
+        tid: Tid,
+        credentials: Credentials,
+        process_vm: &ProcessVm,
+        fs_resolver: &FsResolver,
+        executable_path: &str,
+        process: Weak<Process>,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+    ) -> Result<Arc<Self>> {
+        let elf_file = {
+            let fs_path = FsPath::new(AT_FDCWD, executable_path)?;
+            fs_resolver.lookup(&fs_path)?
+        };
+        let (_, elf_load_info) =
+            load_program_to_vm(process_vm, elf_file, argv, envp, fs_resolver, 1)?;
+
+        let vm_space = process_vm.root_vmar().vm_space().clone();
+        let mut cpu_ctx = UserContext::default();
+        cpu_ctx.set_instruction_pointer(elf_load_info.entry_point() as _);
+        cpu_ctx.set_stack_pointer(elf_load_info.user_stack_top() as _);
+        let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
+        let thread_name = Some(ThreadName::new_from_executable_path(executable_path)?);
+        let thread_builder = PosixThreadBuilder::new(tid, user_space, credentials)
+            .thread_name(thread_name)
+            .process(process);
+        Ok(thread_builder.build())
+    }
+
+    fn as_posix_thread(&self) -> Option<&PosixThread> {
+        self.data().downcast_ref::<PosixThread>()
+    }
+}
--- a/kernel/src/process/posix_thread/robust_list.rs
+++ b/kernel/src/process/posix_thread/robust_list.rs
@ -0,0 +1,158 @@
+// SPDX-License-Identifier: MPL-2.0
+
+//! The implementation of robust list is from occlum.
+
+use crate::{
+    prelude::*,
+    process::{posix_thread::futex::futex_wake, Pid},
+};
+
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Pod)]
+struct RobustList {
+    next: Vaddr, // *const Robust list
+}
+
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Pod)]
+pub struct RobustListHead {
+    /// Linked list of lock entries
+    ///
+    /// If it points to the head of the list, then it is the end of the list.
+    /// If it is an invalid user space pointer or a null pointer, stop iterating
+    /// the list.
+    list: RobustList,
+    /// Specifies the offset from the address of the lock entry to the address
+    /// of the futex.
+    futex_offset: isize,
+    /// Contains transient copy of the address of the lock entry, during list
+    /// insertion and removal.
+    list_op_pending: Vaddr, // *const RobustList
+}
+
+impl RobustListHead {
+    /// Return an iterator for all futexes in the robust list.
+    ///
+    /// The futex refered to by `list_op_pending`, if any, will be returned as
+    /// the last item.
+    pub fn futexes(&self) -> FutexIter<'_> {
+        FutexIter::new(self)
+    }
+
+    /// Return the pending futex address if exist
+    fn pending_futex_addr(&self) -> Option<Vaddr> {
+        if self.list_op_pending == 0 {
+            None
+        } else {
+            Some(self.futex_addr(self.list_op_pending))
+        }
+    }
+
+    /// Get the futex address
+    fn futex_addr(&self, entry_ptr: Vaddr) -> Vaddr {
+        (entry_ptr as isize + self.futex_offset) as _
+    }
+}
+
+pub struct FutexIter<'a> {
+    robust_list: &'a RobustListHead,
+    entry_ptr: Vaddr,
+    count: isize,
+}
+
+impl<'a> FutexIter<'a> {
+    pub fn new(robust_list: &'a RobustListHead) -> Self {
+        Self {
+            robust_list,
+            entry_ptr: robust_list.list.next,
+            count: 0,
+        }
+    }
+
+    // The `self.count` is normally a positive value used to iterate the list
+    // to avoid excessively long or circular list, we use a special value -1
+    // to represent the end of the Iterator.
+    fn set_end(&mut self) {
+        self.count = -1;
+    }
+
+    fn is_end(&self) -> bool {
+        self.count < 0
+    }
+}
+
+const ROBUST_LIST_LIMIT: isize = 2048;
+
+impl<'a> Iterator for FutexIter<'a> {
+    type Item = Vaddr;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.is_end() {
+            return None;
+        }
+
+        let end_ptr = self.robust_list.list.next;
+        while self.entry_ptr != end_ptr || self.count == 0 {
+            if self.count == ROBUST_LIST_LIMIT {
+                break;
+            }
+            if self.entry_ptr == 0 {
+                return None;
+            }
+            let futex_addr = if self.entry_ptr != self.robust_list.list_op_pending {
+                Some(self.robust_list.futex_addr(self.entry_ptr))
+            } else {
+                None
+            };
+            let Ok(robust_list) = CurrentUserSpace::get().read_val::<RobustList>(self.entry_ptr)
+            else {
+                return None;
+            };
+            self.entry_ptr = robust_list.next;
+            self.count += 1;
+            if futex_addr.is_some() {
+                return futex_addr;
+            }
+        }
+        self.set_end();
+        self.robust_list.pending_futex_addr()
+    }
+}
+
+const FUTEX_WAITERS: u32 = 0x8000_0000;
+const FUTEX_OWNER_DIED: u32 = 0x4000_0000;
+const FUTEX_TID_MASK: u32 = 0x3FFF_FFFF;
+
+/// Wakeup one robust futex owned by the thread
+/// FIXME: requires atomic operations here
+pub fn wake_robust_futex(futex_addr: Vaddr, tid: Pid) -> Result<()> {
+    let user_space = CurrentUserSpace::get();
+    let futex_val = {
+        if futex_addr == 0 {
+            return_errno_with_message!(Errno::EINVAL, "invalid futext addr");
+        }
+        user_space.read_val::<u32>(futex_addr)?
+    };
+    let mut old_val = futex_val;
+    loop {
+        // This futex may held by another thread, do nothing
+        if old_val & FUTEX_TID_MASK != tid {
+            break;
+        }
+        let new_val = (old_val & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+        let cur_val = user_space.read_val(futex_addr)?;
+        if cur_val != new_val {
+            // The futex value has changed, let's retry with current value
+            old_val = cur_val;
+            user_space.write_val(futex_addr, &new_val)?;
+            continue;
+        }
+        // Wakeup one waiter
+        if cur_val & FUTEX_WAITERS != 0 {
+            debug!("wake robust futex addr: {:?}", futex_addr);
+            futex_wake(futex_addr, 1)?;
+        }
+        break;
+    }
+    Ok(())
+}
--- a/kernel/src/process/process/builder.rs
+++ b/kernel/src/process/process/builder.rs
@ -0,0 +1,215 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use super::{Pid, Process};
+use crate::{
+    fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
+    prelude::*,
+    process::{
+        posix_thread::{PosixThreadBuilder, PosixThreadExt},
+        process_vm::ProcessVm,
+        rlimit::ResourceLimits,
+        signal::sig_disposition::SigDispositions,
+        Credentials,
+    },
+    sched::nice::Nice,
+    thread::Thread,
+};
+
+pub struct ProcessBuilder<'a> {
+    // Essential parts
+    pid: Pid,
+    executable_path: &'a str,
+    parent: Weak<Process>,
+
+    // Optional parts
+    main_thread_builder: Option<PosixThreadBuilder>,
+    argv: Option<Vec<CString>>,
+    envp: Option<Vec<CString>>,
+    process_vm: Option<ProcessVm>,
+    file_table: Option<Arc<Mutex<FileTable>>>,
+    fs: Option<Arc<RwMutex<FsResolver>>>,
+    umask: Option<Arc<RwLock<FileCreationMask>>>,
+    resource_limits: Option<ResourceLimits>,
+    sig_dispositions: Option<Arc<Mutex<SigDispositions>>>,
+    credentials: Option<Credentials>,
+    nice: Option<Nice>,
+}
+
+impl<'a> ProcessBuilder<'a> {
+    pub fn new(pid: Pid, executable_path: &'a str, parent: Weak<Process>) -> Self {
+        ProcessBuilder {
+            pid,
+            executable_path,
+            parent,
+            main_thread_builder: None,
+            argv: None,
+            envp: None,
+            process_vm: None,
+            file_table: None,
+            fs: None,
+            umask: None,
+            resource_limits: None,
+            sig_dispositions: None,
+            credentials: None,
+            nice: None,
+        }
+    }
+
+    pub fn main_thread_builder(&mut self, builder: PosixThreadBuilder) -> &mut Self {
+        self.main_thread_builder = Some(builder);
+        self
+    }
+
+    pub fn process_vm(&mut self, process_vm: ProcessVm) -> &mut Self {
+        self.process_vm = Some(process_vm);
+        self
+    }
+
+    pub fn file_table(&mut self, file_table: Arc<Mutex<FileTable>>) -> &mut Self {
+        self.file_table = Some(file_table);
+        self
+    }
+
+    pub fn fs(&mut self, fs: Arc<RwMutex<FsResolver>>) -> &mut Self {
+        self.fs = Some(fs);
+        self
+    }
+
+    pub fn umask(&mut self, umask: Arc<RwLock<FileCreationMask>>) -> &mut Self {
+        self.umask = Some(umask);
+        self
+    }
+
+    pub fn resource_limits(&mut self, resource_limits: ResourceLimits) -> &mut Self {
+        self.resource_limits = Some(resource_limits);
+        self
+    }
+
+    pub fn sig_dispositions(&mut self, sig_dispositions: Arc<Mutex<SigDispositions>>) -> &mut Self {
+        self.sig_dispositions = Some(sig_dispositions);
+        self
+    }
+
+    pub fn argv(&mut self, argv: Vec<CString>) -> &mut Self {
+        self.argv = Some(argv);
+        self
+    }
+
+    pub fn envp(&mut self, envp: Vec<CString>) -> &mut Self {
+        self.envp = Some(envp);
+        self
+    }
+
+    pub fn credentials(&mut self, credentials: Credentials) -> &mut Self {
+        self.credentials = Some(credentials);
+        self
+    }
+
+    pub fn nice(&mut self, nice: Nice) -> &mut Self {
+        self.nice = Some(nice);
+        self
+    }
+
+    fn check_build(&self) -> Result<()> {
+        if self.main_thread_builder.is_some() {
+            debug_assert!(self.parent.upgrade().is_some());
+            debug_assert!(self.argv.is_none());
+            debug_assert!(self.envp.is_none());
+            debug_assert!(self.credentials.is_none());
+        }
+
+        if self.main_thread_builder.is_none() {
+            debug_assert!(self.parent.upgrade().is_none());
+            debug_assert!(self.argv.is_some());
+            debug_assert!(self.envp.is_some());
+            debug_assert!(self.credentials.is_some());
+        }
+
+        Ok(())
+    }
+
+    pub fn build(self) -> Result<Arc<Process>> {
+        self.check_build()?;
+        let Self {
+            pid,
+            executable_path,
+            parent,
+            main_thread_builder,
+            argv,
+            envp,
+            process_vm,
+            file_table,
+            fs,
+            umask,
+            resource_limits,
+            sig_dispositions,
+            credentials,
+            nice,
+        } = self;
+
+        let process_vm = process_vm.or_else(|| Some(ProcessVm::alloc())).unwrap();
+
+        let file_table = file_table
+            .or_else(|| Some(Arc::new(Mutex::new(FileTable::new_with_stdio()))))
+            .unwrap();
+
+        let fs = fs
+            .or_else(|| Some(Arc::new(RwMutex::new(FsResolver::new()))))
+            .unwrap();
+
+        let umask = umask
+            .or_else(|| Some(Arc::new(RwLock::new(FileCreationMask::default()))))
+            .unwrap();
+
+        let resource_limits = resource_limits
+            .or_else(|| Some(ResourceLimits::default()))
+            .unwrap();
+
+        let sig_dispositions = sig_dispositions
+            .or_else(|| Some(Arc::new(Mutex::new(SigDispositions::new()))))
+            .unwrap();
+
+        let nice = nice.or_else(|| Some(Nice::default())).unwrap();
+
+        let process = {
+            let threads = Vec::new();
+            Process::new(
+                pid,
+                parent,
+                threads,
+                executable_path.to_string(),
+                process_vm,
+                fs,
+                file_table,
+                umask,
+                resource_limits,
+                nice,
+                sig_dispositions,
+            )
+        };
+
+        let thread = if let Some(thread_builder) = main_thread_builder {
+            let builder = thread_builder.process(Arc::downgrade(&process));
+            builder.build()
+        } else {
+            Thread::new_posix_thread_from_executable(
+                pid,
+                credentials.unwrap(),
+                process.vm(),
+                &process.fs().read(),
+                executable_path,
+                Arc::downgrade(&process),
+                argv.unwrap(),
+                envp.unwrap(),
+            )?
+        };
+
+        process.threads().lock().push(thread);
+
+        process.set_runnable();
+
+        Ok(process)
+    }
+}
--- a/kernel/src/process/process/job_control.rs
+++ b/kernel/src/process/process/job_control.rs
@ -0,0 +1,171 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(unused_variables)]
+
+use crate::{
+    prelude::*,
+    process::{
+        signal::{
+            constants::{SIGCONT, SIGHUP},
+            signals::kernel::KernelSignal,
+            Pauser,
+        },
+        ProcessGroup, Session,
+    },
+};
+
+/// The job control for terminals like tty and pty.
+///
+/// This struct is used to support shell job control, which allows users to
+/// run commands in the foreground or in the background. This struct manages
+/// the session and foreground process group for a terminal.
+pub struct JobControl {
+    foreground: SpinLock<Weak<ProcessGroup>>,
+    session: SpinLock<Weak<Session>>,
+    pauser: Arc<Pauser>,
+}
+
+impl JobControl {
+    /// Creates a new `TtyJobControl`
+    pub fn new() -> Self {
+        Self {
+            foreground: SpinLock::new(Weak::new()),
+            session: SpinLock::new(Weak::new()),
+            pauser: Pauser::new(),
+        }
+    }
+
+    // *************** Session ***************
+
+    /// Returns the session whose controlling terminal is the terminal.
+    fn session(&self) -> Option<Arc<Session>> {
+        self.session.lock().upgrade()
+    }
+
+    /// Sets the terminal as the controlling terminal of the `session`.
+    ///
+    /// # Panics
+    ///
+    /// This terminal should not belong to any session.
+    pub fn set_session(&self, session: &Arc<Session>) {
+        debug_assert!(self.session().is_none());
+        *self.session.lock() = Arc::downgrade(session);
+    }
+
+    /// Sets the terminal as the controlling terminal of the session of current process.
+    ///
+    /// # Panics
+    ///
+    /// This function should only be called in process context.
+    pub fn set_current_session(&self) -> Result<()> {
+        if self.session().is_some() {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "the terminal is already controlling terminal of another session"
+            );
+        }
+
+        let current = current!();
+
+        let process_group = current.process_group().unwrap();
+        *self.foreground.lock() = Arc::downgrade(&process_group);
+
+        let session = current.session().unwrap();
+        *self.session.lock() = Arc::downgrade(&session);
+
+        self.pauser.resume_all();
+        Ok(())
+    }
+
+    /// Releases the current session from this terminal.
+    pub fn release_current_session(&self) -> Result<()> {
+        let Some(session) = self.session() else {
+            return_errno_with_message!(
+                Errno::ENOTTY,
+                "the terminal is not controlling terminal now"
+            );
+        };
+
+        if let Some(foreground) = self.foreground() {
+            foreground.broadcast_signal(KernelSignal::new(SIGHUP));
+            foreground.broadcast_signal(KernelSignal::new(SIGCONT));
+        }
+
+        Ok(())
+    }
+
+    // *************** Foreground process group ***************
+
+    /// Returns the foreground process group
+    pub fn foreground(&self) -> Option<Arc<ProcessGroup>> {
+        self.foreground.lock().upgrade()
+    }
+
+    /// Sets the foreground process group.
+    ///
+    /// # Panics
+    ///
+    /// The process group should belong to one session.
+    pub fn set_foreground(&self, process_group: Option<&Arc<ProcessGroup>>) -> Result<()> {
+        let Some(process_group) = process_group else {
+            // FIXME: should we allow this branch?
+            *self.foreground.lock() = Weak::new();
+            return Ok(());
+        };
+
+        let session = process_group.session().unwrap();
+        let Some(terminal_session) = self.session() else {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "the terminal does not become controlling terminal of one session."
+            );
+        };
+
+        if !Arc::ptr_eq(&terminal_session, &session) {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "the process proup belongs to different session"
+            );
+        }
+
+        *self.foreground.lock() = Arc::downgrade(process_group);
+        self.pauser.resume_all();
+        Ok(())
+    }
+
+    /// Wait until the current process is the foreground process group. If
+    /// the foreground process group is None, returns true.
+    ///
+    /// # Panics
+    ///
+    /// This function should only be called in process context.
+    pub fn wait_until_in_foreground(&self) -> Result<()> {
+        // Fast path
+        if self.current_belongs_to_foreground() {
+            return Ok(());
+        }
+
+        // Slow path
+        self.pauser.pause_until(|| {
+            if self.current_belongs_to_foreground() {
+                Some(())
+            } else {
+                None
+            }
+        })
+    }
+
+    fn current_belongs_to_foreground(&self) -> bool {
+        let Some(foreground) = self.foreground() else {
+            return true;
+        };
+
+        foreground.contains_process(current!().pid())
+    }
+}
+
+impl Default for JobControl {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/kernel/src/process/process/mod.rs
+++ b/kernel/src/process/process/mod.rs
@ -0,0 +1,751 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use self::timer_manager::PosixTimerManager;
+use super::{
+    posix_thread::PosixThreadExt,
+    process_table,
+    process_vm::{Heap, InitStackReader, ProcessVm},
+    rlimit::ResourceLimits,
+    signal::{
+        constants::SIGCHLD,
+        sig_disposition::SigDispositions,
+        sig_num::{AtomicSigNum, SigNum},
+        signals::Signal,
+        Pauser,
+    },
+    status::ProcessStatus,
+    Credentials, TermStatus,
+};
+use crate::{
+    device::tty::open_ntty_as_controlling_terminal,
+    fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
+    prelude::*,
+    sched::nice::Nice,
+    thread::{allocate_tid, Thread},
+    time::clocks::ProfClock,
+    vm::vmar::Vmar,
+};
+
+mod builder;
+mod job_control;
+mod process_group;
+mod session;
+mod terminal;
+mod timer_manager;
+
+use aster_rights::Full;
+use atomic::Atomic;
+pub use builder::ProcessBuilder;
+pub use job_control::JobControl;
+pub use process_group::ProcessGroup;
+pub use session::Session;
+pub use terminal::Terminal;
+
+/// Process id.
+pub type Pid = u32;
+/// Process group id.
+pub type Pgid = u32;
+/// Session Id.
+pub type Sid = u32;
+
+pub type ExitCode = u32;
+
+pub(super) fn init() {
+    timer_manager::init();
+}
+
+/// Process stands for a set of threads that shares the same userspace.
+pub struct Process {
+    // Immutable Part
+    pid: Pid,
+
+    process_vm: ProcessVm,
+    /// Wait for child status changed
+    children_pauser: Arc<Pauser>,
+
+    // Mutable Part
+    /// The executable path.
+    executable_path: RwLock<String>,
+    /// The threads
+    threads: Mutex<Vec<Arc<Thread>>>,
+    /// Process status
+    status: Mutex<ProcessStatus>,
+    /// Parent process
+    pub(super) parent: Mutex<Weak<Process>>,
+    /// Children processes
+    children: Mutex<BTreeMap<Pid, Arc<Process>>>,
+    /// Process group
+    pub(super) process_group: Mutex<Weak<ProcessGroup>>,
+    /// File table
+    file_table: Arc<Mutex<FileTable>>,
+    /// FsResolver
+    fs: Arc<RwMutex<FsResolver>>,
+    /// umask
+    umask: Arc<RwLock<FileCreationMask>>,
+    /// resource limits
+    resource_limits: Mutex<ResourceLimits>,
+    /// Scheduling priority nice value
+    /// According to POSIX.1, the nice value is a per-process attribute,
+    /// the threads in a process should share a nice value.
+    nice: Atomic<Nice>,
+
+    // Signal
+    /// Sig dispositions
+    sig_dispositions: Arc<Mutex<SigDispositions>>,
+    /// The signal that the process should receive when parent process exits.
+    parent_death_signal: AtomicSigNum,
+
+    /// A profiling clock measures the user CPU time and kernel CPU time of the current process.
+    prof_clock: Arc<ProfClock>,
+
+    /// A manager that manages timer resources and utilities of the process.
+    timer_manager: PosixTimerManager,
+}
+
+impl Process {
+    /// Returns the current process.
+    ///
+    /// It returns `None` if:
+    ///  - the function is called in the bootstrap context;
+    ///  - or if the current task is not associated with a process.
+    pub fn current() -> Option<Arc<Process>> {
+        Some(Thread::current()?.as_posix_thread()?.process())
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn new(
+        pid: Pid,
+        parent: Weak<Process>,
+        threads: Vec<Arc<Thread>>,
+        executable_path: String,
+        process_vm: ProcessVm,
+
+        fs: Arc<RwMutex<FsResolver>>,
+        file_table: Arc<Mutex<FileTable>>,
+
+        umask: Arc<RwLock<FileCreationMask>>,
+        resource_limits: ResourceLimits,
+        nice: Nice,
+        sig_dispositions: Arc<Mutex<SigDispositions>>,
+    ) -> Arc<Self> {
+        // SIGCHID does not interrupt pauser. Child process will
+        // resume paused parent when doing exit.
+        let children_pauser = Pauser::new_with_mask(SIGCHLD.into());
+
+        let prof_clock = ProfClock::new();
+
+        Arc::new_cyclic(|process_ref: &Weak<Process>| Self {
+            pid,
+            threads: Mutex::new(threads),
+            executable_path: RwLock::new(executable_path),
+            process_vm,
+            children_pauser,
+            status: Mutex::new(ProcessStatus::Uninit),
+            parent: Mutex::new(parent),
+            children: Mutex::new(BTreeMap::new()),
+            process_group: Mutex::new(Weak::new()),
+            file_table,
+            fs,
+            umask,
+            sig_dispositions,
+            parent_death_signal: AtomicSigNum::new_empty(),
+            resource_limits: Mutex::new(resource_limits),
+            nice: Atomic::new(nice),
+            timer_manager: PosixTimerManager::new(&prof_clock, process_ref),
+            prof_clock,
+        })
+    }
+
+    /// init a user process and run the process
+    pub fn spawn_user_process(
+        executable_path: &str,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+    ) -> Result<Arc<Self>> {
+        // spawn user process should give an absolute path
+        debug_assert!(executable_path.starts_with('/'));
+        let process = Process::create_user_process(executable_path, argv, envp)?;
+
+        open_ntty_as_controlling_terminal(&process)?;
+
+        process.run();
+        Ok(process)
+    }
+
+    fn create_user_process(
+        executable_path: &str,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+    ) -> Result<Arc<Self>> {
+        let process_builder = {
+            let pid = allocate_tid();
+            let parent = Weak::new();
+
+            let credentials = Credentials::new_root();
+
+            let mut builder = ProcessBuilder::new(pid, executable_path, parent);
+            builder.argv(argv).envp(envp).credentials(credentials);
+            builder
+        };
+
+        let process = process_builder.build()?;
+
+        // Lock order: session table -> group table -> process table -> group of process
+        // -> group inner -> session inner
+        let mut session_table_mut = process_table::session_table_mut();
+        let mut group_table_mut = process_table::group_table_mut();
+        let mut process_table_mut = process_table::process_table_mut();
+
+        // Creates new group
+        let group = ProcessGroup::new(process.clone());
+        *process.process_group.lock() = Arc::downgrade(&group);
+        group_table_mut.insert(group.pgid(), group.clone());
+
+        // Creates new session
+        let session = Session::new(group.clone());
+        group.inner.lock().session = Arc::downgrade(&session);
+        session.inner.lock().leader = Some(process.clone());
+        session_table_mut.insert(session.sid(), session);
+
+        process_table_mut.insert(process.pid(), process.clone());
+        Ok(process)
+    }
+
+    /// start to run current process
+    pub fn run(&self) {
+        let threads = self.threads.lock();
+        // when run the process, the process should has only one thread
+        debug_assert!(threads.len() == 1);
+        debug_assert!(self.is_runnable());
+        let thread = threads[0].clone();
+        // should not hold the lock when run thread
+        drop(threads);
+        thread.run();
+    }
+
+    // *********** Basic structures ***********
+
+    pub fn pid(&self) -> Pid {
+        self.pid
+    }
+
+    /// Gets the profiling clock of the process.
+    pub fn prof_clock(&self) -> &Arc<ProfClock> {
+        &self.prof_clock
+    }
+
+    /// Gets the timer resources and utilities of the process.
+    pub fn timer_manager(&self) -> &PosixTimerManager {
+        &self.timer_manager
+    }
+
+    pub fn threads(&self) -> &Mutex<Vec<Arc<Thread>>> {
+        &self.threads
+    }
+
+    pub fn executable_path(&self) -> String {
+        self.executable_path.read().clone()
+    }
+
+    pub fn set_executable_path(&self, executable_path: String) {
+        *self.executable_path.write() = executable_path;
+    }
+
+    pub fn resource_limits(&self) -> &Mutex<ResourceLimits> {
+        &self.resource_limits
+    }
+
+    pub fn nice(&self) -> &Atomic<Nice> {
+        &self.nice
+    }
+
+    pub fn main_thread(&self) -> Option<Arc<Thread>> {
+        self.threads
+            .lock()
+            .iter()
+            .find(|thread| thread.tid() == self.pid)
+            .cloned()
+    }
+
+    // *********** Parent and child ***********
+    pub fn parent(&self) -> Option<Arc<Process>> {
+        self.parent.lock().upgrade()
+    }
+
+    pub fn is_init_process(&self) -> bool {
+        self.parent().is_none()
+    }
+
+    pub(super) fn children(&self) -> &Mutex<BTreeMap<Pid, Arc<Process>>> {
+        &self.children
+    }
+
+    pub fn has_child(&self, pid: &Pid) -> bool {
+        self.children.lock().contains_key(pid)
+    }
+
+    pub fn children_pauser(&self) -> &Arc<Pauser> {
+        &self.children_pauser
+    }
+
+    // *********** Process group & Session***********
+
+    /// Returns the process group ID of the process.
+    pub fn pgid(&self) -> Pgid {
+        if let Some(process_group) = self.process_group.lock().upgrade() {
+            process_group.pgid()
+        } else {
+            0
+        }
+    }
+
+    /// Returns the process group which the process belongs to.
+    pub fn process_group(&self) -> Option<Arc<ProcessGroup>> {
+        self.process_group.lock().upgrade()
+    }
+
+    /// Returns whether `self` is the leader of process group.
+    fn is_group_leader(self: &Arc<Self>) -> bool {
+        let Some(process_group) = self.process_group() else {
+            return false;
+        };
+
+        let Some(leader) = process_group.leader() else {
+            return false;
+        };
+
+        Arc::ptr_eq(self, &leader)
+    }
+
+    /// Returns the session which the process belongs to.
+    pub fn session(&self) -> Option<Arc<Session>> {
+        let process_group = self.process_group()?;
+        process_group.session()
+    }
+
+    /// Returns whether the process is session leader.
+    pub fn is_session_leader(self: &Arc<Self>) -> bool {
+        let session = self.session().unwrap();
+
+        let Some(leading_process) = session.leader() else {
+            return false;
+        };
+
+        Arc::ptr_eq(self, &leading_process)
+    }
+
+    /// Moves the process to the new session.
+    ///
+    /// If the process is already session leader, this method does nothing.
+    ///
+    /// Otherwise, this method creates a new process group in a new session
+    /// and moves the process to the session, returning the new session.
+    ///
+    /// This method may return the following errors:
+    ///  * `EPERM`, if the process is a process group leader, or some existing session
+    ///    or process group has the same ID as the process.
+    pub fn to_new_session(self: &Arc<Self>) -> Result<Arc<Session>> {
+        if self.is_session_leader() {
+            return Ok(self.session().unwrap());
+        }
+
+        if self.is_group_leader() {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "process group leader cannot be moved to new session."
+            );
+        }
+
+        let session = self.session().unwrap();
+
+        // Lock order: session table -> group table -> group of process -> group inner -> session inner
+        let mut session_table_mut = process_table::session_table_mut();
+        let mut group_table_mut = process_table::group_table_mut();
+        let mut self_group_mut = self.process_group.lock();
+
+        if session_table_mut.contains_key(&self.pid) {
+            return_errno_with_message!(Errno::EPERM, "cannot create new session");
+        }
+
+        if group_table_mut.contains_key(&self.pid) {
+            return_errno_with_message!(Errno::EPERM, "cannot create process group");
+        }
+
+        // Removes the process from old group
+        if let Some(old_group) = self_group_mut.upgrade() {
+            let mut group_inner = old_group.inner.lock();
+            let mut session_inner = session.inner.lock();
+            group_inner.remove_process(&self.pid);
+            *self_group_mut = Weak::new();
+
+            if group_inner.is_empty() {
+                group_table_mut.remove(&old_group.pgid());
+                debug_assert!(session_inner.process_groups.contains_key(&old_group.pgid()));
+                session_inner.process_groups.remove(&old_group.pgid());
+
+                if session_inner.is_empty() {
+                    session_table_mut.remove(&session.sid());
+                }
+            }
+        }
+
+        // Creates a new process group
+        let new_group = ProcessGroup::new(self.clone());
+        *self_group_mut = Arc::downgrade(&new_group);
+        group_table_mut.insert(new_group.pgid(), new_group.clone());
+
+        // Creates a new session
+        let new_session = Session::new(new_group.clone());
+        let mut new_group_inner = new_group.inner.lock();
+        new_group_inner.session = Arc::downgrade(&new_session);
+        new_session.inner.lock().leader = Some(self.clone());
+        session_table_mut.insert(new_session.sid(), new_session.clone());
+
+        // Removes the process from session.
+        let mut session_inner = session.inner.lock();
+        session_inner.remove_process(self);
+
+        Ok(new_session)
+    }
+
+    /// Moves the process to other process group.
+    ///
+    ///  * If the group already exists, the process and the group should belong to the same session.
+    ///  * If the group does not exist, this method creates a new group for the process and move the
+    ///    process to the group. The group is added to the session of the process.
+    ///
+    /// This method may return `EPERM` in following cases:
+    ///  * The process is session leader;
+    ///  * The group already exists, but the group does not belong to the same session as the process;
+    ///  * The group does not exist, but `pgid` is not equal to `pid` of the process.
+    pub fn to_other_group(self: &Arc<Self>, pgid: Pgid) -> Result<()> {
+        // if the process already belongs to the process group
+        if self.pgid() == pgid {
+            return Ok(());
+        }
+
+        if self.is_session_leader() {
+            return_errno_with_message!(Errno::EPERM, "the process cannot be a session leader");
+        }
+
+        if let Some(process_group) = process_table::get_process_group(&pgid) {
+            let session = self.session().unwrap();
+            if !session.contains_process_group(&process_group) {
+                return_errno_with_message!(
+                    Errno::EPERM,
+                    "the group and process does not belong to same session"
+                );
+            }
+            self.to_specified_group(&process_group)?;
+        } else {
+            if pgid != self.pid() {
+                return_errno_with_message!(
+                    Errno::EPERM,
+                    "the new process group should have the same ID as the process."
+                );
+            }
+
+            self.to_new_group()?;
+        }
+
+        Ok(())
+    }
+
+    /// Creates a new process group and moves the process to the group.
+    ///
+    /// The new group will be added to the same session as the process.
+    fn to_new_group(self: &Arc<Self>) -> Result<()> {
+        let session = self.session().unwrap();
+        // Lock order: group table -> group of process -> group inner -> session inner
+        let mut group_table_mut = process_table::group_table_mut();
+        let mut self_group_mut = self.process_group.lock();
+
+        // Removes the process from old group
+        if let Some(old_group) = self_group_mut.upgrade() {
+            let mut group_inner = old_group.inner.lock();
+            let mut session_inner = session.inner.lock();
+            group_inner.remove_process(&self.pid);
+            *self_group_mut = Weak::new();
+
+            if group_inner.is_empty() {
+                group_table_mut.remove(&old_group.pgid());
+                debug_assert!(session_inner.process_groups.contains_key(&old_group.pgid()));
+                // The old session won't be empty, since we will add a new group to the session.
+                session_inner.process_groups.remove(&old_group.pgid());
+            }
+        }
+
+        // Creates a new process group. Adds the new group to group table and session.
+        let new_group = ProcessGroup::new(self.clone());
+
+        let mut new_group_inner = new_group.inner.lock();
+        let mut session_inner = session.inner.lock();
+
+        *self_group_mut = Arc::downgrade(&new_group);
+
+        group_table_mut.insert(new_group.pgid(), new_group.clone());
+
+        new_group_inner.session = Arc::downgrade(&session);
+        session_inner
+            .process_groups
+            .insert(new_group.pgid(), new_group.clone());
+
+        Ok(())
+    }
+
+    /// Moves the process to a specified group.
+    ///
+    /// The caller needs to ensure that the process and the group belongs to the same session.
+    fn to_specified_group(self: &Arc<Process>, group: &Arc<ProcessGroup>) -> Result<()> {
+        // Lock order: group table -> group of process -> group inner (small pgid -> big pgid)
+        let mut group_table_mut = process_table::group_table_mut();
+        let mut self_group_mut = self.process_group.lock();
+
+        // Removes the process from old group
+        let mut group_inner = if let Some(old_group) = self_group_mut.upgrade() {
+            // Lock order: group with smaller pgid first
+            let (mut old_group_inner, group_inner) = match old_group.pgid().cmp(&group.pgid()) {
+                core::cmp::Ordering::Equal => return Ok(()),
+                core::cmp::Ordering::Less => (old_group.inner.lock(), group.inner.lock()),
+                core::cmp::Ordering::Greater => {
+                    let group_inner = group.inner.lock();
+                    let old_group_inner = old_group.inner.lock();
+                    (old_group_inner, group_inner)
+                }
+            };
+            old_group_inner.remove_process(&self.pid);
+            *self_group_mut = Weak::new();
+
+            if old_group_inner.is_empty() {
+                group_table_mut.remove(&old_group.pgid());
+            }
+
+            group_inner
+        } else {
+            group.inner.lock()
+        };
+
+        // Adds the process to the specified group
+        group_inner.processes.insert(self.pid, self.clone());
+        *self_group_mut = Arc::downgrade(group);
+
+        Ok(())
+    }
+
+    // ************** Virtual Memory *************
+
+    pub fn vm(&self) -> &ProcessVm {
+        &self.process_vm
+    }
+
+    pub fn root_vmar(&self) -> &Vmar<Full> {
+        self.process_vm.root_vmar()
+    }
+
+    pub fn heap(&self) -> &Heap {
+        self.process_vm.heap()
+    }
+
+    pub fn init_stack_reader(&self) -> InitStackReader {
+        self.process_vm.init_stack_reader()
+    }
+
+    // ************** File system ****************
+
+    pub fn file_table(&self) -> &Arc<Mutex<FileTable>> {
+        &self.file_table
+    }
+
+    pub fn fs(&self) -> &Arc<RwMutex<FsResolver>> {
+        &self.fs
+    }
+
+    pub fn umask(&self) -> &Arc<RwLock<FileCreationMask>> {
+        &self.umask
+    }
+
+    // ****************** Signal ******************
+
+    pub fn sig_dispositions(&self) -> &Arc<Mutex<SigDispositions>> {
+        &self.sig_dispositions
+    }
+
+    /// Enqueues a process-directed signal. This method should only be used for enqueue kernel
+    /// signal and fault signal.
+    ///
+    /// The signal may be delivered to any one of the threads that does not currently have the
+    /// signal blocked.  If more than one of the threads has the signal unblocked, then this method
+    /// chooses an arbitrary thread to which to deliver the signal.
+    ///
+    /// TODO: restrict these method with access control tool.
+    pub fn enqueue_signal(&self, signal: impl Signal + Clone + 'static) {
+        if self.is_zombie() {
+            return;
+        }
+
+        // TODO: check that the signal is not user signal
+
+        // Enqueue signal to the first thread that does not block the signal
+        let threads = self.threads.lock();
+        for thread in threads.iter() {
+            let posix_thread = thread.as_posix_thread().unwrap();
+            if !posix_thread.has_signal_blocked(&signal) {
+                posix_thread.enqueue_signal(Box::new(signal));
+                return;
+            }
+        }
+
+        // If all threads block the signal, enqueue signal to the first thread
+        let thread = threads.iter().next().unwrap();
+        let posix_thread = thread.as_posix_thread().unwrap();
+        posix_thread.enqueue_signal(Box::new(signal));
+    }
+
+    /// Clears the parent death signal.
+    pub fn clear_parent_death_signal(&self) {
+        self.parent_death_signal.clear();
+    }
+
+    /// Sets the parent death signal as `signum`.
+    pub fn set_parent_death_signal(&self, sig_num: SigNum) {
+        self.parent_death_signal.set(sig_num);
+    }
+
+    /// Returns the parent death signal.
+    ///
+    /// The parent death signal is the signal will be sent to child processes
+    /// when the process exits.
+    pub fn parent_death_signal(&self) -> Option<SigNum> {
+        self.parent_death_signal.as_sig_num()
+    }
+
+    // ******************* Status ********************
+
+    fn set_runnable(&self) {
+        self.status.lock().set_runnable();
+    }
+
+    fn is_runnable(&self) -> bool {
+        self.status.lock().is_runnable()
+    }
+
+    pub fn is_zombie(&self) -> bool {
+        self.status.lock().is_zombie()
+    }
+
+    pub fn set_zombie(&self, term_status: TermStatus) {
+        *self.status.lock() = ProcessStatus::Zombie(term_status);
+    }
+
+    pub fn exit_code(&self) -> Option<ExitCode> {
+        match &*self.status.lock() {
+            ProcessStatus::Runnable | ProcessStatus::Uninit => None,
+            ProcessStatus::Zombie(term_status) => Some(term_status.as_u32()),
+        }
+    }
+}
+
+#[cfg(ktest)]
+mod test {
+
+    use ostd::prelude::*;
+
+    use super::*;
+
+    fn new_process(parent: Option<Arc<Process>>) -> Arc<Process> {
+        crate::util::random::init();
+        crate::fs::rootfs::init_root_mount();
+        let pid = allocate_tid();
+        let parent = if let Some(parent) = parent {
+            Arc::downgrade(&parent)
+        } else {
+            Weak::new()
+        };
+        Process::new(
+            pid,
+            parent,
+            vec![],
+            String::new(),
+            ProcessVm::alloc(),
+            Arc::new(RwMutex::new(FsResolver::new())),
+            Arc::new(Mutex::new(FileTable::new())),
+            Arc::new(RwLock::new(FileCreationMask::default())),
+            ResourceLimits::default(),
+            Nice::default(),
+            Arc::new(Mutex::new(SigDispositions::default())),
+        )
+    }
+
+    fn new_process_in_session(parent: Option<Arc<Process>>) -> Arc<Process> {
+        // Lock order: session table -> group table -> group of process -> group inner
+        // -> session inner
+        let mut session_table_mut = process_table::session_table_mut();
+        let mut group_table_mut = process_table::group_table_mut();
+
+        let process = new_process(parent);
+        // Creates new group
+        let group = ProcessGroup::new(process.clone());
+        *process.process_group.lock() = Arc::downgrade(&group);
+
+        // Creates new session
+        let sess = Session::new(group.clone());
+        group.inner.lock().session = Arc::downgrade(&sess);
+        sess.inner.lock().leader = Some(process.clone());
+
+        group_table_mut.insert(group.pgid(), group);
+        session_table_mut.insert(sess.sid(), sess);
+
+        process
+    }
+
+    fn remove_session_and_group(process: Arc<Process>) {
+        // Lock order: session table -> group table
+        let mut session_table_mut = process_table::session_table_mut();
+        let mut group_table_mut = process_table::group_table_mut();
+        if let Some(sess) = process.session() {
+            session_table_mut.remove(&sess.sid());
+        }
+
+        if let Some(group) = process.process_group() {
+            group_table_mut.remove(&group.pgid());
+        }
+    }
+
+    #[ktest]
+    fn init_process() {
+        crate::time::clocks::init_for_ktest();
+        let process = new_process(None);
+        assert!(process.process_group().is_none());
+        assert!(process.session().is_none());
+    }
+
+    #[ktest]
+    fn init_process_in_session() {
+        crate::time::clocks::init_for_ktest();
+        let process = new_process_in_session(None);
+        assert!(process.is_group_leader());
+        assert!(process.is_session_leader());
+        remove_session_and_group(process);
+    }
+
+    #[ktest]
+    fn to_new_session() {
+        crate::time::clocks::init_for_ktest();
+        let process = new_process_in_session(None);
+        let sess = process.session().unwrap();
+        sess.inner.lock().leader = None;
+
+        assert!(!process.is_session_leader());
+        assert!(process
+            .to_new_session()
+            .is_err_and(|e| e.error() == Errno::EPERM));
+
+        let group = process.process_group().unwrap();
+        group.inner.lock().leader = None;
+        assert!(!process.is_group_leader());
+
+        assert!(process
+            .to_new_session()
+            .is_err_and(|e| e.error() == Errno::EPERM));
+    }
+}
--- a/kernel/src/process/process/process_group.rs
+++ b/kernel/src/process/process/process_group.rs
@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use alloc::collections::btree_map::Values;
+
+use super::{Pgid, Pid, Process, Session};
+use crate::{prelude::*, process::signal::signals::Signal};
+
+/// `ProcessGroup` represents a set of processes. Each `ProcessGroup` has a unique
+/// identifier `pgid`.
+pub struct ProcessGroup {
+    pgid: Pgid,
+    pub(in crate::process) inner: Mutex<Inner>,
+}
+
+pub(in crate::process) struct Inner {
+    pub(in crate::process) processes: BTreeMap<Pid, Arc<Process>>,
+    pub(in crate::process) leader: Option<Arc<Process>>,
+    pub(in crate::process) session: Weak<Session>,
+}
+
+impl Inner {
+    pub(in crate::process) fn remove_process(&mut self, pid: &Pid) {
+        let Some(process) = self.processes.remove(pid) else {
+            return;
+        };
+
+        if let Some(leader) = &self.leader
+            && Arc::ptr_eq(leader, &process)
+        {
+            self.leader = None;
+        }
+    }
+
+    pub(in crate::process) fn is_empty(&self) -> bool {
+        self.processes.is_empty()
+    }
+}
+
+impl ProcessGroup {
+    /// Creates a new process group with one process. The pgid is the same as the process
+    /// id. The process will become the leading process of the new process group.
+    ///
+    /// The caller needs to ensure that the process does not belong to any group.
+    pub(in crate::process) fn new(process: Arc<Process>) -> Arc<Self> {
+        let pid = process.pid();
+
+        let inner = {
+            let mut processes = BTreeMap::new();
+            processes.insert(pid, process.clone());
+            Inner {
+                processes,
+                leader: Some(process.clone()),
+                session: Weak::new(),
+            }
+        };
+
+        Arc::new(ProcessGroup {
+            pgid: pid,
+            inner: Mutex::new(inner),
+        })
+    }
+
+    /// Returns whether self contains a process with `pid`.
+    pub(in crate::process) fn contains_process(&self, pid: Pid) -> bool {
+        self.inner.lock().processes.contains_key(&pid)
+    }
+
+    /// Returns the process group identifier
+    pub fn pgid(&self) -> Pgid {
+        self.pgid
+    }
+
+    /// Acquires a lock on the process group.
+    pub fn lock(&self) -> ProcessGroupGuard {
+        ProcessGroupGuard {
+            inner: self.inner.lock(),
+        }
+    }
+
+    /// Broadcasts signal to all processes in the group.
+    ///
+    /// This method should only be used to broadcast fault signal and kernel signal.
+    ///
+    /// TODO: do more check to forbid user signal
+    pub fn broadcast_signal(&self, signal: impl Signal + Clone + 'static) {
+        for process in self.inner.lock().processes.values() {
+            process.enqueue_signal(signal.clone());
+        }
+    }
+
+    /// Returns the leader process.
+    pub fn leader(&self) -> Option<Arc<Process>> {
+        self.inner.lock().leader.clone()
+    }
+
+    /// Returns the session which the group belongs to
+    pub fn session(&self) -> Option<Arc<Session>> {
+        self.inner.lock().session.upgrade()
+    }
+}
+
+/// A scoped lock for a process group.
+///
+/// It provides some public methods to prevent the exposure of the inner type.
+#[clippy::has_significant_drop]
+#[must_use]
+pub struct ProcessGroupGuard<'a> {
+    inner: MutexGuard<'a, Inner>,
+}
+
+impl<'a> ProcessGroupGuard<'a> {
+    /// Returns an iterator over the processes in the group.
+    pub fn iter(&self) -> ProcessGroupIter {
+        ProcessGroupIter {
+            inner: self.inner.processes.values(),
+        }
+    }
+}
+
+/// An iterator over the processes of the process group.
+pub struct ProcessGroupIter<'a> {
+    inner: Values<'a, Pid, Arc<Process>>,
+}
+
+impl<'a> Iterator for ProcessGroupIter<'a> {
+    type Item = &'a Arc<Process>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
--- a/kernel/src/process/process/session.rs
+++ b/kernel/src/process/process/session.rs
@ -0,0 +1,136 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{Pgid, Process, ProcessGroup, Sid, Terminal};
+use crate::prelude::*;
+
+/// A `Session` is a collection of related process groups. Each session has a
+/// unique identifier `sid`. Process groups and sessions form a two-level
+/// hierarchical relationship between processes.
+///
+/// **Leader**: A *session leader* is the process that creates a new session and whose process
+/// ID becomes the session ID.
+///
+/// **Controlling terminal**: The terminal can be used to manage all processes in the session. The
+/// controlling terminal is established when the session leader first opens a terminal.
+pub struct Session {
+    sid: Sid,
+    pub(in crate::process) inner: Mutex<Inner>,
+}
+
+pub(in crate::process) struct Inner {
+    pub(in crate::process) process_groups: BTreeMap<Pgid, Arc<ProcessGroup>>,
+    pub(in crate::process) leader: Option<Arc<Process>>,
+    pub(in crate::process) terminal: Option<Arc<dyn Terminal>>,
+}
+
+impl Inner {
+    pub(in crate::process) fn is_empty(&self) -> bool {
+        self.process_groups.is_empty()
+    }
+
+    pub(in crate::process) fn remove_process(&mut self, process: &Arc<Process>) {
+        if let Some(leader) = &self.leader
+            && Arc::ptr_eq(leader, process)
+        {
+            self.leader = None;
+        }
+    }
+
+    pub(in crate::process) fn remove_process_group(&mut self, pgid: &Pgid) {
+        self.process_groups.remove(pgid);
+    }
+}
+
+impl Session {
+    /// Creates a new session for the process group. The process group becomes the member of
+    /// the new session.
+    ///
+    /// The caller needs to ensure that the group does not belong to any session, and the caller
+    /// should set the leader process after creating the session.
+    pub(in crate::process) fn new(group: Arc<ProcessGroup>) -> Arc<Self> {
+        let sid = group.pgid();
+        let inner = {
+            let mut process_groups = BTreeMap::new();
+            process_groups.insert(group.pgid(), group);
+
+            Inner {
+                process_groups,
+                leader: None,
+                terminal: None,
+            }
+        };
+        Arc::new(Self {
+            sid,
+            inner: Mutex::new(inner),
+        })
+    }
+
+    /// Returns the session id
+    pub fn sid(&self) -> Sid {
+        self.sid
+    }
+
+    /// Returns the leader process.
+    pub fn leader(&self) -> Option<Arc<Process>> {
+        self.inner.lock().leader.clone()
+    }
+
+    /// Returns whether `self` contains the `process_group`
+    pub(in crate::process) fn contains_process_group(
+        self: &Arc<Self>,
+        process_group: &Arc<ProcessGroup>,
+    ) -> bool {
+        self.inner
+            .lock()
+            .process_groups
+            .contains_key(&process_group.pgid())
+    }
+
+    /// Sets terminal as the controlling terminal of the session. The `get_terminal` method
+    /// should set the session for the terminal and returns the session.
+    ///
+    /// If the session already has controlling terminal, this method will return `Err(EPERM)`.
+    pub fn set_terminal<F>(&self, get_terminal: F) -> Result<()>
+    where
+        F: Fn() -> Result<Arc<dyn Terminal>>,
+    {
+        let mut inner = self.inner.lock();
+
+        if inner.terminal.is_some() {
+            return_errno_with_message!(
+                Errno::EPERM,
+                "current session already has controlling terminal"
+            );
+        }
+
+        let terminal = get_terminal()?;
+        inner.terminal = Some(terminal);
+        Ok(())
+    }
+
+    /// Releases the controlling terminal of the session.
+    ///
+    /// If the session does not have controlling terminal, this method will return `ENOTTY`.
+    pub fn release_terminal<F>(&self, release_session: F) -> Result<()>
+    where
+        F: Fn(&Arc<dyn Terminal>) -> Result<()>,
+    {
+        let mut inner = self.inner.lock();
+        if inner.terminal.is_none() {
+            return_errno_with_message!(
+                Errno::ENOTTY,
+                "current session does not has controlling terminal"
+            );
+        }
+
+        let terminal = inner.terminal.as_ref().unwrap();
+        release_session(terminal)?;
+        inner.terminal = None;
+        Ok(())
+    }
+
+    /// Returns the controlling terminal of `self`.
+    pub fn terminal(&self) -> Option<Arc<dyn Terminal>> {
+        self.inner.lock().terminal.clone()
+    }
+}
--- a/kernel/src/process/process/terminal.rs
+++ b/kernel/src/process/process/terminal.rs
@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::JobControl;
+use crate::{
+    fs::inode_handle::FileIo,
+    prelude::*,
+    process::{process_table, Pgid, ProcessGroup},
+};
+
+/// A termial is used to interact with system. A terminal can support the shell
+/// job control.
+///
+/// We currently support two kinds of terminal, the tty and pty.
+pub trait Terminal: Send + Sync + FileIo {
+    // *************** Foreground ***************
+
+    /// Returns the foreground process group
+    fn foreground(&self) -> Option<Arc<ProcessGroup>> {
+        self.job_control().foreground()
+    }
+
+    /// Sets the foreground process group of this terminal.
+    ///
+    /// If the terminal is not controlling terminal, this method returns `ENOTTY`.
+    ///
+    /// # Panics
+    ///
+    /// This method should be called in process context.
+    fn set_foreground(&self, pgid: &Pgid) -> Result<()> {
+        if !self.is_controlling_terminal() {
+            return_errno_with_message!(Errno::ENOTTY, "self is not controlling terminal");
+        }
+
+        let foreground = process_table::get_process_group(pgid);
+
+        self.job_control().set_foreground(foreground.as_ref())
+    }
+
+    // *************** Session and controlling terminal ***************
+
+    /// Returns whether the terminal is the controlling terminal of current process.
+    ///
+    /// # Panics
+    ///
+    /// This method should be called in process context.
+    fn is_controlling_terminal(&self) -> bool {
+        let session = current!().session().unwrap();
+        let Some(terminal) = session.terminal() else {
+            return false;
+        };
+
+        let arc_self = self.arc_self();
+        Arc::ptr_eq(&terminal, &arc_self)
+    }
+
+    /// Sets the terminal as the controlling terminal of the session of current process.
+    ///
+    /// If self is not session leader, or the terminal is controlling terminal of other session,
+    /// or the session already has controlling terminal, this method returns `EPERM`.
+    ///
+    /// # Panics
+    ///
+    /// This method should only be called in process context.
+    fn set_current_session(&self) -> Result<()> {
+        if !current!().is_session_leader() {
+            return_errno_with_message!(Errno::EPERM, "current process is not session leader");
+        }
+
+        let get_terminal = || {
+            self.job_control().set_current_session()?;
+            Ok(self.arc_self())
+        };
+
+        let session = current!().session().unwrap();
+        session.set_terminal(get_terminal)
+    }
+
+    /// Releases the terminal from the session of current process if the terminal is the controlling
+    /// terminal of the session.
+    ///
+    /// If the terminal is not the controlling terminal of the session, this method will return `ENOTTY`.
+    ///
+    /// # Panics
+    ///
+    /// This method should only be called in process context.
+    fn release_current_session(&self) -> Result<()> {
+        if !self.is_controlling_terminal() {
+            return_errno_with_message!(Errno::ENOTTY, "release wrong tty");
+        }
+
+        let current = current!();
+        if !current.is_session_leader() {
+            warn!("TODO: release tty for process that is not session leader");
+            return Ok(());
+        }
+
+        let release_session = |_: &Arc<dyn Terminal>| self.job_control().release_current_session();
+
+        let session = current.session().unwrap();
+        session.release_terminal(release_session)
+    }
+
+    /// Returns the job control of the terminal.
+    fn job_control(&self) -> &JobControl;
+
+    fn arc_self(&self) -> Arc<dyn Terminal>;
+}
--- a/kernel/src/process/process/timer_manager.rs
+++ b/kernel/src/process/process/timer_manager.rs
@ -0,0 +1,212 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use alloc::{
+    boxed::Box,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use core::time::Duration;
+
+use id_alloc::IdAlloc;
+use ostd::{
+    arch::{
+        timer::{self, TIMER_FREQ},
+        x86::trap::is_kernel_interrupted,
+    },
+    sync::Mutex,
+};
+
+use super::Process;
+use crate::{
+    process::{
+        posix_thread::PosixThreadExt,
+        signal::{constants::SIGALRM, signals::kernel::KernelSignal},
+    },
+    thread::{
+        work_queue::{submit_work_item, work_item::WorkItem},
+        Thread,
+    },
+    time::{
+        clocks::{ProfClock, RealTimeClock},
+        Timer, TimerManager,
+    },
+};
+
+/// Updates the CPU time recorded in the CPU clocks of current Process.
+///
+/// This function will be invoked at the system timer interrupt, and
+/// invoke the callbacks of expired timers which are based on the updated
+/// CPU clock.
+fn update_cpu_time() {
+    let Some(current_thread) = Thread::current() else {
+        return;
+    };
+    let Some(posix_thread) = current_thread.as_posix_thread() else {
+        return;
+    };
+    let process = posix_thread.process();
+    let timer_manager = process.timer_manager();
+    let jiffies_interval = Duration::from_millis(1000 / TIMER_FREQ);
+    // Based on whether the timer interrupt occurs in kernel mode or user mode,
+    // the function will add the duration of one timer interrupt interval to the
+    // corresponding CPU clocks.
+    if is_kernel_interrupted() {
+        posix_thread
+            .prof_clock()
+            .kernel_clock()
+            .add_time(jiffies_interval);
+        process
+            .prof_clock()
+            .kernel_clock()
+            .add_time(jiffies_interval);
+    } else {
+        posix_thread
+            .prof_clock()
+            .user_clock()
+            .add_time(jiffies_interval);
+        process.prof_clock().user_clock().add_time(jiffies_interval);
+        timer_manager
+            .virtual_timer()
+            .timer_manager()
+            .process_expired_timers();
+    }
+    timer_manager
+        .prof_timer()
+        .timer_manager()
+        .process_expired_timers();
+    posix_thread.process_expired_timers();
+}
+
+/// Registers a function to update the CPU clock in processes and
+/// threads during the system timer interrupt.
+pub(super) fn init() {
+    timer::register_callback(update_cpu_time);
+}
+
+/// Represents timer resources and utilities for a POSIX process.
+pub struct PosixTimerManager {
+    /// A real-time countdown timer, measuring in wall clock time.
+    alarm_timer: Arc<Timer>,
+    /// A timer based on user CPU clock.
+    virtual_timer: Arc<Timer>,
+    /// A timer based on the profiling clock.
+    prof_timer: Arc<Timer>,
+    /// An ID allocator to allocate unique timer IDs.
+    id_allocator: Mutex<IdAlloc>,
+    /// A container managing all POSIX timers created by `timer_create()` syscall
+    /// within the process context.
+    posix_timers: Mutex<Vec<Option<Arc<Timer>>>>,
+}
+
+fn create_process_timer_callback(process_ref: &Weak<Process>) -> impl Fn() + Clone {
+    let current_process = process_ref.clone();
+    let sent_signal = move || {
+        let signal = KernelSignal::new(SIGALRM);
+        if let Some(process) = current_process.upgrade() {
+            process.enqueue_signal(signal);
+        }
+    };
+
+    let work_func = Box::new(sent_signal);
+    let work_item = Arc::new(WorkItem::new(work_func));
+
+    move || {
+        submit_work_item(
+            work_item.clone(),
+            crate::thread::work_queue::WorkPriority::High,
+        );
+    }
+}
+
+impl PosixTimerManager {
+    pub(super) fn new(prof_clock: &Arc<ProfClock>, process_ref: &Weak<Process>) -> Self {
+        const MAX_NUM_OF_POSIX_TIMERS: usize = 10000;
+
+        let callback = create_process_timer_callback(process_ref);
+
+        let alarm_timer = RealTimeClock::timer_manager().create_timer(callback.clone());
+
+        let virtual_timer =
+            TimerManager::new(prof_clock.user_clock().clone()).create_timer(callback.clone());
+        let prof_timer = TimerManager::new(prof_clock.clone()).create_timer(callback);
+
+        Self {
+            alarm_timer,
+            virtual_timer,
+            prof_timer,
+            id_allocator: Mutex::new(IdAlloc::with_capacity(MAX_NUM_OF_POSIX_TIMERS)),
+            posix_timers: Mutex::new(Vec::new()),
+        }
+    }
+
+    /// Gets the alarm timer of the corresponding process.
+    pub fn alarm_timer(&self) -> &Arc<Timer> {
+        &self.alarm_timer
+    }
+
+    /// Gets the virtual timer of the corresponding process.
+    pub fn virtual_timer(&self) -> &Arc<Timer> {
+        &self.virtual_timer
+    }
+
+    /// Gets the profiling timer of the corresponding process.
+    pub fn prof_timer(&self) -> &Arc<Timer> {
+        &self.prof_timer
+    }
+
+    /// Creates a timer based on the profiling CPU clock of the current process.
+    pub fn create_prof_timer<F>(&self, func: F) -> Arc<Timer>
+    where
+        F: Fn() + Send + Sync + 'static,
+    {
+        self.prof_timer.timer_manager().create_timer(func)
+    }
+
+    /// Creates a timer based on the user CPU clock of the current process.
+    pub fn create_virtual_timer<F>(&self, func: F) -> Arc<Timer>
+    where
+        F: Fn() + Send + Sync + 'static,
+    {
+        self.virtual_timer.timer_manager().create_timer(func)
+    }
+
+    /// Adds a POSIX timer to the managed `posix_timers`, and allocate a timer ID for this timer.
+    /// Return the timer ID.
+    pub fn add_posix_timer(&self, posix_timer: Arc<Timer>) -> usize {
+        let mut timers = self.posix_timers.lock();
+        // Holding the lock of `posix_timers` is required to operate the `id_allocator`.
+        let timer_id = self.id_allocator.lock().alloc().unwrap();
+        if timers.len() < timer_id + 1 {
+            timers.resize(timer_id + 1, None);
+        }
+        // The ID allocated is not used by any other timers so this index in `timers`
+        // must be `None`.
+        timers[timer_id] = Some(posix_timer);
+        timer_id
+    }
+
+    /// Finds a POSIX timer by the input `timer_id`.
+    pub fn find_posix_timer(&self, timer_id: usize) -> Option<Arc<Timer>> {
+        let timers = self.posix_timers.lock();
+        if timer_id >= timers.len() {
+            return None;
+        }
+
+        timers[timer_id].clone()
+    }
+
+    /// Removes the POSIX timer with the ID `timer_id`.
+    pub fn remove_posix_timer(&self, timer_id: usize) -> Option<Arc<Timer>> {
+        let mut timers = self.posix_timers.lock();
+        if timer_id >= timers.len() {
+            return None;
+        }
+
+        let timer = timers[timer_id].take();
+        if timer.is_some() {
+            // Holding the lock of `posix_timers` is required to operate the `id_allocator`.
+            self.id_allocator.lock().free(timer_id);
+        }
+        timer
+    }
+}
--- a/kernel/src/process/process_filter.rs
+++ b/kernel/src/process/process_filter.rs
@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use super::{Pgid, Pid};
+use crate::prelude::*;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ProcessFilter {
+    Any,
+    WithPid(Pid),
+    WithPgid(Pgid),
+}
+
+impl ProcessFilter {
+    // used for waitid
+    pub fn from_which_and_id(which: u64, id: u64) -> Result<Self> {
+        // Does not support PID_FD now(which = 3)
+        // https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/wait.h#L20
+        match which {
+            0 => Ok(ProcessFilter::Any),
+            1 => Ok(ProcessFilter::WithPid(id as Pid)),
+            2 => Ok(ProcessFilter::WithPgid(id as Pgid)),
+            3 => todo!(),
+            _ => return_errno_with_message!(Errno::EINVAL, "invalid which"),
+        }
+    }
+
+    // used for wait4 and kill
+    pub fn from_id(wait_pid: i32) -> Self {
+        // https://man7.org/linux/man-pages/man2/waitpid.2.html
+        // https://man7.org/linux/man-pages/man2/kill.2.html
+        if wait_pid < -1 {
+            // process group ID is equal to the absolute value of pid.
+            ProcessFilter::WithPgid((-wait_pid) as Pgid)
+        } else if wait_pid == -1 {
+            // wait for any child process
+            ProcessFilter::Any
+        } else if wait_pid == 0 {
+            // wait for any child process with same process group ID
+            let pgid = current!().pgid();
+            ProcessFilter::WithPgid(pgid)
+        } else {
+            // pid > 0. wait for the child whose process ID is equal to the value of pid.
+            ProcessFilter::WithPid(wait_pid as Pid)
+        }
+    }
+
+    pub fn contains_pid(&self, pid: Pid) -> bool {
+        match self {
+            ProcessFilter::Any => true,
+            ProcessFilter::WithPid(filter_pid) => *filter_pid == pid,
+            ProcessFilter::WithPgid(_) => todo!(),
+        }
+    }
+}
--- a/kernel/src/process/process_table.rs
+++ b/kernel/src/process/process_table.rs
@ -0,0 +1,113 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+//! A global table stores the pid to process mapping.
+//! This table can be used to get process with pid.
+//! TODO: progress group, thread all need similar mapping
+
+use alloc::collections::btree_map::Values;
+
+use super::{Pgid, Pid, Process, ProcessGroup, Session, Sid};
+use crate::{
+    events::{Events, Observer, Subject},
+    prelude::*,
+};
+
+static PROCESS_TABLE: Mutex<BTreeMap<Pid, Arc<Process>>> = Mutex::new(BTreeMap::new());
+static PROCESS_GROUP_TABLE: Mutex<BTreeMap<Pgid, Arc<ProcessGroup>>> = Mutex::new(BTreeMap::new());
+static PROCESS_TABLE_SUBJECT: Subject<PidEvent> = Subject::new();
+static SESSION_TABLE: Mutex<BTreeMap<Sid, Arc<Session>>> = Mutex::new(BTreeMap::new());
+
+// ************ Process *************
+
+/// Gets a process with pid
+pub fn get_process(pid: Pid) -> Option<Arc<Process>> {
+    PROCESS_TABLE.lock().get(&pid).cloned()
+}
+
+pub(super) fn process_table_mut() -> MutexGuard<'static, BTreeMap<Pid, Arc<Process>>> {
+    PROCESS_TABLE.lock()
+}
+
+/// Acquires a lock on the process table and returns a `ProcessTable`.
+pub fn process_table() -> ProcessTable<'static> {
+    ProcessTable {
+        inner: PROCESS_TABLE.lock(),
+    }
+}
+
+/// A wrapper for the mutex-protected process table.
+///
+/// It provides the `iter` method to iterator over the processes in the table.
+pub struct ProcessTable<'a> {
+    inner: MutexGuard<'a, BTreeMap<Pid, Arc<Process>>>,
+}
+
+impl<'a> ProcessTable<'a> {
+    /// Returns an iterator over the processes in the table.
+    pub fn iter(&self) -> ProcessTableIter {
+        ProcessTableIter {
+            inner: self.inner.values(),
+        }
+    }
+}
+
+/// An iterator over the processes of the process table.
+pub struct ProcessTableIter<'a> {
+    inner: Values<'a, Pid, Arc<Process>>,
+}
+
+impl<'a> Iterator for ProcessTableIter<'a> {
+    type Item = &'a Arc<Process>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+// ************ Process Group *************
+
+/// Gets a process group with `pgid`
+pub fn get_process_group(pgid: &Pgid) -> Option<Arc<ProcessGroup>> {
+    PROCESS_GROUP_TABLE.lock().get(pgid).cloned()
+}
+
+/// Returns whether process table contains process group with pgid
+pub fn contain_process_group(pgid: &Pgid) -> bool {
+    PROCESS_GROUP_TABLE.lock().contains_key(pgid)
+}
+
+pub(super) fn group_table_mut() -> MutexGuard<'static, BTreeMap<Pgid, Arc<ProcessGroup>>> {
+    PROCESS_GROUP_TABLE.lock()
+}
+
+// ************ Session *************
+
+/// Gets a session with `sid`.
+pub fn get_session(sid: &Sid) -> Option<Arc<Session>> {
+    SESSION_TABLE.lock().get(sid).map(Arc::clone)
+}
+
+pub(super) fn session_table_mut() -> MutexGuard<'static, BTreeMap<Sid, Arc<Session>>> {
+    SESSION_TABLE.lock()
+}
+
+// ************ Observer *************
+
+/// Registers an observer which watches `PidEvent`.
+pub fn register_observer(observer: Weak<dyn Observer<PidEvent>>) {
+    PROCESS_TABLE_SUBJECT.register_observer(observer, ());
+}
+
+/// Unregisters an observer which watches `PidEvent`.
+pub fn unregister_observer(observer: &Weak<dyn Observer<PidEvent>>) {
+    PROCESS_TABLE_SUBJECT.unregister_observer(observer);
+}
+
+#[derive(Copy, Clone)]
+pub enum PidEvent {
+    Exit(Pid),
+}
+
+impl Events for PidEvent {}
--- a/kernel/src/process/process_vm/heap.rs
+++ b/kernel/src/process/process_vm/heap.rs
@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use align_ext::AlignExt;
+use aster_rights::Full;
+
+use crate::{
+    prelude::*,
+    vm::{perms::VmPerms, vmar::Vmar},
+};
+
+/// The base address of user heap
+pub const USER_HEAP_BASE: Vaddr = 0x0000_0000_1000_0000;
+/// The max allowed size of user heap
+pub const USER_HEAP_SIZE_LIMIT: usize = 16 * 1024 * PAGE_SIZE; // 16 * 4MB
+
+#[derive(Debug)]
+pub struct Heap {
+    /// The lowest address of the heap
+    base: Vaddr,
+    /// The heap size limit
+    limit: usize,
+    /// The current heap highest address
+    current_heap_end: AtomicUsize,
+}
+
+impl Heap {
+    pub const fn new() -> Self {
+        Heap {
+            base: USER_HEAP_BASE,
+            limit: USER_HEAP_SIZE_LIMIT,
+            current_heap_end: AtomicUsize::new(USER_HEAP_BASE),
+        }
+    }
+
+    /// Inits and maps the heap Vmo
+    pub(super) fn alloc_and_map_vmo(&self, root_vmar: &Vmar<Full>) -> Result<()> {
+        let vmar_map_options = {
+            let perms = VmPerms::READ | VmPerms::WRITE;
+            root_vmar
+                // FIXME: Our current implementation of mapping resize cannot move
+                // existing mappings within the new range, which may cause the resize
+                // operation to fail. Therefore, if there are already mappings within
+                // the heap expansion range, the brk operation will fail.
+                .new_map(PAGE_SIZE, perms)
+                .unwrap()
+                .offset(self.base)
+        };
+        vmar_map_options.build()?;
+
+        self.set_uninitialized();
+        Ok(())
+    }
+
+    pub fn brk(&self, new_heap_end: Option<Vaddr>) -> Result<Vaddr> {
+        let current = current!();
+        let root_vmar = current.root_vmar();
+        match new_heap_end {
+            None => Ok(self.current_heap_end.load(Ordering::Relaxed)),
+            Some(new_heap_end) => {
+                if new_heap_end > self.base + self.limit {
+                    return_errno_with_message!(Errno::ENOMEM, "heap size limit was met.");
+                }
+                let current_heap_end = self.current_heap_end.load(Ordering::Acquire);
+                if new_heap_end <= current_heap_end {
+                    // FIXME: should we allow shrink current user heap?
+                    return Ok(current_heap_end);
+                }
+                let old_size = (current_heap_end - self.base).align_up(PAGE_SIZE);
+                let new_size = (new_heap_end - self.base).align_up(PAGE_SIZE);
+
+                root_vmar.resize_mapping(self.base, old_size, new_size)?;
+                self.current_heap_end.store(new_heap_end, Ordering::Release);
+                Ok(new_heap_end)
+            }
+        }
+    }
+
+    pub(super) fn set_uninitialized(&self) {
+        self.current_heap_end
+            .store(self.base + PAGE_SIZE, Ordering::Relaxed);
+    }
+}
+
+impl Clone for Heap {
+    fn clone(&self) -> Self {
+        let current_heap_end = self.current_heap_end.load(Ordering::Relaxed);
+        Self {
+            base: self.base,
+            limit: self.limit,
+            current_heap_end: AtomicUsize::new(current_heap_end),
+        }
+    }
+}
+
+impl Default for Heap {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/kernel/src/process/process_vm/init_stack/aux_vec.rs
+++ b/kernel/src/process/process_vm/init_stack/aux_vec.rs
@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use crate::prelude::*;
+
+/// Auxiliary Vector.
+///
+/// # What is Auxiliary Vector?
+///
+/// Here is a concise description of Auxiliary Vector from GNU's manual:
+///
+///  > When a program is executed, it receives information from the operating system
+///  > about the environment in which it is operating. The form of this information
+///  > is a table of key-value pairs, where the keys are from the set of ‘AT_’
+///  > values in elf.h.
+
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[repr(u8)]
+pub enum AuxKey {
+    AT_NULL = 0,      /* end of vector */
+    AT_IGNORE = 1,    /* entry should be ignored */
+    AT_EXECFD = 2,    /* file descriptor of program */
+    AT_PHDR = 3,      /* program headers for program */
+    AT_PHENT = 4,     /* size of program header entry */
+    AT_PHNUM = 5,     /* number of program headers */
+    AT_PAGESZ = 6,    /* system page size */
+    AT_BASE = 7,      /* base address of interpreter */
+    AT_FLAGS = 8,     /* flags */
+    AT_ENTRY = 9,     /* entry point of program */
+    AT_NOTELF = 10,   /* program is not ELF */
+    AT_UID = 11,      /* real uid */
+    AT_EUID = 12,     /* effective uid */
+    AT_GID = 13,      /* real gid */
+    AT_EGID = 14,     /* effective gid */
+    AT_PLATFORM = 15, /* string identifying CPU for optimizations */
+    AT_HWCAP = 16,    /* arch dependent hints at CPU capabilities */
+    AT_CLKTCK = 17,   /* frequency at which times() increments */
+
+    /* 18...22 not used */
+    AT_SECURE = 23, /* secure mode boolean */
+    AT_BASE_PLATFORM = 24, /* string identifying real platform, may
+                     * differ from AT_PLATFORM. */
+    AT_RANDOM = 25, /* address of 16 random bytes */
+    AT_HWCAP2 = 26, /* extension of AT_HWCAP */
+
+    /* 28...30 not used */
+    AT_EXECFN = 31, /* filename of program */
+    AT_SYSINFO = 32,
+    AT_SYSINFO_EHDR = 33, /* the start address of the page containing the VDSO */
+}
+
+impl AuxKey {
+    pub fn as_u64(&self) -> u64 {
+        *self as u64
+    }
+}
+
+#[derive(Clone, Default, Debug)]
+pub struct AuxVec {
+    table: BTreeMap<AuxKey, u64>,
+}
+
+impl AuxVec {
+    pub const fn new() -> AuxVec {
+        AuxVec {
+            table: BTreeMap::new(),
+        }
+    }
+}
+
+impl AuxVec {
+    pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
+        if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
+            return_errno_with_message!(Errno::EINVAL, "Illegal key");
+        }
+        self.table
+            .entry(key)
+            .and_modify(|val_mut| *val_mut = val)
+            .or_insert(val);
+        Ok(())
+    }
+
+    pub fn get(&self, key: AuxKey) -> Option<u64> {
+        self.table.get(&key).copied()
+    }
+
+    pub fn del(&mut self, key: AuxKey) -> Option<u64> {
+        self.table.remove(&key)
+    }
+
+    pub fn table(&self) -> &BTreeMap<AuxKey, u64> {
+        &self.table
+    }
+}
--- a/kernel/src/process/process_vm/init_stack/mod.rs
+++ b/kernel/src/process/process_vm/init_stack/mod.rs
@ -0,0 +1,443 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+//! The init stack for the process.
+//! The init stack is used to store the `argv` and `envp` and auxiliary vectors.
+//! We can read `argv` and `envp` of a process from the init stack.
+//! Usually, the lowest address of init stack is
+//! the highest address of the user stack of the first thread.
+//!
+//! However, the init stack will be mapped to user space
+//! and the user process can write the content of init stack,
+//! so the content reading from init stack may not be the same as the process init status.
+//!
+
+use core::{
+    mem,
+    sync::atomic::{AtomicUsize, Ordering},
+};
+
+use align_ext::AlignExt;
+use aster_rights::Full;
+use ostd::mm::{VmIo, MAX_USERSPACE_VADDR};
+
+use self::aux_vec::{AuxKey, AuxVec};
+use crate::{
+    prelude::*,
+    util::random::getrandom,
+    vm::{
+        perms::VmPerms,
+        vmar::Vmar,
+        vmo::{Vmo, VmoOptions, VmoRightsOp},
+    },
+};
+
+pub mod aux_vec;
+
+/// Set the initial stack size to 8 megabytes, following the default Linux stack size limit.
+pub const INIT_STACK_SIZE: usize = 8 * 1024 * 1024; // 8 MB
+
+/// The max number of arguments that can be used to creating a new process.
+pub const MAX_ARGV_NUMBER: usize = 128;
+/// The max number of environmental variables that can be used to creating a new process.
+pub const MAX_ENVP_NUMBER: usize = 128;
+/// The max length of each argument to create a new process.
+pub const MAX_ARG_LEN: usize = 2048;
+/// The max length of each environmental variable (the total length of key-value pair) to create a new process.
+pub const MAX_ENV_LEN: usize = 128;
+
+/*
+ * Illustration of the virtual memory space containing the processes' init stack:
+ *
+ *  (high address)
+ *  +---------------------+ <------+ Highest address
+ *  |                     |          Random stack paddings
+ *  +---------------------+ <------+ The base of stack (stack grows down)
+ *  |                     |
+ *  | Null-terminated     |
+ *  | strings referenced  |
+ *  | by variables below  |
+ *  |                     |
+ *  +---------------------+
+ *  | AT_NULL             |
+ *  +---------------------+
+ *  | AT_NULL             |
+ *  +---------------------+
+ *  | ...                 |
+ *  +---------------------+
+ *  | aux_val[0]          |
+ *  +---------------------+
+ *  | aux_key[0]          | <------+ Auxiliary table
+ *  +---------------------+
+ *  | NULL                |
+ *  +---------------------+
+ *  | ...                 |
+ *  +---------------------+
+ *  | char* envp[0]       | <------+ Environment variables
+ *  +---------------------+
+ *  | NULL                |
+ *  +---------------------+
+ *  | char* argv[argc-1]  |
+ *  +---------------------+
+ *  | ...                 |
+ *  +---------------------+
+ *  | char* argv[0]       |
+ *  +---------------------+
+ *  | long argc           | <------+ Program arguments
+ *  +---------------------+
+ *  |                     |
+ *  |                     |
+ *  +---------------------+
+ *  |                     |
+ *  +---------------------+ <------+ User stack default rlimit
+ *  (low address)
+ */
+
+/// The initial portion of the main stack of a process.
+pub struct InitStack {
+    /// The initial highest address.
+    /// The stack grows down from this address
+    initial_top: Vaddr,
+    /// The max allowed stack size
+    max_size: usize,
+    /// The current stack pointer.
+    /// Before initialized, `pos` points to the `initial_top`,
+    /// After initialized, `pos` points to the user stack pointer(rsp)
+    /// of the process.
+    pos: Arc<AtomicUsize>,
+    vmo: Vmo<Full>,
+}
+
+impl Clone for InitStack {
+    fn clone(&self) -> Self {
+        Self {
+            initial_top: self.initial_top,
+            max_size: self.max_size,
+            pos: self.pos.clone(),
+            vmo: self.vmo.dup(),
+        }
+    }
+}
+
+impl InitStack {
+    pub(super) fn new() -> Self {
+        let nr_pages_padding = {
+            let mut random_nr_pages_padding: u8 = 0;
+            getrandom(random_nr_pages_padding.as_bytes_mut()).unwrap();
+            random_nr_pages_padding as usize
+        };
+        let initial_top = MAX_USERSPACE_VADDR - PAGE_SIZE * nr_pages_padding;
+        let max_size = INIT_STACK_SIZE;
+
+        let vmo = {
+            let vmo_options = VmoOptions::<Full>::new(max_size);
+            vmo_options.alloc().unwrap()
+        };
+        Self {
+            initial_top,
+            max_size,
+            pos: Arc::new(AtomicUsize::new(initial_top)),
+            vmo,
+        }
+    }
+
+    /// Maps the vmo of the init stack.
+    pub(super) fn map_init_stack_vmo(&self, root_vmar: &Vmar<Full>) -> Result<()> {
+        let vmar_map_options = {
+            let perms = VmPerms::READ | VmPerms::WRITE;
+            let map_addr = self.initial_top - self.max_size;
+            debug_assert!(map_addr % PAGE_SIZE == 0);
+            root_vmar
+                .new_map(self.max_size, perms)?
+                .offset(map_addr)
+                .vmo(self.vmo.dup().to_dyn())
+        };
+
+        vmar_map_options.build()?;
+
+        self.set_uninitialized();
+        Ok(())
+    }
+
+    /// Returns the user stack top(highest address), used to setup rsp.
+    ///
+    /// This method should only be called after the stack is initialized.
+    pub fn user_stack_top(&self) -> Vaddr {
+        let stack_top = self.pos();
+        debug_assert!(self.is_initialized());
+
+        stack_top
+    }
+
+    /// Constructs a writer to initialize the content of an `InitStack`.
+    pub(super) fn writer(
+        &self,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+        auxvec: AuxVec,
+    ) -> InitStackWriter<'_> {
+        // The stack should be written only once.
+        debug_assert!(!self.is_initialized());
+        InitStackWriter {
+            pos: self.pos.clone(),
+            vmo: &self.vmo,
+            argv,
+            envp,
+            auxvec,
+            map_addr: self.initial_top - self.max_size,
+        }
+    }
+
+    /// Constructs a reader to parse the content of an `InitStack`.
+    /// The `InitStack` should only be read after initialized
+    pub(super) fn reader(&self) -> InitStackReader<'_> {
+        debug_assert!(self.is_initialized());
+        InitStackReader {
+            base: self.pos(),
+            vmo: &self.vmo,
+            map_addr: self.initial_top - self.max_size,
+        }
+    }
+
+    fn is_initialized(&self) -> bool {
+        self.pos() != self.initial_top
+    }
+
+    fn set_uninitialized(&self) {
+        self.pos.store(self.initial_top, Ordering::Relaxed);
+    }
+
+    fn pos(&self) -> Vaddr {
+        self.pos.load(Ordering::Relaxed)
+    }
+}
+
+/// A writer to initialize the content of an `InitStack`.
+pub struct InitStackWriter<'a> {
+    pos: Arc<AtomicUsize>,
+    vmo: &'a Vmo<Full>,
+    argv: Vec<CString>,
+    envp: Vec<CString>,
+    auxvec: AuxVec,
+    /// The mapping address of the `InitStack`.
+    map_addr: usize,
+}
+
+impl<'a> InitStackWriter<'a> {
+    pub fn write(mut self) -> Result<()> {
+        // FIXME: Some OSes may put the first page of excutable file here
+        // for interpreting elf headers.
+
+        let argc = self.argv.len() as u64;
+
+        // Write envp string
+        let envp_pointers = self.write_envp_strings()?;
+        // Write argv string
+        let argv_pointers = self.write_argv_strings()?;
+        // Generate random values for auxvec
+        let random_value_pointer = {
+            let random_value = generate_random_for_aux_vec();
+            self.write_bytes(&random_value)?
+        };
+        self.auxvec.set(AuxKey::AT_RANDOM, random_value_pointer)?;
+
+        self.adjust_stack_alignment(&envp_pointers, &argv_pointers)?;
+        self.write_aux_vec()?;
+        self.write_envp_pointers(envp_pointers)?;
+        self.write_argv_pointers(argv_pointers)?;
+
+        // write argc
+        self.write_u64(argc)?;
+
+        // Ensure stack top is 16-bytes aligned
+        debug_assert_eq!(self.pos() & !0xf, self.pos());
+
+        Ok(())
+    }
+
+    fn write_envp_strings(&self) -> Result<Vec<u64>> {
+        let mut envp_pointers = Vec::with_capacity(self.envp.len());
+        for envp in self.envp.iter() {
+            let pointer = self.write_cstring(envp)?;
+            envp_pointers.push(pointer);
+        }
+        Ok(envp_pointers)
+    }
+
+    fn write_argv_strings(&self) -> Result<Vec<u64>> {
+        let mut argv_pointers = Vec::with_capacity(self.argv.len());
+        for argv in self.argv.iter().rev() {
+            let pointer = self.write_cstring(argv)?;
+            debug!("argv address = 0x{:x}", pointer);
+            argv_pointers.push(pointer);
+        }
+        argv_pointers.reverse();
+        Ok(argv_pointers)
+    }
+
+    /// Libc ABI requires 16-byte alignment of the stack entrypoint.
+    /// Current postion of the stack is 8-byte aligned already, insert 8 byte
+    /// to meet the requirement if necessary.
+    fn adjust_stack_alignment(&self, envp_pointers: &[u64], argv_pointers: &[u64]) -> Result<()> {
+        // Ensure 8-byte alignment
+        self.write_u64(0)?;
+        let auxvec_size = (self.auxvec.table().len() + 1) * (mem::size_of::<u64>() * 2);
+        let envp_pointers_size = (envp_pointers.len() + 1) * mem::size_of::<u64>();
+        let argv_pointers_size = (argv_pointers.len() + 1) * mem::size_of::<u64>();
+        let argc_size = mem::size_of::<u64>();
+        let to_write_size = auxvec_size + envp_pointers_size + argv_pointers_size + argc_size;
+        if (self.pos() - to_write_size) % 16 != 0 {
+            self.write_u64(0)?;
+        }
+        Ok(())
+    }
+
+    fn write_aux_vec(&self) -> Result<()> {
+        // Write NULL auxilary
+        self.write_u64(0)?;
+        self.write_u64(AuxKey::AT_NULL as u64)?;
+        // Write Auxiliary vectors
+        let aux_vec: Vec<_> = self
+            .auxvec
+            .table()
+            .iter()
+            .map(|(aux_key, aux_value)| (*aux_key, *aux_value))
+            .collect();
+        for (aux_key, aux_value) in aux_vec.iter() {
+            self.write_u64(*aux_value)?;
+            self.write_u64(*aux_key as u64)?;
+        }
+        Ok(())
+    }
+
+    fn write_envp_pointers(&self, mut envp_pointers: Vec<u64>) -> Result<()> {
+        // write NULL pointer
+        self.write_u64(0)?;
+        // write envp pointers
+        envp_pointers.reverse();
+        for envp_pointer in envp_pointers {
+            self.write_u64(envp_pointer)?;
+        }
+        Ok(())
+    }
+
+    fn write_argv_pointers(&self, mut argv_pointers: Vec<u64>) -> Result<()> {
+        // write 0
+        self.write_u64(0)?;
+        // write argv pointers
+        argv_pointers.reverse();
+        for argv_pointer in argv_pointers {
+            self.write_u64(argv_pointer)?;
+        }
+        Ok(())
+    }
+
+    /// Writes u64 to the stack.
+    /// Returns the writing address
+    fn write_u64(&self, val: u64) -> Result<u64> {
+        let start_address = (self.pos() - 8).align_down(8);
+        self.pos.store(start_address, Ordering::Relaxed);
+        self.vmo.write_val(start_address - self.map_addr, &val)?;
+        Ok(self.pos() as u64)
+    }
+
+    /// Writes a CString including the ending null byte to the stack.
+    /// Returns the writing address
+    fn write_cstring(&self, val: &CString) -> Result<u64> {
+        let bytes = val.as_bytes_with_nul();
+        self.write_bytes(bytes)
+    }
+
+    /// Writes u64 to the stack.
+    /// Returns the writing address.
+    fn write_bytes(&self, bytes: &[u8]) -> Result<u64> {
+        let len = bytes.len();
+        self.pos.fetch_sub(len, Ordering::Relaxed);
+        let pos = self.pos();
+        self.vmo.write_bytes(pos - self.map_addr, bytes)?;
+        Ok(pos as u64)
+    }
+
+    fn pos(&self) -> Vaddr {
+        self.pos.load(Ordering::Relaxed)
+    }
+}
+
+fn generate_random_for_aux_vec() -> [u8; 16] {
+    let mut rand_val = [0; 16];
+    getrandom(&mut rand_val).unwrap();
+    rand_val
+}
+
+/// A reader to parse the content of an `InitStack`.
+pub struct InitStackReader<'a> {
+    base: Vaddr,
+    vmo: &'a Vmo<Full>,
+    /// The mapping address of the `InitStack`.
+    map_addr: usize,
+}
+
+impl<'a> InitStackReader<'a> {
+    /// Reads argc from the process init stack
+    pub fn argc(&self) -> Result<u64> {
+        let stack_base = self.init_stack_bottom();
+        Ok(self.vmo.read_val(stack_base - self.map_addr)?)
+    }
+
+    /// Reads argv from the process init stack
+    pub fn argv(&self) -> Result<Vec<CString>> {
+        let argc = self.argc()? as usize;
+        // The reading offset in the initial stack is:
+        // the initial stack bottom address + the size of `argc` in memory
+        let read_offset = self.init_stack_bottom() + size_of::<usize>();
+
+        let mut argv = Vec::with_capacity(argc);
+        let user_space = CurrentUserSpace::get();
+        let mut argv_reader = user_space.reader(read_offset, argc * size_of::<usize>())?;
+        for _ in 0..argc {
+            let arg = {
+                let arg_ptr = argv_reader.read_val::<Vaddr>()?;
+                user_space.read_cstring(arg_ptr, MAX_ARG_LEN)?
+            };
+            argv.push(arg);
+        }
+
+        Ok(argv)
+    }
+
+    /// Reads envp from the process
+    pub fn envp(&self) -> Result<Vec<CString>> {
+        let argc = self.argc()? as usize;
+        // The reading offset in the initial stack is:
+        // the initial stack bottom address
+        // + the size of argc(8)
+        // + the size of arg pointer(8) * the number of arg(argc)
+        // + the size of null pointer(8)
+        let read_offset = self.init_stack_bottom()
+            + size_of::<usize>()
+            + size_of::<usize>() * argc
+            + size_of::<usize>();
+
+        let mut envp = Vec::new();
+        let user_space = CurrentUserSpace::get();
+        let mut envp_reader = user_space.reader(read_offset, MAX_ENVP_NUMBER)?;
+        for _ in 0..MAX_ENVP_NUMBER {
+            let envp_ptr = envp_reader.read_val::<Vaddr>()?;
+
+            if envp_ptr == 0 {
+                break;
+            }
+
+            let env = user_space.read_cstring(envp_ptr, MAX_ENV_LEN)?;
+            envp.push(env);
+        }
+
+        Ok(envp)
+    }
+
+    /// Returns the bottom address of the init stack (lowest address).
+    pub const fn init_stack_bottom(&self) -> Vaddr {
+        self.base
+    }
+}
--- a/kernel/src/process/process_vm/mod.rs
+++ b/kernel/src/process/process_vm/mod.rs
@ -0,0 +1,142 @@
+// SPDX-License-Identifier: MPL-2.0
+
+//! This module defines struct `ProcessVm`
+//! to represent the layout of user space process virtual memory.
+//!
+//! The `ProcessVm` struct contains `Vmar`,
+//! which stores all existing memory mappings.
+//! The `Vm` also contains
+//! the basic info of process level vm segments,
+//! like init stack and heap.
+
+mod heap;
+mod init_stack;
+
+use aster_rights::Full;
+pub use heap::Heap;
+
+pub use self::{
+    heap::USER_HEAP_SIZE_LIMIT,
+    init_stack::{
+        aux_vec::{AuxKey, AuxVec},
+        InitStack, InitStackReader, InitStackWriter, INIT_STACK_SIZE, MAX_ARGV_NUMBER, MAX_ARG_LEN,
+        MAX_ENVP_NUMBER, MAX_ENV_LEN,
+    },
+};
+use crate::{prelude::*, vm::vmar::Vmar};
+
+/*
+ * The user's virtual memory space layout looks like below.
+ * TODO: The layout of the userheap does not match the current implementation,
+ * And currently the initial program break is a fixed value.
+ *
+ *  (high address)
+ *  +---------------------+ <------+ The top of Vmar, which is the highest address usable
+ *  |                     |          Randomly padded pages
+ *  +---------------------+ <------+ The base of the initial user stack
+ *  | User stack          |
+ *  |                     |
+ *  +---------||----------+ <------+ The user stack limit, can be extended lower
+ *  |         \/          |
+ *  | ...                 |
+ *  |                     |
+ *  | MMAP Spaces         |
+ *  |                     |
+ *  | ...                 |
+ *  |         /\          |
+ *  +---------||----------+ <------+ The current program break
+ *  | User heap           |
+ *  |                     |
+ *  +---------------------+ <------+ The original program break
+ *  |                     |          Randomly padded pages
+ *  +---------------------+ <------+ The end of the program's last segment
+ *  |                     |
+ *  | Loaded segments     |
+ *  | .text, .data, .bss  |
+ *  | , etc.              |
+ *  |                     |
+ *  +---------------------+ <------+ The bottom of Vmar at 0x1_0000
+ *  |                     |          64 KiB unusable space
+ *  +---------------------+
+ *  (low address)
+ */
+
+// The process user space virtual memory
+pub struct ProcessVm {
+    root_vmar: Vmar<Full>,
+    init_stack: InitStack,
+    heap: Heap,
+}
+
+impl Clone for ProcessVm {
+    fn clone(&self) -> Self {
+        Self {
+            root_vmar: self.root_vmar.dup().unwrap(),
+            init_stack: self.init_stack.clone(),
+            heap: self.heap.clone(),
+        }
+    }
+}
+
+impl ProcessVm {
+    /// Allocates a new `ProcessVm`
+    pub fn alloc() -> Self {
+        let root_vmar = Vmar::<Full>::new_root();
+        let init_stack = InitStack::new();
+        init_stack.map_init_stack_vmo(&root_vmar).unwrap();
+        let heap = Heap::new();
+        heap.alloc_and_map_vmo(&root_vmar).unwrap();
+        Self {
+            root_vmar,
+            heap,
+            init_stack,
+        }
+    }
+
+    /// Forks a `ProcessVm` from `other`.
+    ///
+    /// The returned `ProcessVm` will have a forked `Vmar`.
+    pub fn fork_from(other: &ProcessVm) -> Result<Self> {
+        let root_vmar = Vmar::<Full>::fork_from(&other.root_vmar)?;
+        Ok(Self {
+            root_vmar,
+            heap: other.heap.clone(),
+            init_stack: other.init_stack.clone(),
+        })
+    }
+
+    pub fn root_vmar(&self) -> &Vmar<Full> {
+        &self.root_vmar
+    }
+
+    /// Returns a reader for reading contents from
+    /// the `InitStack`.
+    pub fn init_stack_reader(&self) -> InitStackReader {
+        self.init_stack.reader()
+    }
+
+    /// Returns the top address of the user stack.
+    pub fn user_stack_top(&self) -> Vaddr {
+        self.init_stack.user_stack_top()
+    }
+
+    pub(super) fn init_stack_writer(
+        &self,
+        argv: Vec<CString>,
+        envp: Vec<CString>,
+        aux_vec: AuxVec,
+    ) -> InitStackWriter {
+        self.init_stack.writer(argv, envp, aux_vec)
+    }
+
+    pub(super) fn heap(&self) -> &Heap {
+        &self.heap
+    }
+
+    /// Clears existing mappings and then maps stack and heap vmo.
+    pub(super) fn clear_and_map(&self) {
+        self.root_vmar.clear().unwrap();
+        self.init_stack.map_init_stack_vmo(&self.root_vmar).unwrap();
+        self.heap.alloc_and_map_vmo(&self.root_vmar).unwrap();
+    }
+}
--- a/kernel/src/process/program_loader/elf/elf_file.rs
+++ b/kernel/src/process/program_loader/elf/elf_file.rs
@ -0,0 +1,217 @@
+// SPDX-License-Identifier: MPL-2.0
+
+/// A wrapper of xmas_elf's elf parsing
+use xmas_elf::{
+    header::{self, Header, HeaderPt1, HeaderPt2, HeaderPt2_, Machine_, Type_},
+    program::{self, ProgramHeader64},
+};
+
+use crate::prelude::*;
+pub struct Elf {
+    pub elf_header: ElfHeader,
+    pub program_headers: Vec<ProgramHeader64>,
+}
+
+impl Elf {
+    pub fn parse_elf(input: &[u8]) -> Result<Self> {
+        // first parse elf header
+        // The elf header is usually 64 bytes. pt1 is 16bytes and pt2 is 48 bytes.
+        // We require 128 bytes here is to keep consistency with linux implementations.
+        debug_assert!(input.len() >= 128);
+        let header = xmas_elf::header::parse_header(input)
+            .map_err(|_| Error::with_message(Errno::ENOEXEC, "parse elf header fails"))?;
+        let elf_header = ElfHeader::parse_elf_header(header)?;
+        check_elf_header(&elf_header)?;
+        // than parse the program headers table
+        // FIXME: we should acquire enough pages before parse
+        let ph_offset = elf_header.pt2.ph_offset;
+        let ph_count = elf_header.pt2.ph_count;
+        let ph_entry_size = elf_header.pt2.ph_entry_size;
+        debug_assert!(
+            input.len() >= ph_offset as usize + ph_count as usize * ph_entry_size as usize
+        );
+        let mut program_headers = Vec::with_capacity(ph_count as usize);
+        for index in 0..ph_count {
+            let program_header = xmas_elf::program::parse_program_header(input, header, index)
+                .map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header fails"))?;
+            let ph64 = match program_header {
+                xmas_elf::program::ProgramHeader::Ph64(ph64) => *ph64,
+                xmas_elf::program::ProgramHeader::Ph32(_) => {
+                    return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable")
+                }
+            };
+            program_headers.push(ph64);
+        }
+        Ok(Self {
+            elf_header,
+            program_headers,
+        })
+    }
+
+    // The following info is used to setup init stack
+    /// the entry point of the elf
+    pub fn entry_point(&self) -> Vaddr {
+        self.elf_header.pt2.entry_point as Vaddr
+    }
+    /// program header table offset
+    pub fn ph_off(&self) -> u64 {
+        self.elf_header.pt2.ph_offset
+    }
+    /// number of program headers
+    pub fn ph_count(&self) -> u16 {
+        self.elf_header.pt2.ph_count
+    }
+    /// The size of a program header
+    pub fn ph_ent(&self) -> u16 {
+        self.elf_header.pt2.ph_entry_size
+    }
+
+    /// The virtual addr of program headers table address
+    pub fn ph_addr(&self) -> Result<Vaddr> {
+        let ph_offset = self.ph_off();
+        for program_header in &self.program_headers {
+            if program_header.offset <= ph_offset
+                && ph_offset < program_header.offset + program_header.file_size
+            {
+                return Ok(
+                    (ph_offset - program_header.offset + program_header.virtual_addr) as Vaddr,
+                );
+            }
+        }
+        return_errno_with_message!(
+            Errno::ENOEXEC,
+            "can not find program header table address in elf"
+        );
+    }
+
+    /// whether the elf is a shared object
+    pub fn is_shared_object(&self) -> bool {
+        self.elf_header.pt2.type_.as_type() == header::Type::SharedObject
+    }
+
+    /// read the ldso path from the elf interpret section
+    pub fn ldso_path(&self, file_header_buf: &[u8]) -> Result<Option<String>> {
+        for program_header in &self.program_headers {
+            let type_ = program_header.get_type().map_err(|_| {
+                Error::with_message(Errno::ENOEXEC, "parse program header type fails")
+            })?;
+            if type_ == program::Type::Interp {
+                let file_size = program_header.file_size as usize;
+                let file_offset = program_header.offset as usize;
+                debug_assert!(file_offset + file_size <= file_header_buf.len());
+                let ldso = CStr::from_bytes_with_nul(
+                    &file_header_buf[file_offset..file_offset + file_size],
+                )?;
+                return Ok(Some(ldso.to_string_lossy().to_string()));
+            }
+        }
+        Ok(None)
+    }
+
+    // An offset to be subtracted from ELF vaddr for PIE
+    pub fn base_load_address_offset(&self) -> u64 {
+        let phdr = self.program_headers.first().unwrap();
+        phdr.virtual_addr - phdr.offset
+    }
+}
+
+pub struct ElfHeader {
+    pub pt1: HeaderPt1,
+    pub pt2: HeaderPt2_64,
+}
+
+impl ElfHeader {
+    fn parse_elf_header(header: Header) -> Result<Self> {
+        let pt1 = *header.pt1;
+        let pt2 = match header.pt2 {
+            HeaderPt2::Header64(header_pt2) => {
+                let HeaderPt2_ {
+                    type_,
+                    machine,
+                    version,
+                    entry_point,
+                    ph_offset,
+                    sh_offset,
+                    flags,
+                    header_size,
+                    ph_entry_size,
+                    ph_count,
+                    sh_entry_size,
+                    sh_count,
+                    sh_str_index,
+                } = header_pt2;
+                HeaderPt2_64 {
+                    type_: *type_,
+                    machine: *machine,
+                    version: *version,
+                    entry_point: *entry_point,
+                    ph_offset: *ph_offset,
+                    sh_offset: *sh_offset,
+                    flags: *flags,
+                    header_size: *header_size,
+                    ph_entry_size: *ph_entry_size,
+                    ph_count: *ph_count,
+                    sh_entry_size: *sh_entry_size,
+                    sh_count: *sh_count,
+                    sh_str_index: *sh_str_index,
+                }
+            }
+            _ => return_errno_with_message!(Errno::ENOEXEC, "parse elf header failed"),
+        };
+        Ok(ElfHeader { pt1, pt2 })
+    }
+}
+
+pub struct HeaderPt2_64 {
+    pub type_: Type_,
+    pub machine: Machine_,
+    #[allow(dead_code)]
+    pub version: u32,
+    pub entry_point: u64,
+    pub ph_offset: u64,
+    #[allow(dead_code)]
+    pub sh_offset: u64,
+    #[allow(dead_code)]
+    pub flags: u32,
+    #[allow(dead_code)]
+    pub header_size: u16,
+    pub ph_entry_size: u16,
+    pub ph_count: u16,
+    #[allow(dead_code)]
+    pub sh_entry_size: u16,
+    #[allow(dead_code)]
+    pub sh_count: u16,
+    #[allow(dead_code)]
+    pub sh_str_index: u16,
+}
+
+fn check_elf_header(elf_header: &ElfHeader) -> Result<()> {
+    // 64bit
+    debug_assert_eq!(elf_header.pt1.class(), header::Class::SixtyFour);
+    if elf_header.pt1.class() != header::Class::SixtyFour {
+        return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable");
+    }
+    // little endian
+    debug_assert_eq!(elf_header.pt1.data(), header::Data::LittleEndian);
+    if elf_header.pt1.data() != header::Data::LittleEndian {
+        return_errno_with_message!(Errno::ENOEXEC, "Not little endian executable");
+    }
+    // system V ABI
+    // debug_assert_eq!(elf_header.pt1.os_abi(), header::OsAbi::SystemV);
+    // if elf_header.pt1.os_abi() != header::OsAbi::SystemV {
+    //     return Error::new(Errno::ENOEXEC);
+    // }
+    // x86_64 architecture
+    debug_assert_eq!(elf_header.pt2.machine.as_machine(), header::Machine::X86_64);
+    if elf_header.pt2.machine.as_machine() != header::Machine::X86_64 {
+        return_errno_with_message!(Errno::ENOEXEC, "Not x86_64 executable");
+    }
+    // Executable file or shared object
+    let elf_type = elf_header.pt2.type_.as_type();
+    debug_assert!(elf_type == header::Type::Executable || elf_type == header::Type::SharedObject);
+    if elf_type != header::Type::Executable && elf_type != header::Type::SharedObject {
+        return_errno_with_message!(Errno::ENOEXEC, "Not executable file");
+    }
+
+    Ok(())
+}
--- a/kernel/src/process/program_loader/elf/load_elf.rs
+++ b/kernel/src/process/program_loader/elf/load_elf.rs
@ -0,0 +1,434 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+#![allow(unused_variables)]
+
+//! This module is used to parse elf file content to get elf_load_info.
+//! When create a process from elf file, we will use the elf_load_info to construct the VmSpace
+
+use align_ext::AlignExt;
+use aster_rights::Full;
+use ostd::mm::VmIo;
+use xmas_elf::program::{self, ProgramHeader64};
+
+use super::elf_file::Elf;
+use crate::{
+    fs::{
+        fs_resolver::{FsPath, FsResolver, AT_FDCWD},
+        path::Dentry,
+    },
+    prelude::*,
+    process::{
+        do_exit_group,
+        process_vm::{AuxKey, AuxVec, ProcessVm},
+        TermStatus,
+    },
+    vdso::{vdso_vmo, VDSO_VMO_SIZE},
+    vm::{perms::VmPerms, util::duplicate_frame, vmar::Vmar, vmo::VmoRightsOp},
+};
+
+/// Loads elf to the process vm.   
+///
+/// This function will map elf segments and
+/// initialize process init stack.
+pub fn load_elf_to_vm(
+    process_vm: &ProcessVm,
+    file_header: &[u8],
+    elf_file: Arc<Dentry>,
+    fs_resolver: &FsResolver,
+    argv: Vec<CString>,
+    envp: Vec<CString>,
+) -> Result<ElfLoadInfo> {
+    let parsed_elf = Elf::parse_elf(file_header)?;
+
+    let ldso = lookup_and_parse_ldso(&parsed_elf, file_header, fs_resolver)?;
+
+    match init_and_map_vmos(process_vm, ldso, &parsed_elf, &elf_file) {
+        Ok((entry_point, mut aux_vec)) => {
+            // Map and set vdso entry.
+            // Since vdso does not require being mapped to any specific address,
+            // vdso is mapped after the elf file, heap and stack are mapped.
+            if let Some(vdso_text_base) = map_vdso_to_vm(process_vm) {
+                aux_vec
+                    .set(AuxKey::AT_SYSINFO_EHDR, vdso_text_base as u64)
+                    .unwrap();
+            }
+
+            let init_stack_writer = process_vm.init_stack_writer(argv, envp, aux_vec);
+            init_stack_writer.write().unwrap();
+
+            let user_stack_top = process_vm.user_stack_top();
+            Ok(ElfLoadInfo {
+                entry_point,
+                user_stack_top,
+            })
+        }
+        Err(err) => {
+            // Since the process_vm is in invalid state,
+            // the process cannot return to user space again,
+            // so `Vmar::clear` and `do_exit_group` are called here.
+            // FIXME: sending a fault signal is an alternative approach.
+            process_vm.root_vmar().clear().unwrap();
+
+            // FIXME: `current` macro will be used in `do_exit_group`.
+            // if the macro is used when creating the init process,
+            // the macro will panic. This corner case should be handled later.
+            // FIXME: how to set the correct exit status?
+            do_exit_group(TermStatus::Exited(1));
+
+            // The process will exit and the error code will be ignored.
+            Err(err)
+        }
+    }
+}
+
+fn lookup_and_parse_ldso(
+    elf: &Elf,
+    file_header: &[u8],
+    fs_resolver: &FsResolver,
+) -> Result<Option<(Arc<Dentry>, Elf)>> {
+    let ldso_file = {
+        let Some(ldso_path) = elf.ldso_path(file_header)? else {
+            return Ok(None);
+        };
+        let fs_path = FsPath::new(AT_FDCWD, &ldso_path)?;
+        fs_resolver.lookup(&fs_path)?
+    };
+    let ldso_elf = {
+        let mut buf = Box::new([0u8; PAGE_SIZE]);
+        let inode = ldso_file.inode();
+        inode.read_bytes_at(0, &mut *buf)?;
+        Elf::parse_elf(&*buf)?
+    };
+    Ok(Some((ldso_file, ldso_elf)))
+}
+
+fn load_ldso(root_vmar: &Vmar<Full>, ldso_file: &Dentry, ldso_elf: &Elf) -> Result<LdsoLoadInfo> {
+    let map_addr = map_segment_vmos(ldso_elf, root_vmar, ldso_file)?;
+    Ok(LdsoLoadInfo::new(
+        ldso_elf.entry_point() + map_addr,
+        map_addr,
+    ))
+}
+
+fn init_and_map_vmos(
+    process_vm: &ProcessVm,
+    ldso: Option<(Arc<Dentry>, Elf)>,
+    parsed_elf: &Elf,
+    elf_file: &Dentry,
+) -> Result<(Vaddr, AuxVec)> {
+    let root_vmar = process_vm.root_vmar();
+
+    // After we clear process vm, if any error happens, we must call exit_group instead of return to user space.
+    let ldso_load_info = if let Some((ldso_file, ldso_elf)) = ldso {
+        Some(load_ldso(root_vmar, &ldso_file, &ldso_elf)?)
+    } else {
+        None
+    };
+
+    let elf_map_addr = map_segment_vmos(parsed_elf, root_vmar, elf_file)?;
+
+    let aux_vec = {
+        let ldso_base = ldso_load_info
+            .as_ref()
+            .map(|load_info| load_info.base_addr());
+        init_aux_vec(parsed_elf, elf_map_addr, ldso_base)?
+    };
+
+    let entry_point = if let Some(ldso_load_info) = ldso_load_info {
+        // Normal shared object
+        ldso_load_info.entry_point()
+    } else if parsed_elf.is_shared_object() {
+        // ldso itself
+        parsed_elf.entry_point() + elf_map_addr
+    } else {
+        // statically linked executable
+        parsed_elf.entry_point()
+    };
+
+    Ok((entry_point, aux_vec))
+}
+
+pub struct LdsoLoadInfo {
+    entry_point: Vaddr,
+    base_addr: Vaddr,
+}
+
+impl LdsoLoadInfo {
+    pub fn new(entry_point: Vaddr, base_addr: Vaddr) -> Self {
+        Self {
+            entry_point,
+            base_addr,
+        }
+    }
+
+    pub fn entry_point(&self) -> Vaddr {
+        self.entry_point
+    }
+
+    pub fn base_addr(&self) -> Vaddr {
+        self.base_addr
+    }
+}
+
+pub struct ElfLoadInfo {
+    entry_point: Vaddr,
+    user_stack_top: Vaddr,
+}
+
+impl ElfLoadInfo {
+    pub fn new(entry_point: Vaddr, user_stack_top: Vaddr) -> Self {
+        Self {
+            entry_point,
+            user_stack_top,
+        }
+    }
+
+    pub fn entry_point(&self) -> Vaddr {
+        self.entry_point
+    }
+
+    pub fn user_stack_top(&self) -> Vaddr {
+        self.user_stack_top
+    }
+}
+
+/// Inits VMO for each segment and then map segment to root vmar
+pub fn map_segment_vmos(elf: &Elf, root_vmar: &Vmar<Full>, elf_file: &Dentry) -> Result<Vaddr> {
+    // all segments of the shared object must be mapped to a continuous vm range
+    // to ensure the relative offset of each segment not changed.
+    let base_addr = if elf.is_shared_object() {
+        base_map_addr(elf, root_vmar)?
+    } else {
+        0
+    };
+    for program_header in &elf.program_headers {
+        let type_ = program_header
+            .get_type()
+            .map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header type fails"))?;
+        if type_ == program::Type::Load {
+            check_segment_align(program_header)?;
+            map_segment_vmo(program_header, elf_file, root_vmar, base_addr)?;
+        }
+    }
+    Ok(base_addr)
+}
+
+fn base_map_addr(elf: &Elf, root_vmar: &Vmar<Full>) -> Result<Vaddr> {
+    let elf_size = elf
+        .program_headers
+        .iter()
+        .filter_map(|program_header| {
+            if let Ok(type_) = program_header.get_type()
+                && type_ == program::Type::Load
+            {
+                let ph_max_addr = program_header.virtual_addr + program_header.mem_size;
+                Some(ph_max_addr as usize)
+            } else {
+                None
+            }
+        })
+        .max()
+        .ok_or(Error::with_message(
+            Errno::ENOEXEC,
+            "executable file does not has loadable sections",
+        ))?;
+    let map_size = elf_size.align_up(PAGE_SIZE);
+    let vmar_map_options = root_vmar.new_map(map_size, VmPerms::empty())?;
+    vmar_map_options.build()
+}
+
+/// Creates and map the corresponding segment VMO to `root_vmar`.
+/// If needed, create additional anonymous mapping to represents .bss segment.
+fn map_segment_vmo(
+    program_header: &ProgramHeader64,
+    elf_file: &Dentry,
+    root_vmar: &Vmar<Full>,
+    base_addr: Vaddr,
+) -> Result<()> {
+    trace!(
+        "mem range = 0x{:x} - 0x{:x}, mem_size = 0x{:x}",
+        program_header.virtual_addr,
+        program_header.virtual_addr + program_header.mem_size,
+        program_header.mem_size
+    );
+    trace!(
+        "file range = 0x{:x} - 0x{:x}, file_size = 0x{:x}",
+        program_header.offset,
+        program_header.offset + program_header.file_size,
+        program_header.file_size
+    );
+
+    let file_offset = program_header.offset as usize;
+    let virtual_addr = program_header.virtual_addr as usize;
+    debug_assert!(file_offset % PAGE_SIZE == virtual_addr % PAGE_SIZE);
+    let segment_vmo = {
+        let inode = elf_file.inode();
+        inode
+            .page_cache()
+            .ok_or(Error::with_message(
+                Errno::ENOENT,
+                "executable has no page cache",
+            ))?
+            .to_dyn()
+            .dup_independent()?
+    };
+
+    let total_map_size = {
+        let vmap_start = virtual_addr.align_down(PAGE_SIZE);
+        let vmap_end = (virtual_addr + program_header.mem_size as usize).align_up(PAGE_SIZE);
+        vmap_end - vmap_start
+    };
+
+    let (segment_offset, segment_size) = {
+        let start = file_offset.align_down(PAGE_SIZE);
+        let end = (file_offset + program_header.file_size as usize).align_up(PAGE_SIZE);
+        debug_assert!(total_map_size >= (program_header.file_size as usize).align_up(PAGE_SIZE));
+        (start, end - start)
+    };
+
+    // Write zero as paddings. There are head padding and tail padding.
+    // Head padding: if the segment's virtual address is not page-aligned,
+    // then the bytes in first page from start to virtual address should be padded zeros.
+    // Tail padding: If the segment's mem_size is larger than file size,
+    // then the bytes that are not backed up by file content should be zeros.(usually .data/.bss sections).
+
+    // Head padding.
+    let page_offset = file_offset % PAGE_SIZE;
+    if page_offset != 0 {
+        let new_frame = {
+            let head_frame = segment_vmo.commit_page(segment_offset)?;
+            let new_frame = duplicate_frame(&head_frame)?;
+
+            let buffer = vec![0u8; page_offset];
+            new_frame.write_bytes(0, &buffer).unwrap();
+            new_frame
+        };
+        let head_idx = segment_offset / PAGE_SIZE;
+        segment_vmo.replace(new_frame, head_idx)?;
+    }
+
+    // Tail padding.
+    let tail_padding_offset = program_header.file_size as usize + page_offset;
+    if segment_size > tail_padding_offset {
+        let new_frame = {
+            let tail_frame = segment_vmo.commit_page(segment_offset + tail_padding_offset)?;
+            let new_frame = duplicate_frame(&tail_frame)?;
+
+            let buffer = vec![0u8; (segment_size - tail_padding_offset) % PAGE_SIZE];
+            new_frame
+                .write_bytes(tail_padding_offset % PAGE_SIZE, &buffer)
+                .unwrap();
+            new_frame
+        };
+
+        let tail_idx = (segment_offset + tail_padding_offset) / PAGE_SIZE;
+        segment_vmo.replace(new_frame, tail_idx).unwrap();
+    }
+
+    let perms = parse_segment_perm(program_header.flags);
+    let mut vm_map_options = root_vmar
+        .new_map(segment_size, perms)?
+        .vmo(segment_vmo)
+        .vmo_offset(segment_offset)
+        .vmo_limit(segment_offset + segment_size)
+        .can_overwrite(true);
+    let offset = base_addr + (program_header.virtual_addr as Vaddr).align_down(PAGE_SIZE);
+    vm_map_options = vm_map_options.offset(offset);
+    let map_addr = vm_map_options.build()?;
+
+    let anonymous_map_size: usize = if total_map_size > segment_size {
+        total_map_size - segment_size
+    } else {
+        0
+    };
+
+    if anonymous_map_size > 0 {
+        let mut anonymous_map_options = root_vmar
+            .new_map(anonymous_map_size, perms)?
+            .can_overwrite(true);
+        anonymous_map_options = anonymous_map_options.offset(offset + segment_size);
+        anonymous_map_options.build()?;
+    }
+    Ok(())
+}
+
+fn parse_segment_perm(flags: xmas_elf::program::Flags) -> VmPerms {
+    let mut vm_perm = VmPerms::empty();
+    if flags.is_read() {
+        vm_perm |= VmPerms::READ;
+    }
+    if flags.is_write() {
+        vm_perm |= VmPerms::WRITE;
+    }
+    if flags.is_execute() {
+        vm_perm |= VmPerms::EXEC;
+    }
+    vm_perm
+}
+
+fn check_segment_align(program_header: &ProgramHeader64) -> Result<()> {
+    let align = program_header.align;
+    if align == 0 || align == 1 {
+        // no align requirement
+        return Ok(());
+    }
+    debug_assert!(align.is_power_of_two());
+    if !align.is_power_of_two() {
+        return_errno_with_message!(Errno::ENOEXEC, "segment align is invalid.");
+    }
+    debug_assert!(program_header.offset % align == program_header.virtual_addr % align);
+    if program_header.offset % align != program_header.virtual_addr % align {
+        return_errno_with_message!(Errno::ENOEXEC, "segment align is not satisfied.");
+    }
+    Ok(())
+}
+
+pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr, ldso_base: Option<Vaddr>) -> Result<AuxVec> {
+    let mut aux_vec = AuxVec::new();
+    aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _)?;
+    let ph_addr = if elf.is_shared_object() {
+        elf.ph_addr()? + elf_map_addr
+    } else {
+        elf.ph_addr()?
+    };
+    aux_vec.set(AuxKey::AT_PHDR, ph_addr as u64)?;
+    aux_vec.set(AuxKey::AT_PHNUM, elf.ph_count() as u64)?;
+    aux_vec.set(AuxKey::AT_PHENT, elf.ph_ent() as u64)?;
+    let elf_entry = if elf.is_shared_object() {
+        let base_load_offset = elf.base_load_address_offset();
+        elf.entry_point() + elf_map_addr - base_load_offset as usize
+    } else {
+        elf.entry_point()
+    };
+    aux_vec.set(AuxKey::AT_ENTRY, elf_entry as u64)?;
+
+    if let Some(ldso_base) = ldso_base {
+        aux_vec.set(AuxKey::AT_BASE, ldso_base as u64)?;
+    }
+    Ok(aux_vec)
+}
+
+/// Maps the VDSO VMO to the corresponding virtual memory address.
+fn map_vdso_to_vm(process_vm: &ProcessVm) -> Option<Vaddr> {
+    let root_vmar = process_vm.root_vmar();
+    let vdso_vmo = vdso_vmo()?;
+
+    let options = root_vmar
+        .new_map(VDSO_VMO_SIZE, VmPerms::empty())
+        .unwrap()
+        .vmo(vdso_vmo.dup().unwrap());
+
+    let vdso_data_base = options.build().unwrap();
+    let vdso_text_base = vdso_data_base + 0x4000;
+
+    let data_perms = VmPerms::READ | VmPerms::WRITE;
+    let text_perms = VmPerms::READ | VmPerms::EXEC;
+    root_vmar
+        .protect(data_perms, vdso_data_base..vdso_data_base + PAGE_SIZE)
+        .unwrap();
+    root_vmar
+        .protect(text_perms, vdso_text_base..vdso_text_base + PAGE_SIZE)
+        .unwrap();
+    Some(vdso_text_base)
+}
--- a/kernel/src/process/program_loader/elf/mod.rs
+++ b/kernel/src/process/program_loader/elf/mod.rs
@ -0,0 +1,6 @@
+// SPDX-License-Identifier: MPL-2.0
+
+mod elf_file;
+mod load_elf;
+
+pub use load_elf::{load_elf_to_vm, ElfLoadInfo};
--- a/kernel/src/process/program_loader/mod.rs
+++ b/kernel/src/process/program_loader/mod.rs
@ -0,0 +1,85 @@
+// SPDX-License-Identifier: MPL-2.0
+
+pub mod elf;
+mod shebang;
+
+use self::{
+    elf::{load_elf_to_vm, ElfLoadInfo},
+    shebang::parse_shebang_line,
+};
+use super::process_vm::ProcessVm;
+use crate::{
+    fs::{
+        fs_resolver::{FsPath, FsResolver, AT_FDCWD},
+        path::Dentry,
+    },
+    prelude::*,
+};
+
+/// Load an executable to root vmar, including loading programe image, preparing heap and stack,
+/// initializing argv, envp and aux tables.
+/// About recursion_limit: recursion limit is used to limit th recursion depth of shebang executables.
+/// If the interpreter(the program behind #!) of shebang executable is also a shebang,
+/// then it will trigger recursion. We will try to setup root vmar for the interpreter.
+/// I guess for most cases, setting the recursion_limit as 1 should be enough.
+/// because the interpreter is usually an elf binary(e.g., /bin/bash)
+pub fn load_program_to_vm(
+    process_vm: &ProcessVm,
+    elf_file: Arc<Dentry>,
+    argv: Vec<CString>,
+    envp: Vec<CString>,
+    fs_resolver: &FsResolver,
+    recursion_limit: usize,
+) -> Result<(String, ElfLoadInfo)> {
+    let abs_path = elf_file.abs_path();
+    let inode = elf_file.inode();
+    let file_header = {
+        // read the first page of file header
+        let mut file_header_buffer = Box::new([0u8; PAGE_SIZE]);
+        inode.read_bytes_at(0, &mut *file_header_buffer)?;
+        file_header_buffer
+    };
+    if let Some(mut new_argv) = parse_shebang_line(&*file_header)? {
+        if recursion_limit == 0 {
+            return_errno_with_message!(Errno::ELOOP, "the recursieve limit is reached");
+        }
+        new_argv.extend_from_slice(&argv);
+        let interpreter = {
+            let filename = new_argv[0].to_str()?.to_string();
+            let fs_path = FsPath::new(AT_FDCWD, &filename)?;
+            fs_resolver.lookup(&fs_path)?
+        };
+        check_executable_file(&interpreter)?;
+        return load_program_to_vm(
+            process_vm,
+            interpreter,
+            new_argv,
+            envp,
+            fs_resolver,
+            recursion_limit - 1,
+        );
+    }
+
+    process_vm.clear_and_map();
+
+    let elf_load_info =
+        load_elf_to_vm(process_vm, &*file_header, elf_file, fs_resolver, argv, envp)?;
+
+    Ok((abs_path, elf_load_info))
+}
+
+pub fn check_executable_file(dentry: &Arc<Dentry>) -> Result<()> {
+    if dentry.type_().is_directory() {
+        return_errno_with_message!(Errno::EISDIR, "the file is a directory");
+    }
+
+    if !dentry.type_().is_regular_file() {
+        return_errno_with_message!(Errno::EACCES, "the dentry is not a regular file");
+    }
+
+    if !dentry.mode()?.is_executable() {
+        return_errno_with_message!(Errno::EACCES, "the dentry is not executable");
+    }
+
+    Ok(())
+}
--- a/kernel/src/process/program_loader/shebang.rs
+++ b/kernel/src/process/program_loader/shebang.rs
@ -0,0 +1,33 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use crate::prelude::*;
+
+/// Try to parse a buffer as a shebang line.
+///
+/// If the buffer starts with `#!` and its header is a valid shebang sequence,
+/// then the function returns `Ok(Some(parts))`,
+/// where `parts` is a `Vec` that contains the path of and the arguments for the interpreter.
+/// If the buffer starts with `#!` but some error occurs while parsing the file,
+/// then `Err(_)` is returned.
+/// If the buffer does not start with `#!`, then `Ok(None)` is returned.
+pub fn parse_shebang_line(file_header_buffer: &[u8]) -> Result<Option<Vec<CString>>> {
+    if !file_header_buffer.starts_with(b"#!") || !file_header_buffer.contains(&b'\n') {
+        // the file is not a shebang
+        return Ok(None);
+    }
+    let first_line_len = file_header_buffer.iter().position(|&c| c == b'\n').unwrap();
+    // skip #!
+    let shebang_header = &file_header_buffer[2..first_line_len];
+    let mut shebang_argv = Vec::new();
+    for arg in shebang_header.split(|&c| c == b' ') {
+        let arg = CString::new(arg)?;
+        shebang_argv.push(arg);
+    }
+    if shebang_argv.len() != 1 {
+        return_errno_with_message!(
+            Errno::EINVAL,
+            "One and only one intpreter program should be specified"
+        );
+    }
+    Ok(Some(shebang_argv))
+}
--- a/kernel/src/process/rlimit.rs
+++ b/kernel/src/process/rlimit.rs
@ -0,0 +1,89 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(non_camel_case_types)]
+
+use super::process_vm::{INIT_STACK_SIZE, USER_HEAP_SIZE_LIMIT};
+use crate::prelude::*;
+
+pub struct ResourceLimits {
+    rlimits: [RLimit64; RLIMIT_COUNT],
+}
+
+impl ResourceLimits {
+    pub fn get_rlimit(&self, resource: ResourceType) -> &RLimit64 {
+        &self.rlimits[resource as usize]
+    }
+
+    pub fn get_rlimit_mut(&mut self, resource: ResourceType) -> &mut RLimit64 {
+        &mut self.rlimits[resource as usize]
+    }
+}
+
+impl Default for ResourceLimits {
+    fn default() -> Self {
+        let stack_size = RLimit64::new(INIT_STACK_SIZE as u64);
+        let heap_size = RLimit64::new(USER_HEAP_SIZE_LIMIT as u64);
+        let open_files = RLimit64::new(1024);
+
+        let mut rlimits = Self {
+            rlimits: [RLimit64::default(); RLIMIT_COUNT],
+        };
+        *rlimits.get_rlimit_mut(ResourceType::RLIMIT_STACK) = stack_size;
+        *rlimits.get_rlimit_mut(ResourceType::RLIMIT_DATA) = heap_size;
+        *rlimits.get_rlimit_mut(ResourceType::RLIMIT_NOFILE) = open_files;
+        rlimits
+    }
+}
+
+#[repr(u32)]
+#[derive(Debug, Clone, Copy, TryFromInt)]
+pub enum ResourceType {
+    RLIMIT_CPU = 0,
+    RLIMIT_FSIZE = 1,
+    RLIMIT_DATA = 2,
+    RLIMIT_STACK = 3,
+    RLIMIT_CORE = 4,
+    RLIMIT_RSS = 5,
+    RLIMIT_NPROC = 6,
+    RLIMIT_NOFILE = 7,
+    RLIMIT_MEMLOCK = 8,
+    RLIMIT_AS = 9,
+    RLIMIT_LOCKS = 10,
+    RLIMIT_SIGPENDING = 11,
+    RLIMIT_MSGQUEUE = 12,
+    RLIMIT_NICE = 13,
+    RLIMIT_RTPRIO = 14,
+    RLIMIT_RTTIME = 15,
+}
+
+pub const RLIMIT_COUNT: usize = 16;
+
+#[derive(Debug, Clone, Copy, Pod)]
+#[repr(C)]
+pub struct RLimit64 {
+    cur: u64,
+    max: u64,
+}
+
+impl RLimit64 {
+    pub fn new(cur: u64) -> Self {
+        Self { cur, max: u64::MAX }
+    }
+
+    pub fn get_cur(&self) -> u64 {
+        self.cur
+    }
+
+    pub fn get_max(&self) -> u64 {
+        self.max
+    }
+}
+
+impl Default for RLimit64 {
+    fn default() -> Self {
+        Self {
+            cur: u64::MAX,
+            max: u64::MAX,
+        }
+    }
+}
--- a/kernel/src/process/signal/c_types.rs
+++ b/kernel/src/process/signal/c_types.rs
@ -0,0 +1,264 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+
+use core::mem::{self, size_of};
+
+use aster_util::{read_union_fields, union_read_ptr::UnionReadPtr};
+
+use super::sig_num::SigNum;
+use crate::{
+    arch::cpu::GpRegs,
+    prelude::*,
+    process::{Pid, Uid},
+};
+
+pub type sigset_t = u64;
+// FIXME: this type should be put at suitable place
+pub type clock_t = i64;
+
+#[derive(Debug, Clone, Copy, Pod)]
+#[repr(C)]
+pub struct sigaction_t {
+    pub handler_ptr: Vaddr,
+    pub flags: u32,
+    pub restorer_ptr: Vaddr,
+    pub mask: sigset_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+pub struct siginfo_t {
+    pub si_signo: i32,
+    pub si_errno: i32,
+    pub si_code: i32,
+    _padding: i32,
+    /// siginfo_fields should be a union type ( See occlum definition ). But union type have unsafe interfaces.
+    /// Here we use a simple byte array.
+    siginfo_fields: siginfo_fields_t,
+}
+
+impl siginfo_t {
+    pub fn new(num: SigNum, code: i32) -> Self {
+        siginfo_t {
+            si_signo: num.as_u8() as i32,
+            si_errno: 0,
+            si_code: code,
+            _padding: 0,
+            siginfo_fields: siginfo_fields_t::zero_fields(),
+        }
+    }
+
+    pub fn set_si_addr(&mut self, si_addr: Vaddr) {
+        self.siginfo_fields.sigfault.addr = si_addr;
+    }
+
+    pub fn si_addr(&self) -> Vaddr {
+        // let siginfo = *self;
+        read_union_fields!(self.siginfo_fields.sigfault.addr)
+    }
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_fields_t {
+    bytes: [u8; 128 - mem::size_of::<i32>() * 4],
+    common: siginfo_common_t,
+    sigfault: siginfo_sigfault_t,
+}
+
+impl siginfo_fields_t {
+    fn zero_fields() -> Self {
+        Self {
+            bytes: [0; 128 - mem::size_of::<i32>() * 4],
+        }
+    }
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_common_t {
+    first: siginfo_common_first_t,
+    second: siginfo_common_second_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_common_first_t {
+    piduid: siginfo_piduid_t,
+    timer: siginfo_timer_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+struct siginfo_piduid_t {
+    pid: Pid,
+    uid: Uid,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+struct siginfo_timer_t {
+    timerid: i32,
+    overrun: i32,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_common_second_t {
+    value: sigval_t,
+    sigchild: siginfo_sigchild_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+pub union sigval_t {
+    sigval_int: i32,
+    sigval_ptr: Vaddr, //*mut c_void
+}
+
+impl sigval_t {
+    pub fn read_int(&self) -> i32 {
+        read_union_fields!(self.sigval_int)
+    }
+
+    pub fn read_ptr(&self) -> Vaddr {
+        read_union_fields!(self.sigval_ptr)
+    }
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_sigchild_t {
+    status: i32,
+    utime: clock_t,
+    stime: clock_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+struct siginfo_sigfault_t {
+    addr: Vaddr, //*const c_void
+    addr_lsb: i16,
+    first: siginfo_sigfault_first_t,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_sigfault_first_t {
+    addr_bnd: siginfo_addr_bnd_t,
+    pkey: u32,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+union siginfo_addr_bnd_t {
+    lower: Vaddr, // *const c_void
+    upper: Vaddr, // *const c_void,
+}
+
+#[derive(Clone, Copy, Debug, Pod)]
+#[repr(C)]
+pub struct ucontext_t {
+    pub uc_flags: u64,
+    pub uc_link: Vaddr, // *mut ucontext_t
+    pub uc_stack: stack_t,
+    pub uc_mcontext: mcontext_t,
+    pub uc_sigmask: sigset_t,
+    pub fpregs: [u8; 64 * 8], //fxsave structure
+}
+
+impl Default for ucontext_t {
+    fn default() -> Self {
+        Self {
+            uc_flags: Default::default(),
+            uc_link: Default::default(),
+            uc_stack: Default::default(),
+            uc_mcontext: Default::default(),
+            uc_sigmask: Default::default(),
+            fpregs: [0u8; 64 * 8],
+        }
+    }
+}
+
+pub type stack_t = sigaltstack_t;
+
+#[derive(Debug, Clone, Copy, Pod, Default)]
+#[repr(C)]
+pub struct sigaltstack_t {
+    pub ss_sp: Vaddr, // *mut c_void
+    pub ss_flags: i32,
+    pub ss_size: usize,
+}
+
+#[derive(Debug, Clone, Copy, Pod, Default)]
+#[repr(C)]
+pub struct mcontext_t {
+    pub inner: SignalCpuContext,
+    // TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
+    _unused0: [u64; 5],
+    // TODO: this field should be `fpregs: fpregset_t,`
+    _unused1: usize,
+    _reserved: [u64; 8],
+}
+
+#[derive(Debug, Clone, Copy, Pod, Default)]
+#[repr(C)]
+pub struct SignalCpuContext {
+    pub gp_regs: GpRegs,
+    pub fpregs_on_heap: u64,
+    pub fpregs: Vaddr, // *mut FpRegs,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+pub struct _sigev_thread {
+    pub function: Vaddr,
+    pub attribute: Vaddr,
+}
+
+const SIGEV_MAX_SIZE: usize = 64;
+/// The total size of the fields `sigev_value`, `sigev_signo` and `sigev_notify`.
+const SIGEV_PREAMBLE_SIZE: usize = size_of::<i32>() * 2 + size_of::<sigval_t>();
+const SIGEV_PAD_SIZE: usize = (SIGEV_MAX_SIZE - SIGEV_PREAMBLE_SIZE) / size_of::<i32>();
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+pub union _sigev_un {
+    pub _pad: [i32; SIGEV_PAD_SIZE],
+    pub _tid: i32,
+    pub _sigev_thread: _sigev_thread,
+}
+
+impl _sigev_un {
+    pub fn read_tid(&self) -> i32 {
+        read_union_fields!(self._tid)
+    }
+
+    pub fn read_function(&self) -> Vaddr {
+        read_union_fields!(self._sigev_thread.function)
+    }
+
+    pub fn read_attribute(&self) -> Vaddr {
+        read_union_fields!(self._sigev_thread.attribute)
+    }
+}
+
+#[derive(Debug, Copy, Clone, TryFromInt, PartialEq)]
+#[repr(i32)]
+pub enum SigNotify {
+    SIGEV_SIGNAL = 0,
+    SIGEV_NONE = 1,
+    SIGEV_THREAD = 2,
+    SIGEV_THREAD_ID = 4,
+}
+
+#[derive(Clone, Copy, Pod)]
+#[repr(C)]
+pub struct sigevent_t {
+    pub sigev_value: sigval_t,
+    pub sigev_signo: i32,
+    pub sigev_notify: i32,
+    pub sigev_un: _sigev_un,
+}
--- a/kernel/src/process/signal/constants.rs
+++ b/kernel/src/process/signal/constants.rs
@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+/// Standard signals
+pub(super) const MIN_STD_SIG_NUM: u8 = 1;
+pub(super) const MAX_STD_SIG_NUM: u8 = 31; // inclusive
+/// Real-time signals
+pub(super) const MIN_RT_SIG_NUM: u8 = 32;
+pub(super) const MAX_RT_SIG_NUM: u8 = 64; // inclusive
+/// Count the number of signals
+pub(super) const COUNT_STD_SIGS: usize = 31;
+pub(super) const COUNT_RT_SIGS: usize = 33;
+pub(super) const COUNT_ALL_SIGS: usize = 64;
+
+pub const SIG_DFL: usize = 0;
+pub const SIG_IGN: usize = 1;
+
+use super::sig_num::SigNum;
+
+macro_rules! define_std_signums {
+    ( $( $name: ident = $num: expr ),+, ) => {
+        $(
+            pub const $name : SigNum = SigNum::from_u8($num);
+        )*
+    }
+}
+
+define_std_signums! {
+    SIGHUP    = 1, // Hangup detected on controlling terminal or death of controlling process
+    SIGINT    = 2, // Interrupt from keyboard
+    SIGQUIT   = 3, // Quit from keyboard
+    SIGILL    = 4, // Illegal Instruction
+    SIGTRAP   = 5, // Trace/breakpoint trap
+    SIGABRT   = 6, // Abort signal from abort(3)
+    SIGBUS    = 7, // Bus error (bad memory access)
+    SIGFPE    = 8, // Floating-point exception
+    SIGKILL   = 9, // Kill signal
+    SIGUSR1   = 10, // User-defined signal 1
+    SIGSEGV   = 11, // Invalid memory reference
+    SIGUSR2   = 12, // User-defined signal 2
+    SIGPIPE   = 13, // Broken pipe: write to pipe with no readers; see pipe(7)
+    SIGALRM   = 14, // Timer signal from alarm(2)
+    SIGTERM   = 15, // Termination signal
+    SIGSTKFLT = 16, // Stack fault on coprocessor (unused)
+    SIGCHLD   = 17, // Child stopped or terminated
+    SIGCONT   = 18, // Continue if stopped
+    SIGSTOP   = 19, // Stop process
+    SIGTSTP   = 20, // Stop typed at terminal
+    SIGTTIN   = 21, // Terminal input for background process
+    SIGTTOU   = 22, // Terminal output for background process
+    SIGURG    = 23, // Urgent condition on socket (4.2BSD)
+    SIGXCPU   = 24, // CPU time limit exceeded (4.2BSD); see setrlimit(2)
+    SIGXFSZ   = 25, // File size limit exceeded (4.2BSD); see setrlimit(2)
+    SIGVTALRM = 26, // Virtual alarm clock (4.2BSD)
+    SIGPROF   = 27, // Profiling timer expired
+    SIGWINCH  = 28, // Window resize signal (4.3BSD, Sun)
+    SIGIO     = 29, // I/O now possible (4.2BSD)
+    SIGPWR    = 30, // Power failure (System V)
+    SIGSYS    = 31, // Bad system call (SVr4); see also seccomp(2)
+}
+
+pub const SI_ASYNCNL: i32 = -60;
+pub const SI_TKILL: i32 = -6;
+pub const SI_SIGIO: i32 = -5;
+pub const SI_ASYNCIO: i32 = -4;
+pub const SI_MESGQ: i32 = -3;
+pub const SI_TIMER: i32 = -2;
+pub const SI_QUEUE: i32 = -1;
+pub const SI_USER: i32 = 0;
+pub const SI_KERNEL: i32 = 128;
+
+pub const FPE_INTDIV: i32 = 1;
+pub const FPE_INTOVF: i32 = 2;
+pub const FPE_FLTDIV: i32 = 3;
+pub const FPE_FLTOVF: i32 = 4;
+pub const FPE_FLTUND: i32 = 5;
+pub const FPE_FLTRES: i32 = 6;
+pub const FPE_FLTINV: i32 = 7;
+pub const FPE_FLTSUB: i32 = 8;
+
+pub const ILL_ILLOPC: i32 = 1;
+pub const ILL_ILLOPN: i32 = 2;
+pub const ILL_ILLADR: i32 = 3;
+pub const ILL_ILLTRP: i32 = 4;
+pub const ILL_PRVOPC: i32 = 5;
+pub const ILL_PRVREG: i32 = 6;
+pub const ILL_COPROC: i32 = 7;
+pub const ILL_BADSTK: i32 = 8;
+
+pub const SEGV_MAPERR: i32 = 1;
+pub const SEGV_ACCERR: i32 = 2;
+pub const SEGV_BNDERR: i32 = 3;
+pub const SEGV_PKUERR: i32 = 4;
+
+pub const BUS_ADRALN: i32 = 1;
+pub const BUS_ADRERR: i32 = 2;
+pub const BUS_OBJERR: i32 = 3;
+pub const BUS_MCEERR_AR: i32 = 4;
+pub const BUS_MCEERR_AO: i32 = 5;
+
+pub const CLD_EXITED: i32 = 1;
+pub const CLD_KILLED: i32 = 2;
+pub const CLD_DUMPED: i32 = 3;
+pub const CLD_TRAPPED: i32 = 4;
+pub const CLD_STOPPED: i32 = 5;
+pub const CLD_CONTINUED: i32 = 6;
--- a/kernel/src/process/signal/events.rs
+++ b/kernel/src/process/signal/events.rs
@ -0,0 +1,33 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{sig_mask::SigMask, sig_num::SigNum};
+use crate::{
+    events::{Events, EventsFilter},
+    prelude::*,
+};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SigEvents(SigNum);
+
+impl SigEvents {
+    pub fn new(sig_num: SigNum) -> Self {
+        Self(sig_num)
+    }
+}
+
+impl Events for SigEvents {}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SigEventsFilter(SigMask);
+
+impl SigEventsFilter {
+    pub fn new(mask: SigMask) -> Self {
+        Self(mask)
+    }
+}
+
+impl EventsFilter<SigEvents> for SigEventsFilter {
+    fn filter(&self, event: &SigEvents) -> bool {
+        !self.0.contains(event.0)
+    }
+}
--- a/kernel/src/process/signal/mod.rs
+++ b/kernel/src/process/signal/mod.rs
@ -0,0 +1,275 @@
+// SPDX-License-Identifier: MPL-2.0
+
+pub mod c_types;
+pub mod constants;
+mod events;
+mod pauser;
+mod poll;
+pub mod sig_action;
+pub mod sig_disposition;
+pub mod sig_mask;
+pub mod sig_num;
+pub mod sig_queues;
+mod sig_stack;
+pub mod signals;
+
+use core::{mem, sync::atomic::Ordering};
+
+use align_ext::AlignExt;
+use c_types::{siginfo_t, ucontext_t};
+pub use events::{SigEvents, SigEventsFilter};
+use ostd::{cpu::UserContext, user::UserContextApi};
+pub use pauser::Pauser;
+pub use poll::{Pollable, Pollee, Poller};
+use sig_action::{SigAction, SigActionFlags, SigDefaultAction};
+use sig_mask::SigMask;
+use sig_num::SigNum;
+pub use sig_stack::{SigStack, SigStackFlags};
+
+use super::posix_thread::{PosixThread, PosixThreadExt};
+use crate::{
+    prelude::*,
+    process::{do_exit_group, TermStatus},
+    thread::{status::ThreadStatus, Thread},
+};
+
+pub trait SignalContext {
+    /// Set signal handler arguments
+    fn set_arguments(&mut self, sig_num: SigNum, siginfo_addr: usize, ucontext_addr: usize);
+}
+
+// TODO: This interface of this method is error prone.
+// The method takes an argument for the current thread to optimize its efficiency.
+/// Handle pending signal for current process.
+pub fn handle_pending_signal(
+    context: &mut UserContext,
+    current_thread: &Arc<Thread>,
+) -> Result<()> {
+    // We first deal with signal in current thread, then signal in current process.
+    let posix_thread = current_thread.as_posix_thread().unwrap();
+    let signal = {
+        let sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed);
+        if let Some(signal) = posix_thread.dequeue_signal(&sig_mask) {
+            signal
+        } else {
+            return Ok(());
+        }
+    };
+
+    let sig_num = signal.num();
+    trace!("sig_num = {:?}, sig_name = {}", sig_num, sig_num.sig_name());
+    let current = posix_thread.process();
+    let mut sig_dispositions = current.sig_dispositions().lock();
+    let sig_action = sig_dispositions.get(sig_num);
+    trace!("sig action: {:x?}", sig_action);
+    match sig_action {
+        SigAction::Ign => {
+            trace!("Ignore signal {:?}", sig_num);
+        }
+        SigAction::User {
+            handler_addr,
+            flags,
+            restorer_addr,
+            mask,
+        } => {
+            if flags.contains(SigActionFlags::SA_RESETHAND) {
+                // In Linux, SA_RESETHAND corresponds to SA_ONESHOT,
+                // which means the user handler will be executed only once and then reset to the default.
+                // Refer to https://elixir.bootlin.com/linux/v6.0.9/source/kernel/signal.c#L2761.
+                sig_dispositions.set_default(sig_num);
+            }
+
+            drop(sig_dispositions);
+
+            handle_user_signal(
+                posix_thread,
+                sig_num,
+                handler_addr,
+                flags,
+                restorer_addr,
+                mask,
+                context,
+                signal.to_info(),
+            )?
+        }
+        SigAction::Dfl => {
+            drop(sig_dispositions);
+
+            let sig_default_action = SigDefaultAction::from_signum(sig_num);
+            trace!("sig_default_action: {:?}", sig_default_action);
+            match sig_default_action {
+                SigDefaultAction::Core | SigDefaultAction::Term => {
+                    warn!(
+                        "{:?}: terminating on signal {}",
+                        current.executable_path(),
+                        sig_num.sig_name()
+                    );
+                    // We should exit current here, since we cannot restore a valid status from trap now.
+                    do_exit_group(TermStatus::Killed(sig_num));
+                }
+                SigDefaultAction::Ign => {}
+                SigDefaultAction::Stop => {
+                    let _ = current_thread.atomic_status().compare_exchange(
+                        ThreadStatus::Running,
+                        ThreadStatus::Stopped,
+                        Ordering::AcqRel,
+                        Ordering::Relaxed,
+                    );
+                }
+                SigDefaultAction::Cont => {
+                    let _ = current_thread.atomic_status().compare_exchange(
+                        ThreadStatus::Stopped,
+                        ThreadStatus::Running,
+                        Ordering::AcqRel,
+                        Ordering::Relaxed,
+                    );
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)]
+pub fn handle_user_signal(
+    posix_thread: &PosixThread,
+    sig_num: SigNum,
+    handler_addr: Vaddr,
+    flags: SigActionFlags,
+    restorer_addr: Vaddr,
+    mut mask: SigMask,
+    context: &mut UserContext,
+    sig_info: siginfo_t,
+) -> Result<()> {
+    debug!("sig_num = {:?}, signame = {}", sig_num, sig_num.sig_name());
+    debug!("handler_addr = 0x{:x}", handler_addr);
+    debug!("flags = {:?}", flags);
+    debug!("restorer_addr = 0x{:x}", restorer_addr);
+    // FIXME: How to respect flags?
+    if flags.contains_unsupported_flag() {
+        warn!("Unsupported Signal flags: {:?}", flags);
+    }
+
+    if !flags.contains(SigActionFlags::SA_NODEFER) {
+        // add current signal to mask
+        mask += sig_num;
+    }
+
+    // block signals in sigmask when running signal handler
+    let old_mask = posix_thread.sig_mask().load(Ordering::Relaxed);
+    posix_thread
+        .sig_mask()
+        .store(old_mask + mask, Ordering::Relaxed);
+
+    // Set up signal stack.
+    let mut stack_pointer = if let Some(sp) = use_alternate_signal_stack(posix_thread) {
+        sp as u64
+    } else {
+        // just use user stack
+        context.stack_pointer() as u64
+    };
+
+    // To avoid corrupting signal stack, we minus 128 first.
+    stack_pointer -= 128;
+
+    let user_space = CurrentUserSpace::get();
+    // 1. write siginfo_t
+    stack_pointer -= mem::size_of::<siginfo_t>() as u64;
+    user_space.write_val(stack_pointer as _, &sig_info)?;
+    let siginfo_addr = stack_pointer;
+
+    // 2. write ucontext_t.
+    stack_pointer = alloc_aligned_in_user_stack(stack_pointer, mem::size_of::<ucontext_t>(), 16)?;
+    let mut ucontext = ucontext_t {
+        uc_sigmask: mask.into(),
+        ..Default::default()
+    };
+    ucontext
+        .uc_mcontext
+        .inner
+        .gp_regs
+        .copy_from_raw(context.general_regs());
+    let mut sig_context = posix_thread.sig_context().lock();
+    if let Some(sig_context_addr) = *sig_context {
+        ucontext.uc_link = sig_context_addr;
+    } else {
+        ucontext.uc_link = 0;
+    }
+    // TODO: store fp regs in ucontext
+    user_space.write_val(stack_pointer as _, &ucontext)?;
+    let ucontext_addr = stack_pointer;
+    // Store the ucontext addr in sig context of current thread.
+    *sig_context = Some(ucontext_addr as Vaddr);
+
+    // 3. Set the address of the trampoline code.
+    if flags.contains(SigActionFlags::SA_RESTORER) {
+        // If contains SA_RESTORER flag, trampoline code is provided by libc in restorer_addr.
+        // We just store restorer_addr on user stack to allow user code just to trampoline code.
+        stack_pointer = write_u64_to_user_stack(stack_pointer, restorer_addr as u64)?;
+        trace!("After set restorer addr: user_rsp = 0x{:x}", stack_pointer);
+    } else {
+        // Otherwise we create a trampoline.
+        // FIXME: This may cause problems if we read old_context from rsp.
+        const TRAMPOLINE: &[u8] = &[
+            0xb8, 0x0f, 0x00, 0x00, 0x00, // mov eax, 15(syscall number of rt_sigreturn)
+            0x0f, 0x05, // syscall (call rt_sigreturn)
+            0x90, // nop (for alignment)
+        ];
+        stack_pointer -= TRAMPOLINE.len() as u64;
+        let trampoline_rip = stack_pointer;
+        user_space.write_bytes(stack_pointer as Vaddr, &mut VmReader::from(TRAMPOLINE))?;
+        stack_pointer = write_u64_to_user_stack(stack_pointer, trampoline_rip)?;
+    }
+
+    // 4. Set correct register values
+    context.set_instruction_pointer(handler_addr as _);
+    context.set_stack_pointer(stack_pointer as usize);
+    // parameters of signal handler
+    if flags.contains(SigActionFlags::SA_SIGINFO) {
+        context.set_arguments(sig_num, siginfo_addr as usize, ucontext_addr as usize);
+    } else {
+        context.set_arguments(sig_num, 0, 0);
+    }
+
+    Ok(())
+}
+
+/// Use an alternate signal stack, which was installed by sigaltstack.
+/// It the stack is already active, we just increase the handler counter and return None, since
+/// the stack pointer can be read from context.
+/// It the stack is not used by any handler, we will return the new sp in alternate signal stack.
+fn use_alternate_signal_stack(posix_thread: &PosixThread) -> Option<usize> {
+    let mut sig_stack = posix_thread.sig_stack().lock();
+    let sig_stack = (*sig_stack).as_mut()?;
+
+    if sig_stack.is_disabled() {
+        return None;
+    }
+
+    if sig_stack.is_active() {
+        // The stack is already active, so we just use sp in context.
+        sig_stack.increase_handler_counter();
+        return None;
+    }
+
+    sig_stack.increase_handler_counter();
+
+    // Make sp align at 16. FIXME: is this required?
+    let stack_pointer = (sig_stack.base() + sig_stack.size()).align_down(16);
+    Some(stack_pointer)
+}
+
+fn write_u64_to_user_stack(rsp: u64, value: u64) -> Result<u64> {
+    let rsp = rsp - 8;
+    CurrentUserSpace::get().write_val(rsp as Vaddr, &value)?;
+    Ok(rsp)
+}
+
+/// alloc memory of size on user stack, the return address should respect the align argument.
+fn alloc_aligned_in_user_stack(rsp: u64, size: usize, align: usize) -> Result<u64> {
+    if !align.is_power_of_two() {
+        return_errno_with_message!(Errno::EINVAL, "align must be power of two");
+    }
+    let start = (rsp - size as u64).align_down(align as u64);
+    Ok(start)
+}
--- a/kernel/src/process/signal/pauser.rs
+++ b/kernel/src/process/signal/pauser.rs
@ -0,0 +1,297 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(unused_variables)]
+
+use core::{
+    sync::atomic::{AtomicBool, Ordering},
+    time::Duration,
+};
+
+use ostd::sync::WaitQueue;
+
+use super::{sig_mask::SigMask, SigEvents, SigEventsFilter};
+use crate::{
+    events::Observer,
+    prelude::*,
+    process::posix_thread::{PosixThread, PosixThreadExt},
+    thread::Thread,
+    time::wait::WaitTimeout,
+};
+
+/// A `Pauser` allows pausing the execution of the current thread until certain conditions are reached.
+///
+/// Behind the scene, `Pauser` is implemented with [`Waiter`] and [`WaitQueue`].
+/// But unlike its [`Waiter`] relatives, `Pauser` is aware of POSIX signals:
+/// if a thread paused by a `Pauser` receives a signal, then the thread will resume its execution.
+///
+/// Another key difference is that `Pauser` combines the two roles of [`Waiter`] and [`WaitQueue`]
+/// into one. Both putting the current thread to sleep and waking it up can be done through the
+/// same `Pauser` object, using its `pause`- and `resume`-family methods.
+///
+/// [`Waiter`]: ostd::sync::Waiter
+///
+/// # Example
+///
+/// Here is how the current thread can be put to sleep with a `Pauser`.
+///
+/// ```no_run
+/// let pauser = Pauser::new(SigMask::new_full());
+/// // Pause the execution of the current thread until a user-given condition is met
+/// // or the current thread is interrupted by a signal.
+/// let res = pauser.pause_until(|| {
+///     if cond() {
+///         Some(())
+///     } else {
+///         None
+///     }
+/// });
+/// match res {
+///     Ok(_) => {
+///         // The user-given condition is met...
+///     }
+///     Err(EINTR) => {
+///         // A signal is received...
+///     }
+///     _ => unreachable!()
+/// }
+/// ```
+///
+/// Let's assume that another thread has access to the same object of `Arc<Pauser>`.
+/// Then, this second thread can resume the execution of the first thread
+/// even when `cond()` does not return `true`.
+///
+/// ```no_run
+/// pauser.resume_all();
+/// ```
+pub struct Pauser {
+    wait_queue: WaitQueue,
+    sig_mask: SigMask,
+}
+
+impl Pauser {
+    /// Creates a new `Pauser`.
+    ///
+    /// The `Pauser` can be interrupted by all signals
+    /// except that are blocked by current thread.
+    pub fn new() -> Arc<Self> {
+        Self::new_with_mask(SigMask::new_empty())
+    }
+
+    /// Creates a new `Pauser` with specified `sig_mask`.
+    ///
+    /// The `Pauser` will ignore signals that are in `sig_mask`
+    /// or blocked by current thread.
+    pub fn new_with_mask(sig_mask: SigMask) -> Arc<Self> {
+        let wait_queue = WaitQueue::new();
+        Arc::new(Self {
+            wait_queue,
+            sig_mask,
+        })
+    }
+
+    /// Pauses the execution of current thread until the `cond` is met ( i.e., `cond()`
+    /// returns `Some(_)` ), or some signal is received by current thread or process.
+    ///
+    /// # Errors
+    ///
+    /// If some signal is received before `cond` is met, this method will returns `Err(EINTR)`.
+    pub fn pause_until<F, R>(self: &Arc<Self>, cond: F) -> Result<R>
+    where
+        F: FnMut() -> Option<R>,
+    {
+        self.do_pause(cond, None)
+    }
+
+    /// Pauses the execution of current thread until the `cond` is met ( i.e., `cond()` returns
+    /// `Some(_)` ), or some signal is received by current thread or process, or the given
+    /// `timeout` is expired.
+    ///
+    /// # Errors
+    ///
+    /// If `timeout` is expired before the `cond` is met or some signal is received,
+    /// it will returns [`ETIME`].
+    ///
+    /// [`ETIME`]: crate::error::Errno::ETIME
+    pub fn pause_until_or_timeout<F, R>(self: &Arc<Self>, cond: F, timeout: &Duration) -> Result<R>
+    where
+        F: FnMut() -> Option<R>,
+    {
+        self.do_pause(cond, Some(timeout))
+    }
+
+    fn do_pause<F, R>(self: &Arc<Self>, mut cond: F, timeout: Option<&Duration>) -> Result<R>
+    where
+        F: FnMut() -> Option<R>,
+    {
+        let current_thread = Thread::current();
+        let sig_queue_waiter =
+            SigObserverRegistrar::new(current_thread.as_ref(), self.sig_mask, self.clone());
+
+        let cond = || {
+            if let Some(res) = cond() {
+                return Some(Ok(res));
+            }
+
+            if sig_queue_waiter.is_interrupted() {
+                return Some(Err(Error::with_message(
+                    Errno::EINTR,
+                    "the current thread is interrupted by a signal",
+                )));
+            }
+
+            None
+        };
+
+        if let Some(timeout) = timeout {
+            self.wait_queue
+                .wait_until_or_timeout(cond, timeout)
+                .ok_or_else(|| Error::with_message(Errno::ETIME, "the time limit is reached"))?
+        } else {
+            self.wait_queue.wait_until(cond)
+        }
+    }
+
+    /// Resumes all paused threads on this pauser.
+    pub fn resume_all(&self) {
+        self.wait_queue.wake_all();
+    }
+
+    /// Resumes one paused thread on this pauser.
+    pub fn resume_one(&self) {
+        self.wait_queue.wake_one();
+    }
+}
+
+enum SigObserverRegistrar<'a> {
+    // A POSIX thread may be interrupted by a signal if the signal is not masked.
+    PosixThread {
+        thread: &'a PosixThread,
+        old_mask: SigMask,
+        observer: Arc<SigQueueObserver>,
+    },
+    // A kernel thread ignores all signals. It is not necessary to wait for them.
+    KernelThread,
+}
+
+impl<'a> SigObserverRegistrar<'a> {
+    fn new(
+        current_thread: Option<&'a Arc<Thread>>,
+        sig_mask: SigMask,
+        pauser: Arc<Pauser>,
+    ) -> Self {
+        let Some(thread) = current_thread.and_then(|thread| thread.as_posix_thread()) else {
+            return Self::KernelThread;
+        };
+
+        // Block `sig_mask`.
+        let (old_mask, filter) = {
+            let old_mask = thread.sig_mask().load(Ordering::Relaxed);
+            let new_mask = old_mask + sig_mask;
+            thread.sig_mask().store(new_mask, Ordering::Relaxed);
+
+            (old_mask, SigEventsFilter::new(new_mask))
+        };
+
+        // Register `SigQueueObserver`.
+        let observer = SigQueueObserver::new(pauser);
+        thread.register_sigqueue_observer(Arc::downgrade(&observer) as _, filter);
+
+        // Check pending signals after registering the observer to avoid race conditions.
+        if thread.has_pending() {
+            observer.set_interrupted();
+        }
+
+        Self::PosixThread {
+            thread,
+            old_mask,
+            observer,
+        }
+    }
+
+    fn is_interrupted(&self) -> bool {
+        match self {
+            Self::PosixThread { observer, .. } => observer.is_interrupted(),
+            Self::KernelThread => false,
+        }
+    }
+}
+
+impl<'a> Drop for SigObserverRegistrar<'a> {
+    fn drop(&mut self) {
+        let Self::PosixThread {
+            thread,
+            old_mask,
+            observer,
+        } = self
+        else {
+            return;
+        };
+
+        // Restore the state, assuming no one else can modify the current thread's signal mask
+        // during the pause.
+        thread.unregiser_sigqueue_observer(&(Arc::downgrade(observer) as _));
+        thread.sig_mask().store(*old_mask, Ordering::Relaxed);
+    }
+}
+
+struct SigQueueObserver {
+    is_interrupted: AtomicBool,
+    pauser: Arc<Pauser>,
+}
+
+impl SigQueueObserver {
+    fn new(pauser: Arc<Pauser>) -> Arc<Self> {
+        Arc::new(Self {
+            is_interrupted: AtomicBool::new(false),
+            pauser,
+        })
+    }
+
+    fn is_interrupted(&self) -> bool {
+        self.is_interrupted.load(Ordering::Acquire)
+    }
+
+    fn set_interrupted(&self) {
+        self.is_interrupted.store(true, Ordering::Release);
+    }
+}
+
+impl Observer<SigEvents> for SigQueueObserver {
+    fn on_events(&self, _: &SigEvents) {
+        self.set_interrupted();
+        self.pauser.wait_queue.wake_all();
+    }
+}
+
+#[cfg(ktest)]
+mod test {
+    use ostd::prelude::*;
+
+    use super::*;
+    use crate::thread::{
+        kernel_thread::{KernelThreadExt, ThreadOptions},
+        Thread,
+    };
+
+    #[ktest]
+    fn test_pauser() {
+        let pauser = Pauser::new();
+        let pauser_cloned = pauser.clone();
+
+        let boolean = Arc::new(AtomicBool::new(false));
+        let boolean_cloned = boolean.clone();
+
+        let thread = Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+            Thread::yield_now();
+
+            boolean_cloned.store(true, Ordering::Relaxed);
+            pauser_cloned.resume_all();
+        }));
+
+        pauser
+            .pause_until(|| boolean.load(Ordering::Relaxed).then_some(()))
+            .unwrap();
+
+        thread.join();
+    }
+}
--- a/kernel/src/process/signal/poll.rs
+++ b/kernel/src/process/signal/poll.rs
@ -0,0 +1,282 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::{
+    sync::atomic::{AtomicU32, AtomicUsize, Ordering},
+    time::Duration,
+};
+
+use crate::{
+    events::{IoEvents, Observer, Subject},
+    prelude::*,
+    process::signal::Pauser,
+};
+
+/// A pollee maintains a set of active events, which can be polled with
+/// pollers or be monitored with observers.
+#[derive(Clone)]
+pub struct Pollee {
+    inner: Arc<PolleeInner>,
+}
+
+struct PolleeInner {
+    // A subject which is monitored with pollers.
+    subject: Subject<IoEvents, IoEvents>,
+    // For efficient manipulation, we use AtomicU32 instead of RwLock<IoEvents>.
+    events: AtomicU32,
+}
+
+impl Pollee {
+    /// Creates a new instance of pollee.
+    pub fn new(init_events: IoEvents) -> Self {
+        let inner = PolleeInner {
+            subject: Subject::new(),
+            events: AtomicU32::new(init_events.bits()),
+        };
+        Self {
+            inner: Arc::new(inner),
+        }
+    }
+
+    /// Returns the current events of the pollee given an event mask.
+    ///
+    /// If no interesting events are polled and a poller is provided, then
+    /// the poller will start monitoring the pollee and receive event
+    /// notification once the pollee gets any interesting events.
+    ///
+    /// This operation is _atomic_ in the sense that either some interesting
+    /// events are returned or the poller is registered (if a poller is provided).
+    pub fn poll(&self, mask: IoEvents, poller: Option<&mut Poller>) -> IoEvents {
+        let mask = mask | IoEvents::ALWAYS_POLL;
+
+        // Fast path: return events immediately
+        let revents = self.events() & mask;
+        if !revents.is_empty() || poller.is_none() {
+            return revents;
+        }
+
+        // Register the provided poller.
+        self.register_poller(poller.unwrap(), mask);
+
+        // It is important to check events again to handle race conditions
+        self.events() & mask
+    }
+
+    fn register_poller(&self, poller: &mut Poller, mask: IoEvents) {
+        self.inner
+            .subject
+            .register_observer(poller.observer(), mask);
+
+        poller.pollees.push(Arc::downgrade(&self.inner));
+    }
+
+    /// Register an IoEvents observer.
+    ///
+    /// A registered observer will get notified (through its `on_events` method)
+    /// every time new events specified by the `mask` argument happen on the
+    /// pollee (through the `add_events` method).
+    ///
+    /// If the given observer has already been registered, then its registered
+    /// event mask will be updated.
+    ///
+    /// Note that the observer will always get notified of the events in
+    /// `IoEvents::ALWAYS_POLL` regardless of the value of `mask`.
+    pub fn register_observer(&self, observer: Weak<dyn Observer<IoEvents>>, mask: IoEvents) {
+        let mask = mask | IoEvents::ALWAYS_POLL;
+        self.inner.subject.register_observer(observer, mask);
+    }
+
+    /// Unregister an IoEvents observer.
+    ///
+    /// If such an observer is found, then the registered observer will be
+    /// removed from the pollee and returned as the return value. Otherwise,
+    /// a `None` will be returned.
+    pub fn unregister_observer(
+        &self,
+        observer: &Weak<dyn Observer<IoEvents>>,
+    ) -> Option<Weak<dyn Observer<IoEvents>>> {
+        self.inner.subject.unregister_observer(observer)
+    }
+
+    /// Add some events to the pollee's state.
+    ///
+    /// This method wakes up all registered pollers that are interested in
+    /// the added events.
+    pub fn add_events(&self, events: IoEvents) {
+        self.inner.events.fetch_or(events.bits(), Ordering::Release);
+        self.inner.subject.notify_observers(&events);
+    }
+
+    /// Remove some events from the pollee's state.
+    ///
+    /// This method will not wake up registered pollers even when
+    /// the pollee still has some interesting events to the pollers.
+    pub fn del_events(&self, events: IoEvents) {
+        self.inner
+            .events
+            .fetch_and(!events.bits(), Ordering::Release);
+    }
+
+    /// Reset the pollee's state.
+    ///
+    /// Reset means removing all events on the pollee.
+    pub fn reset_events(&self) {
+        self.inner
+            .events
+            .fetch_and(!IoEvents::all().bits(), Ordering::Release);
+    }
+
+    fn events(&self) -> IoEvents {
+        let event_bits = self.inner.events.load(Ordering::Acquire);
+        IoEvents::from_bits(event_bits).unwrap()
+    }
+}
+
+/// A poller gets notified when its associated pollees have interesting events.
+pub struct Poller {
+    // Use event counter to wait or wake up a poller
+    event_counter: Arc<EventCounter>,
+    // All pollees that are interesting to this poller
+    pollees: Vec<Weak<PolleeInner>>,
+}
+
+impl Default for Poller {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Poller {
+    /// Constructs a new `Poller`.
+    pub fn new() -> Self {
+        Self {
+            event_counter: Arc::new(EventCounter::new()),
+            pollees: Vec::new(),
+        }
+    }
+
+    /// Wait until there are any interesting events happen since last `wait`. The `wait`
+    /// can be interrupted by signal.
+    pub fn wait(&self) -> Result<()> {
+        self.event_counter.read(None)?;
+        Ok(())
+    }
+
+    /// Wait until there are any interesting events happen since last `wait` or a given timeout
+    /// is expired. This method can be interrupted by signal.
+    pub fn wait_timeout(&self, timeout: &Duration) -> Result<()> {
+        self.event_counter.read(Some(timeout))?;
+        Ok(())
+    }
+
+    fn observer(&self) -> Weak<dyn Observer<IoEvents>> {
+        Arc::downgrade(&self.event_counter) as _
+    }
+}
+
+impl Drop for Poller {
+    fn drop(&mut self) {
+        let observer = self.observer();
+
+        self.pollees
+            .iter()
+            .filter_map(Weak::upgrade)
+            .for_each(|pollee| {
+                pollee.subject.unregister_observer(&observer);
+            });
+    }
+}
+
+/// A counter for wait and wakeup.
+struct EventCounter {
+    counter: AtomicUsize,
+    pauser: Arc<Pauser>,
+}
+
+impl EventCounter {
+    pub fn new() -> Self {
+        let pauser = Pauser::new();
+
+        Self {
+            counter: AtomicUsize::new(0),
+            pauser,
+        }
+    }
+
+    pub fn read(&self, timeout: Option<&Duration>) -> Result<usize> {
+        let cond = || {
+            let val = self.counter.swap(0, Ordering::Relaxed);
+            if val > 0 {
+                Some(val)
+            } else {
+                None
+            }
+        };
+
+        if let Some(timeout) = timeout {
+            self.pauser.pause_until_or_timeout(cond, timeout)
+        } else {
+            self.pauser.pause_until(cond)
+        }
+    }
+
+    pub fn write(&self) {
+        self.counter.fetch_add(1, Ordering::Relaxed);
+        self.pauser.resume_one();
+    }
+}
+
+impl Observer<IoEvents> for EventCounter {
+    fn on_events(&self, _events: &IoEvents) {
+        self.write();
+    }
+}
+
+/// The `Pollable` trait allows for waiting for events and performing event-based operations.
+///
+/// Implementors are required to provide a method, [`Pollable::poll`], which is usually implemented
+/// by simply calling [`Pollee::poll`] on the internal [`Pollee`]. This trait provides another
+/// method, [`Pollable::wait_events`], to allow waiting for events and performing operations
+/// according to the events.
+///
+/// This trait is added instead of creating a new method in [`Pollee`] because sometimes we do not
+/// have access to the internal [`Pollee`], but there is a method that provides the same sematics
+/// as [`Pollee::poll`] and we need to perform event-based operations using that method.
+pub trait Pollable {
+    /// Returns the interesting events if there are any, or waits for them to happen if there are
+    /// none.
+    ///
+    /// This method has the same semantics as [`Pollee::poll`].
+    fn poll(&self, mask: IoEvents, poller: Option<&mut Poller>) -> IoEvents;
+
+    /// Waits for events and performs event-based operations.
+    ///
+    /// If a call to `cond()` succeeds or fails with an error code other than `EAGAIN`, the method
+    /// will return whatever the call to `cond()` returns. Otherwise, the method will wait for some
+    /// interesting events specified in `mask` to happen and try again.
+    ///
+    /// The user must ensure that a call to `cond()` does not fail with `EAGAIN` when the
+    /// interesting events occur. However, it is allowed to have spurious `EAGAIN` failures due to
+    /// race conditions where the events are consumed by another thread.
+    fn wait_events<F, R>(&self, mask: IoEvents, mut cond: F) -> Result<R>
+    where
+        Self: Sized,
+        F: FnMut() -> Result<R>,
+    {
+        let mut poller = Poller::new();
+
+        loop {
+            match cond() {
+                Err(err) if err.error() == Errno::EAGAIN => (),
+                result => return result,
+            };
+
+            let events = self.poll(mask, Some(&mut poller));
+            if !events.is_empty() {
+                continue;
+            }
+
+            // TODO: Support timeout
+            poller.wait()?;
+        }
+    }
+}
--- a/kernel/src/process/signal/sig_action.rs
+++ b/kernel/src/process/signal/sig_action.rs
@ -0,0 +1,149 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use bitflags::bitflags;
+
+use super::{c_types::sigaction_t, constants::*, sig_mask::SigMask, sig_num::SigNum};
+use crate::prelude::*;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
+pub enum SigAction {
+    #[default]
+    Dfl, // Default action
+    Ign, // Ignore this signal
+    User {
+        // User-given handler
+        handler_addr: usize,
+        flags: SigActionFlags,
+        restorer_addr: usize,
+        mask: SigMask,
+    },
+}
+
+impl TryFrom<sigaction_t> for SigAction {
+    type Error = Error;
+
+    fn try_from(input: sigaction_t) -> Result<Self> {
+        let action = match input.handler_ptr {
+            SIG_DFL => SigAction::Dfl,
+            SIG_IGN => SigAction::Ign,
+            _ => {
+                let flags = SigActionFlags::from_bits_truncate(input.flags);
+                let mask = input.mask.into();
+                SigAction::User {
+                    handler_addr: input.handler_ptr,
+                    flags,
+                    restorer_addr: input.restorer_ptr,
+                    mask,
+                }
+            }
+        };
+        Ok(action)
+    }
+}
+
+impl SigAction {
+    pub fn as_c_type(&self) -> sigaction_t {
+        match self {
+            SigAction::Dfl => sigaction_t {
+                handler_ptr: SIG_DFL,
+                flags: 0,
+                restorer_ptr: 0,
+                mask: 0,
+            },
+            SigAction::Ign => sigaction_t {
+                handler_ptr: SIG_IGN,
+                flags: 0,
+                restorer_ptr: 0,
+                mask: 0,
+            },
+            SigAction::User {
+                handler_addr,
+                flags,
+                restorer_addr,
+                mask,
+            } => sigaction_t {
+                handler_ptr: *handler_addr,
+                flags: flags.as_u32(),
+                restorer_ptr: *restorer_addr,
+                mask: (*mask).into(),
+            },
+        }
+    }
+}
+
+bitflags! {
+    pub struct SigActionFlags: u32 {
+        const SA_NOCLDSTOP  = 1;
+        const SA_NOCLDWAIT  = 2;
+        const SA_SIGINFO    = 4;
+        const SA_ONSTACK    = 0x08000000;
+        const SA_RESTART    = 0x10000000;
+        const SA_NODEFER    = 0x40000000;
+        const SA_RESETHAND  = 0x80000000;
+        const SA_RESTORER   = 0x04000000;
+    }
+}
+
+impl TryFrom<u32> for SigActionFlags {
+    type Error = Error;
+
+    fn try_from(bits: u32) -> Result<Self> {
+        let flags = SigActionFlags::from_bits(bits)
+            .ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid sig action flag"))?;
+        if flags.contains(SigActionFlags::SA_RESTART) {
+            warn!("SA_RESTART is not supported");
+        }
+        Ok(flags)
+    }
+}
+
+impl SigActionFlags {
+    pub fn as_u32(&self) -> u32 {
+        self.bits()
+    }
+
+    pub fn contains_unsupported_flag(&self) -> bool {
+        self.intersects(SigActionFlags::SA_NOCLDSTOP | SigActionFlags::SA_NOCLDWAIT)
+    }
+}
+
+/// The default action to signals
+#[derive(Debug, Copy, Clone)]
+pub enum SigDefaultAction {
+    Term, // Default action is to terminate the process.
+    Ign,  // Default action is to ignore the signal.
+    Core, // Default action is to terminate the process and dump core (see core(5)).
+    Stop, // Default action is to stop the process.
+    Cont, // Default action is to continue the process if it is currently stopped.
+}
+
+impl SigDefaultAction {
+    pub fn from_signum(num: SigNum) -> SigDefaultAction {
+        match num {
+            SIGABRT | // = SIGIOT
+            SIGBUS  |
+            SIGFPE  |
+            SIGILL  |
+            SIGQUIT |
+            SIGSEGV |
+            SIGSYS  | // = SIGUNUSED
+            SIGTRAP |
+            SIGXCPU |
+            SIGXFSZ
+                => SigDefaultAction::Core,
+            SIGCHLD |
+            SIGURG  |
+            SIGWINCH
+                => SigDefaultAction::Ign,
+            SIGCONT
+                => SigDefaultAction::Cont,
+            SIGSTOP |
+            SIGTSTP |
+            SIGTTIN |
+            SIGTTOU
+                => SigDefaultAction::Stop,
+            _
+                => SigDefaultAction::Term,
+        }
+    }
+}
--- a/kernel/src/process/signal/sig_disposition.rs
+++ b/kernel/src/process/signal/sig_disposition.rs
@ -0,0 +1,54 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::{constants::*, sig_action::SigAction, sig_num::SigNum};
+
+#[derive(Copy, Clone)]
+pub struct SigDispositions {
+    // SigNum -> SigAction
+    map: [SigAction; COUNT_ALL_SIGS],
+}
+
+impl Default for SigDispositions {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SigDispositions {
+    pub fn new() -> Self {
+        Self {
+            map: [SigAction::default(); COUNT_ALL_SIGS],
+        }
+    }
+
+    pub fn get(&self, num: SigNum) -> SigAction {
+        let idx = Self::num_to_idx(num);
+        self.map[idx]
+    }
+
+    pub fn set(&mut self, num: SigNum, sa: SigAction) {
+        let idx = Self::num_to_idx(num);
+        self.map[idx] = sa;
+    }
+
+    pub fn set_default(&mut self, num: SigNum) {
+        let idx = Self::num_to_idx(num);
+        self.map[idx] = SigAction::Dfl;
+    }
+
+    /// man 7 signal:
+    /// When execve, the handled signals are reset to the default; the dispositions of
+    /// ignored signals are left unchanged.
+    /// This function should be used when execve.
+    pub fn inherit(&mut self) {
+        for sigaction in &mut self.map {
+            if let SigAction::User { .. } = sigaction {
+                *sigaction = SigAction::Dfl;
+            }
+        }
+    }
+
+    fn num_to_idx(num: SigNum) -> usize {
+        (num.as_u8() - MIN_STD_SIG_NUM) as usize
+    }
+}
--- a/kernel/src/process/signal/sig_mask.rs
+++ b/kernel/src/process/signal/sig_mask.rs
@ -0,0 +1,225 @@
+// SPDX-License-Identifier: MPL-2.0
+
+//! Signal sets and atomic masks.
+//!
+//! A signal set is a bit-set of signals. A signal mask is a set of signals
+//! that are blocked from delivery to a thread. An atomic signal mask
+//! implementation is provided for shared access to signal masks.
+
+use core::{
+    fmt::LowerHex,
+    ops,
+    sync::atomic::{AtomicU64, Ordering},
+};
+
+use super::{constants::MIN_STD_SIG_NUM, sig_num::SigNum};
+use crate::prelude::*;
+
+/// A signal mask.
+///
+/// This is an alias to the [`SigSet`]. All the signal in the set are blocked
+/// from the delivery to a thread.
+pub type SigMask = SigSet;
+
+/// A bit-set of signals.
+///
+/// Because that all the signal numbers are in the range of 1 to 64, casting
+/// a signal set from `u64` to `SigSet` will always succeed.
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Pod)]
+#[repr(C)]
+pub struct SigSet {
+    bits: u64,
+}
+
+impl From<SigNum> for SigSet {
+    fn from(signum: SigNum) -> Self {
+        let idx = signum.as_u8() - MIN_STD_SIG_NUM;
+        Self { bits: 1_u64 << idx }
+    }
+}
+
+impl From<u64> for SigSet {
+    fn from(bits: u64) -> Self {
+        SigSet { bits }
+    }
+}
+
+impl From<SigSet> for u64 {
+    fn from(set: SigSet) -> u64 {
+        set.bits
+    }
+}
+
+impl<T: Into<SigSet>> ops::BitAnd<T> for SigSet {
+    type Output = Self;
+
+    fn bitand(self, rhs: T) -> Self {
+        SigSet {
+            bits: self.bits & rhs.into().bits,
+        }
+    }
+}
+
+impl<T: Into<SigSet>> ops::BitAndAssign<T> for SigSet {
+    fn bitand_assign(&mut self, rhs: T) {
+        self.bits &= rhs.into().bits;
+    }
+}
+
+impl<T: Into<SigSet>> ops::BitOr<T> for SigSet {
+    type Output = Self;
+
+    fn bitor(self, rhs: T) -> Self {
+        SigSet {
+            bits: self.bits | rhs.into().bits,
+        }
+    }
+}
+
+impl<T: Into<SigSet>> ops::BitOrAssign<T> for SigSet {
+    fn bitor_assign(&mut self, rhs: T) {
+        self.bits |= rhs.into().bits;
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl<T: Into<SigSet>> ops::Add<T> for SigSet {
+    type Output = Self;
+
+    fn add(self, rhs: T) -> Self {
+        SigSet {
+            bits: self.bits | rhs.into().bits,
+        }
+    }
+}
+
+#[allow(clippy::suspicious_op_assign_impl)]
+impl<T: Into<SigSet>> ops::AddAssign<T> for SigSet {
+    fn add_assign(&mut self, rhs: T) {
+        self.bits |= rhs.into().bits;
+    }
+}
+
+impl<T: Into<SigSet>> ops::Sub<T> for SigSet {
+    type Output = Self;
+
+    fn sub(self, rhs: T) -> Self {
+        SigSet {
+            bits: self.bits & !rhs.into().bits,
+        }
+    }
+}
+
+impl<T: Into<SigSet>> ops::SubAssign<T> for SigSet {
+    fn sub_assign(&mut self, rhs: T) {
+        self.bits &= !rhs.into().bits;
+    }
+}
+
+impl SigSet {
+    pub fn new_empty() -> Self {
+        SigSet { bits: 0 }
+    }
+
+    pub fn new_full() -> Self {
+        SigSet { bits: !0 }
+    }
+
+    pub const fn is_empty(&self) -> bool {
+        self.bits == 0
+    }
+
+    pub const fn is_full(&self) -> bool {
+        self.bits == !0
+    }
+
+    pub fn count(&self) -> usize {
+        self.bits.count_ones() as usize
+    }
+
+    pub fn contains(&self, set: impl Into<Self>) -> bool {
+        let set = set.into();
+        self.bits & set.bits == set.bits
+    }
+}
+
+// This is to allow hexadecimally formatting a `SigSet` when debug printing it.
+impl LowerHex for SigSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        LowerHex::fmt(&self.bits, f) // delegate to u64's implementation
+    }
+}
+
+/// An atomic signal mask.
+///
+/// This is an alias to the [`AtomicSigSet`]. All the signal in the set are
+/// blocked from the delivery to a thread.
+///
+/// [`Relaxed`]: core::sync::atomic::Ordering::Relaxed
+pub type AtomicSigMask = AtomicSigSet;
+
+/// An atomic signal set.
+pub struct AtomicSigSet(AtomicU64);
+
+impl From<SigSet> for AtomicSigSet {
+    fn from(set: SigSet) -> Self {
+        AtomicSigSet(AtomicU64::new(set.bits))
+    }
+}
+
+impl AtomicSigSet {
+    pub fn new_empty() -> Self {
+        AtomicSigSet(AtomicU64::new(0))
+    }
+
+    pub fn new_full() -> Self {
+        AtomicSigSet(AtomicU64::new(!0))
+    }
+
+    pub fn load(&self, ordering: Ordering) -> SigSet {
+        SigSet {
+            bits: self.0.load(ordering),
+        }
+    }
+
+    pub fn store(&self, new_mask: impl Into<SigMask>, ordering: Ordering) {
+        self.0.store(new_mask.into().bits, ordering);
+    }
+
+    pub fn swap(&self, new_mask: impl Into<SigMask>, ordering: Ordering) -> SigSet {
+        let bits = self.0.swap(new_mask.into().bits, ordering);
+        SigSet { bits }
+    }
+
+    pub fn contains(&self, signals: impl Into<SigSet>, ordering: Ordering) -> bool {
+        SigSet {
+            bits: self.0.load(ordering),
+        }
+        .contains(signals.into())
+    }
+
+    /// Applies an update to the signal set.
+    ///
+    /// This is the same as [`AtomicU64::fetch_update`], but the closure `f`
+    /// operates on a [`SigMask`] instead of a `u64`.
+    ///
+    /// It would be a bit slow since it would check if the value is written by
+    /// another thread while evaluating the closure `f`. If you are confident
+    /// that there's no such race, don't use this method.
+    pub fn fetch_update<F>(
+        &self,
+        set_order: Ordering,
+        fetch_order: Ordering,
+        mut f: F,
+    ) -> core::result::Result<SigMask, SigMask>
+    where
+        F: FnMut(SigMask) -> Option<SigMask>,
+    {
+        self.0
+            .fetch_update(set_order, fetch_order, |bits| {
+                f(SigMask { bits }).map(|set| set.bits)
+            })
+            .map(SigMask::from)
+            .map_err(SigMask::from)
+    }
+}
--- a/kernel/src/process/signal/sig_num.rs
+++ b/kernel/src/process/signal/sig_num.rs
@ -0,0 +1,128 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use core::sync::atomic::AtomicU8;
+
+use atomic::Ordering;
+
+use super::constants::*;
+use crate::prelude::*;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SigNum {
+    sig_num: u8,
+}
+
+impl TryFrom<u8> for SigNum {
+    type Error = Error;
+
+    fn try_from(sig_num: u8) -> Result<Self> {
+        if !(MIN_STD_SIG_NUM..=MAX_RT_SIG_NUM).contains(&sig_num) {
+            return_errno_with_message!(Errno::EINVAL, "invalid signal number");
+        }
+        Ok(SigNum { sig_num })
+    }
+}
+
+impl SigNum {
+    /// Caller must ensure the sig_num is valid. otherweise, use try_from will check sig_num and does not panic.
+    pub const fn from_u8(sig_num: u8) -> Self {
+        if sig_num > MAX_RT_SIG_NUM || sig_num < MIN_STD_SIG_NUM {
+            panic!("invalid signal number")
+        }
+        SigNum { sig_num }
+    }
+
+    pub const fn as_u8(&self) -> u8 {
+        self.sig_num
+    }
+
+    pub fn is_std(&self) -> bool {
+        self.sig_num <= MAX_STD_SIG_NUM
+    }
+
+    pub fn is_real_time(&self) -> bool {
+        self.sig_num >= MIN_RT_SIG_NUM
+    }
+
+    pub const fn sig_name(&self) -> &'static str {
+        match *self {
+            SIGHUP => "SIGHUP",
+            SIGINT => "SIGINT",
+            SIGQUIT => "SIGQUIT",
+            SIGILL => "SIGILL",
+            SIGTRAP => "SIGTRAP",
+            SIGABRT => "SIGABRT",
+            SIGBUS => "SIGBUS",
+            SIGFPE => "SIGFPE",
+            SIGKILL => "SIGKILL",
+            SIGUSR1 => "SIGUSR1",
+            SIGSEGV => "SIGSEGV",
+            SIGUSR2 => "SIGUSR2",
+            SIGPIPE => "SIGPIPE",
+            SIGALRM => "SIGALRM",
+            SIGTERM => "SIGTERM",
+            SIGSTKFLT => "SIGSTKFLT",
+            SIGCHLD => "SIGCHLD",
+            SIGCONT => "SIGCONT",
+            SIGSTOP => "SIGSTOP",
+            SIGTSTP => "SIGTSTP",
+            SIGTTIN => "SIGTTIN",
+            SIGTTOU => "SIGTTOU",
+            SIGURG => "SIGURG",
+            SIGXCPU => "SIGXCPU",
+            SIGXFSZ => "SIGXFSZ",
+            SIGVTALRM => "SIGVTALRM",
+            SIGPROF => "SIGPROF",
+            SIGWINCH => "SIGWINCH",
+            SIGIO => "SIGIO",
+            SIGPWR => "SIGPWR",
+            SIGSYS => "SIGSYS",
+            _ => "Realtime Signal",
+        }
+    }
+}
+
+/// Atomic signal number.
+///
+/// This struct represents a signal number and is different from [SigNum]
+/// in that it allows for an empty signal number.
+pub struct AtomicSigNum(AtomicU8);
+
+impl AtomicSigNum {
+    /// Creates a new empty atomic signal number
+    pub const fn new_empty() -> Self {
+        Self(AtomicU8::new(0))
+    }
+
+    /// Creates a new signal number with the specified value
+    pub const fn new(sig_num: SigNum) -> Self {
+        Self(AtomicU8::new(sig_num.as_u8()))
+    }
+
+    /// Determines whether the signal number is empty
+    pub fn is_empty(&self) -> bool {
+        self.0.load(Ordering::Relaxed) == 0
+    }
+
+    /// Returns the corresponding [`SigNum`]
+    pub fn as_sig_num(&self) -> Option<SigNum> {
+        let sig_num = self.0.load(Ordering::Relaxed);
+        if sig_num == 0 {
+            return None;
+        }
+
+        Some(SigNum::from_u8(sig_num))
+    }
+
+    /// Sets the new `sig_num`
+    pub fn set(&self, sig_num: SigNum) {
+        self.0.store(sig_num.as_u8(), Ordering::Relaxed)
+    }
+
+    /// Clears the signal number
+    pub fn clear(&self) {
+        self.0.store(0, Ordering::Relaxed)
+    }
+}
--- a/kernel/src/process/signal/sig_queues.rs
+++ b/kernel/src/process/signal/sig_queues.rs
@ -0,0 +1,238 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use super::{
+    constants::*,
+    sig_mask::{SigMask, SigSet},
+    sig_num::SigNum,
+    signals::Signal,
+    SigEvents, SigEventsFilter,
+};
+use crate::{
+    events::{Observer, Subject},
+    prelude::*,
+};
+
+pub struct SigQueues {
+    // The number of pending signals.
+    // Useful for quickly determining if any signals are pending without locking `queues`.
+    count: AtomicUsize,
+    queues: Mutex<Queues>,
+    subject: Subject<SigEvents, SigEventsFilter>,
+}
+
+impl SigQueues {
+    pub fn new() -> Self {
+        Self {
+            count: AtomicUsize::new(0),
+            queues: Mutex::new(Queues::new()),
+            subject: Subject::new(),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.count.load(Ordering::Relaxed) == 0
+    }
+
+    pub fn enqueue(&self, signal: Box<dyn Signal>) {
+        let signum = signal.num();
+
+        let mut queues = self.queues.lock();
+        if queues.enqueue(signal) {
+            self.count.fetch_add(1, Ordering::Relaxed);
+            // Avoid holding lock when notifying observers
+            drop(queues);
+            self.subject.notify_observers(&SigEvents::new(signum));
+        }
+    }
+
+    pub fn dequeue(&self, blocked: &SigMask) -> Option<Box<dyn Signal>> {
+        // Fast path for the common case of no pending signals
+        if self.is_empty() {
+            return None;
+        }
+
+        let mut queues = self.queues.lock();
+        let signal = queues.dequeue(blocked);
+        if signal.is_some() {
+            self.count.fetch_sub(1, Ordering::Relaxed);
+        }
+        signal
+    }
+
+    /// Returns the pending signals
+    pub fn sig_pending(&self) -> SigSet {
+        let queues = self.queues.lock();
+        queues.sig_pending()
+    }
+
+    /// Returns whether there's some pending signals that are not blocked
+    pub fn has_pending(&self, blocked: SigMask) -> bool {
+        self.queues.lock().has_pending(blocked)
+    }
+
+    pub fn register_observer(
+        &self,
+        observer: Weak<dyn Observer<SigEvents>>,
+        filter: SigEventsFilter,
+    ) {
+        self.subject.register_observer(observer, filter);
+    }
+
+    pub fn unregister_observer(&self, observer: &Weak<dyn Observer<SigEvents>>) {
+        self.subject.unregister_observer(observer);
+    }
+}
+
+impl Default for SigQueues {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+struct Queues {
+    std_queues: Vec<Option<Box<dyn Signal>>>,
+    rt_queues: Vec<VecDeque<Box<dyn Signal>>>,
+}
+
+impl Queues {
+    fn new() -> Self {
+        let std_queues = (0..COUNT_STD_SIGS).map(|_| None).collect();
+        let rt_queues = (0..COUNT_RT_SIGS).map(|_| Default::default()).collect();
+        Self {
+            std_queues,
+            rt_queues,
+        }
+    }
+
+    fn enqueue(&mut self, signal: Box<dyn Signal>) -> bool {
+        let signum = signal.num();
+        if signum.is_std() {
+            // Standard signals
+            //
+            // From signal(7):
+            //
+            // Standard signals do not queue.  If multiple instances of a standard
+            // signal are generated while that signal is blocked, then only one
+            // instance of the signal is marked as pending (and the signal will be
+            // delivered just once when it is unblocked).  In the case where a
+            // standard signal is already pending, the siginfo_t structure (see
+            // sigaction(2)) associated with that signal is not overwritten on
+            // arrival of subsequent instances of the same signal.  Thus, the
+            // process will receive the information associated with the first
+            // instance of the signal.
+            let queue = self.get_std_queue_mut(signum);
+            if queue.is_some() {
+                // If there is already a signal pending, just ignore all subsequent signals
+                return false;
+            }
+            *queue = Some(signal);
+        } else {
+            // Real-time signals
+            let queue = self.get_rt_queue_mut(signum);
+            queue.push_back(signal);
+        }
+
+        true
+    }
+
+    fn dequeue(&mut self, blocked: &SigMask) -> Option<Box<dyn Signal>> {
+        // Deliver standard signals.
+        //
+        // According to signal(7):
+        // If both standard and real-time signals are pending for a process,
+        // POSIX leaves it unspecified which is delivered first. Linux, like
+        // many other implementations, gives priority to standard signals in
+        // this case.
+
+        // POSIX leaves unspecified which to deliver first if there are multiple
+        // pending standard signals. So we are free to define our own. The
+        // principle is to give more urgent signals higher priority (like SIGKILL).
+
+        // FIXME: the gvisor pty_test JobControlTest::ReleaseTTY requires that
+        // the SIGHUP signal should be handled before SIGCONT.
+        const ORDERED_STD_SIGS: [SigNum; COUNT_STD_SIGS] = [
+            SIGKILL, SIGTERM, SIGSTOP, SIGSEGV, SIGILL, SIGHUP, SIGCONT, SIGINT, SIGQUIT, SIGTRAP,
+            SIGABRT, SIGBUS, SIGFPE, SIGUSR1, SIGUSR2, SIGPIPE, SIGALRM, SIGSTKFLT, SIGCHLD,
+            SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
+            SIGIO, SIGPWR, SIGSYS,
+        ];
+        for &signum in &ORDERED_STD_SIGS {
+            if blocked.contains(signum) {
+                continue;
+            }
+
+            let queue = self.get_std_queue_mut(signum);
+            let signal = queue.take();
+            if signal.is_some() {
+                return signal;
+            }
+        }
+
+        // If no standard signals, then deliver real-time signals.
+        //
+        // According to signal (7):
+        // Real-time signals are delivered in a guaranteed order.  Multiple
+        // real-time signals of the same type are delivered in the order
+        // they were sent.  If different real-time signals are sent to a
+        // process, they are delivered starting with the lowest-numbered
+        // signal.  (I.e., low-numbered signals have highest priority.)
+        for signum in MIN_RT_SIG_NUM..=MAX_RT_SIG_NUM {
+            let signum = SigNum::try_from(signum).unwrap();
+            if blocked.contains(signum) {
+                continue;
+            }
+
+            let queue = self.get_rt_queue_mut(signum);
+            let signal = queue.pop_front();
+            if signal.is_some() {
+                return signal;
+            }
+        }
+
+        // There must be pending but blocked signals
+        None
+    }
+
+    /// Returns whether the `SigQueues` has some pending signals which are not blocked
+    fn has_pending(&self, blocked: SigMask) -> bool {
+        self.std_queues.iter().any(|signal| {
+            signal
+                .as_ref()
+                .is_some_and(|signal| !blocked.contains(signal.num()))
+        }) || self.rt_queues.iter().any(|rt_queue| !rt_queue.is_empty())
+    }
+
+    fn get_std_queue_mut(&mut self, signum: SigNum) -> &mut Option<Box<dyn Signal>> {
+        debug_assert!(signum.is_std());
+        let idx = (signum.as_u8() - MIN_STD_SIG_NUM) as usize;
+        &mut self.std_queues[idx]
+    }
+
+    fn get_rt_queue_mut(&mut self, signum: SigNum) -> &mut VecDeque<Box<dyn Signal>> {
+        debug_assert!(signum.is_real_time());
+        let idx = (signum.as_u8() - MIN_RT_SIG_NUM) as usize;
+        &mut self.rt_queues[idx]
+    }
+
+    fn sig_pending(&self) -> SigSet {
+        let mut pending = SigSet::new_empty();
+
+        // Process standard signal queues
+        for (idx, signal) in self.std_queues.iter().enumerate() {
+            if signal.is_some() {
+                pending += SigNum::from_u8(idx as u8 + MIN_STD_SIG_NUM);
+            }
+        }
+
+        // Process real-time signal queues
+        for (idx, signals) in self.rt_queues.iter().enumerate() {
+            if !signals.is_empty() {
+                pending += SigNum::from_u8(idx as u8 + MIN_RT_SIG_NUM);
+            }
+        }
+
+        pending
+    }
+}
--- a/kernel/src/process/signal/sig_stack.rs
+++ b/kernel/src/process/signal/sig_stack.rs
@ -0,0 +1,95 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use crate::prelude::*;
+
+/// User-provided signal stack. `SigStack` is per-thread, and each thread can have
+/// at most one `SigStack`. If one signal handler specifying the `SA_ONSTACK` flag,
+/// the handler should be executed on the `SigStack`, instead of on the default stack.
+///
+/// SigStack can be registered and unregistered by syscall `sigaltstack`.
+#[derive(Debug, Clone)]
+pub struct SigStack {
+    base: Vaddr,
+    flags: SigStackFlags,
+    size: usize,
+    /// The number of handlers that are currently using the stack
+    handler_counter: usize,
+}
+
+bitflags! {
+    pub struct SigStackFlags: u32 {
+        const SS_AUTODISARM = 1 << 31;
+    }
+}
+
+#[repr(u8)]
+#[allow(non_camel_case_types)]
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub enum SigStackStatus {
+    #[default]
+    SS_INACTIVE = 0,
+    // The thread is currently executing on the alternate signal stack
+    SS_ONSTACK = 1,
+    // The stack is currently disabled.
+    SS_DISABLE = 2,
+}
+
+impl SigStack {
+    pub fn new(base: Vaddr, flags: SigStackFlags, size: usize) -> Self {
+        Self {
+            base,
+            flags,
+            size,
+            handler_counter: 0,
+        }
+    }
+
+    pub fn base(&self) -> Vaddr {
+        self.base
+    }
+
+    pub fn flags(&self) -> SigStackFlags {
+        self.flags
+    }
+
+    pub fn size(&self) -> usize {
+        self.size
+    }
+
+    pub fn status(&self) -> SigStackStatus {
+        if self.handler_counter == 0 {
+            return SigStackStatus::SS_INACTIVE;
+        }
+
+        // Learning From [sigaltstack doc](https://man7.org/linux/man-pages/man2/sigaltstack.2.html):
+        // If the stack is currently executed on,
+        // 1. If the stack was established with flag SS_AUTODISARM, the stack status is DISABLE,
+        // 2. otherwise, the stack status is ONSTACK
+        if self.flags.contains(SigStackFlags::SS_AUTODISARM) {
+            SigStackStatus::SS_DISABLE
+        } else {
+            SigStackStatus::SS_ONSTACK
+        }
+    }
+
+    /// Mark the stack is currently used by a signal handler.    
+    pub fn increase_handler_counter(&mut self) {
+        self.handler_counter += 1;
+    }
+
+    // Mark the stack is freed by current handler.
+    pub fn decrease_handler_counter(&mut self) {
+        // FIXME: deal with SS_AUTODISARM flag
+        self.handler_counter -= 1
+    }
+
+    /// Determins whether the stack is executed on by any signal handler
+    pub fn is_active(&self) -> bool {
+        // FIXME: can DISABLE stack be used?
+        self.handler_counter != 0 && !self.flags.contains(SigStackFlags::SS_AUTODISARM)
+    }
+
+    pub fn is_disabled(&self) -> bool {
+        self.handler_counter != 0 && self.flags.contains(SigStackFlags::SS_AUTODISARM)
+    }
+}
--- a/kernel/src/process/signal/signals/fault.rs
+++ b/kernel/src/process/signal/signals/fault.rs
@ -0,0 +1,60 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use ostd::cpu::{
+    CpuException, CpuExceptionInfo, ALIGNMENT_CHECK, BOUND_RANGE_EXCEEDED, DIVIDE_BY_ZERO,
+    GENERAL_PROTECTION_FAULT, INVALID_OPCODE, PAGE_FAULT, SIMD_FLOATING_POINT_EXCEPTION,
+    X87_FLOATING_POINT_EXCEPTION,
+};
+
+use super::Signal;
+use crate::{
+    prelude::*,
+    process::signal::{c_types::siginfo_t, constants::*, sig_num::SigNum},
+};
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct FaultSignal {
+    num: SigNum,
+    code: i32,
+    addr: Option<u64>,
+}
+
+impl FaultSignal {
+    pub fn new(trap_info: &CpuExceptionInfo) -> FaultSignal {
+        debug!("Trap id: {}", trap_info.id);
+        let exception = CpuException::to_cpu_exception(trap_info.id as u16).unwrap();
+        let (num, code, addr) = match *exception {
+            DIVIDE_BY_ZERO => (SIGFPE, FPE_INTDIV, None),
+            X87_FLOATING_POINT_EXCEPTION | SIMD_FLOATING_POINT_EXCEPTION => {
+                (SIGFPE, FPE_FLTDIV, None)
+            }
+            BOUND_RANGE_EXCEEDED => (SIGSEGV, SEGV_BNDERR, None),
+            ALIGNMENT_CHECK => (SIGBUS, BUS_ADRALN, None),
+            INVALID_OPCODE => (SIGILL, ILL_ILLOPC, None),
+            GENERAL_PROTECTION_FAULT => (SIGBUS, BUS_ADRERR, None),
+            PAGE_FAULT => {
+                const PF_ERR_FLAG_PRESENT: usize = 1usize << 0;
+                let code = if trap_info.error_code & PF_ERR_FLAG_PRESENT != 0 {
+                    SEGV_ACCERR
+                } else {
+                    SEGV_MAPERR
+                };
+                let addr = Some(trap_info.page_fault_addr as u64);
+                (SIGSEGV, code, addr)
+            }
+            _ => panic!("Exception cannnot be a signal"),
+        };
+        FaultSignal { num, code, addr }
+    }
+}
+
+impl Signal for FaultSignal {
+    fn num(&self) -> SigNum {
+        self.num
+    }
+
+    fn to_info(&self) -> siginfo_t {
+        siginfo_t::new(self.num, self.code)
+        // info.set_si_addr(self.addr.unwrap_or_default() as *const c_void);
+        // info
+    }
+}
--- a/kernel/src/process/signal/signals/kernel.rs
+++ b/kernel/src/process/signal/signals/kernel.rs
@ -0,0 +1,25 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::Signal;
+use crate::process::signal::{c_types::siginfo_t, constants::SI_KERNEL, sig_num::SigNum};
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct KernelSignal {
+    num: SigNum,
+}
+
+impl KernelSignal {
+    pub const fn new(num: SigNum) -> Self {
+        Self { num }
+    }
+}
+
+impl Signal for KernelSignal {
+    fn num(&self) -> SigNum {
+        self.num
+    }
+
+    fn to_info(&self) -> siginfo_t {
+        siginfo_t::new(self.num, SI_KERNEL)
+    }
+}
--- a/kernel/src/process/signal/signals/mod.rs
+++ b/kernel/src/process/signal/signals/mod.rs
@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MPL-2.0
+
+pub mod fault;
+pub mod kernel;
+pub mod user;
+
+use core::{any::Any, fmt::Debug};
+
+use super::{c_types::siginfo_t, sig_num::SigNum};
+
+pub trait Signal: Send + Sync + Debug + Any {
+    /// Returns the number of the signal.
+    fn num(&self) -> SigNum;
+    /// Returns the siginfo_t that gives more details about a signal.
+    fn to_info(&self) -> siginfo_t;
+}
--- a/kernel/src/process/signal/signals/user.rs
+++ b/kernel/src/process/signal/signals/user.rs
@ -0,0 +1,68 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use super::Signal;
+use crate::process::{
+    signal::{
+        c_types::siginfo_t,
+        constants::{SI_QUEUE, SI_TKILL, SI_USER},
+        sig_num::SigNum,
+    },
+    Pid, Uid,
+};
+
+#[derive(Debug, Clone, Copy)]
+pub struct UserSignal {
+    num: SigNum,
+    pid: Pid,
+    uid: Uid,
+    kind: UserSignalKind,
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum UserSignalKind {
+    Kill,
+    Tkill,
+    Sigqueue,
+}
+
+impl UserSignal {
+    pub fn new(num: SigNum, kind: UserSignalKind, pid: Pid, uid: Uid) -> Self {
+        Self {
+            num,
+            kind,
+            pid,
+            uid,
+        }
+    }
+
+    pub fn pid(&self) -> Pid {
+        self.pid
+    }
+
+    pub fn kind(&self) -> UserSignalKind {
+        self.kind
+    }
+}
+
+impl Signal for UserSignal {
+    fn num(&self) -> SigNum {
+        self.num
+    }
+
+    fn to_info(&self) -> siginfo_t {
+        let code = match self.kind {
+            UserSignalKind::Kill => SI_USER,
+            UserSignalKind::Tkill => SI_TKILL,
+            UserSignalKind::Sigqueue => SI_QUEUE,
+        };
+
+        siginfo_t::new(self.num, code)
+        // info.set_si_pid(self.pid);
+        // info.set_si_uid(self.uid);
+        // if let UserSignalKind::Sigqueue(val) = self.kind {
+        //     info.set_si_value(val);
+        // }
+    }
+}
--- a/kernel/src/process/status.rs
+++ b/kernel/src/process/status.rs
@ -0,0 +1,35 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+//! The process status
+
+use super::TermStatus;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ProcessStatus {
+    // Not ready to run
+    Uninit,
+    /// Can be scheduled to run
+    Runnable,
+    /// Exit while not reaped by parent
+    Zombie(TermStatus),
+}
+
+impl ProcessStatus {
+    pub fn set_zombie(&mut self, term_status: TermStatus) {
+        *self = ProcessStatus::Zombie(term_status);
+    }
+
+    pub fn is_zombie(&self) -> bool {
+        matches!(self, ProcessStatus::Zombie(_))
+    }
+
+    pub fn set_runnable(&mut self) {
+        *self = ProcessStatus::Runnable;
+    }
+
+    pub fn is_runnable(&self) -> bool {
+        *self == ProcessStatus::Runnable
+    }
+}
--- a/kernel/src/process/sync/condvar.rs
+++ b/kernel/src/process/sync/condvar.rs
@ -0,0 +1,366 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+#![allow(unused_variables)]
+
+use alloc::sync::Arc;
+use core::time::Duration;
+
+use ostd::sync::{MutexGuard, SpinLock, WaitQueue};
+
+use crate::time::wait::WaitTimeout;
+
+/// Represents potential errors during lock operations on synchronization primitives,
+/// specifically for operations associated with a `Condvar` (Condition Variable).
+pub enum LockErr<Guard> {
+    Timeout(Guard),
+    Unknown(Guard),
+}
+
+/// LockResult, different from Rust std.
+/// The result of a lock operation.
+pub type LockResult<Guard> = Result<Guard, LockErr<Guard>>;
+
+impl<Guard> LockErr<Guard> {
+    pub fn into_guard(self) -> Guard {
+        match self {
+            LockErr::Timeout(guard) => guard,
+            LockErr::Unknown(guard) => guard,
+        }
+    }
+}
+
+/// A `Condvar` (Condition Variable) is a synchronization primitive that can block threads
+/// until a certain condition becomes true.
+///
+/// Although a `Condvar` can block threads, it is primarily used to achieve thread synchronization.
+/// Threads waiting on a `Condvar` must acquire a mutex before proceeding. This setup is commonly
+/// used with a shared mutable state to ensure safe concurrent access. A typical use involves one
+/// or more threads waiting for a condition to become true to proceed with their operations.
+///
+/// # Usage
+///
+/// Pair a `Condvar` with a `Mutex` to allow threads to wait for certain conditions safely.
+/// A waiting thread will sleep and atomically release the associated mutex.
+/// Another thread can then update the shared state and notify the `Condvar`, allowing the
+/// waiting thread to reacquire the mutex and proceed.
+///
+/// ## Example
+///
+/// This example demonstrates how a `Condvar` can synchronize threads:
+///
+/// ```rust
+/// use alloc::sync::Arc;
+/// use ostd::sync::Mutex;
+/// use crate::{process::sync::Condvar, thread::{kernel_thread::KernelThreadExt, Thread}};
+///
+/// // Initializing a shared condition between threads
+/// let pair = Arc::new((Mutex::new(false), Condvar::new()));
+/// let pair2 = Arc::clone(&pair);
+///
+/// // Spawning a new kernel thread to change a shared state and notify the Condvar
+/// Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+///     let (lock, cvar) = &*pair2;
+///     Thread::yield_now();
+///     let mut started = lock.lock();
+///     *started = true; // Modifying the shared state
+///     cvar.notify_one(); // Notifying one waiting thread
+/// }));
+///
+/// // Main thread waiting for the shared state to be set to true
+/// {
+///     let (lock, cvar) = &*pair;
+///     let mut started = lock.lock();
+///     while !*started {
+///         started = cvar.wait(started).unwrap_or_else(|err| err.into_guard());
+///     }
+/// }
+/// ```
+///
+/// In this example, the main thread and a child thread synchronize access to a boolean flag
+/// using a `Mutex` and a `Condvar`.
+/// The main thread waits for the flag to be set to `true`,
+/// utilizing the `Condvar` to sleep efficiently until the condition is met.
+pub struct Condvar {
+    waitqueue: Arc<WaitQueue>,
+    counter: SpinLock<Inner>,
+}
+
+struct Inner {
+    waiter_count: u64,
+    notify_count: u64,
+}
+
+impl Condvar {
+    /// Creates a new condition variable.
+    pub fn new() -> Self {
+        Condvar {
+            waitqueue: Arc::new(WaitQueue::new()),
+            counter: SpinLock::new(Inner {
+                waiter_count: 0,
+                notify_count: 0,
+            }),
+        }
+    }
+
+    /// Atomically releases the given `MutexGuard`,
+    /// blocking the current thread until the condition variable
+    /// is notified, after which the mutex will be reacquired.
+    ///
+    /// Returns a new `MutexGuard` if the operation is successful,
+    /// or returns the provided guard
+    /// within a `LockErr` if the waiting operation fails.
+    pub fn wait<'a, T>(&self, guard: MutexGuard<'a, T>) -> LockResult<MutexGuard<'a, T>> {
+        let cond = || {
+            // Check if the notify counter is greater than 0.
+            let mut counter = self.counter.lock();
+            if counter.notify_count > 0 {
+                // Decrement the notify counter.
+                counter.notify_count -= 1;
+                Some(())
+            } else {
+                None
+            }
+        };
+        {
+            let mut counter = self.counter.lock();
+            counter.waiter_count += 1;
+        }
+        let lock = MutexGuard::get_lock(&guard);
+        drop(guard);
+        self.waitqueue.wait_until(cond);
+        Ok(lock.lock())
+    }
+
+    /// Waits for the condition variable to be signaled or broadcasted,
+    /// or a timeout to elapse.
+    /// bool is true if the timeout is reached.
+    ///
+    /// The function returns a tuple containing a `MutexGuard`
+    /// and a boolean that is true if the timeout elapsed
+    /// before the condition variable was notified.
+    pub fn wait_timeout<'a, T>(
+        &self,
+        guard: MutexGuard<'a, T>,
+        timeout: Duration,
+    ) -> LockResult<(MutexGuard<'a, T>, bool)> {
+        let cond = || {
+            // Check if the notify counter is greater than 0.
+            let mut counter = self.counter.lock();
+            if counter.notify_count > 0 {
+                // Decrement the notify counter.
+                counter.notify_count -= 1;
+                Some(())
+            } else {
+                None
+            }
+        };
+        {
+            let mut counter = self.counter.lock();
+            counter.waiter_count += 1;
+        }
+        let lock = MutexGuard::get_lock(&guard);
+        drop(guard);
+        // Wait until the condition becomes true, we're explicitly woken up, or the timeout elapses.
+        let res = self.waitqueue.wait_until_or_timeout(cond, &timeout);
+        match res {
+            Some(_) => Ok((lock.lock(), false)),
+            None => {
+                let mut counter = self.counter.lock();
+                counter.waiter_count -= 1;
+                Err(LockErr::Timeout((lock.lock(), true)))
+            }
+        }
+    }
+
+    /// Wait for the condition to become true,
+    /// or until the timeout elapses,
+    /// or until the condition is explicitly woken up.
+    /// bool is true if the timeout is reached.
+    ///
+    /// Similar to `wait_timeout`,
+    /// it returns a tuple containing the `MutexGuard`
+    /// and a boolean value indicating
+    /// whether the wait operation terminated due to a timeout.
+    pub fn wait_timeout_while<'a, T, F>(
+        &self,
+        mut guard: MutexGuard<'a, T>,
+        timeout: Duration,
+        mut condition: F,
+    ) -> LockResult<(MutexGuard<'a, T>, bool)>
+    where
+        F: FnMut(&mut T) -> bool,
+    {
+        loop {
+            if !condition(&mut *guard) {
+                return Ok((guard, false));
+            }
+            guard = match self.wait_timeout(guard, timeout) {
+                Ok((guard, timeout_flag)) => guard,
+                Err(LockErr::Timeout((guard, timeout_flag))) => {
+                    return Err(LockErr::Timeout((guard, timeout_flag)))
+                }
+                Err(LockErr::Unknown(guard)) => return Err(LockErr::Unknown(guard)),
+            }
+        }
+    }
+
+    /// Wait for the condition to become true,
+    /// and until the condition is explicitly woken up or interupted.
+    ///
+    /// This function blocks until either the condition becomes false
+    /// or the condition variable is explicitly notified.
+    /// Returns the `MutexGuard` if the operation completes successfully.
+    pub fn wait_while<'a, T, F>(
+        &self,
+        mut guard: MutexGuard<'a, T>,
+        mut condition: F,
+    ) -> LockResult<MutexGuard<'a, T>>
+    where
+        F: FnMut(&mut T) -> bool,
+    {
+        loop {
+            if !condition(&mut *guard) {
+                return Ok(guard);
+            }
+            guard = match self.wait(guard) {
+                Ok(guard) => guard,
+                Err(LockErr::Unknown(guard)) => return Err(LockErr::Unknown(guard)),
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    /// Wakes up one blocked thread waiting on this condition variable.
+    ///
+    /// If there is a waiting thread, it will be unblocked
+    /// and allowed to reacquire the associated mutex.
+    /// If no threads are waiting, this function is a no-op.
+    pub fn notify_one(&self) {
+        let mut counter = self.counter.lock();
+        if counter.waiter_count == 0 {
+            return;
+        }
+        counter.notify_count += 1;
+        self.waitqueue.wake_one();
+        counter.waiter_count -= 1;
+    }
+
+    /// Wakes up all blocked threads waiting on this condition variable.
+    ///
+    /// This method will unblock all waiting threads
+    /// and they will be allowed to reacquire the associated mutex.
+    /// If no threads are waiting, this function is a no-op.
+    pub fn notify_all(&self) {
+        let mut counter = self.counter.lock();
+        if counter.waiter_count == 0 {
+            return;
+        }
+        counter.notify_count = counter.waiter_count;
+        self.waitqueue.wake_all();
+        counter.waiter_count = 0;
+    }
+}
+
+#[cfg(ktest)]
+mod test {
+    use ostd::{prelude::*, sync::Mutex};
+
+    use super::*;
+    use crate::thread::{
+        kernel_thread::{KernelThreadExt, ThreadOptions},
+        Thread,
+    };
+
+    #[ktest]
+    fn test_condvar_wait() {
+        let pair = Arc::new((Mutex::new(false), Condvar::new()));
+        let pair2 = Arc::clone(&pair);
+
+        Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+            Thread::yield_now();
+            let (lock, cvar) = &*pair2;
+            let mut started = lock.lock();
+            *started = true;
+            cvar.notify_one();
+        }));
+
+        {
+            let (lock, cvar) = &*pair;
+            let mut started = lock.lock();
+            while !*started {
+                started = cvar.wait(started).unwrap_or_else(|err| err.into_guard());
+            }
+            assert!(*started);
+        }
+    }
+
+    #[ktest]
+    fn test_condvar_wait_timeout() {
+        let pair = Arc::new((Mutex::new(false), Condvar::new()));
+        let pair2 = Arc::clone(&pair);
+
+        Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+            Thread::yield_now();
+            let (lock, cvar) = &*pair2;
+            let mut started = lock.lock();
+            *started = true;
+            cvar.notify_one();
+        }));
+
+        {
+            let (lock, cvar) = &*pair;
+            let mut started = lock.lock();
+            while !*started {
+                (started, _) = cvar
+                    .wait_timeout(started, Duration::from_secs(1))
+                    .unwrap_or_else(|err| err.into_guard());
+            }
+            assert!(*started);
+        }
+    }
+
+    #[ktest]
+    fn test_condvar_wait_while() {
+        let pair = Arc::new((Mutex::new(true), Condvar::new()));
+        let pair2 = Arc::clone(&pair);
+
+        Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+            Thread::yield_now();
+            let (lock, cvar) = &*pair2;
+            let mut started = lock.lock();
+            *started = false;
+            cvar.notify_one();
+        }));
+
+        {
+            let (lock, cvar) = &*pair;
+            let started = cvar
+                .wait_while(lock.lock(), |started| *started)
+                .unwrap_or_else(|err| err.into_guard());
+            assert!(!*started);
+        }
+    }
+
+    #[ktest]
+    fn test_condvar_wait_timeout_while() {
+        let pair = Arc::new((Mutex::new(true), Condvar::new()));
+        let pair2 = Arc::clone(&pair);
+
+        Thread::spawn_kernel_thread(ThreadOptions::new(move || {
+            Thread::yield_now();
+            let (lock, cvar) = &*pair2;
+            let mut started = lock.lock();
+            *started = false;
+            cvar.notify_one();
+        }));
+
+        {
+            let (lock, cvar) = &*pair;
+            let (started, _) = cvar
+                .wait_timeout_while(lock.lock(), Duration::from_secs(1), |started| *started)
+                .unwrap_or_else(|err| err.into_guard());
+            assert!(!*started);
+        }
+    }
+}
--- a/kernel/src/process/sync/mod.rs
+++ b/kernel/src/process/sync/mod.rs
@ -0,0 +1,6 @@
+// SPDX-License-Identifier: MPL-2.0
+
+mod condvar;
+
+#[allow(unused_imports)]
+pub use self::condvar::{Condvar, LockErr};
--- a/kernel/src/process/term_status.rs
+++ b/kernel/src/process/term_status.rs
@ -0,0 +1,19 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use super::signal::sig_num::SigNum;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TermStatus {
+    Exited(u8),
+    Killed(SigNum),
+}
+
+impl TermStatus {
+    /// Return as a 32-bit integer encoded as specified in wait(2) man page.
+    pub fn as_u32(&self) -> u32 {
+        match self {
+            TermStatus::Exited(status) => (*status as u32) << 8,
+            TermStatus::Killed(signum) => signum.as_u8() as u32,
+        }
+    }
+}
--- a/kernel/src/process/wait.rs
+++ b/kernel/src/process/wait.rs
@ -0,0 +1,113 @@
+// SPDX-License-Identifier: MPL-2.0
+
+#![allow(dead_code)]
+
+use super::{process_filter::ProcessFilter, ExitCode, Pid, Process};
+use crate::{prelude::*, process::process_table, thread::thread_table};
+
+// The definition of WaitOptions is from Occlum
+bitflags! {
+    pub struct WaitOptions: u32 {
+        const WNOHANG = 0x1;
+        //Note: Below flags are not supported yet
+        const WSTOPPED = 0x2; // Same as WUNTRACED
+        const WEXITED = 0x4;
+        const WCONTINUED = 0x8;
+        const WNOWAIT = 0x01000000;
+    }
+}
+
+impl WaitOptions {
+    pub fn supported(&self) -> bool {
+        let unsupported_flags = WaitOptions::all() - WaitOptions::WNOHANG;
+        !self.intersects(unsupported_flags)
+    }
+}
+
+pub fn wait_child_exit(
+    child_filter: ProcessFilter,
+    wait_options: WaitOptions,
+) -> Result<Option<Arc<Process>>> {
+    let current = current!();
+    let zombie_child = current.children_pauser().pause_until(|| {
+        let unwaited_children = current
+            .children()
+            .lock()
+            .values()
+            .filter(|child| match child_filter {
+                ProcessFilter::Any => true,
+                ProcessFilter::WithPid(pid) => child.pid() == pid,
+                ProcessFilter::WithPgid(pgid) => child.pgid() == pgid,
+            })
+            .cloned()
+            .collect::<Vec<_>>();
+
+        if unwaited_children.is_empty() {
+            return Some(Err(Error::with_message(
+                Errno::ECHILD,
+                "the process has no child to wait",
+            )));
+        }
+
+        // return immediately if we find a zombie child
+        let zombie_child = unwaited_children.iter().find(|child| child.is_zombie());
+
+        if let Some(zombie_child) = zombie_child {
+            let zombie_pid = zombie_child.pid();
+            if wait_options.contains(WaitOptions::WNOWAIT) {
+                // does not reap child, directly return
+                return Some(Ok(Some(zombie_child.clone())));
+            } else {
+                reap_zombie_child(&current, zombie_pid);
+                return Some(Ok(Some(zombie_child.clone())));
+            }
+        }
+
+        if wait_options.contains(WaitOptions::WNOHANG) {
+            return Some(Ok(None));
+        }
+
+        // wait
+        None
+    })??;
+
+    Ok(zombie_child)
+}
+
+/// Free zombie child with pid, returns the exit code of child process.
+fn reap_zombie_child(process: &Process, pid: Pid) -> ExitCode {
+    let child_process = process.children().lock().remove(&pid).unwrap();
+    assert!(child_process.is_zombie());
+    for thread in &*child_process.threads().lock() {
+        thread_table::remove_thread(thread.tid());
+    }
+
+    // Lock order: session table -> group table -> process table -> group of process
+    // -> group inner -> session inner
+    let mut session_table_mut = process_table::session_table_mut();
+    let mut group_table_mut = process_table::group_table_mut();
+    let mut process_table_mut = process_table::process_table_mut();
+
+    let mut child_group_mut = child_process.process_group.lock();
+
+    let process_group = child_group_mut.upgrade().unwrap();
+    let mut group_inner = process_group.inner.lock();
+    let session = group_inner.session.upgrade().unwrap();
+    let mut session_inner = session.inner.lock();
+
+    group_inner.remove_process(&child_process.pid());
+    session_inner.remove_process(&child_process);
+    *child_group_mut = Weak::new();
+
+    if group_inner.is_empty() {
+        group_table_mut.remove(&process_group.pgid());
+        session_inner.remove_process_group(&process_group.pgid());
+
+        if session_inner.is_empty() {
+            session_table_mut.remove(&session.sid());
+        }
+    }
+
+    process_table_mut.remove(&child_process.pid());
+    child_process.exit_code().unwrap()
+}