Remove the shim kernel crate

This commit is contained in:
Zhang Junyang
2024-08-19 19:15:22 +08:00
committed by Tate, Hongliang Tian
parent d76c7a5b1e
commit dafd16075f
416 changed files with 231 additions and 273 deletions

443
kernel/src/process/clone.rs Normal file
View File

@ -0,0 +1,443 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::Ordering;
use ostd::{
cpu::UserContext,
user::{UserContextApi, UserSpace},
};
use super::{
posix_thread::{PosixThread, PosixThreadBuilder, PosixThreadExt, ThreadName},
process_table,
process_vm::ProcessVm,
signal::sig_disposition::SigDispositions,
Credentials, Process, ProcessBuilder,
};
use crate::{
cpu::LinuxAbi,
fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
prelude::*,
thread::{allocate_tid, thread_table, Thread, Tid},
};
bitflags! {
pub struct CloneFlags: u32 {
const CLONE_VM = 0x00000100; /* Set if VM shared between processes. */
const CLONE_FS = 0x00000200; /* Set if fs info shared between processes. */
const CLONE_FILES = 0x00000400; /* Set if open files shared between processes. */
const CLONE_SIGHAND = 0x00000800; /* Set if signal handlers shared. */
const CLONE_PIDFD = 0x00001000; /* Set if a pidfd should be placed in parent. */
const CLONE_PTRACE = 0x00002000; /* Set if tracing continues on the child. */
const CLONE_VFORK = 0x00004000; /* Set if the parent wants the child to wake it up on mm_release. */
const CLONE_PARENT = 0x00008000; /* Set if we want to have the same parent as the cloner. */
const CLONE_THREAD = 0x00010000; /* Set to add to same thread group. */
const CLONE_NEWNS = 0x00020000; /* Set to create new namespace. */
const CLONE_SYSVSEM = 0x00040000; /* Set to shared SVID SEM_UNDO semantics. */
const CLONE_SETTLS = 0x00080000; /* Set TLS info. */
const CLONE_PARENT_SETTID = 0x00100000; /* Store TID in userlevel buffer before MM copy. */
const CLONE_CHILD_CLEARTID = 0x00200000;/* Register exit futex and memory location to clear. */
const CLONE_DETACHED = 0x00400000; /* Create clone detached. */
const CLONE_UNTRACED = 0x00800000; /* Set if the tracing process can't force CLONE_PTRACE on this clone. */
const CLONE_CHILD_SETTID = 0x01000000; /* Store TID in userlevel buffer in the child. */
const CLONE_NEWCGROUP = 0x02000000; /* New cgroup namespace. */
const CLONE_NEWUTS = 0x04000000; /* New utsname group. */
const CLONE_NEWIPC = 0x08000000; /* New ipcs. */
const CLONE_NEWUSER = 0x10000000; /* New user namespace. */
const CLONE_NEWPID = 0x20000000; /* New pid namespace. */
const CLONE_NEWNET = 0x40000000; /* New network namespace. */
const CLONE_IO = 0x80000000; /* Clone I/O context. */
}
}
#[derive(Debug, Clone, Copy)]
pub struct CloneArgs {
new_sp: u64,
stack_size: usize,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
clone_flags: CloneFlags,
}
impl CloneArgs {
/// Clone Args for syscall fork.
/// TODO: set the correct values
pub const fn for_fork() -> Self {
CloneArgs {
new_sp: 0,
stack_size: 0,
parent_tidptr: 0,
child_tidptr: 0,
tls: 0,
clone_flags: CloneFlags::empty(),
}
}
pub const fn new(
new_sp: u64,
stack_size: usize,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
clone_flags: CloneFlags,
) -> Self {
CloneArgs {
new_sp,
stack_size,
parent_tidptr,
child_tidptr,
tls,
clone_flags,
}
}
}
impl From<u64> for CloneFlags {
fn from(flags: u64) -> Self {
// We use the lower 32 bits
let clone_flags = (flags & 0xffff_ffff) as u32;
CloneFlags::from_bits_truncate(clone_flags)
}
}
impl CloneFlags {
fn check_unsupported_flags(&self) -> Result<()> {
let supported_flags = CloneFlags::CLONE_VM
| CloneFlags::CLONE_FS
| CloneFlags::CLONE_FILES
| CloneFlags::CLONE_SIGHAND
| CloneFlags::CLONE_THREAD
| CloneFlags::CLONE_SYSVSEM
| CloneFlags::CLONE_SETTLS
| CloneFlags::CLONE_PARENT_SETTID
| CloneFlags::CLONE_CHILD_SETTID
| CloneFlags::CLONE_CHILD_CLEARTID;
let unsupported_flags = *self - supported_flags;
if !unsupported_flags.is_empty() {
panic!("contains unsupported clone flags: {:?}", unsupported_flags);
}
Ok(())
}
}
/// Clone a child thread or child process.
///
/// FIXME: currently, the child process or thread will be scheduled to run at once,
/// but this may not be the expected bahavior.
pub fn clone_child(
ctx: &Context,
parent_context: &UserContext,
clone_args: CloneArgs,
) -> Result<Tid> {
clone_args.clone_flags.check_unsupported_flags()?;
if clone_args.clone_flags.contains(CloneFlags::CLONE_THREAD) {
let child_thread = clone_child_thread(ctx, parent_context, clone_args)?;
child_thread.run();
let child_tid = child_thread.tid();
Ok(child_tid)
} else {
let child_process = clone_child_process(ctx, parent_context, clone_args)?;
child_process.run();
let child_pid = child_process.pid();
Ok(child_pid)
}
}
fn clone_child_thread(
ctx: &Context,
parent_context: &UserContext,
clone_args: CloneArgs,
) -> Result<Arc<Thread>> {
let Context {
process,
posix_thread,
thread: _,
task: _,
} = ctx;
let clone_flags = clone_args.clone_flags;
debug_assert!(clone_flags.contains(CloneFlags::CLONE_VM));
debug_assert!(clone_flags.contains(CloneFlags::CLONE_FILES));
debug_assert!(clone_flags.contains(CloneFlags::CLONE_SIGHAND));
let child_root_vmar = process.root_vmar();
let child_user_space = {
let child_vm_space = child_root_vmar.vm_space().clone();
let child_cpu_context = clone_cpu_context(
parent_context,
clone_args.new_sp,
clone_args.stack_size,
clone_args.tls,
clone_flags,
);
Arc::new(UserSpace::new(child_vm_space, child_cpu_context))
};
clone_sysvsem(clone_flags)?;
// Inherit sigmask from current thread
let sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into();
let child_tid = allocate_tid();
let child_thread = {
let credentials = {
let credentials = ctx.posix_thread.credentials();
Credentials::new_from(&credentials)
};
let thread_builder = PosixThreadBuilder::new(child_tid, child_user_space, credentials)
.process(posix_thread.weak_process())
.sig_mask(sig_mask);
thread_builder.build()
};
process.threads().lock().push(child_thread.clone());
let child_posix_thread = child_thread.as_posix_thread().unwrap();
clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
clone_child_settid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
Ok(child_thread)
}
fn clone_child_process(
ctx: &Context,
parent_context: &UserContext,
clone_args: CloneArgs,
) -> Result<Arc<Process>> {
let Context {
process,
posix_thread,
thread: _,
task: _,
} = ctx;
let clone_flags = clone_args.clone_flags;
// clone vm
let child_process_vm = {
let parent_process_vm = process.vm();
clone_vm(parent_process_vm, clone_flags)?
};
// clone user space
let child_user_space = {
let child_cpu_context = clone_cpu_context(
parent_context,
clone_args.new_sp,
clone_args.stack_size,
clone_args.tls,
clone_flags,
);
let child_vm_space = {
let child_root_vmar = child_process_vm.root_vmar();
child_root_vmar.vm_space().clone()
};
Arc::new(UserSpace::new(child_vm_space, child_cpu_context))
};
// clone file table
let child_file_table = clone_files(process.file_table(), clone_flags);
// clone fs
let child_fs = clone_fs(process.fs(), clone_flags);
// clone umask
let child_umask = {
let parent_umask = process.umask().read().get();
Arc::new(RwLock::new(FileCreationMask::new(parent_umask)))
};
// clone sig dispositions
let child_sig_dispositions = clone_sighand(process.sig_dispositions(), clone_flags);
// clone system V semaphore
clone_sysvsem(clone_flags)?;
// inherit parent's sig mask
let child_sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed).into();
// inherit parent's nice value
let child_nice = process.nice().load(Ordering::Relaxed);
let child_tid = allocate_tid();
let child = {
let child_elf_path = process.executable_path();
let child_thread_builder = {
let child_thread_name = ThreadName::new_from_executable_path(&child_elf_path)?;
let credentials = {
let credentials = ctx.posix_thread.credentials();
Credentials::new_from(&credentials)
};
PosixThreadBuilder::new(child_tid, child_user_space, credentials)
.thread_name(Some(child_thread_name))
.sig_mask(child_sig_mask)
};
let mut process_builder =
ProcessBuilder::new(child_tid, &child_elf_path, posix_thread.weak_process());
process_builder
.main_thread_builder(child_thread_builder)
.process_vm(child_process_vm)
.file_table(child_file_table)
.fs(child_fs)
.umask(child_umask)
.sig_dispositions(child_sig_dispositions)
.nice(child_nice);
process_builder.build()?
};
// Deals with clone flags
let child_thread = thread_table::get_thread(child_tid).unwrap();
let child_posix_thread = child_thread.as_posix_thread().unwrap();
clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
clone_child_settid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
// Sets parent process and group for child process.
set_parent_and_group(process, &child);
Ok(child)
}
fn clone_child_cleartid(
child_posix_thread: &PosixThread,
child_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
*child_posix_thread.clear_child_tid().lock() = child_tidptr;
}
Ok(())
}
fn clone_child_settid(
child_posix_thread: &PosixThread,
child_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
*child_posix_thread.set_child_tid().lock() = child_tidptr;
}
Ok(())
}
fn clone_parent_settid(
child_tid: Tid,
parent_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
CurrentUserSpace::get().write_val(parent_tidptr, &child_tid)?;
}
Ok(())
}
/// Clone child process vm. If CLONE_VM is set, both threads share the same root vmar.
/// Otherwise, fork a new copy-on-write vmar.
fn clone_vm(parent_process_vm: &ProcessVm, clone_flags: CloneFlags) -> Result<ProcessVm> {
if clone_flags.contains(CloneFlags::CLONE_VM) {
Ok(parent_process_vm.clone())
} else {
ProcessVm::fork_from(parent_process_vm)
}
}
fn clone_cpu_context(
parent_context: &UserContext,
new_sp: u64,
stack_size: usize,
tls: u64,
clone_flags: CloneFlags,
) -> UserContext {
let mut child_context = *parent_context;
// The return value of child thread is zero
child_context.set_syscall_ret(0);
if clone_flags.contains(CloneFlags::CLONE_VM) {
// if parent and child shares the same address space, a new stack must be specified.
debug_assert!(new_sp != 0);
}
if new_sp != 0 {
// If stack size is not 0, the `new_sp` points to the BOTTOMMOST byte of stack.
if stack_size != 0 {
child_context.set_stack_pointer(new_sp as usize + stack_size);
}
// If stack size is 0, the new_sp points to the TOPMOST byte of stack.
else {
child_context.set_stack_pointer(new_sp as usize);
}
}
if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
child_context.set_tls_pointer(tls as usize);
}
child_context
}
fn clone_fs(
parent_fs: &Arc<RwMutex<FsResolver>>,
clone_flags: CloneFlags,
) -> Arc<RwMutex<FsResolver>> {
if clone_flags.contains(CloneFlags::CLONE_FS) {
parent_fs.clone()
} else {
Arc::new(RwMutex::new(parent_fs.read().clone()))
}
}
fn clone_files(
parent_file_table: &Arc<Mutex<FileTable>>,
clone_flags: CloneFlags,
) -> Arc<Mutex<FileTable>> {
// if CLONE_FILES is set, the child and parent shares the same file table
// Otherwise, the child will deep copy a new file table.
// FIXME: the clone may not be deep copy.
if clone_flags.contains(CloneFlags::CLONE_FILES) {
parent_file_table.clone()
} else {
Arc::new(Mutex::new(parent_file_table.lock().clone()))
}
}
fn clone_sighand(
parent_sig_dispositions: &Arc<Mutex<SigDispositions>>,
clone_flags: CloneFlags,
) -> Arc<Mutex<SigDispositions>> {
// similer to CLONE_FILES
if clone_flags.contains(CloneFlags::CLONE_SIGHAND) {
parent_sig_dispositions.clone()
} else {
Arc::new(Mutex::new(*parent_sig_dispositions.lock()))
}
}
fn clone_sysvsem(clone_flags: CloneFlags) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_SYSVSEM) {
warn!("CLONE_SYSVSEM is not supported now");
}
Ok(())
}
fn set_parent_and_group(parent: &Process, child: &Arc<Process>) {
let process_group = parent.process_group().unwrap();
let mut process_table_mut = process_table::process_table_mut();
let mut group_inner = process_group.inner.lock();
let mut child_group_mut = child.process_group.lock();
let mut children_mut = parent.children().lock();
children_mut.insert(child.pid(), child.clone());
group_inner.processes.insert(child.pid(), child.clone());
*child_group_mut = Arc::downgrade(&process_group);
process_table_mut.insert(child.pid(), child.clone());
}

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(non_camel_case_types)]
use crate::{prelude::*, process::Pid};
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct cap_user_header_t {
pub version: u32,
pub pid: Pid,
}
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct cap_user_data_t {
pub effective: u32,
pub permitted: u32,
pub inheritable: u32,
}
pub const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522;

View File

@ -0,0 +1,94 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::{AtomicU64, Ordering};
use bitflags::bitflags;
bitflags! {
/// Represents a set of Linux capabilities.
pub struct CapSet: u64 {
const CHOWN = 1 << 0;
const DAC_OVERRIDE = 1 << 1;
const DAC_READ_SEARCH = 1 << 2;
const FOWNER = 1 << 3;
const FSETID = 1 << 4;
const KILL = 1 << 5;
const SETGID = 1 << 6;
const SETUID = 1 << 7;
const SETPCAP = 1 << 8;
const LINUX_IMMUTABLE = 1 << 9;
const NET_BIND_SERVICE = 1 << 10;
const NET_BROADCAST = 1 << 11;
const NET_ADMIN = 1 << 12;
const NET_RAW = 1 << 13;
const IPC_LOCK = 1 << 14;
const IPC_OWNER = 1 << 15;
const SYS_MODULE = 1 << 16;
const SYS_RAWIO = 1 << 17;
const SYS_CHROOT = 1 << 18;
const SYS_PTRACE = 1 << 19;
const SYS_PACCT = 1 << 20;
const SYS_ADMIN = 1 << 21;
const SYS_BOOT = 1 << 22;
const SYS_NICE = 1 << 23;
const SYS_RESOURCE = 1 << 24;
const SYS_TIME = 1 << 25;
const SYS_TTY_CONFIG = 1 << 26;
const MKNOD = 1 << 27;
const LEASE = 1 << 28;
const AUDIT_WRITE = 1 << 29;
const AUDIT_CONTROL = 1 << 30;
const SETFCAP = 1 << 31;
const MAC_OVERRIDE = 1 << 32;
const MAC_ADMIN = 1 << 33;
const SYSLOG = 1 << 34;
const WAKE_ALARM = 1 << 35;
const BLOCK_SUSPEND = 1 << 36;
const AUDIT_READ = 1 << 37;
const PERFMON = 1 << 38;
const BPF = 1 << 39;
const CHECKPOINT_RESTORE = 1u64 << 40;
// ... include other capabilities as needed
}
}
impl CapSet {
/// Converts the capability set to a `u32`. The higher bits are truncated.
pub fn as_u32(&self) -> u32 {
self.bits() as u32
}
/// Creates a new `CapSet` with the `SYS_ADMIN` capability set, typically for a root user.
pub const fn new_root() -> Self {
CapSet::SYS_ADMIN
}
/// The most significant bit in a 64-bit `CapSet` that may be set to represent a Linux capability.
pub fn most_significant_bit() -> u8 {
// CHECKPOINT_RESTORE is the Linux capability with the largest numerical value
40
}
}
#[derive(Debug)]
pub(super) struct AtomicCapSet(AtomicU64);
impl AtomicCapSet {
pub const fn new(capset: CapSet) -> Self {
Self(AtomicU64::new(capset.bits))
}
pub fn set(&self, capset: CapSet) {
self.0.store(capset.bits(), Ordering::Relaxed);
}
pub fn get(&self) -> CapSet {
CapSet::from_bits_truncate(self.0.load(Ordering::Relaxed))
}
}
impl Clone for AtomicCapSet {
fn clone(&self) -> Self {
Self::new(self.get())
}
}

View File

@ -0,0 +1,440 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::sync::{RwLockReadGuard, RwLockWriteGuard};
use super::{group::AtomicGid, user::AtomicUid, Gid, Uid};
use crate::{
prelude::*,
process::credentials::capabilities::{AtomicCapSet, CapSet},
};
#[derive(Debug)]
pub(super) struct Credentials_ {
/// Real user id. The user to which the process belongs.
ruid: AtomicUid,
/// Effective user id. Used to determine the permissions granted to a process when it tries to perform various operations (i.e., system calls)
euid: AtomicUid,
/// Saved-set uid. Used by set_uid elf, the saved_set_uid will be set if the elf has setuid bit
suid: AtomicUid,
/// User id used for filesystem checks.
fsuid: AtomicUid,
/// Real group id. The group to which the process belongs
rgid: AtomicGid,
/// Effective gid,
egid: AtomicGid,
/// Saved-set gid. Used by set_gid elf, the saved_set_gid will be set if the elf has setgid bit
sgid: AtomicGid,
/// Group id used for file system checks.
fsgid: AtomicGid,
/// A set of additional groups to which a process belongs.
supplementary_gids: RwLock<BTreeSet<Gid>>,
/// The Linux capabilities.
/// This is not the capability (in static_cap.rs) enforced on rust objects.
/// Capability that child processes can inherit
inheritable_capset: AtomicCapSet,
/// Capabilities that a process can potentially be granted.
/// It defines the maximum set of privileges that the process could possibly have.
/// Even if the process is not currently using these privileges, it has the potential ability to enable them.
permitted_capset: AtomicCapSet,
/// Capability that we can actually use
effective_capset: AtomicCapSet,
}
impl Credentials_ {
/// Create a new credentials. ruid, euid, suid will be set as the same uid, and gid is the same.
pub fn new(uid: Uid, gid: Gid, capset: CapSet) -> Self {
let mut supplementary_gids = BTreeSet::new();
supplementary_gids.insert(gid);
Self {
ruid: AtomicUid::new(uid),
euid: AtomicUid::new(uid),
suid: AtomicUid::new(uid),
fsuid: AtomicUid::new(uid),
rgid: AtomicGid::new(gid),
egid: AtomicGid::new(gid),
sgid: AtomicGid::new(gid),
fsgid: AtomicGid::new(gid),
supplementary_gids: RwLock::new(supplementary_gids),
inheritable_capset: AtomicCapSet::new(capset),
permitted_capset: AtomicCapSet::new(capset),
effective_capset: AtomicCapSet::new(capset),
}
}
fn is_privileged(&self) -> bool {
self.euid.is_root()
}
// ******* Uid methods *******
pub(super) fn ruid(&self) -> Uid {
self.ruid.get()
}
pub(super) fn euid(&self) -> Uid {
self.euid.get()
}
pub(super) fn suid(&self) -> Uid {
self.suid.get()
}
pub(super) fn fsuid(&self) -> Uid {
self.fsuid.get()
}
pub(super) fn set_uid(&self, uid: Uid) {
if self.is_privileged() {
self.ruid.set(uid);
self.euid.set(uid);
self.suid.set(uid);
} else {
self.euid.set(uid);
}
}
pub(super) fn set_reuid(&self, ruid: Option<Uid>, euid: Option<Uid>) -> Result<()> {
self.check_uid_perm(ruid.as_ref(), euid.as_ref(), None, false)?;
let should_set_suid = ruid.is_some() || euid.is_some_and(|euid| euid != self.ruid());
self.set_resuid_unchecked(ruid, euid, None);
if should_set_suid {
self.suid.set(self.euid());
}
// FIXME: should we set fsuid here? The linux document for syscall `setfsuid` is contradictory
// with the document of syscall `setreuid`. The `setfsuid` document says the `fsuid` is always
// the same as `euid`, but `setreuid` does not mention the `fsuid` should be set.
self.fsuid.set(self.euid());
Ok(())
}
pub(super) fn set_resuid(
&self,
ruid: Option<Uid>,
euid: Option<Uid>,
suid: Option<Uid>,
) -> Result<()> {
self.check_uid_perm(ruid.as_ref(), euid.as_ref(), suid.as_ref(), true)?;
self.set_resuid_unchecked(ruid, euid, suid);
self.fsuid.set(self.euid());
Ok(())
}
pub(super) fn set_fsuid(&self, fsuid: Option<Uid>) -> Result<Uid> {
let old_fsuid = self.fsuid();
let Some(fsuid) = fsuid else {
return Ok(old_fsuid);
};
if self.is_privileged() {
self.fsuid.set(fsuid);
return Ok(old_fsuid);
}
if fsuid != self.ruid() && fsuid != self.euid() && fsuid != self.suid() {
return_errno_with_message!(
Errno::EPERM,
"fsuid can only be one of old ruid, old euid and old suid."
)
}
self.fsuid.set(fsuid);
Ok(old_fsuid)
}
pub(super) fn set_euid(&self, euid: Uid) {
self.euid.set(euid);
}
pub(super) fn set_suid(&self, suid: Uid) {
self.suid.set(suid);
}
// For `setreuid`, ruid can *NOT* be set to old suid,
// while for `setresuid`, ruid can be set to old suid.
fn check_uid_perm(
&self,
ruid: Option<&Uid>,
euid: Option<&Uid>,
suid: Option<&Uid>,
ruid_may_be_old_suid: bool,
) -> Result<()> {
if self.is_privileged() {
return Ok(());
}
if let Some(ruid) = ruid
&& *ruid != self.ruid()
&& *ruid != self.euid()
&& (!ruid_may_be_old_suid || *ruid != self.suid())
{
return_errno_with_message!(
Errno::EPERM,
"ruid can only be one of old ruid, old euid (and old suid)."
);
}
if let Some(euid) = euid
&& *euid != self.ruid()
&& *euid != self.euid()
&& *euid != self.suid()
{
return_errno_with_message!(
Errno::EPERM,
"euid can only be one of old ruid, old euid and old suid."
)
}
if let Some(suid) = suid
&& *suid != self.ruid()
&& *suid != self.euid()
&& *suid != self.suid()
{
return_errno_with_message!(
Errno::EPERM,
"suid can only be one of old ruid, old euid and old suid."
)
}
Ok(())
}
fn set_resuid_unchecked(&self, ruid: Option<Uid>, euid: Option<Uid>, suid: Option<Uid>) {
if let Some(ruid) = ruid {
self.ruid.set(ruid);
}
if let Some(euid) = euid {
self.euid.set(euid);
}
if let Some(suid) = suid {
self.suid.set(suid);
}
}
// ******* Gid methods *******
pub(super) fn rgid(&self) -> Gid {
self.rgid.get()
}
pub(super) fn egid(&self) -> Gid {
self.egid.get()
}
pub(super) fn sgid(&self) -> Gid {
self.sgid.get()
}
pub(super) fn fsgid(&self) -> Gid {
self.fsgid.get()
}
pub(super) fn set_gid(&self, gid: Gid) {
if self.is_privileged() {
self.rgid.set(gid);
self.egid.set(gid);
self.sgid.set(gid);
} else {
self.egid.set(gid);
}
}
pub(super) fn set_regid(&self, rgid: Option<Gid>, egid: Option<Gid>) -> Result<()> {
self.check_gid_perm(rgid.as_ref(), egid.as_ref(), None, false)?;
let should_set_sgid = rgid.is_some() || egid.is_some_and(|egid| egid != self.rgid());
self.set_resgid_unchecked(rgid, egid, None);
if should_set_sgid {
self.sgid.set(self.egid());
}
self.fsgid.set(self.egid());
Ok(())
}
pub(super) fn set_resgid(
&self,
rgid: Option<Gid>,
egid: Option<Gid>,
sgid: Option<Gid>,
) -> Result<()> {
self.check_gid_perm(rgid.as_ref(), egid.as_ref(), sgid.as_ref(), true)?;
self.set_resgid_unchecked(rgid, egid, sgid);
self.fsgid.set(self.egid());
Ok(())
}
pub(super) fn set_fsgid(&self, fsgid: Option<Gid>) -> Result<Gid> {
let old_fsgid = self.fsgid();
let Some(fsgid) = fsgid else {
return Ok(old_fsgid);
};
if self.is_privileged() {
self.fsgid.set(fsgid);
return Ok(old_fsgid);
}
if fsgid != self.rgid() && fsgid != self.egid() && fsgid != self.sgid() {
return_errno_with_message!(
Errno::EPERM,
"fsuid can only be one of old ruid, old euid and old suid."
)
}
self.fsgid.set(fsgid);
Ok(old_fsgid)
}
pub(super) fn set_egid(&self, egid: Gid) {
self.egid.set(egid);
}
pub(super) fn set_sgid(&self, sgid: Gid) {
self.sgid.set(sgid);
}
// For `setregid`, rgid can *NOT* be set to old sgid,
// while for `setresgid`, ruid can be set to old sgid.
fn check_gid_perm(
&self,
rgid: Option<&Gid>,
egid: Option<&Gid>,
sgid: Option<&Gid>,
rgid_may_be_old_sgid: bool,
) -> Result<()> {
if self.is_privileged() {
return Ok(());
}
if let Some(rgid) = rgid
&& *rgid != self.rgid()
&& *rgid != self.egid()
&& (!rgid_may_be_old_sgid || *rgid != self.sgid())
{
return_errno_with_message!(
Errno::EPERM,
"rgid can only be one of old rgid, old egid (and old sgid)."
);
}
if let Some(egid) = egid
&& *egid != self.rgid()
&& *egid != self.egid()
&& *egid != self.sgid()
{
return_errno_with_message!(
Errno::EPERM,
"egid can only be one of old rgid, old egid and old sgid."
)
}
if let Some(sgid) = sgid
&& *sgid != self.rgid()
&& *sgid != self.egid()
&& *sgid != self.sgid()
{
return_errno_with_message!(
Errno::EPERM,
"sgid can only be one of old rgid, old egid and old sgid."
)
}
Ok(())
}
fn set_resgid_unchecked(&self, rgid: Option<Gid>, egid: Option<Gid>, sgid: Option<Gid>) {
if let Some(rgid) = rgid {
self.rgid.set(rgid);
}
if let Some(egid) = egid {
self.egid.set(egid);
}
if let Some(sgid) = sgid {
self.sgid.set(sgid);
}
}
// ******* Supplementary groups methods *******
pub(super) fn groups(&self) -> RwLockReadGuard<BTreeSet<Gid>> {
self.supplementary_gids.read()
}
pub(super) fn groups_mut(&self) -> RwLockWriteGuard<BTreeSet<Gid>> {
self.supplementary_gids.write()
}
// ******* Linux Capability methods *******
pub(super) fn inheritable_capset(&self) -> CapSet {
self.inheritable_capset.get()
}
pub(super) fn permitted_capset(&self) -> CapSet {
self.permitted_capset.get()
}
pub(super) fn effective_capset(&self) -> CapSet {
self.effective_capset.get()
}
pub(super) fn set_inheritable_capset(&self, inheritable_capset: CapSet) {
self.inheritable_capset.set(inheritable_capset);
}
pub(super) fn set_permitted_capset(&self, permitted_capset: CapSet) {
self.permitted_capset.set(permitted_capset);
}
pub(super) fn set_effective_capset(&self, effective_capset: CapSet) {
self.effective_capset.set(effective_capset);
}
}
impl Clone for Credentials_ {
fn clone(&self) -> Self {
Self {
ruid: self.ruid.clone(),
euid: self.euid.clone(),
suid: self.suid.clone(),
fsuid: self.fsuid.clone(),
rgid: self.rgid.clone(),
egid: self.egid.clone(),
sgid: self.sgid.clone(),
fsgid: self.fsgid.clone(),
supplementary_gids: RwLock::new(self.supplementary_gids.read().clone()),
inheritable_capset: self.inheritable_capset.clone(),
permitted_capset: self.permitted_capset.clone(),
effective_capset: self.effective_capset.clone(),
}
}
}

View File

@ -0,0 +1,52 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::{AtomicU32, Ordering};
use crate::prelude::*;
#[derive(Debug, Clone, Copy, Pod, Default, PartialEq, Eq, PartialOrd, Ord)]
#[repr(C)]
pub struct Gid(u32);
impl Gid {
pub const fn new(gid: u32) -> Self {
Self(gid)
}
pub const fn new_root() -> Self {
Self(ROOT_GID)
}
pub const fn as_u32(&self) -> u32 {
self.0
}
pub const fn is_root(&self) -> bool {
self.0 == ROOT_GID
}
}
const ROOT_GID: u32 = 0;
#[derive(Debug)]
pub(super) struct AtomicGid(AtomicU32);
impl AtomicGid {
pub const fn new(gid: Gid) -> Self {
Self(AtomicU32::new(gid.as_u32()))
}
pub fn set(&self, gid: Gid) {
self.0.store(gid.as_u32(), Ordering::Relaxed)
}
pub fn get(&self) -> Gid {
Gid(self.0.load(Ordering::Relaxed))
}
}
impl Clone for AtomicGid {
fn clone(&self) -> Self {
Self(AtomicU32::new(self.0.load(Ordering::Relaxed)))
}
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: MPL-2.0
pub mod c_types;
pub mod capabilities;
mod credentials_;
mod group;
mod static_cap;
mod user;
use aster_rights::FullOp;
use credentials_::Credentials_;
pub use group::Gid;
pub use user::Uid;
use crate::prelude::*;
/// `Credentials` represents a set of associated numeric user ids (UIDs) and group identifiers (GIDs)
/// for a process.
/// These identifiers are as follows:
/// - real user ID and group ID;
/// - effective user ID and group ID;
/// - saved-set user ID and saved-set group ID;
/// - file system user ID and group ID (Linux-specific);
/// - supplementary group IDs;
/// - Linux capabilities.
pub struct Credentials<R = FullOp>(Arc<Credentials_>, R);

View File

@ -0,0 +1,303 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use aster_rights::{Dup, Read, TRights, Write};
use aster_rights_proc::require;
use ostd::sync::{RwLockReadGuard, RwLockWriteGuard};
use super::{capabilities::CapSet, credentials_::Credentials_, Credentials, Gid, Uid};
use crate::prelude::*;
impl<R: TRights> Credentials<R> {
/// Creates a root `Credentials`. This method can only be used when creating the first process
pub fn new_root() -> Self {
let uid = Uid::new_root();
let gid = Gid::new_root();
let cap = CapSet::new_root();
let credentials_ = Arc::new(Credentials_::new(uid, gid, cap));
Self(credentials_, R::new())
}
/// Clones a new `Credentials` from an existing `Credentials`.
///
/// This method requires the `Read` right.
#[require(R1 > Read)]
pub fn new_from<R1: TRights>(credentials: &Credentials<R1>) -> Self {
let credentials_ = Arc::new(credentials.0.as_ref().clone());
Self(credentials_, R::new())
}
/// Duplicates the capabilities.
///
/// This method requires the `Dup` right.
#[require(R > Dup)]
pub fn dup(&self) -> Self {
Self(self.0.clone(), self.1)
}
/// Restricts capabilities to a smaller set.
#[require(R > R1)]
pub fn restrict<R1: TRights>(self) -> Credentials<R1> {
let Credentials(credentials_, _) = self;
Credentials(credentials_, R1::new())
}
// *********** Uid methods **********
/// Gets real user id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn ruid(&self) -> Uid {
self.0.ruid()
}
/// Gets effective user id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn euid(&self) -> Uid {
self.0.euid()
}
/// Gets saved-set user id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn suid(&self) -> Uid {
self.0.suid()
}
/// Gets file system user id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn fsuid(&self) -> Uid {
self.0.fsuid()
}
/// Sets uid. If self is privileged, sets the effective, real, saved-set user ids as `uid`,
/// Otherwise, sets effective user id as `uid`.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_uid(&self, uid: Uid) {
self.0.set_uid(uid);
}
/// Sets real, effective user ids as `ruid`, `euid` respectively. if `ruid` or `euid`
/// is `None`, the corresponding user id will leave unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_reuid(&self, ruid: Option<Uid>, euid: Option<Uid>) -> Result<()> {
self.0.set_reuid(ruid, euid)
}
/// Sets real, effective, saved-set user ids as `ruid`, `euid`, `suid` respectively. if
/// `ruid`, `euid` or `suid` is `None`, the corresponding user id will leave unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_resuid(
&self,
ruid: Option<Uid>,
euid: Option<Uid>,
suid: Option<Uid>,
) -> Result<()> {
self.0.set_resuid(ruid, euid, suid)
}
/// Sets file system user id as `fsuid`. Returns the original file system user id.
/// If `fsuid` is None, leaves file system user id unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_fsuid(&self, fsuid: Option<Uid>) -> Result<Uid> {
self.0.set_fsuid(fsuid)
}
/// Sets effective user id as `euid`. This method should only be used when executing a file
/// whose `setuid` bit is set.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_euid(&self, euid: Uid) {
self.0.set_euid(euid);
}
/// Sets saved-set user id as the same of effective user id. This method should only be used when
/// executing a new executable file.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn reset_suid(&self) {
let euid = self.0.euid();
self.0.set_suid(euid);
}
// *********** Gid methods **********
/// Gets real group id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn rgid(&self) -> Gid {
self.0.rgid()
}
/// Gets effective group id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn egid(&self) -> Gid {
self.0.egid()
}
/// Gets saved-set group id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn sgid(&self) -> Gid {
self.0.sgid()
}
/// Gets file system group id.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn fsgid(&self) -> Gid {
self.0.fsgid()
}
/// Sets gid. If self is privileged, sets the effective, real, saved-set group ids as `gid`,
/// Otherwise, sets effective group id as `gid`.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_gid(&self, gid: Gid) {
self.0.set_gid(gid);
}
/// Sets real, effective group ids as `rgid`, `egid` respectively. if `rgid` or `egid`
/// is `None`, the corresponding group id will leave unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_regid(&self, rgid: Option<Gid>, egid: Option<Gid>) -> Result<()> {
self.0.set_regid(rgid, egid)
}
/// Sets real, effective, saved-set group ids as `rgid`, `egid`, `sgid` respectively. if
/// `rgid`, `egid` or `sgid` is `None`, the corresponding group id will leave unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_resgid(
&self,
rgid: Option<Gid>,
egid: Option<Gid>,
sgid: Option<Gid>,
) -> Result<()> {
self.0.set_resgid(rgid, egid, sgid)
}
/// Sets file system group id as `fsgid`. Returns the original file system group id.
/// If `fsgid` is None, leaves file system group id unchanged.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_fsgid(&self, fsgid: Option<Gid>) -> Result<Gid> {
self.0.set_fsgid(fsgid)
}
/// Sets effective group id as `egid`. This method should only be used when executing a file
/// whose `setgid` bit is set.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_egid(&self, egid: Gid) {
self.0.set_egid(egid);
}
/// Sets saved-set group id as the same of effective group id. This method should only be used when
/// executing a new executable file.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn reset_sgid(&self) {
let egid = self.0.egid();
self.0.set_sgid(egid);
}
// *********** Supplementary group methods **********
/// Acquires the read lock of supplementary group ids.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn groups(&self) -> RwLockReadGuard<BTreeSet<Gid>> {
self.0.groups()
}
/// Acquires the write lock of supplementary group ids.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn groups_mut(&self) -> RwLockWriteGuard<BTreeSet<Gid>> {
self.0.groups_mut()
}
// *********** Linux Capability methods **********
/// Gets the capabilities that child process can inherit.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn inheritable_capset(&self) -> CapSet {
self.0.inheritable_capset()
}
/// Gets the capabilities that are permitted.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn permitted_capset(&self) -> CapSet {
self.0.permitted_capset()
}
/// Gets the capabilities that actually use.
///
/// This method requies the `Read` right.
#[require(R > Read)]
pub fn effective_capset(&self) -> CapSet {
self.0.effective_capset()
}
/// Sets the capabilities that child process can inherit.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_inheritable_capset(&self, inheritable_capset: CapSet) {
self.0.set_inheritable_capset(inheritable_capset);
}
/// Sets the capabilities that are permitted.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_permitted_capset(&self, permitted_capset: CapSet) {
self.0.set_permitted_capset(permitted_capset);
}
/// Sets the capabilities that actually use.
///
/// This method requires the `Write` right.
#[require(R > Write)]
pub fn set_effective_capset(&self, effective_capset: CapSet) {
self.0.set_effective_capset(effective_capset);
}
}

View File

@ -0,0 +1,56 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::{AtomicU32, Ordering};
use crate::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Pod)]
#[repr(C)]
pub struct Uid(u32);
const ROOT_UID: u32 = 0;
impl Uid {
pub const fn new_root() -> Self {
Self(ROOT_UID)
}
pub const fn new(uid: u32) -> Self {
Self(uid)
}
pub const fn is_root(&self) -> bool {
self.0 == ROOT_UID
}
pub const fn as_u32(&self) -> u32 {
self.0
}
}
#[derive(Debug)]
pub(super) struct AtomicUid(AtomicU32);
impl AtomicUid {
pub const fn new(uid: Uid) -> Self {
Self(AtomicU32::new(uid.as_u32()))
}
pub fn set(&self, uid: Uid) {
self.0.store(uid.as_u32(), Ordering::Release)
}
pub fn get(&self) -> Uid {
Uid(self.0.load(Ordering::Acquire))
}
pub fn is_root(&self) -> bool {
self.get().is_root()
}
}
impl Clone for AtomicUid {
fn clone(&self) -> Self {
Self(AtomicU32::new(self.0.load(Ordering::Acquire)))
}
}

View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: MPL-2.0
use super::{process_table, Pid, Process, TermStatus};
use crate::{
prelude::*,
process::{
posix_thread::do_exit,
signal::{constants::SIGCHLD, signals::kernel::KernelSignal},
},
};
pub fn do_exit_group(term_status: TermStatus) {
let current = current!();
debug!("exit group was called");
if current.is_zombie() {
return;
}
current.set_zombie(term_status);
// Exit all threads
let threads = current.threads().lock().clone();
for thread in threads {
if let Err(e) = do_exit(thread, term_status) {
debug!("Ignore error when call exit: {:?}", e);
}
}
// Sends parent-death signal
// FIXME: according to linux spec, the signal should be sent when a posix thread which
// creates child process exits, not when the whole process exits group.
for (_, child) in current.children().lock().iter() {
let Some(signum) = child.parent_death_signal() else {
continue;
};
// FIXME: set pid of the signal
let signal = KernelSignal::new(signum);
child.enqueue_signal(signal);
}
// Close all files then exit the process
let files = current.file_table().lock().close_all();
drop(files);
// Move children to the init process
if !is_init_process(&current) {
if let Some(init_process) = get_init_process() {
let mut init_children = init_process.children().lock();
for (_, child_process) in current.children().lock().extract_if(|_, _| true) {
let mut parent = child_process.parent.lock();
init_children.insert(child_process.pid(), child_process.clone());
*parent = Arc::downgrade(&init_process);
}
}
}
if let Some(parent) = current.parent() {
// Notify parent
let signal = KernelSignal::new(SIGCHLD);
parent.enqueue_signal(signal);
parent.children_pauser().resume_all();
}
}
const INIT_PROCESS_PID: Pid = 1;
/// Gets the init process
fn get_init_process() -> Option<Arc<Process>> {
process_table::get_process(INIT_PROCESS_PID)
}
fn is_init_process(process: &Process) -> bool {
process.pid() == INIT_PROCESS_PID
}

178
kernel/src/process/kill.rs Normal file
View File

@ -0,0 +1,178 @@
// SPDX-License-Identifier: MPL-2.0
use super::{
posix_thread::PosixThreadExt,
process_table,
signal::signals::{user::UserSignal, Signal},
Pgid, Pid, Process, Sid, Uid,
};
use crate::{
prelude::*,
thread::{thread_table, Tid},
};
/// Sends a signal to a process, using the current process as the sender.
///
/// The credentials of the current process will be checked to determine
/// if it is authorized to send the signal to this particular target process.
///
/// If `signal` is `None`, this method will only check permission without sending
/// any signal.
pub fn kill(pid: Pid, signal: Option<UserSignal>) -> Result<()> {
let process = process_table::get_process(pid)
.ok_or_else(|| Error::with_message(Errno::ESRCH, "the target process does not exist"))?;
kill_process(&process, signal)
}
/// Sends a signal to all processes in a group, using the current process
/// as the sender.
///
/// The credentials of the current process will be checked to determine
/// if it is authorized to send the signal to the target group.
///
/// If `signal` is `None`, this method will only check permission without sending
/// any signal.
pub fn kill_group(pgid: Pgid, signal: Option<UserSignal>) -> Result<()> {
let process_group = process_table::get_process_group(&pgid)
.ok_or_else(|| Error::with_message(Errno::ESRCH, "target group does not exist"))?;
let inner = process_group.inner.lock();
for process in inner.processes.values() {
kill_process(process, signal)?;
}
Ok(())
}
/// Sends a signal to a target thread, using the current process
/// as the sender.
///
/// If `signal` is `None`, this method will only check permission without sending
/// any signal.
pub fn tgkill(tid: Tid, tgid: Pid, signal: Option<UserSignal>) -> Result<()> {
let thread = thread_table::get_thread(tid)
.ok_or_else(|| Error::with_message(Errno::ESRCH, "target thread does not exist"))?;
if thread.status().is_exited() {
return Ok(());
}
let posix_thread = thread.as_posix_thread().unwrap();
// Check tgid
let pid = posix_thread.process().pid();
if pid != tgid {
return_errno_with_message!(
Errno::EINVAL,
"the combination of tgid and pid is not valid"
);
}
// Check permission
let signum = signal.map(|signal| signal.num());
let sender = current_thread_sender_ids();
posix_thread.check_signal_perm(signum.as_ref(), &sender)?;
if let Some(signal) = signal {
posix_thread.enqueue_signal(Box::new(signal));
}
Ok(())
}
/// Sends a signal to all processes except current process and init process, using
/// the current process as the sender.
///
/// The credentials of the current process will be checked to determine
/// if it is authorized to send the signal to the target group.
pub fn kill_all(signal: Option<UserSignal>) -> Result<()> {
let current = current!();
for process in process_table::process_table().iter() {
if Arc::ptr_eq(&current, process) || process.is_init_process() {
continue;
}
kill_process(process, signal)?;
}
Ok(())
}
fn kill_process(process: &Process, signal: Option<UserSignal>) -> Result<()> {
let threads = process.threads().lock();
let posix_threads = threads
.iter()
.map(|thread| thread.as_posix_thread().unwrap());
// First check permission
let signum = signal.map(|signal| signal.num());
let sender_ids = current_thread_sender_ids();
let mut permitted_threads = {
posix_threads.clone().filter(|posix_thread| {
posix_thread
.check_signal_perm(signum.as_ref(), &sender_ids)
.is_ok()
})
};
if permitted_threads.clone().count() == 0 {
return_errno_with_message!(Errno::EPERM, "cannot send signal to the target process");
}
let Some(signal) = signal else { return Ok(()) };
// Send signal to any thread that does not blocks the signal.
for thread in permitted_threads.clone() {
if !thread.has_signal_blocked(&signal) {
thread.enqueue_signal(Box::new(signal));
return Ok(());
}
}
// If all threads block the signal, send signal to the first thread.
let first_thread = permitted_threads.next().unwrap();
first_thread.enqueue_signal(Box::new(signal));
Ok(())
}
fn current_thread_sender_ids() -> SignalSenderIds {
let current_thread = current_thread!();
let current_posix_thread = current_thread.as_posix_thread().unwrap();
let current_process = current_posix_thread.process();
let credentials = current_posix_thread.credentials();
let ruid = credentials.ruid();
let euid = credentials.euid();
let sid = current_process.session().unwrap().sid();
SignalSenderIds::new(ruid, euid, sid)
}
/// The ids of the signal sender process.
///
/// This struct now includes effective user id, real user id and session id.
pub(super) struct SignalSenderIds {
ruid: Uid,
euid: Uid,
sid: Sid,
}
impl SignalSenderIds {
fn new(ruid: Uid, euid: Uid, sid: Sid) -> Self {
Self { ruid, euid, sid }
}
pub(super) fn ruid(&self) -> Uid {
self.ruid
}
pub(super) fn euid(&self) -> Uid {
self.euid
}
pub(super) fn sid(&self) -> Sid {
self.sid
}
}

38
kernel/src/process/mod.rs Normal file
View File

@ -0,0 +1,38 @@
// SPDX-License-Identifier: MPL-2.0
mod clone;
pub mod credentials;
mod exit;
mod kill;
pub mod posix_thread;
#[allow(clippy::module_inception)]
mod process;
mod process_filter;
pub mod process_table;
mod process_vm;
mod program_loader;
mod rlimit;
pub mod signal;
mod status;
pub mod sync;
mod term_status;
mod wait;
pub use clone::{clone_child, CloneArgs, CloneFlags};
pub use credentials::{Credentials, Gid, Uid};
pub use exit::do_exit_group;
pub use kill::{kill, kill_all, kill_group, tgkill};
pub use process::{
ExitCode, JobControl, Pgid, Pid, Process, ProcessBuilder, ProcessGroup, Session, Sid, Terminal,
};
pub use process_filter::ProcessFilter;
pub use process_vm::{MAX_ARGV_NUMBER, MAX_ARG_LEN, MAX_ENVP_NUMBER, MAX_ENV_LEN};
pub use program_loader::{check_executable_file, load_program_to_vm};
pub use rlimit::ResourceType;
pub use term_status::TermStatus;
pub use wait::{wait_child_exit, WaitOptions};
pub(super) fn init() {
process::init();
posix_thread::futex::init();
}

View File

@ -0,0 +1,117 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use ostd::user::UserSpace;
use super::PosixThread;
use crate::{
prelude::*,
process::{
posix_thread::name::ThreadName,
signal::{sig_mask::AtomicSigMask, sig_queues::SigQueues},
Credentials, Process,
},
thread::{status::ThreadStatus, task, thread_table, Thread, Tid},
time::{clocks::ProfClock, TimerManager},
};
/// The builder to build a posix thread
pub struct PosixThreadBuilder {
// The essential part
tid: Tid,
user_space: Arc<UserSpace>,
process: Weak<Process>,
credentials: Credentials,
// Optional part
thread_name: Option<ThreadName>,
set_child_tid: Vaddr,
clear_child_tid: Vaddr,
sig_mask: AtomicSigMask,
sig_queues: SigQueues,
}
impl PosixThreadBuilder {
pub fn new(tid: Tid, user_space: Arc<UserSpace>, credentials: Credentials) -> Self {
Self {
tid,
user_space,
process: Weak::new(),
credentials,
thread_name: None,
set_child_tid: 0,
clear_child_tid: 0,
sig_mask: AtomicSigMask::new_empty(),
sig_queues: SigQueues::new(),
}
}
pub fn process(mut self, process: Weak<Process>) -> Self {
self.process = process;
self
}
pub fn thread_name(mut self, thread_name: Option<ThreadName>) -> Self {
self.thread_name = thread_name;
self
}
pub fn set_child_tid(mut self, set_child_tid: Vaddr) -> Self {
self.set_child_tid = set_child_tid;
self
}
pub fn clear_child_tid(mut self, clear_child_tid: Vaddr) -> Self {
self.clear_child_tid = clear_child_tid;
self
}
pub fn sig_mask(mut self, sig_mask: AtomicSigMask) -> Self {
self.sig_mask = sig_mask;
self
}
pub fn build(self) -> Arc<Thread> {
let Self {
tid,
user_space,
process,
credentials,
thread_name,
set_child_tid,
clear_child_tid,
sig_mask,
sig_queues,
} = self;
let thread = Arc::new_cyclic(|thread_ref| {
let task = task::create_new_user_task(user_space, thread_ref.clone());
let status = ThreadStatus::Init;
let prof_clock = ProfClock::new();
let virtual_timer_manager = TimerManager::new(prof_clock.user_clock().clone());
let prof_timer_manager = TimerManager::new(prof_clock.clone());
let posix_thread = PosixThread {
process,
name: Mutex::new(thread_name),
set_child_tid: Mutex::new(set_child_tid),
clear_child_tid: Mutex::new(clear_child_tid),
credentials,
sig_mask,
sig_queues,
sig_context: Mutex::new(None),
sig_stack: Mutex::new(None),
robust_list: Mutex::new(None),
prof_clock,
virtual_timer_manager,
prof_timer_manager,
};
Thread::new(tid, task, posix_thread, status)
});
thread_table::add_thread(thread.clone());
thread
}
}

View File

@ -0,0 +1,66 @@
// SPDX-License-Identifier: MPL-2.0
use super::{futex::futex_wake, robust_list::wake_robust_futex, PosixThread, PosixThreadExt};
use crate::{
prelude::*,
process::{do_exit_group, TermStatus},
thread::{thread_table, Thread, Tid},
};
/// Exits the thread if the thread is a POSIX thread.
///
/// # Panics
///
/// If the thread is not a POSIX thread, this method will panic.
pub fn do_exit(thread: Arc<Thread>, term_status: TermStatus) -> Result<()> {
if thread.status().is_exited() {
return Ok(());
}
thread.exit();
let tid = thread.tid();
let posix_thread = thread.as_posix_thread().unwrap();
let mut clear_ctid = posix_thread.clear_child_tid().lock();
// If clear_ctid !=0 ,do a futex wake and write zero to the clear_ctid addr.
if *clear_ctid != 0 {
futex_wake(*clear_ctid, 1)?;
// FIXME: the correct write length?
CurrentUserSpace::get()
.write_val(*clear_ctid, &0u32)
.unwrap();
*clear_ctid = 0;
}
// exit the robust list: walk the robust list; mark futex words as dead and do futex wake
wake_robust_list(posix_thread, tid);
if tid != posix_thread.process().pid() {
// We don't remove main thread.
// The main thread is removed when the process is reaped.
thread_table::remove_thread(tid);
}
if posix_thread.is_main_thread(tid) || posix_thread.is_last_thread() {
// exit current process.
do_exit_group(term_status);
}
futex_wake(Arc::as_ptr(&posix_thread.process()) as Vaddr, 1)?;
Ok(())
}
/// Walks the robust futex list, marking futex dead and wake waiters.
/// It corresponds to Linux's exit_robust_list(), errors are silently ignored.
fn wake_robust_list(thread: &PosixThread, tid: Tid) {
let mut robust_list = thread.robust_list.lock();
let list_head = match *robust_list {
Some(robust_list_head) => robust_list_head,
None => return,
};
trace!("wake the rubust_list: {:?}", list_head);
for futex_addr in list_head.futexes() {
wake_robust_futex(futex_addr, tid).unwrap();
}
*robust_list = None;
}

View File

@ -0,0 +1,410 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink};
use ostd::{
cpu::num_cpus,
sync::{Waiter, Waker},
};
use spin::Once;
use crate::prelude::*;
type FutexBitSet = u32;
type FutexBucketRef = Arc<Mutex<FutexBucket>>;
const FUTEX_OP_MASK: u32 = 0x0000_000F;
const FUTEX_FLAGS_MASK: u32 = 0xFFFF_FFF0;
const FUTEX_BITSET_MATCH_ANY: FutexBitSet = 0xFFFF_FFFF;
/// do futex wait
pub fn futex_wait(futex_addr: u64, futex_val: i32, timeout: &Option<FutexTimeout>) -> Result<()> {
futex_wait_bitset(futex_addr as _, futex_val, timeout, FUTEX_BITSET_MATCH_ANY)
}
/// do futex wait bitset
pub fn futex_wait_bitset(
futex_addr: Vaddr,
futex_val: i32,
timeout: &Option<FutexTimeout>,
bitset: FutexBitSet,
) -> Result<()> {
debug!(
"futex_wait_bitset addr: {:#x}, val: {}, timeout: {:?}, bitset: {:#x}",
futex_addr, futex_val, timeout, bitset
);
let futex_key = FutexKey::new(futex_addr, bitset);
let (futex_item, waiter) = FutexItem::create(futex_key);
let (_, futex_bucket_ref) = get_futex_bucket(futex_key);
// lock futex bucket ref here to avoid data race
let mut futex_bucket = futex_bucket_ref.lock();
if futex_key.load_val() != futex_val {
return_errno_with_message!(Errno::EAGAIN, "futex value does not match");
}
futex_bucket.add_item(futex_item);
// drop lock
drop(futex_bucket);
// TODO: wait on the futex item with a timeout.
waiter.wait();
Ok(())
}
/// do futex wake
pub fn futex_wake(futex_addr: Vaddr, max_count: usize) -> Result<usize> {
futex_wake_bitset(futex_addr, max_count, FUTEX_BITSET_MATCH_ANY)
}
/// Do futex wake with bitset
pub fn futex_wake_bitset(
futex_addr: Vaddr,
max_count: usize,
bitset: FutexBitSet,
) -> Result<usize> {
debug!(
"futex_wake_bitset addr: {:#x}, max_count: {}, bitset: {:#x}",
futex_addr, max_count, bitset
);
let futex_key = FutexKey::new(futex_addr, bitset);
let (_, futex_bucket_ref) = get_futex_bucket(futex_key);
let mut futex_bucket = futex_bucket_ref.lock();
let res = futex_bucket.remove_and_wake_items(futex_key, max_count);
drop(futex_bucket);
Ok(res)
}
/// Do futex requeue
pub fn futex_requeue(
futex_addr: Vaddr,
max_nwakes: usize,
max_nrequeues: usize,
futex_new_addr: Vaddr,
) -> Result<usize> {
if futex_new_addr == futex_addr {
return futex_wake(futex_addr, max_nwakes);
}
let futex_key = FutexKey::new(futex_addr, FUTEX_BITSET_MATCH_ANY);
let futex_new_key = FutexKey::new(futex_new_addr, FUTEX_BITSET_MATCH_ANY);
let (bucket_idx, futex_bucket_ref) = get_futex_bucket(futex_key);
let (new_bucket_idx, futex_new_bucket_ref) = get_futex_bucket(futex_new_key);
let nwakes = {
if bucket_idx == new_bucket_idx {
let mut futex_bucket = futex_bucket_ref.lock();
let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes);
futex_bucket.update_item_keys(futex_key, futex_new_key, max_nrequeues);
drop(futex_bucket);
nwakes
} else {
let (mut futex_bucket, mut futex_new_bucket) = {
if bucket_idx < new_bucket_idx {
let futex_bucket = futex_bucket_ref.lock();
let futext_new_bucket = futex_new_bucket_ref.lock();
(futex_bucket, futext_new_bucket)
} else {
// bucket_idx > new_bucket_idx
let futex_new_bucket = futex_new_bucket_ref.lock();
let futex_bucket = futex_bucket_ref.lock();
(futex_bucket, futex_new_bucket)
}
};
let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes);
futex_bucket.requeue_items_to_another_bucket(
futex_key,
&mut futex_new_bucket,
futex_new_key,
max_nrequeues,
);
nwakes
}
};
Ok(nwakes)
}
static FUTEX_BUCKETS: Once<FutexBucketVec> = Once::new();
/// Get the futex hash bucket count.
///
/// This number is calculated the same way as Linux's:
/// <https://github.com/torvalds/linux/blob/master/kernel/futex/core.c>
fn get_bucket_count() -> usize {
((1 << 8) * num_cpus()).next_power_of_two() as usize
}
fn get_futex_bucket(key: FutexKey) -> (usize, FutexBucketRef) {
FUTEX_BUCKETS.get().unwrap().get_bucket(key)
}
/// Initialize the futex system.
pub fn init() {
FUTEX_BUCKETS.call_once(|| FutexBucketVec::new(get_bucket_count()));
}
#[derive(Debug, Clone)]
pub struct FutexTimeout {}
impl FutexTimeout {
pub fn new() -> Self {
todo!()
}
}
struct FutexBucketVec {
vec: Vec<FutexBucketRef>,
}
impl FutexBucketVec {
pub fn new(size: usize) -> FutexBucketVec {
let mut buckets = FutexBucketVec {
vec: Vec::with_capacity(size),
};
for _ in 0..size {
let bucket = Arc::new(Mutex::new(FutexBucket::new()));
buckets.vec.push(bucket);
}
buckets
}
pub fn get_bucket(&self, key: FutexKey) -> (usize, FutexBucketRef) {
let index = (self.vec.len() - 1) & {
// The addr is the multiples of 4, so we ignore the last 2 bits
let addr = key.addr() >> 2;
// simple hash
addr / self.size()
};
(index, self.vec[index].clone())
}
fn size(&self) -> usize {
self.vec.len()
}
}
struct FutexBucket {
items: LinkedList<FutexItemAdapter>,
}
intrusive_adapter!(FutexItemAdapter = Box<FutexItem>: FutexItem { link: LinkedListAtomicLink });
impl FutexBucket {
pub fn new() -> FutexBucket {
FutexBucket {
items: LinkedList::new(FutexItemAdapter::new()),
}
}
pub fn add_item(&mut self, item: Box<FutexItem>) {
self.items.push_back(item);
}
pub fn remove_item(&mut self, item: &FutexItem) {
let mut item_cursor = self.items.front_mut();
while !item_cursor.is_null() {
// The item_cursor has been checked not null.
let futex_item = item_cursor.get().unwrap();
if !futex_item.match_up(item) {
item_cursor.move_next();
continue;
} else {
let _ = item_cursor.remove();
break;
}
}
}
pub fn remove_and_wake_items(&mut self, key: FutexKey, max_count: usize) -> usize {
let mut count = 0;
let mut item_cursor = self.items.front_mut();
while !item_cursor.is_null() && count < max_count {
// The item_cursor has been checked not null.
let item = item_cursor.get().unwrap();
if !item.key.match_up(&key) {
item_cursor.move_next();
continue;
}
let item = item_cursor.remove().unwrap();
item.wake();
count += 1;
}
count
}
pub fn update_item_keys(&mut self, key: FutexKey, new_key: FutexKey, max_count: usize) {
let mut count = 0;
let mut item_cursor = self.items.front_mut();
while !item_cursor.is_null() && count < max_count {
// The item_cursor has been checked not null.
let item = item_cursor.get().unwrap();
if !item.key.match_up(&key) {
item_cursor.move_next();
continue;
}
let mut item = item_cursor.remove().unwrap();
item.key = new_key;
item_cursor.insert_before(item);
count += 1;
}
}
pub fn requeue_items_to_another_bucket(
&mut self,
key: FutexKey,
another: &mut Self,
new_key: FutexKey,
max_nrequeues: usize,
) {
let mut count = 0;
let mut item_cursor = self.items.front_mut();
while !item_cursor.is_null() && count < max_nrequeues {
// The item_cursor has been checked not null.
let item = item_cursor.get().unwrap();
if !item.key.match_up(&key) {
item_cursor.move_next();
continue;
}
let mut item = item_cursor.remove().unwrap();
item.key = new_key;
another.add_item(item);
count += 1;
}
}
}
struct FutexItem {
key: FutexKey,
waker: Arc<Waker>,
link: LinkedListAtomicLink,
}
impl FutexItem {
pub fn create(key: FutexKey) -> (Box<Self>, Waiter) {
let (waiter, waker) = Waiter::new_pair();
let futex_item = Box::new(FutexItem {
key,
waker,
link: LinkedListAtomicLink::new(),
});
(futex_item, waiter)
}
pub fn wake(&self) {
self.waker.wake_up();
}
pub fn match_up(&self, another: &Self) -> bool {
self.key.match_up(&another.key)
}
}
// The addr of a futex, it should be used to mark different futex word
#[derive(Debug, Clone, Copy)]
struct FutexKey {
addr: Vaddr,
bitset: FutexBitSet,
}
impl FutexKey {
pub fn new(addr: Vaddr, bitset: FutexBitSet) -> Self {
Self { addr, bitset }
}
pub fn load_val(&self) -> i32 {
// FIXME: how to implement a atomic load?
warn!("implement an atomic load");
CurrentUserSpace::get().read_val(self.addr).unwrap()
}
pub fn addr(&self) -> Vaddr {
self.addr
}
pub fn bitset(&self) -> FutexBitSet {
self.bitset
}
pub fn match_up(&self, another: &Self) -> bool {
self.addr == another.addr && (self.bitset & another.bitset) != 0
}
}
// The implementation is from occlum
#[derive(PartialEq, Debug, Clone, Copy)]
#[allow(non_camel_case_types)]
pub enum FutexOp {
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
FUTEX_FD = 2,
FUTEX_REQUEUE = 3,
FUTEX_CMP_REQUEUE = 4,
FUTEX_WAKE_OP = 5,
FUTEX_LOCK_PI = 6,
FUTEX_UNLOCK_PI = 7,
FUTEX_TRYLOCK_PI = 8,
FUTEX_WAIT_BITSET = 9,
FUTEX_WAKE_BITSET = 10,
}
impl FutexOp {
pub fn from_u32(bits: u32) -> Result<FutexOp> {
match bits {
0 => Ok(FutexOp::FUTEX_WAIT),
1 => Ok(FutexOp::FUTEX_WAKE),
2 => Ok(FutexOp::FUTEX_FD),
3 => Ok(FutexOp::FUTEX_REQUEUE),
4 => Ok(FutexOp::FUTEX_CMP_REQUEUE),
5 => Ok(FutexOp::FUTEX_WAKE_OP),
6 => Ok(FutexOp::FUTEX_LOCK_PI),
7 => Ok(FutexOp::FUTEX_UNLOCK_PI),
8 => Ok(FutexOp::FUTEX_TRYLOCK_PI),
9 => Ok(FutexOp::FUTEX_WAIT_BITSET),
10 => Ok(FutexOp::FUTEX_WAKE_BITSET),
_ => return_errno_with_message!(Errno::EINVAL, "Unknown futex op"),
}
}
}
bitflags! {
pub struct FutexFlags : u32 {
const FUTEX_PRIVATE = 128;
const FUTEX_CLOCK_REALTIME = 256;
}
}
impl FutexFlags {
pub fn from_u32(bits: u32) -> Result<FutexFlags> {
FutexFlags::from_bits(bits)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown futex flags"))
}
}
pub fn futex_op_and_flags_from_u32(bits: u32) -> Result<(FutexOp, FutexFlags)> {
let op = {
let op_bits = bits & FUTEX_OP_MASK;
FutexOp::from_u32(op_bits)?
};
let flags = {
let flags_bits = bits & FUTEX_FLAGS_MASK;
FutexFlags::from_u32(flags_bits)?
};
Ok((op, flags))
}

View File

@ -0,0 +1,267 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use core::sync::atomic::Ordering;
use aster_rights::{ReadOp, WriteOp};
use super::{
kill::SignalSenderIds,
signal::{
sig_mask::{AtomicSigMask, SigMask, SigSet},
sig_num::SigNum,
sig_queues::SigQueues,
signals::Signal,
SigEvents, SigEventsFilter, SigStack,
},
Credentials, Process,
};
use crate::{
events::Observer,
prelude::*,
process::signal::constants::SIGCONT,
thread::Tid,
time::{clocks::ProfClock, Timer, TimerManager},
};
mod builder;
mod exit;
pub mod futex;
mod name;
mod posix_thread_ext;
mod robust_list;
pub use builder::PosixThreadBuilder;
pub use exit::do_exit;
pub use name::{ThreadName, MAX_THREAD_NAME_LEN};
pub use posix_thread_ext::PosixThreadExt;
pub use robust_list::RobustListHead;
pub struct PosixThread {
// Immutable part
process: Weak<Process>,
// Mutable part
name: Mutex<Option<ThreadName>>,
// Linux specific attributes.
// https://man7.org/linux/man-pages/man2/set_tid_address.2.html
set_child_tid: Mutex<Vaddr>,
clear_child_tid: Mutex<Vaddr>,
robust_list: Mutex<Option<RobustListHead>>,
/// Process credentials. At the kernel level, credentials are a per-thread attribute.
credentials: Credentials,
// Signal
/// Blocked signals
sig_mask: AtomicSigMask,
/// Thread-directed sigqueue
sig_queues: SigQueues,
/// Signal handler ucontext address
/// FIXME: This field may be removed. For glibc applications with RESTORER flag set, the sig_context is always equals with rsp.
sig_context: Mutex<Option<Vaddr>>,
sig_stack: Mutex<Option<SigStack>>,
/// A profiling clock measures the user CPU time and kernel CPU time in the thread.
prof_clock: Arc<ProfClock>,
/// A manager that manages timers based on the user CPU time of the current thread.
virtual_timer_manager: Arc<TimerManager>,
/// A manager that manages timers based on the profiling clock of the current thread.
prof_timer_manager: Arc<TimerManager>,
}
impl PosixThread {
pub fn process(&self) -> Arc<Process> {
self.process.upgrade().unwrap()
}
pub fn weak_process(&self) -> Weak<Process> {
Weak::clone(&self.process)
}
pub fn thread_name(&self) -> &Mutex<Option<ThreadName>> {
&self.name
}
pub fn set_child_tid(&self) -> &Mutex<Vaddr> {
&self.set_child_tid
}
pub fn clear_child_tid(&self) -> &Mutex<Vaddr> {
&self.clear_child_tid
}
/// Get the reference to the signal mask of the thread.
///
/// Note that while this function offers mutable access to the signal mask,
/// it is not sound for callers other than the current thread to modify the
/// signal mask. They may only read the signal mask.
pub fn sig_mask(&self) -> &AtomicSigMask {
&self.sig_mask
}
pub fn sig_pending(&self) -> SigSet {
self.sig_queues.sig_pending()
}
/// Returns whether the thread has some pending signals
/// that are not blocked.
pub fn has_pending(&self) -> bool {
let blocked = self.sig_mask().load(Ordering::Relaxed);
self.sig_queues.has_pending(blocked)
}
/// Returns whether the signal is blocked by the thread.
pub(in crate::process) fn has_signal_blocked(&self, signal: &dyn Signal) -> bool {
self.sig_mask.contains(signal.num(), Ordering::Relaxed)
}
/// Checks whether the signal can be delivered to the thread.
///
/// For a signal can be delivered to the thread, the sending thread must either
/// be privileged, or the real or effective user ID of the sending thread must equal
/// the real or saved set-user-ID of the target thread.
///
/// For SIGCONT, the sending and receiving processes should belong to the same session.
pub(in crate::process) fn check_signal_perm(
&self,
signum: Option<&SigNum>,
sender: &SignalSenderIds,
) -> Result<()> {
if sender.euid().is_root() {
return Ok(());
}
if let Some(signum) = signum
&& *signum == SIGCONT
{
let receiver_sid = self.process().session().unwrap().sid();
if receiver_sid == sender.sid() {
return Ok(());
}
return_errno_with_message!(
Errno::EPERM,
"sigcont requires that sender and receiver belongs to the same session"
);
}
let (receiver_ruid, receiver_suid) = {
let credentials = self.credentials();
(credentials.ruid(), credentials.suid())
};
// FIXME: further check the below code to ensure the behavior is same as Linux. According
// to man(2) kill, the real or effective user ID of the sending process must equal the
// real or saved set-user-ID of the target process.
if sender.ruid() == receiver_ruid
|| sender.ruid() == receiver_suid
|| sender.euid() == receiver_ruid
|| sender.euid() == receiver_suid
{
return Ok(());
}
return_errno_with_message!(Errno::EPERM, "sending signal to the thread is not allowed.");
}
/// Enqueues a thread-directed signal. This method should only be used for enqueue kernel
/// signal and fault signal.
pub fn enqueue_signal(&self, signal: Box<dyn Signal>) {
self.sig_queues.enqueue(signal);
}
/// Returns a reference to the profiling clock of the current thread.
pub fn prof_clock(&self) -> &Arc<ProfClock> {
&self.prof_clock
}
/// Creates a timer based on the profiling CPU clock of the current thread.
pub fn create_prof_timer<F>(&self, func: F) -> Arc<Timer>
where
F: Fn() + Send + Sync + 'static,
{
self.prof_timer_manager.create_timer(func)
}
/// Creates a timer based on the user CPU clock of the current thread.
pub fn create_virtual_timer<F>(&self, func: F) -> Arc<Timer>
where
F: Fn() + Send + Sync + 'static,
{
self.virtual_timer_manager.create_timer(func)
}
/// Checks the `TimerCallback`s that are managed by the `prof_timer_manager`.
/// If any have timed out, call the corresponding callback functions.
pub fn process_expired_timers(&self) {
self.prof_timer_manager.process_expired_timers();
}
pub fn dequeue_signal(&self, mask: &SigMask) -> Option<Box<dyn Signal>> {
self.sig_queues.dequeue(mask)
}
pub fn register_sigqueue_observer(
&self,
observer: Weak<dyn Observer<SigEvents>>,
filter: SigEventsFilter,
) {
self.sig_queues.register_observer(observer, filter);
}
pub fn unregiser_sigqueue_observer(&self, observer: &Weak<dyn Observer<SigEvents>>) {
self.sig_queues.unregister_observer(observer);
}
pub fn sig_context(&self) -> &Mutex<Option<Vaddr>> {
&self.sig_context
}
pub fn sig_stack(&self) -> &Mutex<Option<SigStack>> {
&self.sig_stack
}
pub fn robust_list(&self) -> &Mutex<Option<RobustListHead>> {
&self.robust_list
}
fn is_main_thread(&self, tid: Tid) -> bool {
let process = self.process();
let pid = process.pid();
tid == pid
}
fn is_last_thread(&self) -> bool {
let process = self.process.upgrade().unwrap();
let threads = process.threads().lock();
threads
.iter()
.filter(|thread| !thread.status().is_exited())
.count()
== 0
}
/// Gets the read-only credentials of the thread.
pub fn credentials(&self) -> Credentials<ReadOp> {
self.credentials.dup().restrict()
}
/// Gets the write-only credentials of the current thread.
///
/// It is illegal to mutate the credentials from a thread other than the
/// current thread. For performance reasons, this function only checks it
/// using debug assertions.
pub fn credentials_mut(&self) -> Credentials<WriteOp> {
debug_assert!(core::ptr::eq(
current_thread!().as_posix_thread().unwrap(),
self
));
self.credentials.dup().restrict()
}
}

View File

@ -0,0 +1,56 @@
// SPDX-License-Identifier: MPL-2.0
use crate::prelude::*;
pub const MAX_THREAD_NAME_LEN: usize = 16;
#[derive(Debug)]
pub struct ThreadName {
inner: [u8; MAX_THREAD_NAME_LEN],
count: usize,
}
impl Default for ThreadName {
fn default() -> Self {
ThreadName::new()
}
}
impl ThreadName {
pub fn new() -> Self {
ThreadName {
inner: [0; MAX_THREAD_NAME_LEN],
count: 0,
}
}
pub fn new_from_executable_path(executable_path: &str) -> Result<Self> {
let mut thread_name = ThreadName::new();
let executable_file_name = executable_path
.split('/')
.last()
.ok_or(Error::with_message(Errno::EINVAL, "invalid elf path"))?;
let name = CString::new(executable_file_name)?;
thread_name.set_name(&name)?;
Ok(thread_name)
}
pub fn set_name(&mut self, name: &CStr) -> Result<()> {
let bytes = name.to_bytes_with_nul();
let bytes_len = bytes.len();
if bytes_len > MAX_THREAD_NAME_LEN {
// if len > MAX_THREAD_NAME_LEN, truncate it.
self.count = MAX_THREAD_NAME_LEN;
self.inner[..MAX_THREAD_NAME_LEN].clone_from_slice(&bytes[..MAX_THREAD_NAME_LEN]);
self.inner[MAX_THREAD_NAME_LEN - 1] = 0;
return Ok(());
}
self.count = bytes_len;
self.inner[..bytes_len].clone_from_slice(bytes);
Ok(())
}
pub fn name(&self) -> Result<Option<&CStr>> {
Ok(Some(CStr::from_bytes_until_nul(&self.inner)?))
}
}

View File

@ -0,0 +1,64 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::{
cpu::UserContext,
user::{UserContextApi, UserSpace},
};
use super::{builder::PosixThreadBuilder, name::ThreadName, PosixThread};
use crate::{
fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD},
prelude::*,
process::{process_vm::ProcessVm, program_loader::load_program_to_vm, Credentials, Process},
thread::{Thread, Tid},
};
pub trait PosixThreadExt {
fn as_posix_thread(&self) -> Option<&PosixThread>;
#[allow(clippy::too_many_arguments)]
fn new_posix_thread_from_executable(
tid: Tid,
credentials: Credentials,
process_vm: &ProcessVm,
fs_resolver: &FsResolver,
executable_path: &str,
process: Weak<Process>,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>>;
}
impl PosixThreadExt for Thread {
/// This function should only be called when launch shell()
fn new_posix_thread_from_executable(
tid: Tid,
credentials: Credentials,
process_vm: &ProcessVm,
fs_resolver: &FsResolver,
executable_path: &str,
process: Weak<Process>,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
let elf_file = {
let fs_path = FsPath::new(AT_FDCWD, executable_path)?;
fs_resolver.lookup(&fs_path)?
};
let (_, elf_load_info) =
load_program_to_vm(process_vm, elf_file, argv, envp, fs_resolver, 1)?;
let vm_space = process_vm.root_vmar().vm_space().clone();
let mut cpu_ctx = UserContext::default();
cpu_ctx.set_instruction_pointer(elf_load_info.entry_point() as _);
cpu_ctx.set_stack_pointer(elf_load_info.user_stack_top() as _);
let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
let thread_name = Some(ThreadName::new_from_executable_path(executable_path)?);
let thread_builder = PosixThreadBuilder::new(tid, user_space, credentials)
.thread_name(thread_name)
.process(process);
Ok(thread_builder.build())
}
fn as_posix_thread(&self) -> Option<&PosixThread> {
self.data().downcast_ref::<PosixThread>()
}
}

View File

@ -0,0 +1,158 @@
// SPDX-License-Identifier: MPL-2.0
//! The implementation of robust list is from occlum.
use crate::{
prelude::*,
process::{posix_thread::futex::futex_wake, Pid},
};
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod)]
struct RobustList {
next: Vaddr, // *const Robust list
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod)]
pub struct RobustListHead {
/// Linked list of lock entries
///
/// If it points to the head of the list, then it is the end of the list.
/// If it is an invalid user space pointer or a null pointer, stop iterating
/// the list.
list: RobustList,
/// Specifies the offset from the address of the lock entry to the address
/// of the futex.
futex_offset: isize,
/// Contains transient copy of the address of the lock entry, during list
/// insertion and removal.
list_op_pending: Vaddr, // *const RobustList
}
impl RobustListHead {
/// Return an iterator for all futexes in the robust list.
///
/// The futex refered to by `list_op_pending`, if any, will be returned as
/// the last item.
pub fn futexes(&self) -> FutexIter<'_> {
FutexIter::new(self)
}
/// Return the pending futex address if exist
fn pending_futex_addr(&self) -> Option<Vaddr> {
if self.list_op_pending == 0 {
None
} else {
Some(self.futex_addr(self.list_op_pending))
}
}
/// Get the futex address
fn futex_addr(&self, entry_ptr: Vaddr) -> Vaddr {
(entry_ptr as isize + self.futex_offset) as _
}
}
pub struct FutexIter<'a> {
robust_list: &'a RobustListHead,
entry_ptr: Vaddr,
count: isize,
}
impl<'a> FutexIter<'a> {
pub fn new(robust_list: &'a RobustListHead) -> Self {
Self {
robust_list,
entry_ptr: robust_list.list.next,
count: 0,
}
}
// The `self.count` is normally a positive value used to iterate the list
// to avoid excessively long or circular list, we use a special value -1
// to represent the end of the Iterator.
fn set_end(&mut self) {
self.count = -1;
}
fn is_end(&self) -> bool {
self.count < 0
}
}
const ROBUST_LIST_LIMIT: isize = 2048;
impl<'a> Iterator for FutexIter<'a> {
type Item = Vaddr;
fn next(&mut self) -> Option<Self::Item> {
if self.is_end() {
return None;
}
let end_ptr = self.robust_list.list.next;
while self.entry_ptr != end_ptr || self.count == 0 {
if self.count == ROBUST_LIST_LIMIT {
break;
}
if self.entry_ptr == 0 {
return None;
}
let futex_addr = if self.entry_ptr != self.robust_list.list_op_pending {
Some(self.robust_list.futex_addr(self.entry_ptr))
} else {
None
};
let Ok(robust_list) = CurrentUserSpace::get().read_val::<RobustList>(self.entry_ptr)
else {
return None;
};
self.entry_ptr = robust_list.next;
self.count += 1;
if futex_addr.is_some() {
return futex_addr;
}
}
self.set_end();
self.robust_list.pending_futex_addr()
}
}
const FUTEX_WAITERS: u32 = 0x8000_0000;
const FUTEX_OWNER_DIED: u32 = 0x4000_0000;
const FUTEX_TID_MASK: u32 = 0x3FFF_FFFF;
/// Wakeup one robust futex owned by the thread
/// FIXME: requires atomic operations here
pub fn wake_robust_futex(futex_addr: Vaddr, tid: Pid) -> Result<()> {
let user_space = CurrentUserSpace::get();
let futex_val = {
if futex_addr == 0 {
return_errno_with_message!(Errno::EINVAL, "invalid futext addr");
}
user_space.read_val::<u32>(futex_addr)?
};
let mut old_val = futex_val;
loop {
// This futex may held by another thread, do nothing
if old_val & FUTEX_TID_MASK != tid {
break;
}
let new_val = (old_val & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
let cur_val = user_space.read_val(futex_addr)?;
if cur_val != new_val {
// The futex value has changed, let's retry with current value
old_val = cur_val;
user_space.write_val(futex_addr, &new_val)?;
continue;
}
// Wakeup one waiter
if cur_val & FUTEX_WAITERS != 0 {
debug!("wake robust futex addr: {:?}", futex_addr);
futex_wake(futex_addr, 1)?;
}
break;
}
Ok(())
}

View File

@ -0,0 +1,215 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use super::{Pid, Process};
use crate::{
fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
prelude::*,
process::{
posix_thread::{PosixThreadBuilder, PosixThreadExt},
process_vm::ProcessVm,
rlimit::ResourceLimits,
signal::sig_disposition::SigDispositions,
Credentials,
},
sched::nice::Nice,
thread::Thread,
};
pub struct ProcessBuilder<'a> {
// Essential parts
pid: Pid,
executable_path: &'a str,
parent: Weak<Process>,
// Optional parts
main_thread_builder: Option<PosixThreadBuilder>,
argv: Option<Vec<CString>>,
envp: Option<Vec<CString>>,
process_vm: Option<ProcessVm>,
file_table: Option<Arc<Mutex<FileTable>>>,
fs: Option<Arc<RwMutex<FsResolver>>>,
umask: Option<Arc<RwLock<FileCreationMask>>>,
resource_limits: Option<ResourceLimits>,
sig_dispositions: Option<Arc<Mutex<SigDispositions>>>,
credentials: Option<Credentials>,
nice: Option<Nice>,
}
impl<'a> ProcessBuilder<'a> {
pub fn new(pid: Pid, executable_path: &'a str, parent: Weak<Process>) -> Self {
ProcessBuilder {
pid,
executable_path,
parent,
main_thread_builder: None,
argv: None,
envp: None,
process_vm: None,
file_table: None,
fs: None,
umask: None,
resource_limits: None,
sig_dispositions: None,
credentials: None,
nice: None,
}
}
pub fn main_thread_builder(&mut self, builder: PosixThreadBuilder) -> &mut Self {
self.main_thread_builder = Some(builder);
self
}
pub fn process_vm(&mut self, process_vm: ProcessVm) -> &mut Self {
self.process_vm = Some(process_vm);
self
}
pub fn file_table(&mut self, file_table: Arc<Mutex<FileTable>>) -> &mut Self {
self.file_table = Some(file_table);
self
}
pub fn fs(&mut self, fs: Arc<RwMutex<FsResolver>>) -> &mut Self {
self.fs = Some(fs);
self
}
pub fn umask(&mut self, umask: Arc<RwLock<FileCreationMask>>) -> &mut Self {
self.umask = Some(umask);
self
}
pub fn resource_limits(&mut self, resource_limits: ResourceLimits) -> &mut Self {
self.resource_limits = Some(resource_limits);
self
}
pub fn sig_dispositions(&mut self, sig_dispositions: Arc<Mutex<SigDispositions>>) -> &mut Self {
self.sig_dispositions = Some(sig_dispositions);
self
}
pub fn argv(&mut self, argv: Vec<CString>) -> &mut Self {
self.argv = Some(argv);
self
}
pub fn envp(&mut self, envp: Vec<CString>) -> &mut Self {
self.envp = Some(envp);
self
}
pub fn credentials(&mut self, credentials: Credentials) -> &mut Self {
self.credentials = Some(credentials);
self
}
pub fn nice(&mut self, nice: Nice) -> &mut Self {
self.nice = Some(nice);
self
}
fn check_build(&self) -> Result<()> {
if self.main_thread_builder.is_some() {
debug_assert!(self.parent.upgrade().is_some());
debug_assert!(self.argv.is_none());
debug_assert!(self.envp.is_none());
debug_assert!(self.credentials.is_none());
}
if self.main_thread_builder.is_none() {
debug_assert!(self.parent.upgrade().is_none());
debug_assert!(self.argv.is_some());
debug_assert!(self.envp.is_some());
debug_assert!(self.credentials.is_some());
}
Ok(())
}
pub fn build(self) -> Result<Arc<Process>> {
self.check_build()?;
let Self {
pid,
executable_path,
parent,
main_thread_builder,
argv,
envp,
process_vm,
file_table,
fs,
umask,
resource_limits,
sig_dispositions,
credentials,
nice,
} = self;
let process_vm = process_vm.or_else(|| Some(ProcessVm::alloc())).unwrap();
let file_table = file_table
.or_else(|| Some(Arc::new(Mutex::new(FileTable::new_with_stdio()))))
.unwrap();
let fs = fs
.or_else(|| Some(Arc::new(RwMutex::new(FsResolver::new()))))
.unwrap();
let umask = umask
.or_else(|| Some(Arc::new(RwLock::new(FileCreationMask::default()))))
.unwrap();
let resource_limits = resource_limits
.or_else(|| Some(ResourceLimits::default()))
.unwrap();
let sig_dispositions = sig_dispositions
.or_else(|| Some(Arc::new(Mutex::new(SigDispositions::new()))))
.unwrap();
let nice = nice.or_else(|| Some(Nice::default())).unwrap();
let process = {
let threads = Vec::new();
Process::new(
pid,
parent,
threads,
executable_path.to_string(),
process_vm,
fs,
file_table,
umask,
resource_limits,
nice,
sig_dispositions,
)
};
let thread = if let Some(thread_builder) = main_thread_builder {
let builder = thread_builder.process(Arc::downgrade(&process));
builder.build()
} else {
Thread::new_posix_thread_from_executable(
pid,
credentials.unwrap(),
process.vm(),
&process.fs().read(),
executable_path,
Arc::downgrade(&process),
argv.unwrap(),
envp.unwrap(),
)?
};
process.threads().lock().push(thread);
process.set_runnable();
Ok(process)
}
}

View File

@ -0,0 +1,171 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(unused_variables)]
use crate::{
prelude::*,
process::{
signal::{
constants::{SIGCONT, SIGHUP},
signals::kernel::KernelSignal,
Pauser,
},
ProcessGroup, Session,
},
};
/// The job control for terminals like tty and pty.
///
/// This struct is used to support shell job control, which allows users to
/// run commands in the foreground or in the background. This struct manages
/// the session and foreground process group for a terminal.
pub struct JobControl {
foreground: SpinLock<Weak<ProcessGroup>>,
session: SpinLock<Weak<Session>>,
pauser: Arc<Pauser>,
}
impl JobControl {
/// Creates a new `TtyJobControl`
pub fn new() -> Self {
Self {
foreground: SpinLock::new(Weak::new()),
session: SpinLock::new(Weak::new()),
pauser: Pauser::new(),
}
}
// *************** Session ***************
/// Returns the session whose controlling terminal is the terminal.
fn session(&self) -> Option<Arc<Session>> {
self.session.lock().upgrade()
}
/// Sets the terminal as the controlling terminal of the `session`.
///
/// # Panics
///
/// This terminal should not belong to any session.
pub fn set_session(&self, session: &Arc<Session>) {
debug_assert!(self.session().is_none());
*self.session.lock() = Arc::downgrade(session);
}
/// Sets the terminal as the controlling terminal of the session of current process.
///
/// # Panics
///
/// This function should only be called in process context.
pub fn set_current_session(&self) -> Result<()> {
if self.session().is_some() {
return_errno_with_message!(
Errno::EPERM,
"the terminal is already controlling terminal of another session"
);
}
let current = current!();
let process_group = current.process_group().unwrap();
*self.foreground.lock() = Arc::downgrade(&process_group);
let session = current.session().unwrap();
*self.session.lock() = Arc::downgrade(&session);
self.pauser.resume_all();
Ok(())
}
/// Releases the current session from this terminal.
pub fn release_current_session(&self) -> Result<()> {
let Some(session) = self.session() else {
return_errno_with_message!(
Errno::ENOTTY,
"the terminal is not controlling terminal now"
);
};
if let Some(foreground) = self.foreground() {
foreground.broadcast_signal(KernelSignal::new(SIGHUP));
foreground.broadcast_signal(KernelSignal::new(SIGCONT));
}
Ok(())
}
// *************** Foreground process group ***************
/// Returns the foreground process group
pub fn foreground(&self) -> Option<Arc<ProcessGroup>> {
self.foreground.lock().upgrade()
}
/// Sets the foreground process group.
///
/// # Panics
///
/// The process group should belong to one session.
pub fn set_foreground(&self, process_group: Option<&Arc<ProcessGroup>>) -> Result<()> {
let Some(process_group) = process_group else {
// FIXME: should we allow this branch?
*self.foreground.lock() = Weak::new();
return Ok(());
};
let session = process_group.session().unwrap();
let Some(terminal_session) = self.session() else {
return_errno_with_message!(
Errno::EPERM,
"the terminal does not become controlling terminal of one session."
);
};
if !Arc::ptr_eq(&terminal_session, &session) {
return_errno_with_message!(
Errno::EPERM,
"the process proup belongs to different session"
);
}
*self.foreground.lock() = Arc::downgrade(process_group);
self.pauser.resume_all();
Ok(())
}
/// Wait until the current process is the foreground process group. If
/// the foreground process group is None, returns true.
///
/// # Panics
///
/// This function should only be called in process context.
pub fn wait_until_in_foreground(&self) -> Result<()> {
// Fast path
if self.current_belongs_to_foreground() {
return Ok(());
}
// Slow path
self.pauser.pause_until(|| {
if self.current_belongs_to_foreground() {
Some(())
} else {
None
}
})
}
fn current_belongs_to_foreground(&self) -> bool {
let Some(foreground) = self.foreground() else {
return true;
};
foreground.contains_process(current!().pid())
}
}
impl Default for JobControl {
fn default() -> Self {
Self::new()
}
}

View File

@ -0,0 +1,751 @@
// SPDX-License-Identifier: MPL-2.0
use self::timer_manager::PosixTimerManager;
use super::{
posix_thread::PosixThreadExt,
process_table,
process_vm::{Heap, InitStackReader, ProcessVm},
rlimit::ResourceLimits,
signal::{
constants::SIGCHLD,
sig_disposition::SigDispositions,
sig_num::{AtomicSigNum, SigNum},
signals::Signal,
Pauser,
},
status::ProcessStatus,
Credentials, TermStatus,
};
use crate::{
device::tty::open_ntty_as_controlling_terminal,
fs::{file_table::FileTable, fs_resolver::FsResolver, utils::FileCreationMask},
prelude::*,
sched::nice::Nice,
thread::{allocate_tid, Thread},
time::clocks::ProfClock,
vm::vmar::Vmar,
};
mod builder;
mod job_control;
mod process_group;
mod session;
mod terminal;
mod timer_manager;
use aster_rights::Full;
use atomic::Atomic;
pub use builder::ProcessBuilder;
pub use job_control::JobControl;
pub use process_group::ProcessGroup;
pub use session::Session;
pub use terminal::Terminal;
/// Process id.
pub type Pid = u32;
/// Process group id.
pub type Pgid = u32;
/// Session Id.
pub type Sid = u32;
pub type ExitCode = u32;
pub(super) fn init() {
timer_manager::init();
}
/// Process stands for a set of threads that shares the same userspace.
pub struct Process {
// Immutable Part
pid: Pid,
process_vm: ProcessVm,
/// Wait for child status changed
children_pauser: Arc<Pauser>,
// Mutable Part
/// The executable path.
executable_path: RwLock<String>,
/// The threads
threads: Mutex<Vec<Arc<Thread>>>,
/// Process status
status: Mutex<ProcessStatus>,
/// Parent process
pub(super) parent: Mutex<Weak<Process>>,
/// Children processes
children: Mutex<BTreeMap<Pid, Arc<Process>>>,
/// Process group
pub(super) process_group: Mutex<Weak<ProcessGroup>>,
/// File table
file_table: Arc<Mutex<FileTable>>,
/// FsResolver
fs: Arc<RwMutex<FsResolver>>,
/// umask
umask: Arc<RwLock<FileCreationMask>>,
/// resource limits
resource_limits: Mutex<ResourceLimits>,
/// Scheduling priority nice value
/// According to POSIX.1, the nice value is a per-process attribute,
/// the threads in a process should share a nice value.
nice: Atomic<Nice>,
// Signal
/// Sig dispositions
sig_dispositions: Arc<Mutex<SigDispositions>>,
/// The signal that the process should receive when parent process exits.
parent_death_signal: AtomicSigNum,
/// A profiling clock measures the user CPU time and kernel CPU time of the current process.
prof_clock: Arc<ProfClock>,
/// A manager that manages timer resources and utilities of the process.
timer_manager: PosixTimerManager,
}
impl Process {
/// Returns the current process.
///
/// It returns `None` if:
/// - the function is called in the bootstrap context;
/// - or if the current task is not associated with a process.
pub fn current() -> Option<Arc<Process>> {
Some(Thread::current()?.as_posix_thread()?.process())
}
#[allow(clippy::too_many_arguments)]
fn new(
pid: Pid,
parent: Weak<Process>,
threads: Vec<Arc<Thread>>,
executable_path: String,
process_vm: ProcessVm,
fs: Arc<RwMutex<FsResolver>>,
file_table: Arc<Mutex<FileTable>>,
umask: Arc<RwLock<FileCreationMask>>,
resource_limits: ResourceLimits,
nice: Nice,
sig_dispositions: Arc<Mutex<SigDispositions>>,
) -> Arc<Self> {
// SIGCHID does not interrupt pauser. Child process will
// resume paused parent when doing exit.
let children_pauser = Pauser::new_with_mask(SIGCHLD.into());
let prof_clock = ProfClock::new();
Arc::new_cyclic(|process_ref: &Weak<Process>| Self {
pid,
threads: Mutex::new(threads),
executable_path: RwLock::new(executable_path),
process_vm,
children_pauser,
status: Mutex::new(ProcessStatus::Uninit),
parent: Mutex::new(parent),
children: Mutex::new(BTreeMap::new()),
process_group: Mutex::new(Weak::new()),
file_table,
fs,
umask,
sig_dispositions,
parent_death_signal: AtomicSigNum::new_empty(),
resource_limits: Mutex::new(resource_limits),
nice: Atomic::new(nice),
timer_manager: PosixTimerManager::new(&prof_clock, process_ref),
prof_clock,
})
}
/// init a user process and run the process
pub fn spawn_user_process(
executable_path: &str,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
// spawn user process should give an absolute path
debug_assert!(executable_path.starts_with('/'));
let process = Process::create_user_process(executable_path, argv, envp)?;
open_ntty_as_controlling_terminal(&process)?;
process.run();
Ok(process)
}
fn create_user_process(
executable_path: &str,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
let process_builder = {
let pid = allocate_tid();
let parent = Weak::new();
let credentials = Credentials::new_root();
let mut builder = ProcessBuilder::new(pid, executable_path, parent);
builder.argv(argv).envp(envp).credentials(credentials);
builder
};
let process = process_builder.build()?;
// Lock order: session table -> group table -> process table -> group of process
// -> group inner -> session inner
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
let mut process_table_mut = process_table::process_table_mut();
// Creates new group
let group = ProcessGroup::new(process.clone());
*process.process_group.lock() = Arc::downgrade(&group);
group_table_mut.insert(group.pgid(), group.clone());
// Creates new session
let session = Session::new(group.clone());
group.inner.lock().session = Arc::downgrade(&session);
session.inner.lock().leader = Some(process.clone());
session_table_mut.insert(session.sid(), session);
process_table_mut.insert(process.pid(), process.clone());
Ok(process)
}
/// start to run current process
pub fn run(&self) {
let threads = self.threads.lock();
// when run the process, the process should has only one thread
debug_assert!(threads.len() == 1);
debug_assert!(self.is_runnable());
let thread = threads[0].clone();
// should not hold the lock when run thread
drop(threads);
thread.run();
}
// *********** Basic structures ***********
pub fn pid(&self) -> Pid {
self.pid
}
/// Gets the profiling clock of the process.
pub fn prof_clock(&self) -> &Arc<ProfClock> {
&self.prof_clock
}
/// Gets the timer resources and utilities of the process.
pub fn timer_manager(&self) -> &PosixTimerManager {
&self.timer_manager
}
pub fn threads(&self) -> &Mutex<Vec<Arc<Thread>>> {
&self.threads
}
pub fn executable_path(&self) -> String {
self.executable_path.read().clone()
}
pub fn set_executable_path(&self, executable_path: String) {
*self.executable_path.write() = executable_path;
}
pub fn resource_limits(&self) -> &Mutex<ResourceLimits> {
&self.resource_limits
}
pub fn nice(&self) -> &Atomic<Nice> {
&self.nice
}
pub fn main_thread(&self) -> Option<Arc<Thread>> {
self.threads
.lock()
.iter()
.find(|thread| thread.tid() == self.pid)
.cloned()
}
// *********** Parent and child ***********
pub fn parent(&self) -> Option<Arc<Process>> {
self.parent.lock().upgrade()
}
pub fn is_init_process(&self) -> bool {
self.parent().is_none()
}
pub(super) fn children(&self) -> &Mutex<BTreeMap<Pid, Arc<Process>>> {
&self.children
}
pub fn has_child(&self, pid: &Pid) -> bool {
self.children.lock().contains_key(pid)
}
pub fn children_pauser(&self) -> &Arc<Pauser> {
&self.children_pauser
}
// *********** Process group & Session***********
/// Returns the process group ID of the process.
pub fn pgid(&self) -> Pgid {
if let Some(process_group) = self.process_group.lock().upgrade() {
process_group.pgid()
} else {
0
}
}
/// Returns the process group which the process belongs to.
pub fn process_group(&self) -> Option<Arc<ProcessGroup>> {
self.process_group.lock().upgrade()
}
/// Returns whether `self` is the leader of process group.
fn is_group_leader(self: &Arc<Self>) -> bool {
let Some(process_group) = self.process_group() else {
return false;
};
let Some(leader) = process_group.leader() else {
return false;
};
Arc::ptr_eq(self, &leader)
}
/// Returns the session which the process belongs to.
pub fn session(&self) -> Option<Arc<Session>> {
let process_group = self.process_group()?;
process_group.session()
}
/// Returns whether the process is session leader.
pub fn is_session_leader(self: &Arc<Self>) -> bool {
let session = self.session().unwrap();
let Some(leading_process) = session.leader() else {
return false;
};
Arc::ptr_eq(self, &leading_process)
}
/// Moves the process to the new session.
///
/// If the process is already session leader, this method does nothing.
///
/// Otherwise, this method creates a new process group in a new session
/// and moves the process to the session, returning the new session.
///
/// This method may return the following errors:
/// * `EPERM`, if the process is a process group leader, or some existing session
/// or process group has the same ID as the process.
pub fn to_new_session(self: &Arc<Self>) -> Result<Arc<Session>> {
if self.is_session_leader() {
return Ok(self.session().unwrap());
}
if self.is_group_leader() {
return_errno_with_message!(
Errno::EPERM,
"process group leader cannot be moved to new session."
);
}
let session = self.session().unwrap();
// Lock order: session table -> group table -> group of process -> group inner -> session inner
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
let mut self_group_mut = self.process_group.lock();
if session_table_mut.contains_key(&self.pid) {
return_errno_with_message!(Errno::EPERM, "cannot create new session");
}
if group_table_mut.contains_key(&self.pid) {
return_errno_with_message!(Errno::EPERM, "cannot create process group");
}
// Removes the process from old group
if let Some(old_group) = self_group_mut.upgrade() {
let mut group_inner = old_group.inner.lock();
let mut session_inner = session.inner.lock();
group_inner.remove_process(&self.pid);
*self_group_mut = Weak::new();
if group_inner.is_empty() {
group_table_mut.remove(&old_group.pgid());
debug_assert!(session_inner.process_groups.contains_key(&old_group.pgid()));
session_inner.process_groups.remove(&old_group.pgid());
if session_inner.is_empty() {
session_table_mut.remove(&session.sid());
}
}
}
// Creates a new process group
let new_group = ProcessGroup::new(self.clone());
*self_group_mut = Arc::downgrade(&new_group);
group_table_mut.insert(new_group.pgid(), new_group.clone());
// Creates a new session
let new_session = Session::new(new_group.clone());
let mut new_group_inner = new_group.inner.lock();
new_group_inner.session = Arc::downgrade(&new_session);
new_session.inner.lock().leader = Some(self.clone());
session_table_mut.insert(new_session.sid(), new_session.clone());
// Removes the process from session.
let mut session_inner = session.inner.lock();
session_inner.remove_process(self);
Ok(new_session)
}
/// Moves the process to other process group.
///
/// * If the group already exists, the process and the group should belong to the same session.
/// * If the group does not exist, this method creates a new group for the process and move the
/// process to the group. The group is added to the session of the process.
///
/// This method may return `EPERM` in following cases:
/// * The process is session leader;
/// * The group already exists, but the group does not belong to the same session as the process;
/// * The group does not exist, but `pgid` is not equal to `pid` of the process.
pub fn to_other_group(self: &Arc<Self>, pgid: Pgid) -> Result<()> {
// if the process already belongs to the process group
if self.pgid() == pgid {
return Ok(());
}
if self.is_session_leader() {
return_errno_with_message!(Errno::EPERM, "the process cannot be a session leader");
}
if let Some(process_group) = process_table::get_process_group(&pgid) {
let session = self.session().unwrap();
if !session.contains_process_group(&process_group) {
return_errno_with_message!(
Errno::EPERM,
"the group and process does not belong to same session"
);
}
self.to_specified_group(&process_group)?;
} else {
if pgid != self.pid() {
return_errno_with_message!(
Errno::EPERM,
"the new process group should have the same ID as the process."
);
}
self.to_new_group()?;
}
Ok(())
}
/// Creates a new process group and moves the process to the group.
///
/// The new group will be added to the same session as the process.
fn to_new_group(self: &Arc<Self>) -> Result<()> {
let session = self.session().unwrap();
// Lock order: group table -> group of process -> group inner -> session inner
let mut group_table_mut = process_table::group_table_mut();
let mut self_group_mut = self.process_group.lock();
// Removes the process from old group
if let Some(old_group) = self_group_mut.upgrade() {
let mut group_inner = old_group.inner.lock();
let mut session_inner = session.inner.lock();
group_inner.remove_process(&self.pid);
*self_group_mut = Weak::new();
if group_inner.is_empty() {
group_table_mut.remove(&old_group.pgid());
debug_assert!(session_inner.process_groups.contains_key(&old_group.pgid()));
// The old session won't be empty, since we will add a new group to the session.
session_inner.process_groups.remove(&old_group.pgid());
}
}
// Creates a new process group. Adds the new group to group table and session.
let new_group = ProcessGroup::new(self.clone());
let mut new_group_inner = new_group.inner.lock();
let mut session_inner = session.inner.lock();
*self_group_mut = Arc::downgrade(&new_group);
group_table_mut.insert(new_group.pgid(), new_group.clone());
new_group_inner.session = Arc::downgrade(&session);
session_inner
.process_groups
.insert(new_group.pgid(), new_group.clone());
Ok(())
}
/// Moves the process to a specified group.
///
/// The caller needs to ensure that the process and the group belongs to the same session.
fn to_specified_group(self: &Arc<Process>, group: &Arc<ProcessGroup>) -> Result<()> {
// Lock order: group table -> group of process -> group inner (small pgid -> big pgid)
let mut group_table_mut = process_table::group_table_mut();
let mut self_group_mut = self.process_group.lock();
// Removes the process from old group
let mut group_inner = if let Some(old_group) = self_group_mut.upgrade() {
// Lock order: group with smaller pgid first
let (mut old_group_inner, group_inner) = match old_group.pgid().cmp(&group.pgid()) {
core::cmp::Ordering::Equal => return Ok(()),
core::cmp::Ordering::Less => (old_group.inner.lock(), group.inner.lock()),
core::cmp::Ordering::Greater => {
let group_inner = group.inner.lock();
let old_group_inner = old_group.inner.lock();
(old_group_inner, group_inner)
}
};
old_group_inner.remove_process(&self.pid);
*self_group_mut = Weak::new();
if old_group_inner.is_empty() {
group_table_mut.remove(&old_group.pgid());
}
group_inner
} else {
group.inner.lock()
};
// Adds the process to the specified group
group_inner.processes.insert(self.pid, self.clone());
*self_group_mut = Arc::downgrade(group);
Ok(())
}
// ************** Virtual Memory *************
pub fn vm(&self) -> &ProcessVm {
&self.process_vm
}
pub fn root_vmar(&self) -> &Vmar<Full> {
self.process_vm.root_vmar()
}
pub fn heap(&self) -> &Heap {
self.process_vm.heap()
}
pub fn init_stack_reader(&self) -> InitStackReader {
self.process_vm.init_stack_reader()
}
// ************** File system ****************
pub fn file_table(&self) -> &Arc<Mutex<FileTable>> {
&self.file_table
}
pub fn fs(&self) -> &Arc<RwMutex<FsResolver>> {
&self.fs
}
pub fn umask(&self) -> &Arc<RwLock<FileCreationMask>> {
&self.umask
}
// ****************** Signal ******************
pub fn sig_dispositions(&self) -> &Arc<Mutex<SigDispositions>> {
&self.sig_dispositions
}
/// Enqueues a process-directed signal. This method should only be used for enqueue kernel
/// signal and fault signal.
///
/// The signal may be delivered to any one of the threads that does not currently have the
/// signal blocked. If more than one of the threads has the signal unblocked, then this method
/// chooses an arbitrary thread to which to deliver the signal.
///
/// TODO: restrict these method with access control tool.
pub fn enqueue_signal(&self, signal: impl Signal + Clone + 'static) {
if self.is_zombie() {
return;
}
// TODO: check that the signal is not user signal
// Enqueue signal to the first thread that does not block the signal
let threads = self.threads.lock();
for thread in threads.iter() {
let posix_thread = thread.as_posix_thread().unwrap();
if !posix_thread.has_signal_blocked(&signal) {
posix_thread.enqueue_signal(Box::new(signal));
return;
}
}
// If all threads block the signal, enqueue signal to the first thread
let thread = threads.iter().next().unwrap();
let posix_thread = thread.as_posix_thread().unwrap();
posix_thread.enqueue_signal(Box::new(signal));
}
/// Clears the parent death signal.
pub fn clear_parent_death_signal(&self) {
self.parent_death_signal.clear();
}
/// Sets the parent death signal as `signum`.
pub fn set_parent_death_signal(&self, sig_num: SigNum) {
self.parent_death_signal.set(sig_num);
}
/// Returns the parent death signal.
///
/// The parent death signal is the signal will be sent to child processes
/// when the process exits.
pub fn parent_death_signal(&self) -> Option<SigNum> {
self.parent_death_signal.as_sig_num()
}
// ******************* Status ********************
fn set_runnable(&self) {
self.status.lock().set_runnable();
}
fn is_runnable(&self) -> bool {
self.status.lock().is_runnable()
}
pub fn is_zombie(&self) -> bool {
self.status.lock().is_zombie()
}
pub fn set_zombie(&self, term_status: TermStatus) {
*self.status.lock() = ProcessStatus::Zombie(term_status);
}
pub fn exit_code(&self) -> Option<ExitCode> {
match &*self.status.lock() {
ProcessStatus::Runnable | ProcessStatus::Uninit => None,
ProcessStatus::Zombie(term_status) => Some(term_status.as_u32()),
}
}
}
#[cfg(ktest)]
mod test {
use ostd::prelude::*;
use super::*;
fn new_process(parent: Option<Arc<Process>>) -> Arc<Process> {
crate::util::random::init();
crate::fs::rootfs::init_root_mount();
let pid = allocate_tid();
let parent = if let Some(parent) = parent {
Arc::downgrade(&parent)
} else {
Weak::new()
};
Process::new(
pid,
parent,
vec![],
String::new(),
ProcessVm::alloc(),
Arc::new(RwMutex::new(FsResolver::new())),
Arc::new(Mutex::new(FileTable::new())),
Arc::new(RwLock::new(FileCreationMask::default())),
ResourceLimits::default(),
Nice::default(),
Arc::new(Mutex::new(SigDispositions::default())),
)
}
fn new_process_in_session(parent: Option<Arc<Process>>) -> Arc<Process> {
// Lock order: session table -> group table -> group of process -> group inner
// -> session inner
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
let process = new_process(parent);
// Creates new group
let group = ProcessGroup::new(process.clone());
*process.process_group.lock() = Arc::downgrade(&group);
// Creates new session
let sess = Session::new(group.clone());
group.inner.lock().session = Arc::downgrade(&sess);
sess.inner.lock().leader = Some(process.clone());
group_table_mut.insert(group.pgid(), group);
session_table_mut.insert(sess.sid(), sess);
process
}
fn remove_session_and_group(process: Arc<Process>) {
// Lock order: session table -> group table
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
if let Some(sess) = process.session() {
session_table_mut.remove(&sess.sid());
}
if let Some(group) = process.process_group() {
group_table_mut.remove(&group.pgid());
}
}
#[ktest]
fn init_process() {
crate::time::clocks::init_for_ktest();
let process = new_process(None);
assert!(process.process_group().is_none());
assert!(process.session().is_none());
}
#[ktest]
fn init_process_in_session() {
crate::time::clocks::init_for_ktest();
let process = new_process_in_session(None);
assert!(process.is_group_leader());
assert!(process.is_session_leader());
remove_session_and_group(process);
}
#[ktest]
fn to_new_session() {
crate::time::clocks::init_for_ktest();
let process = new_process_in_session(None);
let sess = process.session().unwrap();
sess.inner.lock().leader = None;
assert!(!process.is_session_leader());
assert!(process
.to_new_session()
.is_err_and(|e| e.error() == Errno::EPERM));
let group = process.process_group().unwrap();
group.inner.lock().leader = None;
assert!(!process.is_group_leader());
assert!(process
.to_new_session()
.is_err_and(|e| e.error() == Errno::EPERM));
}
}

View File

@ -0,0 +1,131 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::collections::btree_map::Values;
use super::{Pgid, Pid, Process, Session};
use crate::{prelude::*, process::signal::signals::Signal};
/// `ProcessGroup` represents a set of processes. Each `ProcessGroup` has a unique
/// identifier `pgid`.
pub struct ProcessGroup {
pgid: Pgid,
pub(in crate::process) inner: Mutex<Inner>,
}
pub(in crate::process) struct Inner {
pub(in crate::process) processes: BTreeMap<Pid, Arc<Process>>,
pub(in crate::process) leader: Option<Arc<Process>>,
pub(in crate::process) session: Weak<Session>,
}
impl Inner {
pub(in crate::process) fn remove_process(&mut self, pid: &Pid) {
let Some(process) = self.processes.remove(pid) else {
return;
};
if let Some(leader) = &self.leader
&& Arc::ptr_eq(leader, &process)
{
self.leader = None;
}
}
pub(in crate::process) fn is_empty(&self) -> bool {
self.processes.is_empty()
}
}
impl ProcessGroup {
/// Creates a new process group with one process. The pgid is the same as the process
/// id. The process will become the leading process of the new process group.
///
/// The caller needs to ensure that the process does not belong to any group.
pub(in crate::process) fn new(process: Arc<Process>) -> Arc<Self> {
let pid = process.pid();
let inner = {
let mut processes = BTreeMap::new();
processes.insert(pid, process.clone());
Inner {
processes,
leader: Some(process.clone()),
session: Weak::new(),
}
};
Arc::new(ProcessGroup {
pgid: pid,
inner: Mutex::new(inner),
})
}
/// Returns whether self contains a process with `pid`.
pub(in crate::process) fn contains_process(&self, pid: Pid) -> bool {
self.inner.lock().processes.contains_key(&pid)
}
/// Returns the process group identifier
pub fn pgid(&self) -> Pgid {
self.pgid
}
/// Acquires a lock on the process group.
pub fn lock(&self) -> ProcessGroupGuard {
ProcessGroupGuard {
inner: self.inner.lock(),
}
}
/// Broadcasts signal to all processes in the group.
///
/// This method should only be used to broadcast fault signal and kernel signal.
///
/// TODO: do more check to forbid user signal
pub fn broadcast_signal(&self, signal: impl Signal + Clone + 'static) {
for process in self.inner.lock().processes.values() {
process.enqueue_signal(signal.clone());
}
}
/// Returns the leader process.
pub fn leader(&self) -> Option<Arc<Process>> {
self.inner.lock().leader.clone()
}
/// Returns the session which the group belongs to
pub fn session(&self) -> Option<Arc<Session>> {
self.inner.lock().session.upgrade()
}
}
/// A scoped lock for a process group.
///
/// It provides some public methods to prevent the exposure of the inner type.
#[clippy::has_significant_drop]
#[must_use]
pub struct ProcessGroupGuard<'a> {
inner: MutexGuard<'a, Inner>,
}
impl<'a> ProcessGroupGuard<'a> {
/// Returns an iterator over the processes in the group.
pub fn iter(&self) -> ProcessGroupIter {
ProcessGroupIter {
inner: self.inner.processes.values(),
}
}
}
/// An iterator over the processes of the process group.
pub struct ProcessGroupIter<'a> {
inner: Values<'a, Pid, Arc<Process>>,
}
impl<'a> Iterator for ProcessGroupIter<'a> {
type Item = &'a Arc<Process>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}

View File

@ -0,0 +1,136 @@
// SPDX-License-Identifier: MPL-2.0
use super::{Pgid, Process, ProcessGroup, Sid, Terminal};
use crate::prelude::*;
/// A `Session` is a collection of related process groups. Each session has a
/// unique identifier `sid`. Process groups and sessions form a two-level
/// hierarchical relationship between processes.
///
/// **Leader**: A *session leader* is the process that creates a new session and whose process
/// ID becomes the session ID.
///
/// **Controlling terminal**: The terminal can be used to manage all processes in the session. The
/// controlling terminal is established when the session leader first opens a terminal.
pub struct Session {
sid: Sid,
pub(in crate::process) inner: Mutex<Inner>,
}
pub(in crate::process) struct Inner {
pub(in crate::process) process_groups: BTreeMap<Pgid, Arc<ProcessGroup>>,
pub(in crate::process) leader: Option<Arc<Process>>,
pub(in crate::process) terminal: Option<Arc<dyn Terminal>>,
}
impl Inner {
pub(in crate::process) fn is_empty(&self) -> bool {
self.process_groups.is_empty()
}
pub(in crate::process) fn remove_process(&mut self, process: &Arc<Process>) {
if let Some(leader) = &self.leader
&& Arc::ptr_eq(leader, process)
{
self.leader = None;
}
}
pub(in crate::process) fn remove_process_group(&mut self, pgid: &Pgid) {
self.process_groups.remove(pgid);
}
}
impl Session {
/// Creates a new session for the process group. The process group becomes the member of
/// the new session.
///
/// The caller needs to ensure that the group does not belong to any session, and the caller
/// should set the leader process after creating the session.
pub(in crate::process) fn new(group: Arc<ProcessGroup>) -> Arc<Self> {
let sid = group.pgid();
let inner = {
let mut process_groups = BTreeMap::new();
process_groups.insert(group.pgid(), group);
Inner {
process_groups,
leader: None,
terminal: None,
}
};
Arc::new(Self {
sid,
inner: Mutex::new(inner),
})
}
/// Returns the session id
pub fn sid(&self) -> Sid {
self.sid
}
/// Returns the leader process.
pub fn leader(&self) -> Option<Arc<Process>> {
self.inner.lock().leader.clone()
}
/// Returns whether `self` contains the `process_group`
pub(in crate::process) fn contains_process_group(
self: &Arc<Self>,
process_group: &Arc<ProcessGroup>,
) -> bool {
self.inner
.lock()
.process_groups
.contains_key(&process_group.pgid())
}
/// Sets terminal as the controlling terminal of the session. The `get_terminal` method
/// should set the session for the terminal and returns the session.
///
/// If the session already has controlling terminal, this method will return `Err(EPERM)`.
pub fn set_terminal<F>(&self, get_terminal: F) -> Result<()>
where
F: Fn() -> Result<Arc<dyn Terminal>>,
{
let mut inner = self.inner.lock();
if inner.terminal.is_some() {
return_errno_with_message!(
Errno::EPERM,
"current session already has controlling terminal"
);
}
let terminal = get_terminal()?;
inner.terminal = Some(terminal);
Ok(())
}
/// Releases the controlling terminal of the session.
///
/// If the session does not have controlling terminal, this method will return `ENOTTY`.
pub fn release_terminal<F>(&self, release_session: F) -> Result<()>
where
F: Fn(&Arc<dyn Terminal>) -> Result<()>,
{
let mut inner = self.inner.lock();
if inner.terminal.is_none() {
return_errno_with_message!(
Errno::ENOTTY,
"current session does not has controlling terminal"
);
}
let terminal = inner.terminal.as_ref().unwrap();
release_session(terminal)?;
inner.terminal = None;
Ok(())
}
/// Returns the controlling terminal of `self`.
pub fn terminal(&self) -> Option<Arc<dyn Terminal>> {
self.inner.lock().terminal.clone()
}
}

View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: MPL-2.0
use super::JobControl;
use crate::{
fs::inode_handle::FileIo,
prelude::*,
process::{process_table, Pgid, ProcessGroup},
};
/// A termial is used to interact with system. A terminal can support the shell
/// job control.
///
/// We currently support two kinds of terminal, the tty and pty.
pub trait Terminal: Send + Sync + FileIo {
// *************** Foreground ***************
/// Returns the foreground process group
fn foreground(&self) -> Option<Arc<ProcessGroup>> {
self.job_control().foreground()
}
/// Sets the foreground process group of this terminal.
///
/// If the terminal is not controlling terminal, this method returns `ENOTTY`.
///
/// # Panics
///
/// This method should be called in process context.
fn set_foreground(&self, pgid: &Pgid) -> Result<()> {
if !self.is_controlling_terminal() {
return_errno_with_message!(Errno::ENOTTY, "self is not controlling terminal");
}
let foreground = process_table::get_process_group(pgid);
self.job_control().set_foreground(foreground.as_ref())
}
// *************** Session and controlling terminal ***************
/// Returns whether the terminal is the controlling terminal of current process.
///
/// # Panics
///
/// This method should be called in process context.
fn is_controlling_terminal(&self) -> bool {
let session = current!().session().unwrap();
let Some(terminal) = session.terminal() else {
return false;
};
let arc_self = self.arc_self();
Arc::ptr_eq(&terminal, &arc_self)
}
/// Sets the terminal as the controlling terminal of the session of current process.
///
/// If self is not session leader, or the terminal is controlling terminal of other session,
/// or the session already has controlling terminal, this method returns `EPERM`.
///
/// # Panics
///
/// This method should only be called in process context.
fn set_current_session(&self) -> Result<()> {
if !current!().is_session_leader() {
return_errno_with_message!(Errno::EPERM, "current process is not session leader");
}
let get_terminal = || {
self.job_control().set_current_session()?;
Ok(self.arc_self())
};
let session = current!().session().unwrap();
session.set_terminal(get_terminal)
}
/// Releases the terminal from the session of current process if the terminal is the controlling
/// terminal of the session.
///
/// If the terminal is not the controlling terminal of the session, this method will return `ENOTTY`.
///
/// # Panics
///
/// This method should only be called in process context.
fn release_current_session(&self) -> Result<()> {
if !self.is_controlling_terminal() {
return_errno_with_message!(Errno::ENOTTY, "release wrong tty");
}
let current = current!();
if !current.is_session_leader() {
warn!("TODO: release tty for process that is not session leader");
return Ok(());
}
let release_session = |_: &Arc<dyn Terminal>| self.job_control().release_current_session();
let session = current.session().unwrap();
session.release_terminal(release_session)
}
/// Returns the job control of the terminal.
fn job_control(&self) -> &JobControl;
fn arc_self(&self) -> Arc<dyn Terminal>;
}

View File

@ -0,0 +1,212 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::{
boxed::Box,
sync::{Arc, Weak},
vec::Vec,
};
use core::time::Duration;
use id_alloc::IdAlloc;
use ostd::{
arch::{
timer::{self, TIMER_FREQ},
x86::trap::is_kernel_interrupted,
},
sync::Mutex,
};
use super::Process;
use crate::{
process::{
posix_thread::PosixThreadExt,
signal::{constants::SIGALRM, signals::kernel::KernelSignal},
},
thread::{
work_queue::{submit_work_item, work_item::WorkItem},
Thread,
},
time::{
clocks::{ProfClock, RealTimeClock},
Timer, TimerManager,
},
};
/// Updates the CPU time recorded in the CPU clocks of current Process.
///
/// This function will be invoked at the system timer interrupt, and
/// invoke the callbacks of expired timers which are based on the updated
/// CPU clock.
fn update_cpu_time() {
let Some(current_thread) = Thread::current() else {
return;
};
let Some(posix_thread) = current_thread.as_posix_thread() else {
return;
};
let process = posix_thread.process();
let timer_manager = process.timer_manager();
let jiffies_interval = Duration::from_millis(1000 / TIMER_FREQ);
// Based on whether the timer interrupt occurs in kernel mode or user mode,
// the function will add the duration of one timer interrupt interval to the
// corresponding CPU clocks.
if is_kernel_interrupted() {
posix_thread
.prof_clock()
.kernel_clock()
.add_time(jiffies_interval);
process
.prof_clock()
.kernel_clock()
.add_time(jiffies_interval);
} else {
posix_thread
.prof_clock()
.user_clock()
.add_time(jiffies_interval);
process.prof_clock().user_clock().add_time(jiffies_interval);
timer_manager
.virtual_timer()
.timer_manager()
.process_expired_timers();
}
timer_manager
.prof_timer()
.timer_manager()
.process_expired_timers();
posix_thread.process_expired_timers();
}
/// Registers a function to update the CPU clock in processes and
/// threads during the system timer interrupt.
pub(super) fn init() {
timer::register_callback(update_cpu_time);
}
/// Represents timer resources and utilities for a POSIX process.
pub struct PosixTimerManager {
/// A real-time countdown timer, measuring in wall clock time.
alarm_timer: Arc<Timer>,
/// A timer based on user CPU clock.
virtual_timer: Arc<Timer>,
/// A timer based on the profiling clock.
prof_timer: Arc<Timer>,
/// An ID allocator to allocate unique timer IDs.
id_allocator: Mutex<IdAlloc>,
/// A container managing all POSIX timers created by `timer_create()` syscall
/// within the process context.
posix_timers: Mutex<Vec<Option<Arc<Timer>>>>,
}
fn create_process_timer_callback(process_ref: &Weak<Process>) -> impl Fn() + Clone {
let current_process = process_ref.clone();
let sent_signal = move || {
let signal = KernelSignal::new(SIGALRM);
if let Some(process) = current_process.upgrade() {
process.enqueue_signal(signal);
}
};
let work_func = Box::new(sent_signal);
let work_item = Arc::new(WorkItem::new(work_func));
move || {
submit_work_item(
work_item.clone(),
crate::thread::work_queue::WorkPriority::High,
);
}
}
impl PosixTimerManager {
pub(super) fn new(prof_clock: &Arc<ProfClock>, process_ref: &Weak<Process>) -> Self {
const MAX_NUM_OF_POSIX_TIMERS: usize = 10000;
let callback = create_process_timer_callback(process_ref);
let alarm_timer = RealTimeClock::timer_manager().create_timer(callback.clone());
let virtual_timer =
TimerManager::new(prof_clock.user_clock().clone()).create_timer(callback.clone());
let prof_timer = TimerManager::new(prof_clock.clone()).create_timer(callback);
Self {
alarm_timer,
virtual_timer,
prof_timer,
id_allocator: Mutex::new(IdAlloc::with_capacity(MAX_NUM_OF_POSIX_TIMERS)),
posix_timers: Mutex::new(Vec::new()),
}
}
/// Gets the alarm timer of the corresponding process.
pub fn alarm_timer(&self) -> &Arc<Timer> {
&self.alarm_timer
}
/// Gets the virtual timer of the corresponding process.
pub fn virtual_timer(&self) -> &Arc<Timer> {
&self.virtual_timer
}
/// Gets the profiling timer of the corresponding process.
pub fn prof_timer(&self) -> &Arc<Timer> {
&self.prof_timer
}
/// Creates a timer based on the profiling CPU clock of the current process.
pub fn create_prof_timer<F>(&self, func: F) -> Arc<Timer>
where
F: Fn() + Send + Sync + 'static,
{
self.prof_timer.timer_manager().create_timer(func)
}
/// Creates a timer based on the user CPU clock of the current process.
pub fn create_virtual_timer<F>(&self, func: F) -> Arc<Timer>
where
F: Fn() + Send + Sync + 'static,
{
self.virtual_timer.timer_manager().create_timer(func)
}
/// Adds a POSIX timer to the managed `posix_timers`, and allocate a timer ID for this timer.
/// Return the timer ID.
pub fn add_posix_timer(&self, posix_timer: Arc<Timer>) -> usize {
let mut timers = self.posix_timers.lock();
// Holding the lock of `posix_timers` is required to operate the `id_allocator`.
let timer_id = self.id_allocator.lock().alloc().unwrap();
if timers.len() < timer_id + 1 {
timers.resize(timer_id + 1, None);
}
// The ID allocated is not used by any other timers so this index in `timers`
// must be `None`.
timers[timer_id] = Some(posix_timer);
timer_id
}
/// Finds a POSIX timer by the input `timer_id`.
pub fn find_posix_timer(&self, timer_id: usize) -> Option<Arc<Timer>> {
let timers = self.posix_timers.lock();
if timer_id >= timers.len() {
return None;
}
timers[timer_id].clone()
}
/// Removes the POSIX timer with the ID `timer_id`.
pub fn remove_posix_timer(&self, timer_id: usize) -> Option<Arc<Timer>> {
let mut timers = self.posix_timers.lock();
if timer_id >= timers.len() {
return None;
}
let timer = timers[timer_id].take();
if timer.is_some() {
// Holding the lock of `posix_timers` is required to operate the `id_allocator`.
self.id_allocator.lock().free(timer_id);
}
timer
}
}

View File

@ -0,0 +1,56 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use super::{Pgid, Pid};
use crate::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessFilter {
Any,
WithPid(Pid),
WithPgid(Pgid),
}
impl ProcessFilter {
// used for waitid
pub fn from_which_and_id(which: u64, id: u64) -> Result<Self> {
// Does not support PID_FD now(which = 3)
// https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/wait.h#L20
match which {
0 => Ok(ProcessFilter::Any),
1 => Ok(ProcessFilter::WithPid(id as Pid)),
2 => Ok(ProcessFilter::WithPgid(id as Pgid)),
3 => todo!(),
_ => return_errno_with_message!(Errno::EINVAL, "invalid which"),
}
}
// used for wait4 and kill
pub fn from_id(wait_pid: i32) -> Self {
// https://man7.org/linux/man-pages/man2/waitpid.2.html
// https://man7.org/linux/man-pages/man2/kill.2.html
if wait_pid < -1 {
// process group ID is equal to the absolute value of pid.
ProcessFilter::WithPgid((-wait_pid) as Pgid)
} else if wait_pid == -1 {
// wait for any child process
ProcessFilter::Any
} else if wait_pid == 0 {
// wait for any child process with same process group ID
let pgid = current!().pgid();
ProcessFilter::WithPgid(pgid)
} else {
// pid > 0. wait for the child whose process ID is equal to the value of pid.
ProcessFilter::WithPid(wait_pid as Pid)
}
}
pub fn contains_pid(&self, pid: Pid) -> bool {
match self {
ProcessFilter::Any => true,
ProcessFilter::WithPid(filter_pid) => *filter_pid == pid,
ProcessFilter::WithPgid(_) => todo!(),
}
}
}

View File

@ -0,0 +1,113 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
//! A global table stores the pid to process mapping.
//! This table can be used to get process with pid.
//! TODO: progress group, thread all need similar mapping
use alloc::collections::btree_map::Values;
use super::{Pgid, Pid, Process, ProcessGroup, Session, Sid};
use crate::{
events::{Events, Observer, Subject},
prelude::*,
};
static PROCESS_TABLE: Mutex<BTreeMap<Pid, Arc<Process>>> = Mutex::new(BTreeMap::new());
static PROCESS_GROUP_TABLE: Mutex<BTreeMap<Pgid, Arc<ProcessGroup>>> = Mutex::new(BTreeMap::new());
static PROCESS_TABLE_SUBJECT: Subject<PidEvent> = Subject::new();
static SESSION_TABLE: Mutex<BTreeMap<Sid, Arc<Session>>> = Mutex::new(BTreeMap::new());
// ************ Process *************
/// Gets a process with pid
pub fn get_process(pid: Pid) -> Option<Arc<Process>> {
PROCESS_TABLE.lock().get(&pid).cloned()
}
pub(super) fn process_table_mut() -> MutexGuard<'static, BTreeMap<Pid, Arc<Process>>> {
PROCESS_TABLE.lock()
}
/// Acquires a lock on the process table and returns a `ProcessTable`.
pub fn process_table() -> ProcessTable<'static> {
ProcessTable {
inner: PROCESS_TABLE.lock(),
}
}
/// A wrapper for the mutex-protected process table.
///
/// It provides the `iter` method to iterator over the processes in the table.
pub struct ProcessTable<'a> {
inner: MutexGuard<'a, BTreeMap<Pid, Arc<Process>>>,
}
impl<'a> ProcessTable<'a> {
/// Returns an iterator over the processes in the table.
pub fn iter(&self) -> ProcessTableIter {
ProcessTableIter {
inner: self.inner.values(),
}
}
}
/// An iterator over the processes of the process table.
pub struct ProcessTableIter<'a> {
inner: Values<'a, Pid, Arc<Process>>,
}
impl<'a> Iterator for ProcessTableIter<'a> {
type Item = &'a Arc<Process>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
// ************ Process Group *************
/// Gets a process group with `pgid`
pub fn get_process_group(pgid: &Pgid) -> Option<Arc<ProcessGroup>> {
PROCESS_GROUP_TABLE.lock().get(pgid).cloned()
}
/// Returns whether process table contains process group with pgid
pub fn contain_process_group(pgid: &Pgid) -> bool {
PROCESS_GROUP_TABLE.lock().contains_key(pgid)
}
pub(super) fn group_table_mut() -> MutexGuard<'static, BTreeMap<Pgid, Arc<ProcessGroup>>> {
PROCESS_GROUP_TABLE.lock()
}
// ************ Session *************
/// Gets a session with `sid`.
pub fn get_session(sid: &Sid) -> Option<Arc<Session>> {
SESSION_TABLE.lock().get(sid).map(Arc::clone)
}
pub(super) fn session_table_mut() -> MutexGuard<'static, BTreeMap<Sid, Arc<Session>>> {
SESSION_TABLE.lock()
}
// ************ Observer *************
/// Registers an observer which watches `PidEvent`.
pub fn register_observer(observer: Weak<dyn Observer<PidEvent>>) {
PROCESS_TABLE_SUBJECT.register_observer(observer, ());
}
/// Unregisters an observer which watches `PidEvent`.
pub fn unregister_observer(observer: &Weak<dyn Observer<PidEvent>>) {
PROCESS_TABLE_SUBJECT.unregister_observer(observer);
}
#[derive(Copy, Clone)]
pub enum PidEvent {
Exit(Pid),
}
impl Events for PidEvent {}

View File

@ -0,0 +1,101 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::{AtomicUsize, Ordering};
use align_ext::AlignExt;
use aster_rights::Full;
use crate::{
prelude::*,
vm::{perms::VmPerms, vmar::Vmar},
};
/// The base address of user heap
pub const USER_HEAP_BASE: Vaddr = 0x0000_0000_1000_0000;
/// The max allowed size of user heap
pub const USER_HEAP_SIZE_LIMIT: usize = 16 * 1024 * PAGE_SIZE; // 16 * 4MB
#[derive(Debug)]
pub struct Heap {
/// The lowest address of the heap
base: Vaddr,
/// The heap size limit
limit: usize,
/// The current heap highest address
current_heap_end: AtomicUsize,
}
impl Heap {
pub const fn new() -> Self {
Heap {
base: USER_HEAP_BASE,
limit: USER_HEAP_SIZE_LIMIT,
current_heap_end: AtomicUsize::new(USER_HEAP_BASE),
}
}
/// Inits and maps the heap Vmo
pub(super) fn alloc_and_map_vmo(&self, root_vmar: &Vmar<Full>) -> Result<()> {
let vmar_map_options = {
let perms = VmPerms::READ | VmPerms::WRITE;
root_vmar
// FIXME: Our current implementation of mapping resize cannot move
// existing mappings within the new range, which may cause the resize
// operation to fail. Therefore, if there are already mappings within
// the heap expansion range, the brk operation will fail.
.new_map(PAGE_SIZE, perms)
.unwrap()
.offset(self.base)
};
vmar_map_options.build()?;
self.set_uninitialized();
Ok(())
}
pub fn brk(&self, new_heap_end: Option<Vaddr>) -> Result<Vaddr> {
let current = current!();
let root_vmar = current.root_vmar();
match new_heap_end {
None => Ok(self.current_heap_end.load(Ordering::Relaxed)),
Some(new_heap_end) => {
if new_heap_end > self.base + self.limit {
return_errno_with_message!(Errno::ENOMEM, "heap size limit was met.");
}
let current_heap_end = self.current_heap_end.load(Ordering::Acquire);
if new_heap_end <= current_heap_end {
// FIXME: should we allow shrink current user heap?
return Ok(current_heap_end);
}
let old_size = (current_heap_end - self.base).align_up(PAGE_SIZE);
let new_size = (new_heap_end - self.base).align_up(PAGE_SIZE);
root_vmar.resize_mapping(self.base, old_size, new_size)?;
self.current_heap_end.store(new_heap_end, Ordering::Release);
Ok(new_heap_end)
}
}
}
pub(super) fn set_uninitialized(&self) {
self.current_heap_end
.store(self.base + PAGE_SIZE, Ordering::Relaxed);
}
}
impl Clone for Heap {
fn clone(&self) -> Self {
let current_heap_end = self.current_heap_end.load(Ordering::Relaxed);
Self {
base: self.base,
limit: self.limit,
current_heap_end: AtomicUsize::new(current_heap_end),
}
}
}
impl Default for Heap {
fn default() -> Self {
Self::new()
}
}

View File

@ -0,0 +1,96 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use crate::prelude::*;
/// Auxiliary Vector.
///
/// # What is Auxiliary Vector?
///
/// Here is a concise description of Auxiliary Vector from GNU's manual:
///
/// > When a program is executed, it receives information from the operating system
/// > about the environment in which it is operating. The form of this information
/// > is a table of key-value pairs, where the keys are from the set of AT_
/// > values in elf.h.
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum AuxKey {
AT_NULL = 0, /* end of vector */
AT_IGNORE = 1, /* entry should be ignored */
AT_EXECFD = 2, /* file descriptor of program */
AT_PHDR = 3, /* program headers for program */
AT_PHENT = 4, /* size of program header entry */
AT_PHNUM = 5, /* number of program headers */
AT_PAGESZ = 6, /* system page size */
AT_BASE = 7, /* base address of interpreter */
AT_FLAGS = 8, /* flags */
AT_ENTRY = 9, /* entry point of program */
AT_NOTELF = 10, /* program is not ELF */
AT_UID = 11, /* real uid */
AT_EUID = 12, /* effective uid */
AT_GID = 13, /* real gid */
AT_EGID = 14, /* effective gid */
AT_PLATFORM = 15, /* string identifying CPU for optimizations */
AT_HWCAP = 16, /* arch dependent hints at CPU capabilities */
AT_CLKTCK = 17, /* frequency at which times() increments */
/* 18...22 not used */
AT_SECURE = 23, /* secure mode boolean */
AT_BASE_PLATFORM = 24, /* string identifying real platform, may
* differ from AT_PLATFORM. */
AT_RANDOM = 25, /* address of 16 random bytes */
AT_HWCAP2 = 26, /* extension of AT_HWCAP */
/* 28...30 not used */
AT_EXECFN = 31, /* filename of program */
AT_SYSINFO = 32,
AT_SYSINFO_EHDR = 33, /* the start address of the page containing the VDSO */
}
impl AuxKey {
pub fn as_u64(&self) -> u64 {
*self as u64
}
}
#[derive(Clone, Default, Debug)]
pub struct AuxVec {
table: BTreeMap<AuxKey, u64>,
}
impl AuxVec {
pub const fn new() -> AuxVec {
AuxVec {
table: BTreeMap::new(),
}
}
}
impl AuxVec {
pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
return_errno_with_message!(Errno::EINVAL, "Illegal key");
}
self.table
.entry(key)
.and_modify(|val_mut| *val_mut = val)
.or_insert(val);
Ok(())
}
pub fn get(&self, key: AuxKey) -> Option<u64> {
self.table.get(&key).copied()
}
pub fn del(&mut self, key: AuxKey) -> Option<u64> {
self.table.remove(&key)
}
pub fn table(&self) -> &BTreeMap<AuxKey, u64> {
&self.table
}
}

View File

@ -0,0 +1,443 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
//! The init stack for the process.
//! The init stack is used to store the `argv` and `envp` and auxiliary vectors.
//! We can read `argv` and `envp` of a process from the init stack.
//! Usually, the lowest address of init stack is
//! the highest address of the user stack of the first thread.
//!
//! However, the init stack will be mapped to user space
//! and the user process can write the content of init stack,
//! so the content reading from init stack may not be the same as the process init status.
//!
use core::{
mem,
sync::atomic::{AtomicUsize, Ordering},
};
use align_ext::AlignExt;
use aster_rights::Full;
use ostd::mm::{VmIo, MAX_USERSPACE_VADDR};
use self::aux_vec::{AuxKey, AuxVec};
use crate::{
prelude::*,
util::random::getrandom,
vm::{
perms::VmPerms,
vmar::Vmar,
vmo::{Vmo, VmoOptions, VmoRightsOp},
},
};
pub mod aux_vec;
/// Set the initial stack size to 8 megabytes, following the default Linux stack size limit.
pub const INIT_STACK_SIZE: usize = 8 * 1024 * 1024; // 8 MB
/// The max number of arguments that can be used to creating a new process.
pub const MAX_ARGV_NUMBER: usize = 128;
/// The max number of environmental variables that can be used to creating a new process.
pub const MAX_ENVP_NUMBER: usize = 128;
/// The max length of each argument to create a new process.
pub const MAX_ARG_LEN: usize = 2048;
/// The max length of each environmental variable (the total length of key-value pair) to create a new process.
pub const MAX_ENV_LEN: usize = 128;
/*
* Illustration of the virtual memory space containing the processes' init stack:
*
* (high address)
* +---------------------+ <------+ Highest address
* | | Random stack paddings
* +---------------------+ <------+ The base of stack (stack grows down)
* | |
* | Null-terminated |
* | strings referenced |
* | by variables below |
* | |
* +---------------------+
* | AT_NULL |
* +---------------------+
* | AT_NULL |
* +---------------------+
* | ... |
* +---------------------+
* | aux_val[0] |
* +---------------------+
* | aux_key[0] | <------+ Auxiliary table
* +---------------------+
* | NULL |
* +---------------------+
* | ... |
* +---------------------+
* | char* envp[0] | <------+ Environment variables
* +---------------------+
* | NULL |
* +---------------------+
* | char* argv[argc-1] |
* +---------------------+
* | ... |
* +---------------------+
* | char* argv[0] |
* +---------------------+
* | long argc | <------+ Program arguments
* +---------------------+
* | |
* | |
* +---------------------+
* | |
* +---------------------+ <------+ User stack default rlimit
* (low address)
*/
/// The initial portion of the main stack of a process.
pub struct InitStack {
/// The initial highest address.
/// The stack grows down from this address
initial_top: Vaddr,
/// The max allowed stack size
max_size: usize,
/// The current stack pointer.
/// Before initialized, `pos` points to the `initial_top`,
/// After initialized, `pos` points to the user stack pointer(rsp)
/// of the process.
pos: Arc<AtomicUsize>,
vmo: Vmo<Full>,
}
impl Clone for InitStack {
fn clone(&self) -> Self {
Self {
initial_top: self.initial_top,
max_size: self.max_size,
pos: self.pos.clone(),
vmo: self.vmo.dup(),
}
}
}
impl InitStack {
pub(super) fn new() -> Self {
let nr_pages_padding = {
let mut random_nr_pages_padding: u8 = 0;
getrandom(random_nr_pages_padding.as_bytes_mut()).unwrap();
random_nr_pages_padding as usize
};
let initial_top = MAX_USERSPACE_VADDR - PAGE_SIZE * nr_pages_padding;
let max_size = INIT_STACK_SIZE;
let vmo = {
let vmo_options = VmoOptions::<Full>::new(max_size);
vmo_options.alloc().unwrap()
};
Self {
initial_top,
max_size,
pos: Arc::new(AtomicUsize::new(initial_top)),
vmo,
}
}
/// Maps the vmo of the init stack.
pub(super) fn map_init_stack_vmo(&self, root_vmar: &Vmar<Full>) -> Result<()> {
let vmar_map_options = {
let perms = VmPerms::READ | VmPerms::WRITE;
let map_addr = self.initial_top - self.max_size;
debug_assert!(map_addr % PAGE_SIZE == 0);
root_vmar
.new_map(self.max_size, perms)?
.offset(map_addr)
.vmo(self.vmo.dup().to_dyn())
};
vmar_map_options.build()?;
self.set_uninitialized();
Ok(())
}
/// Returns the user stack top(highest address), used to setup rsp.
///
/// This method should only be called after the stack is initialized.
pub fn user_stack_top(&self) -> Vaddr {
let stack_top = self.pos();
debug_assert!(self.is_initialized());
stack_top
}
/// Constructs a writer to initialize the content of an `InitStack`.
pub(super) fn writer(
&self,
argv: Vec<CString>,
envp: Vec<CString>,
auxvec: AuxVec,
) -> InitStackWriter<'_> {
// The stack should be written only once.
debug_assert!(!self.is_initialized());
InitStackWriter {
pos: self.pos.clone(),
vmo: &self.vmo,
argv,
envp,
auxvec,
map_addr: self.initial_top - self.max_size,
}
}
/// Constructs a reader to parse the content of an `InitStack`.
/// The `InitStack` should only be read after initialized
pub(super) fn reader(&self) -> InitStackReader<'_> {
debug_assert!(self.is_initialized());
InitStackReader {
base: self.pos(),
vmo: &self.vmo,
map_addr: self.initial_top - self.max_size,
}
}
fn is_initialized(&self) -> bool {
self.pos() != self.initial_top
}
fn set_uninitialized(&self) {
self.pos.store(self.initial_top, Ordering::Relaxed);
}
fn pos(&self) -> Vaddr {
self.pos.load(Ordering::Relaxed)
}
}
/// A writer to initialize the content of an `InitStack`.
pub struct InitStackWriter<'a> {
pos: Arc<AtomicUsize>,
vmo: &'a Vmo<Full>,
argv: Vec<CString>,
envp: Vec<CString>,
auxvec: AuxVec,
/// The mapping address of the `InitStack`.
map_addr: usize,
}
impl<'a> InitStackWriter<'a> {
pub fn write(mut self) -> Result<()> {
// FIXME: Some OSes may put the first page of excutable file here
// for interpreting elf headers.
let argc = self.argv.len() as u64;
// Write envp string
let envp_pointers = self.write_envp_strings()?;
// Write argv string
let argv_pointers = self.write_argv_strings()?;
// Generate random values for auxvec
let random_value_pointer = {
let random_value = generate_random_for_aux_vec();
self.write_bytes(&random_value)?
};
self.auxvec.set(AuxKey::AT_RANDOM, random_value_pointer)?;
self.adjust_stack_alignment(&envp_pointers, &argv_pointers)?;
self.write_aux_vec()?;
self.write_envp_pointers(envp_pointers)?;
self.write_argv_pointers(argv_pointers)?;
// write argc
self.write_u64(argc)?;
// Ensure stack top is 16-bytes aligned
debug_assert_eq!(self.pos() & !0xf, self.pos());
Ok(())
}
fn write_envp_strings(&self) -> Result<Vec<u64>> {
let mut envp_pointers = Vec::with_capacity(self.envp.len());
for envp in self.envp.iter() {
let pointer = self.write_cstring(envp)?;
envp_pointers.push(pointer);
}
Ok(envp_pointers)
}
fn write_argv_strings(&self) -> Result<Vec<u64>> {
let mut argv_pointers = Vec::with_capacity(self.argv.len());
for argv in self.argv.iter().rev() {
let pointer = self.write_cstring(argv)?;
debug!("argv address = 0x{:x}", pointer);
argv_pointers.push(pointer);
}
argv_pointers.reverse();
Ok(argv_pointers)
}
/// Libc ABI requires 16-byte alignment of the stack entrypoint.
/// Current postion of the stack is 8-byte aligned already, insert 8 byte
/// to meet the requirement if necessary.
fn adjust_stack_alignment(&self, envp_pointers: &[u64], argv_pointers: &[u64]) -> Result<()> {
// Ensure 8-byte alignment
self.write_u64(0)?;
let auxvec_size = (self.auxvec.table().len() + 1) * (mem::size_of::<u64>() * 2);
let envp_pointers_size = (envp_pointers.len() + 1) * mem::size_of::<u64>();
let argv_pointers_size = (argv_pointers.len() + 1) * mem::size_of::<u64>();
let argc_size = mem::size_of::<u64>();
let to_write_size = auxvec_size + envp_pointers_size + argv_pointers_size + argc_size;
if (self.pos() - to_write_size) % 16 != 0 {
self.write_u64(0)?;
}
Ok(())
}
fn write_aux_vec(&self) -> Result<()> {
// Write NULL auxilary
self.write_u64(0)?;
self.write_u64(AuxKey::AT_NULL as u64)?;
// Write Auxiliary vectors
let aux_vec: Vec<_> = self
.auxvec
.table()
.iter()
.map(|(aux_key, aux_value)| (*aux_key, *aux_value))
.collect();
for (aux_key, aux_value) in aux_vec.iter() {
self.write_u64(*aux_value)?;
self.write_u64(*aux_key as u64)?;
}
Ok(())
}
fn write_envp_pointers(&self, mut envp_pointers: Vec<u64>) -> Result<()> {
// write NULL pointer
self.write_u64(0)?;
// write envp pointers
envp_pointers.reverse();
for envp_pointer in envp_pointers {
self.write_u64(envp_pointer)?;
}
Ok(())
}
fn write_argv_pointers(&self, mut argv_pointers: Vec<u64>) -> Result<()> {
// write 0
self.write_u64(0)?;
// write argv pointers
argv_pointers.reverse();
for argv_pointer in argv_pointers {
self.write_u64(argv_pointer)?;
}
Ok(())
}
/// Writes u64 to the stack.
/// Returns the writing address
fn write_u64(&self, val: u64) -> Result<u64> {
let start_address = (self.pos() - 8).align_down(8);
self.pos.store(start_address, Ordering::Relaxed);
self.vmo.write_val(start_address - self.map_addr, &val)?;
Ok(self.pos() as u64)
}
/// Writes a CString including the ending null byte to the stack.
/// Returns the writing address
fn write_cstring(&self, val: &CString) -> Result<u64> {
let bytes = val.as_bytes_with_nul();
self.write_bytes(bytes)
}
/// Writes u64 to the stack.
/// Returns the writing address.
fn write_bytes(&self, bytes: &[u8]) -> Result<u64> {
let len = bytes.len();
self.pos.fetch_sub(len, Ordering::Relaxed);
let pos = self.pos();
self.vmo.write_bytes(pos - self.map_addr, bytes)?;
Ok(pos as u64)
}
fn pos(&self) -> Vaddr {
self.pos.load(Ordering::Relaxed)
}
}
fn generate_random_for_aux_vec() -> [u8; 16] {
let mut rand_val = [0; 16];
getrandom(&mut rand_val).unwrap();
rand_val
}
/// A reader to parse the content of an `InitStack`.
pub struct InitStackReader<'a> {
base: Vaddr,
vmo: &'a Vmo<Full>,
/// The mapping address of the `InitStack`.
map_addr: usize,
}
impl<'a> InitStackReader<'a> {
/// Reads argc from the process init stack
pub fn argc(&self) -> Result<u64> {
let stack_base = self.init_stack_bottom();
Ok(self.vmo.read_val(stack_base - self.map_addr)?)
}
/// Reads argv from the process init stack
pub fn argv(&self) -> Result<Vec<CString>> {
let argc = self.argc()? as usize;
// The reading offset in the initial stack is:
// the initial stack bottom address + the size of `argc` in memory
let read_offset = self.init_stack_bottom() + size_of::<usize>();
let mut argv = Vec::with_capacity(argc);
let user_space = CurrentUserSpace::get();
let mut argv_reader = user_space.reader(read_offset, argc * size_of::<usize>())?;
for _ in 0..argc {
let arg = {
let arg_ptr = argv_reader.read_val::<Vaddr>()?;
user_space.read_cstring(arg_ptr, MAX_ARG_LEN)?
};
argv.push(arg);
}
Ok(argv)
}
/// Reads envp from the process
pub fn envp(&self) -> Result<Vec<CString>> {
let argc = self.argc()? as usize;
// The reading offset in the initial stack is:
// the initial stack bottom address
// + the size of argc(8)
// + the size of arg pointer(8) * the number of arg(argc)
// + the size of null pointer(8)
let read_offset = self.init_stack_bottom()
+ size_of::<usize>()
+ size_of::<usize>() * argc
+ size_of::<usize>();
let mut envp = Vec::new();
let user_space = CurrentUserSpace::get();
let mut envp_reader = user_space.reader(read_offset, MAX_ENVP_NUMBER)?;
for _ in 0..MAX_ENVP_NUMBER {
let envp_ptr = envp_reader.read_val::<Vaddr>()?;
if envp_ptr == 0 {
break;
}
let env = user_space.read_cstring(envp_ptr, MAX_ENV_LEN)?;
envp.push(env);
}
Ok(envp)
}
/// Returns the bottom address of the init stack (lowest address).
pub const fn init_stack_bottom(&self) -> Vaddr {
self.base
}
}

View File

@ -0,0 +1,142 @@
// SPDX-License-Identifier: MPL-2.0
//! This module defines struct `ProcessVm`
//! to represent the layout of user space process virtual memory.
//!
//! The `ProcessVm` struct contains `Vmar`,
//! which stores all existing memory mappings.
//! The `Vm` also contains
//! the basic info of process level vm segments,
//! like init stack and heap.
mod heap;
mod init_stack;
use aster_rights::Full;
pub use heap::Heap;
pub use self::{
heap::USER_HEAP_SIZE_LIMIT,
init_stack::{
aux_vec::{AuxKey, AuxVec},
InitStack, InitStackReader, InitStackWriter, INIT_STACK_SIZE, MAX_ARGV_NUMBER, MAX_ARG_LEN,
MAX_ENVP_NUMBER, MAX_ENV_LEN,
},
};
use crate::{prelude::*, vm::vmar::Vmar};
/*
* The user's virtual memory space layout looks like below.
* TODO: The layout of the userheap does not match the current implementation,
* And currently the initial program break is a fixed value.
*
* (high address)
* +---------------------+ <------+ The top of Vmar, which is the highest address usable
* | | Randomly padded pages
* +---------------------+ <------+ The base of the initial user stack
* | User stack |
* | |
* +---------||----------+ <------+ The user stack limit, can be extended lower
* | \/ |
* | ... |
* | |
* | MMAP Spaces |
* | |
* | ... |
* | /\ |
* +---------||----------+ <------+ The current program break
* | User heap |
* | |
* +---------------------+ <------+ The original program break
* | | Randomly padded pages
* +---------------------+ <------+ The end of the program's last segment
* | |
* | Loaded segments |
* | .text, .data, .bss |
* | , etc. |
* | |
* +---------------------+ <------+ The bottom of Vmar at 0x1_0000
* | | 64 KiB unusable space
* +---------------------+
* (low address)
*/
// The process user space virtual memory
pub struct ProcessVm {
root_vmar: Vmar<Full>,
init_stack: InitStack,
heap: Heap,
}
impl Clone for ProcessVm {
fn clone(&self) -> Self {
Self {
root_vmar: self.root_vmar.dup().unwrap(),
init_stack: self.init_stack.clone(),
heap: self.heap.clone(),
}
}
}
impl ProcessVm {
/// Allocates a new `ProcessVm`
pub fn alloc() -> Self {
let root_vmar = Vmar::<Full>::new_root();
let init_stack = InitStack::new();
init_stack.map_init_stack_vmo(&root_vmar).unwrap();
let heap = Heap::new();
heap.alloc_and_map_vmo(&root_vmar).unwrap();
Self {
root_vmar,
heap,
init_stack,
}
}
/// Forks a `ProcessVm` from `other`.
///
/// The returned `ProcessVm` will have a forked `Vmar`.
pub fn fork_from(other: &ProcessVm) -> Result<Self> {
let root_vmar = Vmar::<Full>::fork_from(&other.root_vmar)?;
Ok(Self {
root_vmar,
heap: other.heap.clone(),
init_stack: other.init_stack.clone(),
})
}
pub fn root_vmar(&self) -> &Vmar<Full> {
&self.root_vmar
}
/// Returns a reader for reading contents from
/// the `InitStack`.
pub fn init_stack_reader(&self) -> InitStackReader {
self.init_stack.reader()
}
/// Returns the top address of the user stack.
pub fn user_stack_top(&self) -> Vaddr {
self.init_stack.user_stack_top()
}
pub(super) fn init_stack_writer(
&self,
argv: Vec<CString>,
envp: Vec<CString>,
aux_vec: AuxVec,
) -> InitStackWriter {
self.init_stack.writer(argv, envp, aux_vec)
}
pub(super) fn heap(&self) -> &Heap {
&self.heap
}
/// Clears existing mappings and then maps stack and heap vmo.
pub(super) fn clear_and_map(&self) {
self.root_vmar.clear().unwrap();
self.init_stack.map_init_stack_vmo(&self.root_vmar).unwrap();
self.heap.alloc_and_map_vmo(&self.root_vmar).unwrap();
}
}

View File

@ -0,0 +1,217 @@
// SPDX-License-Identifier: MPL-2.0
/// A wrapper of xmas_elf's elf parsing
use xmas_elf::{
header::{self, Header, HeaderPt1, HeaderPt2, HeaderPt2_, Machine_, Type_},
program::{self, ProgramHeader64},
};
use crate::prelude::*;
pub struct Elf {
pub elf_header: ElfHeader,
pub program_headers: Vec<ProgramHeader64>,
}
impl Elf {
pub fn parse_elf(input: &[u8]) -> Result<Self> {
// first parse elf header
// The elf header is usually 64 bytes. pt1 is 16bytes and pt2 is 48 bytes.
// We require 128 bytes here is to keep consistency with linux implementations.
debug_assert!(input.len() >= 128);
let header = xmas_elf::header::parse_header(input)
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse elf header fails"))?;
let elf_header = ElfHeader::parse_elf_header(header)?;
check_elf_header(&elf_header)?;
// than parse the program headers table
// FIXME: we should acquire enough pages before parse
let ph_offset = elf_header.pt2.ph_offset;
let ph_count = elf_header.pt2.ph_count;
let ph_entry_size = elf_header.pt2.ph_entry_size;
debug_assert!(
input.len() >= ph_offset as usize + ph_count as usize * ph_entry_size as usize
);
let mut program_headers = Vec::with_capacity(ph_count as usize);
for index in 0..ph_count {
let program_header = xmas_elf::program::parse_program_header(input, header, index)
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header fails"))?;
let ph64 = match program_header {
xmas_elf::program::ProgramHeader::Ph64(ph64) => *ph64,
xmas_elf::program::ProgramHeader::Ph32(_) => {
return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable")
}
};
program_headers.push(ph64);
}
Ok(Self {
elf_header,
program_headers,
})
}
// The following info is used to setup init stack
/// the entry point of the elf
pub fn entry_point(&self) -> Vaddr {
self.elf_header.pt2.entry_point as Vaddr
}
/// program header table offset
pub fn ph_off(&self) -> u64 {
self.elf_header.pt2.ph_offset
}
/// number of program headers
pub fn ph_count(&self) -> u16 {
self.elf_header.pt2.ph_count
}
/// The size of a program header
pub fn ph_ent(&self) -> u16 {
self.elf_header.pt2.ph_entry_size
}
/// The virtual addr of program headers table address
pub fn ph_addr(&self) -> Result<Vaddr> {
let ph_offset = self.ph_off();
for program_header in &self.program_headers {
if program_header.offset <= ph_offset
&& ph_offset < program_header.offset + program_header.file_size
{
return Ok(
(ph_offset - program_header.offset + program_header.virtual_addr) as Vaddr,
);
}
}
return_errno_with_message!(
Errno::ENOEXEC,
"can not find program header table address in elf"
);
}
/// whether the elf is a shared object
pub fn is_shared_object(&self) -> bool {
self.elf_header.pt2.type_.as_type() == header::Type::SharedObject
}
/// read the ldso path from the elf interpret section
pub fn ldso_path(&self, file_header_buf: &[u8]) -> Result<Option<String>> {
for program_header in &self.program_headers {
let type_ = program_header.get_type().map_err(|_| {
Error::with_message(Errno::ENOEXEC, "parse program header type fails")
})?;
if type_ == program::Type::Interp {
let file_size = program_header.file_size as usize;
let file_offset = program_header.offset as usize;
debug_assert!(file_offset + file_size <= file_header_buf.len());
let ldso = CStr::from_bytes_with_nul(
&file_header_buf[file_offset..file_offset + file_size],
)?;
return Ok(Some(ldso.to_string_lossy().to_string()));
}
}
Ok(None)
}
// An offset to be subtracted from ELF vaddr for PIE
pub fn base_load_address_offset(&self) -> u64 {
let phdr = self.program_headers.first().unwrap();
phdr.virtual_addr - phdr.offset
}
}
pub struct ElfHeader {
pub pt1: HeaderPt1,
pub pt2: HeaderPt2_64,
}
impl ElfHeader {
fn parse_elf_header(header: Header) -> Result<Self> {
let pt1 = *header.pt1;
let pt2 = match header.pt2 {
HeaderPt2::Header64(header_pt2) => {
let HeaderPt2_ {
type_,
machine,
version,
entry_point,
ph_offset,
sh_offset,
flags,
header_size,
ph_entry_size,
ph_count,
sh_entry_size,
sh_count,
sh_str_index,
} = header_pt2;
HeaderPt2_64 {
type_: *type_,
machine: *machine,
version: *version,
entry_point: *entry_point,
ph_offset: *ph_offset,
sh_offset: *sh_offset,
flags: *flags,
header_size: *header_size,
ph_entry_size: *ph_entry_size,
ph_count: *ph_count,
sh_entry_size: *sh_entry_size,
sh_count: *sh_count,
sh_str_index: *sh_str_index,
}
}
_ => return_errno_with_message!(Errno::ENOEXEC, "parse elf header failed"),
};
Ok(ElfHeader { pt1, pt2 })
}
}
pub struct HeaderPt2_64 {
pub type_: Type_,
pub machine: Machine_,
#[allow(dead_code)]
pub version: u32,
pub entry_point: u64,
pub ph_offset: u64,
#[allow(dead_code)]
pub sh_offset: u64,
#[allow(dead_code)]
pub flags: u32,
#[allow(dead_code)]
pub header_size: u16,
pub ph_entry_size: u16,
pub ph_count: u16,
#[allow(dead_code)]
pub sh_entry_size: u16,
#[allow(dead_code)]
pub sh_count: u16,
#[allow(dead_code)]
pub sh_str_index: u16,
}
fn check_elf_header(elf_header: &ElfHeader) -> Result<()> {
// 64bit
debug_assert_eq!(elf_header.pt1.class(), header::Class::SixtyFour);
if elf_header.pt1.class() != header::Class::SixtyFour {
return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable");
}
// little endian
debug_assert_eq!(elf_header.pt1.data(), header::Data::LittleEndian);
if elf_header.pt1.data() != header::Data::LittleEndian {
return_errno_with_message!(Errno::ENOEXEC, "Not little endian executable");
}
// system V ABI
// debug_assert_eq!(elf_header.pt1.os_abi(), header::OsAbi::SystemV);
// if elf_header.pt1.os_abi() != header::OsAbi::SystemV {
// return Error::new(Errno::ENOEXEC);
// }
// x86_64 architecture
debug_assert_eq!(elf_header.pt2.machine.as_machine(), header::Machine::X86_64);
if elf_header.pt2.machine.as_machine() != header::Machine::X86_64 {
return_errno_with_message!(Errno::ENOEXEC, "Not x86_64 executable");
}
// Executable file or shared object
let elf_type = elf_header.pt2.type_.as_type();
debug_assert!(elf_type == header::Type::Executable || elf_type == header::Type::SharedObject);
if elf_type != header::Type::Executable && elf_type != header::Type::SharedObject {
return_errno_with_message!(Errno::ENOEXEC, "Not executable file");
}
Ok(())
}

View File

@ -0,0 +1,434 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
#![allow(unused_variables)]
//! This module is used to parse elf file content to get elf_load_info.
//! When create a process from elf file, we will use the elf_load_info to construct the VmSpace
use align_ext::AlignExt;
use aster_rights::Full;
use ostd::mm::VmIo;
use xmas_elf::program::{self, ProgramHeader64};
use super::elf_file::Elf;
use crate::{
fs::{
fs_resolver::{FsPath, FsResolver, AT_FDCWD},
path::Dentry,
},
prelude::*,
process::{
do_exit_group,
process_vm::{AuxKey, AuxVec, ProcessVm},
TermStatus,
},
vdso::{vdso_vmo, VDSO_VMO_SIZE},
vm::{perms::VmPerms, util::duplicate_frame, vmar::Vmar, vmo::VmoRightsOp},
};
/// Loads elf to the process vm.
///
/// This function will map elf segments and
/// initialize process init stack.
pub fn load_elf_to_vm(
process_vm: &ProcessVm,
file_header: &[u8],
elf_file: Arc<Dentry>,
fs_resolver: &FsResolver,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<ElfLoadInfo> {
let parsed_elf = Elf::parse_elf(file_header)?;
let ldso = lookup_and_parse_ldso(&parsed_elf, file_header, fs_resolver)?;
match init_and_map_vmos(process_vm, ldso, &parsed_elf, &elf_file) {
Ok((entry_point, mut aux_vec)) => {
// Map and set vdso entry.
// Since vdso does not require being mapped to any specific address,
// vdso is mapped after the elf file, heap and stack are mapped.
if let Some(vdso_text_base) = map_vdso_to_vm(process_vm) {
aux_vec
.set(AuxKey::AT_SYSINFO_EHDR, vdso_text_base as u64)
.unwrap();
}
let init_stack_writer = process_vm.init_stack_writer(argv, envp, aux_vec);
init_stack_writer.write().unwrap();
let user_stack_top = process_vm.user_stack_top();
Ok(ElfLoadInfo {
entry_point,
user_stack_top,
})
}
Err(err) => {
// Since the process_vm is in invalid state,
// the process cannot return to user space again,
// so `Vmar::clear` and `do_exit_group` are called here.
// FIXME: sending a fault signal is an alternative approach.
process_vm.root_vmar().clear().unwrap();
// FIXME: `current` macro will be used in `do_exit_group`.
// if the macro is used when creating the init process,
// the macro will panic. This corner case should be handled later.
// FIXME: how to set the correct exit status?
do_exit_group(TermStatus::Exited(1));
// The process will exit and the error code will be ignored.
Err(err)
}
}
}
fn lookup_and_parse_ldso(
elf: &Elf,
file_header: &[u8],
fs_resolver: &FsResolver,
) -> Result<Option<(Arc<Dentry>, Elf)>> {
let ldso_file = {
let Some(ldso_path) = elf.ldso_path(file_header)? else {
return Ok(None);
};
let fs_path = FsPath::new(AT_FDCWD, &ldso_path)?;
fs_resolver.lookup(&fs_path)?
};
let ldso_elf = {
let mut buf = Box::new([0u8; PAGE_SIZE]);
let inode = ldso_file.inode();
inode.read_bytes_at(0, &mut *buf)?;
Elf::parse_elf(&*buf)?
};
Ok(Some((ldso_file, ldso_elf)))
}
fn load_ldso(root_vmar: &Vmar<Full>, ldso_file: &Dentry, ldso_elf: &Elf) -> Result<LdsoLoadInfo> {
let map_addr = map_segment_vmos(ldso_elf, root_vmar, ldso_file)?;
Ok(LdsoLoadInfo::new(
ldso_elf.entry_point() + map_addr,
map_addr,
))
}
fn init_and_map_vmos(
process_vm: &ProcessVm,
ldso: Option<(Arc<Dentry>, Elf)>,
parsed_elf: &Elf,
elf_file: &Dentry,
) -> Result<(Vaddr, AuxVec)> {
let root_vmar = process_vm.root_vmar();
// After we clear process vm, if any error happens, we must call exit_group instead of return to user space.
let ldso_load_info = if let Some((ldso_file, ldso_elf)) = ldso {
Some(load_ldso(root_vmar, &ldso_file, &ldso_elf)?)
} else {
None
};
let elf_map_addr = map_segment_vmos(parsed_elf, root_vmar, elf_file)?;
let aux_vec = {
let ldso_base = ldso_load_info
.as_ref()
.map(|load_info| load_info.base_addr());
init_aux_vec(parsed_elf, elf_map_addr, ldso_base)?
};
let entry_point = if let Some(ldso_load_info) = ldso_load_info {
// Normal shared object
ldso_load_info.entry_point()
} else if parsed_elf.is_shared_object() {
// ldso itself
parsed_elf.entry_point() + elf_map_addr
} else {
// statically linked executable
parsed_elf.entry_point()
};
Ok((entry_point, aux_vec))
}
pub struct LdsoLoadInfo {
entry_point: Vaddr,
base_addr: Vaddr,
}
impl LdsoLoadInfo {
pub fn new(entry_point: Vaddr, base_addr: Vaddr) -> Self {
Self {
entry_point,
base_addr,
}
}
pub fn entry_point(&self) -> Vaddr {
self.entry_point
}
pub fn base_addr(&self) -> Vaddr {
self.base_addr
}
}
pub struct ElfLoadInfo {
entry_point: Vaddr,
user_stack_top: Vaddr,
}
impl ElfLoadInfo {
pub fn new(entry_point: Vaddr, user_stack_top: Vaddr) -> Self {
Self {
entry_point,
user_stack_top,
}
}
pub fn entry_point(&self) -> Vaddr {
self.entry_point
}
pub fn user_stack_top(&self) -> Vaddr {
self.user_stack_top
}
}
/// Inits VMO for each segment and then map segment to root vmar
pub fn map_segment_vmos(elf: &Elf, root_vmar: &Vmar<Full>, elf_file: &Dentry) -> Result<Vaddr> {
// all segments of the shared object must be mapped to a continuous vm range
// to ensure the relative offset of each segment not changed.
let base_addr = if elf.is_shared_object() {
base_map_addr(elf, root_vmar)?
} else {
0
};
for program_header in &elf.program_headers {
let type_ = program_header
.get_type()
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header type fails"))?;
if type_ == program::Type::Load {
check_segment_align(program_header)?;
map_segment_vmo(program_header, elf_file, root_vmar, base_addr)?;
}
}
Ok(base_addr)
}
fn base_map_addr(elf: &Elf, root_vmar: &Vmar<Full>) -> Result<Vaddr> {
let elf_size = elf
.program_headers
.iter()
.filter_map(|program_header| {
if let Ok(type_) = program_header.get_type()
&& type_ == program::Type::Load
{
let ph_max_addr = program_header.virtual_addr + program_header.mem_size;
Some(ph_max_addr as usize)
} else {
None
}
})
.max()
.ok_or(Error::with_message(
Errno::ENOEXEC,
"executable file does not has loadable sections",
))?;
let map_size = elf_size.align_up(PAGE_SIZE);
let vmar_map_options = root_vmar.new_map(map_size, VmPerms::empty())?;
vmar_map_options.build()
}
/// Creates and map the corresponding segment VMO to `root_vmar`.
/// If needed, create additional anonymous mapping to represents .bss segment.
fn map_segment_vmo(
program_header: &ProgramHeader64,
elf_file: &Dentry,
root_vmar: &Vmar<Full>,
base_addr: Vaddr,
) -> Result<()> {
trace!(
"mem range = 0x{:x} - 0x{:x}, mem_size = 0x{:x}",
program_header.virtual_addr,
program_header.virtual_addr + program_header.mem_size,
program_header.mem_size
);
trace!(
"file range = 0x{:x} - 0x{:x}, file_size = 0x{:x}",
program_header.offset,
program_header.offset + program_header.file_size,
program_header.file_size
);
let file_offset = program_header.offset as usize;
let virtual_addr = program_header.virtual_addr as usize;
debug_assert!(file_offset % PAGE_SIZE == virtual_addr % PAGE_SIZE);
let segment_vmo = {
let inode = elf_file.inode();
inode
.page_cache()
.ok_or(Error::with_message(
Errno::ENOENT,
"executable has no page cache",
))?
.to_dyn()
.dup_independent()?
};
let total_map_size = {
let vmap_start = virtual_addr.align_down(PAGE_SIZE);
let vmap_end = (virtual_addr + program_header.mem_size as usize).align_up(PAGE_SIZE);
vmap_end - vmap_start
};
let (segment_offset, segment_size) = {
let start = file_offset.align_down(PAGE_SIZE);
let end = (file_offset + program_header.file_size as usize).align_up(PAGE_SIZE);
debug_assert!(total_map_size >= (program_header.file_size as usize).align_up(PAGE_SIZE));
(start, end - start)
};
// Write zero as paddings. There are head padding and tail padding.
// Head padding: if the segment's virtual address is not page-aligned,
// then the bytes in first page from start to virtual address should be padded zeros.
// Tail padding: If the segment's mem_size is larger than file size,
// then the bytes that are not backed up by file content should be zeros.(usually .data/.bss sections).
// Head padding.
let page_offset = file_offset % PAGE_SIZE;
if page_offset != 0 {
let new_frame = {
let head_frame = segment_vmo.commit_page(segment_offset)?;
let new_frame = duplicate_frame(&head_frame)?;
let buffer = vec![0u8; page_offset];
new_frame.write_bytes(0, &buffer).unwrap();
new_frame
};
let head_idx = segment_offset / PAGE_SIZE;
segment_vmo.replace(new_frame, head_idx)?;
}
// Tail padding.
let tail_padding_offset = program_header.file_size as usize + page_offset;
if segment_size > tail_padding_offset {
let new_frame = {
let tail_frame = segment_vmo.commit_page(segment_offset + tail_padding_offset)?;
let new_frame = duplicate_frame(&tail_frame)?;
let buffer = vec![0u8; (segment_size - tail_padding_offset) % PAGE_SIZE];
new_frame
.write_bytes(tail_padding_offset % PAGE_SIZE, &buffer)
.unwrap();
new_frame
};
let tail_idx = (segment_offset + tail_padding_offset) / PAGE_SIZE;
segment_vmo.replace(new_frame, tail_idx).unwrap();
}
let perms = parse_segment_perm(program_header.flags);
let mut vm_map_options = root_vmar
.new_map(segment_size, perms)?
.vmo(segment_vmo)
.vmo_offset(segment_offset)
.vmo_limit(segment_offset + segment_size)
.can_overwrite(true);
let offset = base_addr + (program_header.virtual_addr as Vaddr).align_down(PAGE_SIZE);
vm_map_options = vm_map_options.offset(offset);
let map_addr = vm_map_options.build()?;
let anonymous_map_size: usize = if total_map_size > segment_size {
total_map_size - segment_size
} else {
0
};
if anonymous_map_size > 0 {
let mut anonymous_map_options = root_vmar
.new_map(anonymous_map_size, perms)?
.can_overwrite(true);
anonymous_map_options = anonymous_map_options.offset(offset + segment_size);
anonymous_map_options.build()?;
}
Ok(())
}
fn parse_segment_perm(flags: xmas_elf::program::Flags) -> VmPerms {
let mut vm_perm = VmPerms::empty();
if flags.is_read() {
vm_perm |= VmPerms::READ;
}
if flags.is_write() {
vm_perm |= VmPerms::WRITE;
}
if flags.is_execute() {
vm_perm |= VmPerms::EXEC;
}
vm_perm
}
fn check_segment_align(program_header: &ProgramHeader64) -> Result<()> {
let align = program_header.align;
if align == 0 || align == 1 {
// no align requirement
return Ok(());
}
debug_assert!(align.is_power_of_two());
if !align.is_power_of_two() {
return_errno_with_message!(Errno::ENOEXEC, "segment align is invalid.");
}
debug_assert!(program_header.offset % align == program_header.virtual_addr % align);
if program_header.offset % align != program_header.virtual_addr % align {
return_errno_with_message!(Errno::ENOEXEC, "segment align is not satisfied.");
}
Ok(())
}
pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr, ldso_base: Option<Vaddr>) -> Result<AuxVec> {
let mut aux_vec = AuxVec::new();
aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _)?;
let ph_addr = if elf.is_shared_object() {
elf.ph_addr()? + elf_map_addr
} else {
elf.ph_addr()?
};
aux_vec.set(AuxKey::AT_PHDR, ph_addr as u64)?;
aux_vec.set(AuxKey::AT_PHNUM, elf.ph_count() as u64)?;
aux_vec.set(AuxKey::AT_PHENT, elf.ph_ent() as u64)?;
let elf_entry = if elf.is_shared_object() {
let base_load_offset = elf.base_load_address_offset();
elf.entry_point() + elf_map_addr - base_load_offset as usize
} else {
elf.entry_point()
};
aux_vec.set(AuxKey::AT_ENTRY, elf_entry as u64)?;
if let Some(ldso_base) = ldso_base {
aux_vec.set(AuxKey::AT_BASE, ldso_base as u64)?;
}
Ok(aux_vec)
}
/// Maps the VDSO VMO to the corresponding virtual memory address.
fn map_vdso_to_vm(process_vm: &ProcessVm) -> Option<Vaddr> {
let root_vmar = process_vm.root_vmar();
let vdso_vmo = vdso_vmo()?;
let options = root_vmar
.new_map(VDSO_VMO_SIZE, VmPerms::empty())
.unwrap()
.vmo(vdso_vmo.dup().unwrap());
let vdso_data_base = options.build().unwrap();
let vdso_text_base = vdso_data_base + 0x4000;
let data_perms = VmPerms::READ | VmPerms::WRITE;
let text_perms = VmPerms::READ | VmPerms::EXEC;
root_vmar
.protect(data_perms, vdso_data_base..vdso_data_base + PAGE_SIZE)
.unwrap();
root_vmar
.protect(text_perms, vdso_text_base..vdso_text_base + PAGE_SIZE)
.unwrap();
Some(vdso_text_base)
}

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: MPL-2.0
mod elf_file;
mod load_elf;
pub use load_elf::{load_elf_to_vm, ElfLoadInfo};

View File

@ -0,0 +1,85 @@
// SPDX-License-Identifier: MPL-2.0
pub mod elf;
mod shebang;
use self::{
elf::{load_elf_to_vm, ElfLoadInfo},
shebang::parse_shebang_line,
};
use super::process_vm::ProcessVm;
use crate::{
fs::{
fs_resolver::{FsPath, FsResolver, AT_FDCWD},
path::Dentry,
},
prelude::*,
};
/// Load an executable to root vmar, including loading programe image, preparing heap and stack,
/// initializing argv, envp and aux tables.
/// About recursion_limit: recursion limit is used to limit th recursion depth of shebang executables.
/// If the interpreter(the program behind #!) of shebang executable is also a shebang,
/// then it will trigger recursion. We will try to setup root vmar for the interpreter.
/// I guess for most cases, setting the recursion_limit as 1 should be enough.
/// because the interpreter is usually an elf binary(e.g., /bin/bash)
pub fn load_program_to_vm(
process_vm: &ProcessVm,
elf_file: Arc<Dentry>,
argv: Vec<CString>,
envp: Vec<CString>,
fs_resolver: &FsResolver,
recursion_limit: usize,
) -> Result<(String, ElfLoadInfo)> {
let abs_path = elf_file.abs_path();
let inode = elf_file.inode();
let file_header = {
// read the first page of file header
let mut file_header_buffer = Box::new([0u8; PAGE_SIZE]);
inode.read_bytes_at(0, &mut *file_header_buffer)?;
file_header_buffer
};
if let Some(mut new_argv) = parse_shebang_line(&*file_header)? {
if recursion_limit == 0 {
return_errno_with_message!(Errno::ELOOP, "the recursieve limit is reached");
}
new_argv.extend_from_slice(&argv);
let interpreter = {
let filename = new_argv[0].to_str()?.to_string();
let fs_path = FsPath::new(AT_FDCWD, &filename)?;
fs_resolver.lookup(&fs_path)?
};
check_executable_file(&interpreter)?;
return load_program_to_vm(
process_vm,
interpreter,
new_argv,
envp,
fs_resolver,
recursion_limit - 1,
);
}
process_vm.clear_and_map();
let elf_load_info =
load_elf_to_vm(process_vm, &*file_header, elf_file, fs_resolver, argv, envp)?;
Ok((abs_path, elf_load_info))
}
pub fn check_executable_file(dentry: &Arc<Dentry>) -> Result<()> {
if dentry.type_().is_directory() {
return_errno_with_message!(Errno::EISDIR, "the file is a directory");
}
if !dentry.type_().is_regular_file() {
return_errno_with_message!(Errno::EACCES, "the dentry is not a regular file");
}
if !dentry.mode()?.is_executable() {
return_errno_with_message!(Errno::EACCES, "the dentry is not executable");
}
Ok(())
}

View File

@ -0,0 +1,33 @@
// SPDX-License-Identifier: MPL-2.0
use crate::prelude::*;
/// Try to parse a buffer as a shebang line.
///
/// If the buffer starts with `#!` and its header is a valid shebang sequence,
/// then the function returns `Ok(Some(parts))`,
/// where `parts` is a `Vec` that contains the path of and the arguments for the interpreter.
/// If the buffer starts with `#!` but some error occurs while parsing the file,
/// then `Err(_)` is returned.
/// If the buffer does not start with `#!`, then `Ok(None)` is returned.
pub fn parse_shebang_line(file_header_buffer: &[u8]) -> Result<Option<Vec<CString>>> {
if !file_header_buffer.starts_with(b"#!") || !file_header_buffer.contains(&b'\n') {
// the file is not a shebang
return Ok(None);
}
let first_line_len = file_header_buffer.iter().position(|&c| c == b'\n').unwrap();
// skip #!
let shebang_header = &file_header_buffer[2..first_line_len];
let mut shebang_argv = Vec::new();
for arg in shebang_header.split(|&c| c == b' ') {
let arg = CString::new(arg)?;
shebang_argv.push(arg);
}
if shebang_argv.len() != 1 {
return_errno_with_message!(
Errno::EINVAL,
"One and only one intpreter program should be specified"
);
}
Ok(Some(shebang_argv))
}

View File

@ -0,0 +1,89 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(non_camel_case_types)]
use super::process_vm::{INIT_STACK_SIZE, USER_HEAP_SIZE_LIMIT};
use crate::prelude::*;
pub struct ResourceLimits {
rlimits: [RLimit64; RLIMIT_COUNT],
}
impl ResourceLimits {
pub fn get_rlimit(&self, resource: ResourceType) -> &RLimit64 {
&self.rlimits[resource as usize]
}
pub fn get_rlimit_mut(&mut self, resource: ResourceType) -> &mut RLimit64 {
&mut self.rlimits[resource as usize]
}
}
impl Default for ResourceLimits {
fn default() -> Self {
let stack_size = RLimit64::new(INIT_STACK_SIZE as u64);
let heap_size = RLimit64::new(USER_HEAP_SIZE_LIMIT as u64);
let open_files = RLimit64::new(1024);
let mut rlimits = Self {
rlimits: [RLimit64::default(); RLIMIT_COUNT],
};
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_STACK) = stack_size;
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_DATA) = heap_size;
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_NOFILE) = open_files;
rlimits
}
}
#[repr(u32)]
#[derive(Debug, Clone, Copy, TryFromInt)]
pub enum ResourceType {
RLIMIT_CPU = 0,
RLIMIT_FSIZE = 1,
RLIMIT_DATA = 2,
RLIMIT_STACK = 3,
RLIMIT_CORE = 4,
RLIMIT_RSS = 5,
RLIMIT_NPROC = 6,
RLIMIT_NOFILE = 7,
RLIMIT_MEMLOCK = 8,
RLIMIT_AS = 9,
RLIMIT_LOCKS = 10,
RLIMIT_SIGPENDING = 11,
RLIMIT_MSGQUEUE = 12,
RLIMIT_NICE = 13,
RLIMIT_RTPRIO = 14,
RLIMIT_RTTIME = 15,
}
pub const RLIMIT_COUNT: usize = 16;
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct RLimit64 {
cur: u64,
max: u64,
}
impl RLimit64 {
pub fn new(cur: u64) -> Self {
Self { cur, max: u64::MAX }
}
pub fn get_cur(&self) -> u64 {
self.cur
}
pub fn get_max(&self) -> u64 {
self.max
}
}
impl Default for RLimit64 {
fn default() -> Self {
Self {
cur: u64::MAX,
max: u64::MAX,
}
}
}

View File

@ -0,0 +1,264 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
#![allow(non_camel_case_types)]
use core::mem::{self, size_of};
use aster_util::{read_union_fields, union_read_ptr::UnionReadPtr};
use super::sig_num::SigNum;
use crate::{
arch::cpu::GpRegs,
prelude::*,
process::{Pid, Uid},
};
pub type sigset_t = u64;
// FIXME: this type should be put at suitable place
pub type clock_t = i64;
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct sigaction_t {
pub handler_ptr: Vaddr,
pub flags: u32,
pub restorer_ptr: Vaddr,
pub mask: sigset_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub struct siginfo_t {
pub si_signo: i32,
pub si_errno: i32,
pub si_code: i32,
_padding: i32,
/// siginfo_fields should be a union type ( See occlum definition ). But union type have unsafe interfaces.
/// Here we use a simple byte array.
siginfo_fields: siginfo_fields_t,
}
impl siginfo_t {
pub fn new(num: SigNum, code: i32) -> Self {
siginfo_t {
si_signo: num.as_u8() as i32,
si_errno: 0,
si_code: code,
_padding: 0,
siginfo_fields: siginfo_fields_t::zero_fields(),
}
}
pub fn set_si_addr(&mut self, si_addr: Vaddr) {
self.siginfo_fields.sigfault.addr = si_addr;
}
pub fn si_addr(&self) -> Vaddr {
// let siginfo = *self;
read_union_fields!(self.siginfo_fields.sigfault.addr)
}
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_fields_t {
bytes: [u8; 128 - mem::size_of::<i32>() * 4],
common: siginfo_common_t,
sigfault: siginfo_sigfault_t,
}
impl siginfo_fields_t {
fn zero_fields() -> Self {
Self {
bytes: [0; 128 - mem::size_of::<i32>() * 4],
}
}
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_t {
first: siginfo_common_first_t,
second: siginfo_common_second_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_first_t {
piduid: siginfo_piduid_t,
timer: siginfo_timer_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_piduid_t {
pid: Pid,
uid: Uid,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_timer_t {
timerid: i32,
overrun: i32,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_second_t {
value: sigval_t,
sigchild: siginfo_sigchild_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub union sigval_t {
sigval_int: i32,
sigval_ptr: Vaddr, //*mut c_void
}
impl sigval_t {
pub fn read_int(&self) -> i32 {
read_union_fields!(self.sigval_int)
}
pub fn read_ptr(&self) -> Vaddr {
read_union_fields!(self.sigval_ptr)
}
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_sigchild_t {
status: i32,
utime: clock_t,
stime: clock_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_sigfault_t {
addr: Vaddr, //*const c_void
addr_lsb: i16,
first: siginfo_sigfault_first_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_sigfault_first_t {
addr_bnd: siginfo_addr_bnd_t,
pkey: u32,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_addr_bnd_t {
lower: Vaddr, // *const c_void
upper: Vaddr, // *const c_void,
}
#[derive(Clone, Copy, Debug, Pod)]
#[repr(C)]
pub struct ucontext_t {
pub uc_flags: u64,
pub uc_link: Vaddr, // *mut ucontext_t
pub uc_stack: stack_t,
pub uc_mcontext: mcontext_t,
pub uc_sigmask: sigset_t,
pub fpregs: [u8; 64 * 8], //fxsave structure
}
impl Default for ucontext_t {
fn default() -> Self {
Self {
uc_flags: Default::default(),
uc_link: Default::default(),
uc_stack: Default::default(),
uc_mcontext: Default::default(),
uc_sigmask: Default::default(),
fpregs: [0u8; 64 * 8],
}
}
}
pub type stack_t = sigaltstack_t;
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct sigaltstack_t {
pub ss_sp: Vaddr, // *mut c_void
pub ss_flags: i32,
pub ss_size: usize,
}
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct mcontext_t {
pub inner: SignalCpuContext,
// TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
_unused0: [u64; 5],
// TODO: this field should be `fpregs: fpregset_t,`
_unused1: usize,
_reserved: [u64; 8],
}
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct SignalCpuContext {
pub gp_regs: GpRegs,
pub fpregs_on_heap: u64,
pub fpregs: Vaddr, // *mut FpRegs,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub struct _sigev_thread {
pub function: Vaddr,
pub attribute: Vaddr,
}
const SIGEV_MAX_SIZE: usize = 64;
/// The total size of the fields `sigev_value`, `sigev_signo` and `sigev_notify`.
const SIGEV_PREAMBLE_SIZE: usize = size_of::<i32>() * 2 + size_of::<sigval_t>();
const SIGEV_PAD_SIZE: usize = (SIGEV_MAX_SIZE - SIGEV_PREAMBLE_SIZE) / size_of::<i32>();
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub union _sigev_un {
pub _pad: [i32; SIGEV_PAD_SIZE],
pub _tid: i32,
pub _sigev_thread: _sigev_thread,
}
impl _sigev_un {
pub fn read_tid(&self) -> i32 {
read_union_fields!(self._tid)
}
pub fn read_function(&self) -> Vaddr {
read_union_fields!(self._sigev_thread.function)
}
pub fn read_attribute(&self) -> Vaddr {
read_union_fields!(self._sigev_thread.attribute)
}
}
#[derive(Debug, Copy, Clone, TryFromInt, PartialEq)]
#[repr(i32)]
pub enum SigNotify {
SIGEV_SIGNAL = 0,
SIGEV_NONE = 1,
SIGEV_THREAD = 2,
SIGEV_THREAD_ID = 4,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub struct sigevent_t {
pub sigev_value: sigval_t,
pub sigev_signo: i32,
pub sigev_notify: i32,
pub sigev_un: _sigev_un,
}

View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
/// Standard signals
pub(super) const MIN_STD_SIG_NUM: u8 = 1;
pub(super) const MAX_STD_SIG_NUM: u8 = 31; // inclusive
/// Real-time signals
pub(super) const MIN_RT_SIG_NUM: u8 = 32;
pub(super) const MAX_RT_SIG_NUM: u8 = 64; // inclusive
/// Count the number of signals
pub(super) const COUNT_STD_SIGS: usize = 31;
pub(super) const COUNT_RT_SIGS: usize = 33;
pub(super) const COUNT_ALL_SIGS: usize = 64;
pub const SIG_DFL: usize = 0;
pub const SIG_IGN: usize = 1;
use super::sig_num::SigNum;
macro_rules! define_std_signums {
( $( $name: ident = $num: expr ),+, ) => {
$(
pub const $name : SigNum = SigNum::from_u8($num);
)*
}
}
define_std_signums! {
SIGHUP = 1, // Hangup detected on controlling terminal or death of controlling process
SIGINT = 2, // Interrupt from keyboard
SIGQUIT = 3, // Quit from keyboard
SIGILL = 4, // Illegal Instruction
SIGTRAP = 5, // Trace/breakpoint trap
SIGABRT = 6, // Abort signal from abort(3)
SIGBUS = 7, // Bus error (bad memory access)
SIGFPE = 8, // Floating-point exception
SIGKILL = 9, // Kill signal
SIGUSR1 = 10, // User-defined signal 1
SIGSEGV = 11, // Invalid memory reference
SIGUSR2 = 12, // User-defined signal 2
SIGPIPE = 13, // Broken pipe: write to pipe with no readers; see pipe(7)
SIGALRM = 14, // Timer signal from alarm(2)
SIGTERM = 15, // Termination signal
SIGSTKFLT = 16, // Stack fault on coprocessor (unused)
SIGCHLD = 17, // Child stopped or terminated
SIGCONT = 18, // Continue if stopped
SIGSTOP = 19, // Stop process
SIGTSTP = 20, // Stop typed at terminal
SIGTTIN = 21, // Terminal input for background process
SIGTTOU = 22, // Terminal output for background process
SIGURG = 23, // Urgent condition on socket (4.2BSD)
SIGXCPU = 24, // CPU time limit exceeded (4.2BSD); see setrlimit(2)
SIGXFSZ = 25, // File size limit exceeded (4.2BSD); see setrlimit(2)
SIGVTALRM = 26, // Virtual alarm clock (4.2BSD)
SIGPROF = 27, // Profiling timer expired
SIGWINCH = 28, // Window resize signal (4.3BSD, Sun)
SIGIO = 29, // I/O now possible (4.2BSD)
SIGPWR = 30, // Power failure (System V)
SIGSYS = 31, // Bad system call (SVr4); see also seccomp(2)
}
pub const SI_ASYNCNL: i32 = -60;
pub const SI_TKILL: i32 = -6;
pub const SI_SIGIO: i32 = -5;
pub const SI_ASYNCIO: i32 = -4;
pub const SI_MESGQ: i32 = -3;
pub const SI_TIMER: i32 = -2;
pub const SI_QUEUE: i32 = -1;
pub const SI_USER: i32 = 0;
pub const SI_KERNEL: i32 = 128;
pub const FPE_INTDIV: i32 = 1;
pub const FPE_INTOVF: i32 = 2;
pub const FPE_FLTDIV: i32 = 3;
pub const FPE_FLTOVF: i32 = 4;
pub const FPE_FLTUND: i32 = 5;
pub const FPE_FLTRES: i32 = 6;
pub const FPE_FLTINV: i32 = 7;
pub const FPE_FLTSUB: i32 = 8;
pub const ILL_ILLOPC: i32 = 1;
pub const ILL_ILLOPN: i32 = 2;
pub const ILL_ILLADR: i32 = 3;
pub const ILL_ILLTRP: i32 = 4;
pub const ILL_PRVOPC: i32 = 5;
pub const ILL_PRVREG: i32 = 6;
pub const ILL_COPROC: i32 = 7;
pub const ILL_BADSTK: i32 = 8;
pub const SEGV_MAPERR: i32 = 1;
pub const SEGV_ACCERR: i32 = 2;
pub const SEGV_BNDERR: i32 = 3;
pub const SEGV_PKUERR: i32 = 4;
pub const BUS_ADRALN: i32 = 1;
pub const BUS_ADRERR: i32 = 2;
pub const BUS_OBJERR: i32 = 3;
pub const BUS_MCEERR_AR: i32 = 4;
pub const BUS_MCEERR_AO: i32 = 5;
pub const CLD_EXITED: i32 = 1;
pub const CLD_KILLED: i32 = 2;
pub const CLD_DUMPED: i32 = 3;
pub const CLD_TRAPPED: i32 = 4;
pub const CLD_STOPPED: i32 = 5;
pub const CLD_CONTINUED: i32 = 6;

View File

@ -0,0 +1,33 @@
// SPDX-License-Identifier: MPL-2.0
use super::{sig_mask::SigMask, sig_num::SigNum};
use crate::{
events::{Events, EventsFilter},
prelude::*,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SigEvents(SigNum);
impl SigEvents {
pub fn new(sig_num: SigNum) -> Self {
Self(sig_num)
}
}
impl Events for SigEvents {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SigEventsFilter(SigMask);
impl SigEventsFilter {
pub fn new(mask: SigMask) -> Self {
Self(mask)
}
}
impl EventsFilter<SigEvents> for SigEventsFilter {
fn filter(&self, event: &SigEvents) -> bool {
!self.0.contains(event.0)
}
}

View File

@ -0,0 +1,275 @@
// SPDX-License-Identifier: MPL-2.0
pub mod c_types;
pub mod constants;
mod events;
mod pauser;
mod poll;
pub mod sig_action;
pub mod sig_disposition;
pub mod sig_mask;
pub mod sig_num;
pub mod sig_queues;
mod sig_stack;
pub mod signals;
use core::{mem, sync::atomic::Ordering};
use align_ext::AlignExt;
use c_types::{siginfo_t, ucontext_t};
pub use events::{SigEvents, SigEventsFilter};
use ostd::{cpu::UserContext, user::UserContextApi};
pub use pauser::Pauser;
pub use poll::{Pollable, Pollee, Poller};
use sig_action::{SigAction, SigActionFlags, SigDefaultAction};
use sig_mask::SigMask;
use sig_num::SigNum;
pub use sig_stack::{SigStack, SigStackFlags};
use super::posix_thread::{PosixThread, PosixThreadExt};
use crate::{
prelude::*,
process::{do_exit_group, TermStatus},
thread::{status::ThreadStatus, Thread},
};
pub trait SignalContext {
/// Set signal handler arguments
fn set_arguments(&mut self, sig_num: SigNum, siginfo_addr: usize, ucontext_addr: usize);
}
// TODO: This interface of this method is error prone.
// The method takes an argument for the current thread to optimize its efficiency.
/// Handle pending signal for current process.
pub fn handle_pending_signal(
context: &mut UserContext,
current_thread: &Arc<Thread>,
) -> Result<()> {
// We first deal with signal in current thread, then signal in current process.
let posix_thread = current_thread.as_posix_thread().unwrap();
let signal = {
let sig_mask = posix_thread.sig_mask().load(Ordering::Relaxed);
if let Some(signal) = posix_thread.dequeue_signal(&sig_mask) {
signal
} else {
return Ok(());
}
};
let sig_num = signal.num();
trace!("sig_num = {:?}, sig_name = {}", sig_num, sig_num.sig_name());
let current = posix_thread.process();
let mut sig_dispositions = current.sig_dispositions().lock();
let sig_action = sig_dispositions.get(sig_num);
trace!("sig action: {:x?}", sig_action);
match sig_action {
SigAction::Ign => {
trace!("Ignore signal {:?}", sig_num);
}
SigAction::User {
handler_addr,
flags,
restorer_addr,
mask,
} => {
if flags.contains(SigActionFlags::SA_RESETHAND) {
// In Linux, SA_RESETHAND corresponds to SA_ONESHOT,
// which means the user handler will be executed only once and then reset to the default.
// Refer to https://elixir.bootlin.com/linux/v6.0.9/source/kernel/signal.c#L2761.
sig_dispositions.set_default(sig_num);
}
drop(sig_dispositions);
handle_user_signal(
posix_thread,
sig_num,
handler_addr,
flags,
restorer_addr,
mask,
context,
signal.to_info(),
)?
}
SigAction::Dfl => {
drop(sig_dispositions);
let sig_default_action = SigDefaultAction::from_signum(sig_num);
trace!("sig_default_action: {:?}", sig_default_action);
match sig_default_action {
SigDefaultAction::Core | SigDefaultAction::Term => {
warn!(
"{:?}: terminating on signal {}",
current.executable_path(),
sig_num.sig_name()
);
// We should exit current here, since we cannot restore a valid status from trap now.
do_exit_group(TermStatus::Killed(sig_num));
}
SigDefaultAction::Ign => {}
SigDefaultAction::Stop => {
let _ = current_thread.atomic_status().compare_exchange(
ThreadStatus::Running,
ThreadStatus::Stopped,
Ordering::AcqRel,
Ordering::Relaxed,
);
}
SigDefaultAction::Cont => {
let _ = current_thread.atomic_status().compare_exchange(
ThreadStatus::Stopped,
ThreadStatus::Running,
Ordering::AcqRel,
Ordering::Relaxed,
);
}
}
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn handle_user_signal(
posix_thread: &PosixThread,
sig_num: SigNum,
handler_addr: Vaddr,
flags: SigActionFlags,
restorer_addr: Vaddr,
mut mask: SigMask,
context: &mut UserContext,
sig_info: siginfo_t,
) -> Result<()> {
debug!("sig_num = {:?}, signame = {}", sig_num, sig_num.sig_name());
debug!("handler_addr = 0x{:x}", handler_addr);
debug!("flags = {:?}", flags);
debug!("restorer_addr = 0x{:x}", restorer_addr);
// FIXME: How to respect flags?
if flags.contains_unsupported_flag() {
warn!("Unsupported Signal flags: {:?}", flags);
}
if !flags.contains(SigActionFlags::SA_NODEFER) {
// add current signal to mask
mask += sig_num;
}
// block signals in sigmask when running signal handler
let old_mask = posix_thread.sig_mask().load(Ordering::Relaxed);
posix_thread
.sig_mask()
.store(old_mask + mask, Ordering::Relaxed);
// Set up signal stack.
let mut stack_pointer = if let Some(sp) = use_alternate_signal_stack(posix_thread) {
sp as u64
} else {
// just use user stack
context.stack_pointer() as u64
};
// To avoid corrupting signal stack, we minus 128 first.
stack_pointer -= 128;
let user_space = CurrentUserSpace::get();
// 1. write siginfo_t
stack_pointer -= mem::size_of::<siginfo_t>() as u64;
user_space.write_val(stack_pointer as _, &sig_info)?;
let siginfo_addr = stack_pointer;
// 2. write ucontext_t.
stack_pointer = alloc_aligned_in_user_stack(stack_pointer, mem::size_of::<ucontext_t>(), 16)?;
let mut ucontext = ucontext_t {
uc_sigmask: mask.into(),
..Default::default()
};
ucontext
.uc_mcontext
.inner
.gp_regs
.copy_from_raw(context.general_regs());
let mut sig_context = posix_thread.sig_context().lock();
if let Some(sig_context_addr) = *sig_context {
ucontext.uc_link = sig_context_addr;
} else {
ucontext.uc_link = 0;
}
// TODO: store fp regs in ucontext
user_space.write_val(stack_pointer as _, &ucontext)?;
let ucontext_addr = stack_pointer;
// Store the ucontext addr in sig context of current thread.
*sig_context = Some(ucontext_addr as Vaddr);
// 3. Set the address of the trampoline code.
if flags.contains(SigActionFlags::SA_RESTORER) {
// If contains SA_RESTORER flag, trampoline code is provided by libc in restorer_addr.
// We just store restorer_addr on user stack to allow user code just to trampoline code.
stack_pointer = write_u64_to_user_stack(stack_pointer, restorer_addr as u64)?;
trace!("After set restorer addr: user_rsp = 0x{:x}", stack_pointer);
} else {
// Otherwise we create a trampoline.
// FIXME: This may cause problems if we read old_context from rsp.
const TRAMPOLINE: &[u8] = &[
0xb8, 0x0f, 0x00, 0x00, 0x00, // mov eax, 15(syscall number of rt_sigreturn)
0x0f, 0x05, // syscall (call rt_sigreturn)
0x90, // nop (for alignment)
];
stack_pointer -= TRAMPOLINE.len() as u64;
let trampoline_rip = stack_pointer;
user_space.write_bytes(stack_pointer as Vaddr, &mut VmReader::from(TRAMPOLINE))?;
stack_pointer = write_u64_to_user_stack(stack_pointer, trampoline_rip)?;
}
// 4. Set correct register values
context.set_instruction_pointer(handler_addr as _);
context.set_stack_pointer(stack_pointer as usize);
// parameters of signal handler
if flags.contains(SigActionFlags::SA_SIGINFO) {
context.set_arguments(sig_num, siginfo_addr as usize, ucontext_addr as usize);
} else {
context.set_arguments(sig_num, 0, 0);
}
Ok(())
}
/// Use an alternate signal stack, which was installed by sigaltstack.
/// It the stack is already active, we just increase the handler counter and return None, since
/// the stack pointer can be read from context.
/// It the stack is not used by any handler, we will return the new sp in alternate signal stack.
fn use_alternate_signal_stack(posix_thread: &PosixThread) -> Option<usize> {
let mut sig_stack = posix_thread.sig_stack().lock();
let sig_stack = (*sig_stack).as_mut()?;
if sig_stack.is_disabled() {
return None;
}
if sig_stack.is_active() {
// The stack is already active, so we just use sp in context.
sig_stack.increase_handler_counter();
return None;
}
sig_stack.increase_handler_counter();
// Make sp align at 16. FIXME: is this required?
let stack_pointer = (sig_stack.base() + sig_stack.size()).align_down(16);
Some(stack_pointer)
}
fn write_u64_to_user_stack(rsp: u64, value: u64) -> Result<u64> {
let rsp = rsp - 8;
CurrentUserSpace::get().write_val(rsp as Vaddr, &value)?;
Ok(rsp)
}
/// alloc memory of size on user stack, the return address should respect the align argument.
fn alloc_aligned_in_user_stack(rsp: u64, size: usize, align: usize) -> Result<u64> {
if !align.is_power_of_two() {
return_errno_with_message!(Errno::EINVAL, "align must be power of two");
}
let start = (rsp - size as u64).align_down(align as u64);
Ok(start)
}

View File

@ -0,0 +1,297 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(unused_variables)]
use core::{
sync::atomic::{AtomicBool, Ordering},
time::Duration,
};
use ostd::sync::WaitQueue;
use super::{sig_mask::SigMask, SigEvents, SigEventsFilter};
use crate::{
events::Observer,
prelude::*,
process::posix_thread::{PosixThread, PosixThreadExt},
thread::Thread,
time::wait::WaitTimeout,
};
/// A `Pauser` allows pausing the execution of the current thread until certain conditions are reached.
///
/// Behind the scene, `Pauser` is implemented with [`Waiter`] and [`WaitQueue`].
/// But unlike its [`Waiter`] relatives, `Pauser` is aware of POSIX signals:
/// if a thread paused by a `Pauser` receives a signal, then the thread will resume its execution.
///
/// Another key difference is that `Pauser` combines the two roles of [`Waiter`] and [`WaitQueue`]
/// into one. Both putting the current thread to sleep and waking it up can be done through the
/// same `Pauser` object, using its `pause`- and `resume`-family methods.
///
/// [`Waiter`]: ostd::sync::Waiter
///
/// # Example
///
/// Here is how the current thread can be put to sleep with a `Pauser`.
///
/// ```no_run
/// let pauser = Pauser::new(SigMask::new_full());
/// // Pause the execution of the current thread until a user-given condition is met
/// // or the current thread is interrupted by a signal.
/// let res = pauser.pause_until(|| {
/// if cond() {
/// Some(())
/// } else {
/// None
/// }
/// });
/// match res {
/// Ok(_) => {
/// // The user-given condition is met...
/// }
/// Err(EINTR) => {
/// // A signal is received...
/// }
/// _ => unreachable!()
/// }
/// ```
///
/// Let's assume that another thread has access to the same object of `Arc<Pauser>`.
/// Then, this second thread can resume the execution of the first thread
/// even when `cond()` does not return `true`.
///
/// ```no_run
/// pauser.resume_all();
/// ```
pub struct Pauser {
wait_queue: WaitQueue,
sig_mask: SigMask,
}
impl Pauser {
/// Creates a new `Pauser`.
///
/// The `Pauser` can be interrupted by all signals
/// except that are blocked by current thread.
pub fn new() -> Arc<Self> {
Self::new_with_mask(SigMask::new_empty())
}
/// Creates a new `Pauser` with specified `sig_mask`.
///
/// The `Pauser` will ignore signals that are in `sig_mask`
/// or blocked by current thread.
pub fn new_with_mask(sig_mask: SigMask) -> Arc<Self> {
let wait_queue = WaitQueue::new();
Arc::new(Self {
wait_queue,
sig_mask,
})
}
/// Pauses the execution of current thread until the `cond` is met ( i.e., `cond()`
/// returns `Some(_)` ), or some signal is received by current thread or process.
///
/// # Errors
///
/// If some signal is received before `cond` is met, this method will returns `Err(EINTR)`.
pub fn pause_until<F, R>(self: &Arc<Self>, cond: F) -> Result<R>
where
F: FnMut() -> Option<R>,
{
self.do_pause(cond, None)
}
/// Pauses the execution of current thread until the `cond` is met ( i.e., `cond()` returns
/// `Some(_)` ), or some signal is received by current thread or process, or the given
/// `timeout` is expired.
///
/// # Errors
///
/// If `timeout` is expired before the `cond` is met or some signal is received,
/// it will returns [`ETIME`].
///
/// [`ETIME`]: crate::error::Errno::ETIME
pub fn pause_until_or_timeout<F, R>(self: &Arc<Self>, cond: F, timeout: &Duration) -> Result<R>
where
F: FnMut() -> Option<R>,
{
self.do_pause(cond, Some(timeout))
}
fn do_pause<F, R>(self: &Arc<Self>, mut cond: F, timeout: Option<&Duration>) -> Result<R>
where
F: FnMut() -> Option<R>,
{
let current_thread = Thread::current();
let sig_queue_waiter =
SigObserverRegistrar::new(current_thread.as_ref(), self.sig_mask, self.clone());
let cond = || {
if let Some(res) = cond() {
return Some(Ok(res));
}
if sig_queue_waiter.is_interrupted() {
return Some(Err(Error::with_message(
Errno::EINTR,
"the current thread is interrupted by a signal",
)));
}
None
};
if let Some(timeout) = timeout {
self.wait_queue
.wait_until_or_timeout(cond, timeout)
.ok_or_else(|| Error::with_message(Errno::ETIME, "the time limit is reached"))?
} else {
self.wait_queue.wait_until(cond)
}
}
/// Resumes all paused threads on this pauser.
pub fn resume_all(&self) {
self.wait_queue.wake_all();
}
/// Resumes one paused thread on this pauser.
pub fn resume_one(&self) {
self.wait_queue.wake_one();
}
}
enum SigObserverRegistrar<'a> {
// A POSIX thread may be interrupted by a signal if the signal is not masked.
PosixThread {
thread: &'a PosixThread,
old_mask: SigMask,
observer: Arc<SigQueueObserver>,
},
// A kernel thread ignores all signals. It is not necessary to wait for them.
KernelThread,
}
impl<'a> SigObserverRegistrar<'a> {
fn new(
current_thread: Option<&'a Arc<Thread>>,
sig_mask: SigMask,
pauser: Arc<Pauser>,
) -> Self {
let Some(thread) = current_thread.and_then(|thread| thread.as_posix_thread()) else {
return Self::KernelThread;
};
// Block `sig_mask`.
let (old_mask, filter) = {
let old_mask = thread.sig_mask().load(Ordering::Relaxed);
let new_mask = old_mask + sig_mask;
thread.sig_mask().store(new_mask, Ordering::Relaxed);
(old_mask, SigEventsFilter::new(new_mask))
};
// Register `SigQueueObserver`.
let observer = SigQueueObserver::new(pauser);
thread.register_sigqueue_observer(Arc::downgrade(&observer) as _, filter);
// Check pending signals after registering the observer to avoid race conditions.
if thread.has_pending() {
observer.set_interrupted();
}
Self::PosixThread {
thread,
old_mask,
observer,
}
}
fn is_interrupted(&self) -> bool {
match self {
Self::PosixThread { observer, .. } => observer.is_interrupted(),
Self::KernelThread => false,
}
}
}
impl<'a> Drop for SigObserverRegistrar<'a> {
fn drop(&mut self) {
let Self::PosixThread {
thread,
old_mask,
observer,
} = self
else {
return;
};
// Restore the state, assuming no one else can modify the current thread's signal mask
// during the pause.
thread.unregiser_sigqueue_observer(&(Arc::downgrade(observer) as _));
thread.sig_mask().store(*old_mask, Ordering::Relaxed);
}
}
struct SigQueueObserver {
is_interrupted: AtomicBool,
pauser: Arc<Pauser>,
}
impl SigQueueObserver {
fn new(pauser: Arc<Pauser>) -> Arc<Self> {
Arc::new(Self {
is_interrupted: AtomicBool::new(false),
pauser,
})
}
fn is_interrupted(&self) -> bool {
self.is_interrupted.load(Ordering::Acquire)
}
fn set_interrupted(&self) {
self.is_interrupted.store(true, Ordering::Release);
}
}
impl Observer<SigEvents> for SigQueueObserver {
fn on_events(&self, _: &SigEvents) {
self.set_interrupted();
self.pauser.wait_queue.wake_all();
}
}
#[cfg(ktest)]
mod test {
use ostd::prelude::*;
use super::*;
use crate::thread::{
kernel_thread::{KernelThreadExt, ThreadOptions},
Thread,
};
#[ktest]
fn test_pauser() {
let pauser = Pauser::new();
let pauser_cloned = pauser.clone();
let boolean = Arc::new(AtomicBool::new(false));
let boolean_cloned = boolean.clone();
let thread = Thread::spawn_kernel_thread(ThreadOptions::new(move || {
Thread::yield_now();
boolean_cloned.store(true, Ordering::Relaxed);
pauser_cloned.resume_all();
}));
pauser
.pause_until(|| boolean.load(Ordering::Relaxed).then_some(()))
.unwrap();
thread.join();
}
}

View File

@ -0,0 +1,282 @@
// SPDX-License-Identifier: MPL-2.0
use core::{
sync::atomic::{AtomicU32, AtomicUsize, Ordering},
time::Duration,
};
use crate::{
events::{IoEvents, Observer, Subject},
prelude::*,
process::signal::Pauser,
};
/// A pollee maintains a set of active events, which can be polled with
/// pollers or be monitored with observers.
#[derive(Clone)]
pub struct Pollee {
inner: Arc<PolleeInner>,
}
struct PolleeInner {
// A subject which is monitored with pollers.
subject: Subject<IoEvents, IoEvents>,
// For efficient manipulation, we use AtomicU32 instead of RwLock<IoEvents>.
events: AtomicU32,
}
impl Pollee {
/// Creates a new instance of pollee.
pub fn new(init_events: IoEvents) -> Self {
let inner = PolleeInner {
subject: Subject::new(),
events: AtomicU32::new(init_events.bits()),
};
Self {
inner: Arc::new(inner),
}
}
/// Returns the current events of the pollee given an event mask.
///
/// If no interesting events are polled and a poller is provided, then
/// the poller will start monitoring the pollee and receive event
/// notification once the pollee gets any interesting events.
///
/// This operation is _atomic_ in the sense that either some interesting
/// events are returned or the poller is registered (if a poller is provided).
pub fn poll(&self, mask: IoEvents, poller: Option<&mut Poller>) -> IoEvents {
let mask = mask | IoEvents::ALWAYS_POLL;
// Fast path: return events immediately
let revents = self.events() & mask;
if !revents.is_empty() || poller.is_none() {
return revents;
}
// Register the provided poller.
self.register_poller(poller.unwrap(), mask);
// It is important to check events again to handle race conditions
self.events() & mask
}
fn register_poller(&self, poller: &mut Poller, mask: IoEvents) {
self.inner
.subject
.register_observer(poller.observer(), mask);
poller.pollees.push(Arc::downgrade(&self.inner));
}
/// Register an IoEvents observer.
///
/// A registered observer will get notified (through its `on_events` method)
/// every time new events specified by the `mask` argument happen on the
/// pollee (through the `add_events` method).
///
/// If the given observer has already been registered, then its registered
/// event mask will be updated.
///
/// Note that the observer will always get notified of the events in
/// `IoEvents::ALWAYS_POLL` regardless of the value of `mask`.
pub fn register_observer(&self, observer: Weak<dyn Observer<IoEvents>>, mask: IoEvents) {
let mask = mask | IoEvents::ALWAYS_POLL;
self.inner.subject.register_observer(observer, mask);
}
/// Unregister an IoEvents observer.
///
/// If such an observer is found, then the registered observer will be
/// removed from the pollee and returned as the return value. Otherwise,
/// a `None` will be returned.
pub fn unregister_observer(
&self,
observer: &Weak<dyn Observer<IoEvents>>,
) -> Option<Weak<dyn Observer<IoEvents>>> {
self.inner.subject.unregister_observer(observer)
}
/// Add some events to the pollee's state.
///
/// This method wakes up all registered pollers that are interested in
/// the added events.
pub fn add_events(&self, events: IoEvents) {
self.inner.events.fetch_or(events.bits(), Ordering::Release);
self.inner.subject.notify_observers(&events);
}
/// Remove some events from the pollee's state.
///
/// This method will not wake up registered pollers even when
/// the pollee still has some interesting events to the pollers.
pub fn del_events(&self, events: IoEvents) {
self.inner
.events
.fetch_and(!events.bits(), Ordering::Release);
}
/// Reset the pollee's state.
///
/// Reset means removing all events on the pollee.
pub fn reset_events(&self) {
self.inner
.events
.fetch_and(!IoEvents::all().bits(), Ordering::Release);
}
fn events(&self) -> IoEvents {
let event_bits = self.inner.events.load(Ordering::Acquire);
IoEvents::from_bits(event_bits).unwrap()
}
}
/// A poller gets notified when its associated pollees have interesting events.
pub struct Poller {
// Use event counter to wait or wake up a poller
event_counter: Arc<EventCounter>,
// All pollees that are interesting to this poller
pollees: Vec<Weak<PolleeInner>>,
}
impl Default for Poller {
fn default() -> Self {
Self::new()
}
}
impl Poller {
/// Constructs a new `Poller`.
pub fn new() -> Self {
Self {
event_counter: Arc::new(EventCounter::new()),
pollees: Vec::new(),
}
}
/// Wait until there are any interesting events happen since last `wait`. The `wait`
/// can be interrupted by signal.
pub fn wait(&self) -> Result<()> {
self.event_counter.read(None)?;
Ok(())
}
/// Wait until there are any interesting events happen since last `wait` or a given timeout
/// is expired. This method can be interrupted by signal.
pub fn wait_timeout(&self, timeout: &Duration) -> Result<()> {
self.event_counter.read(Some(timeout))?;
Ok(())
}
fn observer(&self) -> Weak<dyn Observer<IoEvents>> {
Arc::downgrade(&self.event_counter) as _
}
}
impl Drop for Poller {
fn drop(&mut self) {
let observer = self.observer();
self.pollees
.iter()
.filter_map(Weak::upgrade)
.for_each(|pollee| {
pollee.subject.unregister_observer(&observer);
});
}
}
/// A counter for wait and wakeup.
struct EventCounter {
counter: AtomicUsize,
pauser: Arc<Pauser>,
}
impl EventCounter {
pub fn new() -> Self {
let pauser = Pauser::new();
Self {
counter: AtomicUsize::new(0),
pauser,
}
}
pub fn read(&self, timeout: Option<&Duration>) -> Result<usize> {
let cond = || {
let val = self.counter.swap(0, Ordering::Relaxed);
if val > 0 {
Some(val)
} else {
None
}
};
if let Some(timeout) = timeout {
self.pauser.pause_until_or_timeout(cond, timeout)
} else {
self.pauser.pause_until(cond)
}
}
pub fn write(&self) {
self.counter.fetch_add(1, Ordering::Relaxed);
self.pauser.resume_one();
}
}
impl Observer<IoEvents> for EventCounter {
fn on_events(&self, _events: &IoEvents) {
self.write();
}
}
/// The `Pollable` trait allows for waiting for events and performing event-based operations.
///
/// Implementors are required to provide a method, [`Pollable::poll`], which is usually implemented
/// by simply calling [`Pollee::poll`] on the internal [`Pollee`]. This trait provides another
/// method, [`Pollable::wait_events`], to allow waiting for events and performing operations
/// according to the events.
///
/// This trait is added instead of creating a new method in [`Pollee`] because sometimes we do not
/// have access to the internal [`Pollee`], but there is a method that provides the same sematics
/// as [`Pollee::poll`] and we need to perform event-based operations using that method.
pub trait Pollable {
/// Returns the interesting events if there are any, or waits for them to happen if there are
/// none.
///
/// This method has the same semantics as [`Pollee::poll`].
fn poll(&self, mask: IoEvents, poller: Option<&mut Poller>) -> IoEvents;
/// Waits for events and performs event-based operations.
///
/// If a call to `cond()` succeeds or fails with an error code other than `EAGAIN`, the method
/// will return whatever the call to `cond()` returns. Otherwise, the method will wait for some
/// interesting events specified in `mask` to happen and try again.
///
/// The user must ensure that a call to `cond()` does not fail with `EAGAIN` when the
/// interesting events occur. However, it is allowed to have spurious `EAGAIN` failures due to
/// race conditions where the events are consumed by another thread.
fn wait_events<F, R>(&self, mask: IoEvents, mut cond: F) -> Result<R>
where
Self: Sized,
F: FnMut() -> Result<R>,
{
let mut poller = Poller::new();
loop {
match cond() {
Err(err) if err.error() == Errno::EAGAIN => (),
result => return result,
};
let events = self.poll(mask, Some(&mut poller));
if !events.is_empty() {
continue;
}
// TODO: Support timeout
poller.wait()?;
}
}
}

View File

@ -0,0 +1,149 @@
// SPDX-License-Identifier: MPL-2.0
use bitflags::bitflags;
use super::{c_types::sigaction_t, constants::*, sig_mask::SigMask, sig_num::SigNum};
use crate::prelude::*;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
pub enum SigAction {
#[default]
Dfl, // Default action
Ign, // Ignore this signal
User {
// User-given handler
handler_addr: usize,
flags: SigActionFlags,
restorer_addr: usize,
mask: SigMask,
},
}
impl TryFrom<sigaction_t> for SigAction {
type Error = Error;
fn try_from(input: sigaction_t) -> Result<Self> {
let action = match input.handler_ptr {
SIG_DFL => SigAction::Dfl,
SIG_IGN => SigAction::Ign,
_ => {
let flags = SigActionFlags::from_bits_truncate(input.flags);
let mask = input.mask.into();
SigAction::User {
handler_addr: input.handler_ptr,
flags,
restorer_addr: input.restorer_ptr,
mask,
}
}
};
Ok(action)
}
}
impl SigAction {
pub fn as_c_type(&self) -> sigaction_t {
match self {
SigAction::Dfl => sigaction_t {
handler_ptr: SIG_DFL,
flags: 0,
restorer_ptr: 0,
mask: 0,
},
SigAction::Ign => sigaction_t {
handler_ptr: SIG_IGN,
flags: 0,
restorer_ptr: 0,
mask: 0,
},
SigAction::User {
handler_addr,
flags,
restorer_addr,
mask,
} => sigaction_t {
handler_ptr: *handler_addr,
flags: flags.as_u32(),
restorer_ptr: *restorer_addr,
mask: (*mask).into(),
},
}
}
}
bitflags! {
pub struct SigActionFlags: u32 {
const SA_NOCLDSTOP = 1;
const SA_NOCLDWAIT = 2;
const SA_SIGINFO = 4;
const SA_ONSTACK = 0x08000000;
const SA_RESTART = 0x10000000;
const SA_NODEFER = 0x40000000;
const SA_RESETHAND = 0x80000000;
const SA_RESTORER = 0x04000000;
}
}
impl TryFrom<u32> for SigActionFlags {
type Error = Error;
fn try_from(bits: u32) -> Result<Self> {
let flags = SigActionFlags::from_bits(bits)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid sig action flag"))?;
if flags.contains(SigActionFlags::SA_RESTART) {
warn!("SA_RESTART is not supported");
}
Ok(flags)
}
}
impl SigActionFlags {
pub fn as_u32(&self) -> u32 {
self.bits()
}
pub fn contains_unsupported_flag(&self) -> bool {
self.intersects(SigActionFlags::SA_NOCLDSTOP | SigActionFlags::SA_NOCLDWAIT)
}
}
/// The default action to signals
#[derive(Debug, Copy, Clone)]
pub enum SigDefaultAction {
Term, // Default action is to terminate the process.
Ign, // Default action is to ignore the signal.
Core, // Default action is to terminate the process and dump core (see core(5)).
Stop, // Default action is to stop the process.
Cont, // Default action is to continue the process if it is currently stopped.
}
impl SigDefaultAction {
pub fn from_signum(num: SigNum) -> SigDefaultAction {
match num {
SIGABRT | // = SIGIOT
SIGBUS |
SIGFPE |
SIGILL |
SIGQUIT |
SIGSEGV |
SIGSYS | // = SIGUNUSED
SIGTRAP |
SIGXCPU |
SIGXFSZ
=> SigDefaultAction::Core,
SIGCHLD |
SIGURG |
SIGWINCH
=> SigDefaultAction::Ign,
SIGCONT
=> SigDefaultAction::Cont,
SIGSTOP |
SIGTSTP |
SIGTTIN |
SIGTTOU
=> SigDefaultAction::Stop,
_
=> SigDefaultAction::Term,
}
}
}

View File

@ -0,0 +1,54 @@
// SPDX-License-Identifier: MPL-2.0
use super::{constants::*, sig_action::SigAction, sig_num::SigNum};
#[derive(Copy, Clone)]
pub struct SigDispositions {
// SigNum -> SigAction
map: [SigAction; COUNT_ALL_SIGS],
}
impl Default for SigDispositions {
fn default() -> Self {
Self::new()
}
}
impl SigDispositions {
pub fn new() -> Self {
Self {
map: [SigAction::default(); COUNT_ALL_SIGS],
}
}
pub fn get(&self, num: SigNum) -> SigAction {
let idx = Self::num_to_idx(num);
self.map[idx]
}
pub fn set(&mut self, num: SigNum, sa: SigAction) {
let idx = Self::num_to_idx(num);
self.map[idx] = sa;
}
pub fn set_default(&mut self, num: SigNum) {
let idx = Self::num_to_idx(num);
self.map[idx] = SigAction::Dfl;
}
/// man 7 signal:
/// When execve, the handled signals are reset to the default; the dispositions of
/// ignored signals are left unchanged.
/// This function should be used when execve.
pub fn inherit(&mut self) {
for sigaction in &mut self.map {
if let SigAction::User { .. } = sigaction {
*sigaction = SigAction::Dfl;
}
}
}
fn num_to_idx(num: SigNum) -> usize {
(num.as_u8() - MIN_STD_SIG_NUM) as usize
}
}

View File

@ -0,0 +1,225 @@
// SPDX-License-Identifier: MPL-2.0
//! Signal sets and atomic masks.
//!
//! A signal set is a bit-set of signals. A signal mask is a set of signals
//! that are blocked from delivery to a thread. An atomic signal mask
//! implementation is provided for shared access to signal masks.
use core::{
fmt::LowerHex,
ops,
sync::atomic::{AtomicU64, Ordering},
};
use super::{constants::MIN_STD_SIG_NUM, sig_num::SigNum};
use crate::prelude::*;
/// A signal mask.
///
/// This is an alias to the [`SigSet`]. All the signal in the set are blocked
/// from the delivery to a thread.
pub type SigMask = SigSet;
/// A bit-set of signals.
///
/// Because that all the signal numbers are in the range of 1 to 64, casting
/// a signal set from `u64` to `SigSet` will always succeed.
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Pod)]
#[repr(C)]
pub struct SigSet {
bits: u64,
}
impl From<SigNum> for SigSet {
fn from(signum: SigNum) -> Self {
let idx = signum.as_u8() - MIN_STD_SIG_NUM;
Self { bits: 1_u64 << idx }
}
}
impl From<u64> for SigSet {
fn from(bits: u64) -> Self {
SigSet { bits }
}
}
impl From<SigSet> for u64 {
fn from(set: SigSet) -> u64 {
set.bits
}
}
impl<T: Into<SigSet>> ops::BitAnd<T> for SigSet {
type Output = Self;
fn bitand(self, rhs: T) -> Self {
SigSet {
bits: self.bits & rhs.into().bits,
}
}
}
impl<T: Into<SigSet>> ops::BitAndAssign<T> for SigSet {
fn bitand_assign(&mut self, rhs: T) {
self.bits &= rhs.into().bits;
}
}
impl<T: Into<SigSet>> ops::BitOr<T> for SigSet {
type Output = Self;
fn bitor(self, rhs: T) -> Self {
SigSet {
bits: self.bits | rhs.into().bits,
}
}
}
impl<T: Into<SigSet>> ops::BitOrAssign<T> for SigSet {
fn bitor_assign(&mut self, rhs: T) {
self.bits |= rhs.into().bits;
}
}
#[allow(clippy::suspicious_arithmetic_impl)]
impl<T: Into<SigSet>> ops::Add<T> for SigSet {
type Output = Self;
fn add(self, rhs: T) -> Self {
SigSet {
bits: self.bits | rhs.into().bits,
}
}
}
#[allow(clippy::suspicious_op_assign_impl)]
impl<T: Into<SigSet>> ops::AddAssign<T> for SigSet {
fn add_assign(&mut self, rhs: T) {
self.bits |= rhs.into().bits;
}
}
impl<T: Into<SigSet>> ops::Sub<T> for SigSet {
type Output = Self;
fn sub(self, rhs: T) -> Self {
SigSet {
bits: self.bits & !rhs.into().bits,
}
}
}
impl<T: Into<SigSet>> ops::SubAssign<T> for SigSet {
fn sub_assign(&mut self, rhs: T) {
self.bits &= !rhs.into().bits;
}
}
impl SigSet {
pub fn new_empty() -> Self {
SigSet { bits: 0 }
}
pub fn new_full() -> Self {
SigSet { bits: !0 }
}
pub const fn is_empty(&self) -> bool {
self.bits == 0
}
pub const fn is_full(&self) -> bool {
self.bits == !0
}
pub fn count(&self) -> usize {
self.bits.count_ones() as usize
}
pub fn contains(&self, set: impl Into<Self>) -> bool {
let set = set.into();
self.bits & set.bits == set.bits
}
}
// This is to allow hexadecimally formatting a `SigSet` when debug printing it.
impl LowerHex for SigSet {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
LowerHex::fmt(&self.bits, f) // delegate to u64's implementation
}
}
/// An atomic signal mask.
///
/// This is an alias to the [`AtomicSigSet`]. All the signal in the set are
/// blocked from the delivery to a thread.
///
/// [`Relaxed`]: core::sync::atomic::Ordering::Relaxed
pub type AtomicSigMask = AtomicSigSet;
/// An atomic signal set.
pub struct AtomicSigSet(AtomicU64);
impl From<SigSet> for AtomicSigSet {
fn from(set: SigSet) -> Self {
AtomicSigSet(AtomicU64::new(set.bits))
}
}
impl AtomicSigSet {
pub fn new_empty() -> Self {
AtomicSigSet(AtomicU64::new(0))
}
pub fn new_full() -> Self {
AtomicSigSet(AtomicU64::new(!0))
}
pub fn load(&self, ordering: Ordering) -> SigSet {
SigSet {
bits: self.0.load(ordering),
}
}
pub fn store(&self, new_mask: impl Into<SigMask>, ordering: Ordering) {
self.0.store(new_mask.into().bits, ordering);
}
pub fn swap(&self, new_mask: impl Into<SigMask>, ordering: Ordering) -> SigSet {
let bits = self.0.swap(new_mask.into().bits, ordering);
SigSet { bits }
}
pub fn contains(&self, signals: impl Into<SigSet>, ordering: Ordering) -> bool {
SigSet {
bits: self.0.load(ordering),
}
.contains(signals.into())
}
/// Applies an update to the signal set.
///
/// This is the same as [`AtomicU64::fetch_update`], but the closure `f`
/// operates on a [`SigMask`] instead of a `u64`.
///
/// It would be a bit slow since it would check if the value is written by
/// another thread while evaluating the closure `f`. If you are confident
/// that there's no such race, don't use this method.
pub fn fetch_update<F>(
&self,
set_order: Ordering,
fetch_order: Ordering,
mut f: F,
) -> core::result::Result<SigMask, SigMask>
where
F: FnMut(SigMask) -> Option<SigMask>,
{
self.0
.fetch_update(set_order, fetch_order, |bits| {
f(SigMask { bits }).map(|set| set.bits)
})
.map(SigMask::from)
.map_err(SigMask::from)
}
}

View File

@ -0,0 +1,128 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use core::sync::atomic::AtomicU8;
use atomic::Ordering;
use super::constants::*;
use crate::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SigNum {
sig_num: u8,
}
impl TryFrom<u8> for SigNum {
type Error = Error;
fn try_from(sig_num: u8) -> Result<Self> {
if !(MIN_STD_SIG_NUM..=MAX_RT_SIG_NUM).contains(&sig_num) {
return_errno_with_message!(Errno::EINVAL, "invalid signal number");
}
Ok(SigNum { sig_num })
}
}
impl SigNum {
/// Caller must ensure the sig_num is valid. otherweise, use try_from will check sig_num and does not panic.
pub const fn from_u8(sig_num: u8) -> Self {
if sig_num > MAX_RT_SIG_NUM || sig_num < MIN_STD_SIG_NUM {
panic!("invalid signal number")
}
SigNum { sig_num }
}
pub const fn as_u8(&self) -> u8 {
self.sig_num
}
pub fn is_std(&self) -> bool {
self.sig_num <= MAX_STD_SIG_NUM
}
pub fn is_real_time(&self) -> bool {
self.sig_num >= MIN_RT_SIG_NUM
}
pub const fn sig_name(&self) -> &'static str {
match *self {
SIGHUP => "SIGHUP",
SIGINT => "SIGINT",
SIGQUIT => "SIGQUIT",
SIGILL => "SIGILL",
SIGTRAP => "SIGTRAP",
SIGABRT => "SIGABRT",
SIGBUS => "SIGBUS",
SIGFPE => "SIGFPE",
SIGKILL => "SIGKILL",
SIGUSR1 => "SIGUSR1",
SIGSEGV => "SIGSEGV",
SIGUSR2 => "SIGUSR2",
SIGPIPE => "SIGPIPE",
SIGALRM => "SIGALRM",
SIGTERM => "SIGTERM",
SIGSTKFLT => "SIGSTKFLT",
SIGCHLD => "SIGCHLD",
SIGCONT => "SIGCONT",
SIGSTOP => "SIGSTOP",
SIGTSTP => "SIGTSTP",
SIGTTIN => "SIGTTIN",
SIGTTOU => "SIGTTOU",
SIGURG => "SIGURG",
SIGXCPU => "SIGXCPU",
SIGXFSZ => "SIGXFSZ",
SIGVTALRM => "SIGVTALRM",
SIGPROF => "SIGPROF",
SIGWINCH => "SIGWINCH",
SIGIO => "SIGIO",
SIGPWR => "SIGPWR",
SIGSYS => "SIGSYS",
_ => "Realtime Signal",
}
}
}
/// Atomic signal number.
///
/// This struct represents a signal number and is different from [SigNum]
/// in that it allows for an empty signal number.
pub struct AtomicSigNum(AtomicU8);
impl AtomicSigNum {
/// Creates a new empty atomic signal number
pub const fn new_empty() -> Self {
Self(AtomicU8::new(0))
}
/// Creates a new signal number with the specified value
pub const fn new(sig_num: SigNum) -> Self {
Self(AtomicU8::new(sig_num.as_u8()))
}
/// Determines whether the signal number is empty
pub fn is_empty(&self) -> bool {
self.0.load(Ordering::Relaxed) == 0
}
/// Returns the corresponding [`SigNum`]
pub fn as_sig_num(&self) -> Option<SigNum> {
let sig_num = self.0.load(Ordering::Relaxed);
if sig_num == 0 {
return None;
}
Some(SigNum::from_u8(sig_num))
}
/// Sets the new `sig_num`
pub fn set(&self, sig_num: SigNum) {
self.0.store(sig_num.as_u8(), Ordering::Relaxed)
}
/// Clears the signal number
pub fn clear(&self) {
self.0.store(0, Ordering::Relaxed)
}
}

View File

@ -0,0 +1,238 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::{AtomicUsize, Ordering};
use super::{
constants::*,
sig_mask::{SigMask, SigSet},
sig_num::SigNum,
signals::Signal,
SigEvents, SigEventsFilter,
};
use crate::{
events::{Observer, Subject},
prelude::*,
};
pub struct SigQueues {
// The number of pending signals.
// Useful for quickly determining if any signals are pending without locking `queues`.
count: AtomicUsize,
queues: Mutex<Queues>,
subject: Subject<SigEvents, SigEventsFilter>,
}
impl SigQueues {
pub fn new() -> Self {
Self {
count: AtomicUsize::new(0),
queues: Mutex::new(Queues::new()),
subject: Subject::new(),
}
}
pub fn is_empty(&self) -> bool {
self.count.load(Ordering::Relaxed) == 0
}
pub fn enqueue(&self, signal: Box<dyn Signal>) {
let signum = signal.num();
let mut queues = self.queues.lock();
if queues.enqueue(signal) {
self.count.fetch_add(1, Ordering::Relaxed);
// Avoid holding lock when notifying observers
drop(queues);
self.subject.notify_observers(&SigEvents::new(signum));
}
}
pub fn dequeue(&self, blocked: &SigMask) -> Option<Box<dyn Signal>> {
// Fast path for the common case of no pending signals
if self.is_empty() {
return None;
}
let mut queues = self.queues.lock();
let signal = queues.dequeue(blocked);
if signal.is_some() {
self.count.fetch_sub(1, Ordering::Relaxed);
}
signal
}
/// Returns the pending signals
pub fn sig_pending(&self) -> SigSet {
let queues = self.queues.lock();
queues.sig_pending()
}
/// Returns whether there's some pending signals that are not blocked
pub fn has_pending(&self, blocked: SigMask) -> bool {
self.queues.lock().has_pending(blocked)
}
pub fn register_observer(
&self,
observer: Weak<dyn Observer<SigEvents>>,
filter: SigEventsFilter,
) {
self.subject.register_observer(observer, filter);
}
pub fn unregister_observer(&self, observer: &Weak<dyn Observer<SigEvents>>) {
self.subject.unregister_observer(observer);
}
}
impl Default for SigQueues {
fn default() -> Self {
Self::new()
}
}
struct Queues {
std_queues: Vec<Option<Box<dyn Signal>>>,
rt_queues: Vec<VecDeque<Box<dyn Signal>>>,
}
impl Queues {
fn new() -> Self {
let std_queues = (0..COUNT_STD_SIGS).map(|_| None).collect();
let rt_queues = (0..COUNT_RT_SIGS).map(|_| Default::default()).collect();
Self {
std_queues,
rt_queues,
}
}
fn enqueue(&mut self, signal: Box<dyn Signal>) -> bool {
let signum = signal.num();
if signum.is_std() {
// Standard signals
//
// From signal(7):
//
// Standard signals do not queue. If multiple instances of a standard
// signal are generated while that signal is blocked, then only one
// instance of the signal is marked as pending (and the signal will be
// delivered just once when it is unblocked). In the case where a
// standard signal is already pending, the siginfo_t structure (see
// sigaction(2)) associated with that signal is not overwritten on
// arrival of subsequent instances of the same signal. Thus, the
// process will receive the information associated with the first
// instance of the signal.
let queue = self.get_std_queue_mut(signum);
if queue.is_some() {
// If there is already a signal pending, just ignore all subsequent signals
return false;
}
*queue = Some(signal);
} else {
// Real-time signals
let queue = self.get_rt_queue_mut(signum);
queue.push_back(signal);
}
true
}
fn dequeue(&mut self, blocked: &SigMask) -> Option<Box<dyn Signal>> {
// Deliver standard signals.
//
// According to signal(7):
// If both standard and real-time signals are pending for a process,
// POSIX leaves it unspecified which is delivered first. Linux, like
// many other implementations, gives priority to standard signals in
// this case.
// POSIX leaves unspecified which to deliver first if there are multiple
// pending standard signals. So we are free to define our own. The
// principle is to give more urgent signals higher priority (like SIGKILL).
// FIXME: the gvisor pty_test JobControlTest::ReleaseTTY requires that
// the SIGHUP signal should be handled before SIGCONT.
const ORDERED_STD_SIGS: [SigNum; COUNT_STD_SIGS] = [
SIGKILL, SIGTERM, SIGSTOP, SIGSEGV, SIGILL, SIGHUP, SIGCONT, SIGINT, SIGQUIT, SIGTRAP,
SIGABRT, SIGBUS, SIGFPE, SIGUSR1, SIGUSR2, SIGPIPE, SIGALRM, SIGSTKFLT, SIGCHLD,
SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
SIGIO, SIGPWR, SIGSYS,
];
for &signum in &ORDERED_STD_SIGS {
if blocked.contains(signum) {
continue;
}
let queue = self.get_std_queue_mut(signum);
let signal = queue.take();
if signal.is_some() {
return signal;
}
}
// If no standard signals, then deliver real-time signals.
//
// According to signal (7):
// Real-time signals are delivered in a guaranteed order. Multiple
// real-time signals of the same type are delivered in the order
// they were sent. If different real-time signals are sent to a
// process, they are delivered starting with the lowest-numbered
// signal. (I.e., low-numbered signals have highest priority.)
for signum in MIN_RT_SIG_NUM..=MAX_RT_SIG_NUM {
let signum = SigNum::try_from(signum).unwrap();
if blocked.contains(signum) {
continue;
}
let queue = self.get_rt_queue_mut(signum);
let signal = queue.pop_front();
if signal.is_some() {
return signal;
}
}
// There must be pending but blocked signals
None
}
/// Returns whether the `SigQueues` has some pending signals which are not blocked
fn has_pending(&self, blocked: SigMask) -> bool {
self.std_queues.iter().any(|signal| {
signal
.as_ref()
.is_some_and(|signal| !blocked.contains(signal.num()))
}) || self.rt_queues.iter().any(|rt_queue| !rt_queue.is_empty())
}
fn get_std_queue_mut(&mut self, signum: SigNum) -> &mut Option<Box<dyn Signal>> {
debug_assert!(signum.is_std());
let idx = (signum.as_u8() - MIN_STD_SIG_NUM) as usize;
&mut self.std_queues[idx]
}
fn get_rt_queue_mut(&mut self, signum: SigNum) -> &mut VecDeque<Box<dyn Signal>> {
debug_assert!(signum.is_real_time());
let idx = (signum.as_u8() - MIN_RT_SIG_NUM) as usize;
&mut self.rt_queues[idx]
}
fn sig_pending(&self) -> SigSet {
let mut pending = SigSet::new_empty();
// Process standard signal queues
for (idx, signal) in self.std_queues.iter().enumerate() {
if signal.is_some() {
pending += SigNum::from_u8(idx as u8 + MIN_STD_SIG_NUM);
}
}
// Process real-time signal queues
for (idx, signals) in self.rt_queues.iter().enumerate() {
if !signals.is_empty() {
pending += SigNum::from_u8(idx as u8 + MIN_RT_SIG_NUM);
}
}
pending
}
}

View File

@ -0,0 +1,95 @@
// SPDX-License-Identifier: MPL-2.0
use crate::prelude::*;
/// User-provided signal stack. `SigStack` is per-thread, and each thread can have
/// at most one `SigStack`. If one signal handler specifying the `SA_ONSTACK` flag,
/// the handler should be executed on the `SigStack`, instead of on the default stack.
///
/// SigStack can be registered and unregistered by syscall `sigaltstack`.
#[derive(Debug, Clone)]
pub struct SigStack {
base: Vaddr,
flags: SigStackFlags,
size: usize,
/// The number of handlers that are currently using the stack
handler_counter: usize,
}
bitflags! {
pub struct SigStackFlags: u32 {
const SS_AUTODISARM = 1 << 31;
}
}
#[repr(u8)]
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum SigStackStatus {
#[default]
SS_INACTIVE = 0,
// The thread is currently executing on the alternate signal stack
SS_ONSTACK = 1,
// The stack is currently disabled.
SS_DISABLE = 2,
}
impl SigStack {
pub fn new(base: Vaddr, flags: SigStackFlags, size: usize) -> Self {
Self {
base,
flags,
size,
handler_counter: 0,
}
}
pub fn base(&self) -> Vaddr {
self.base
}
pub fn flags(&self) -> SigStackFlags {
self.flags
}
pub fn size(&self) -> usize {
self.size
}
pub fn status(&self) -> SigStackStatus {
if self.handler_counter == 0 {
return SigStackStatus::SS_INACTIVE;
}
// Learning From [sigaltstack doc](https://man7.org/linux/man-pages/man2/sigaltstack.2.html):
// If the stack is currently executed on,
// 1. If the stack was established with flag SS_AUTODISARM, the stack status is DISABLE,
// 2. otherwise, the stack status is ONSTACK
if self.flags.contains(SigStackFlags::SS_AUTODISARM) {
SigStackStatus::SS_DISABLE
} else {
SigStackStatus::SS_ONSTACK
}
}
/// Mark the stack is currently used by a signal handler.
pub fn increase_handler_counter(&mut self) {
self.handler_counter += 1;
}
// Mark the stack is freed by current handler.
pub fn decrease_handler_counter(&mut self) {
// FIXME: deal with SS_AUTODISARM flag
self.handler_counter -= 1
}
/// Determins whether the stack is executed on by any signal handler
pub fn is_active(&self) -> bool {
// FIXME: can DISABLE stack be used?
self.handler_counter != 0 && !self.flags.contains(SigStackFlags::SS_AUTODISARM)
}
pub fn is_disabled(&self) -> bool {
self.handler_counter != 0 && self.flags.contains(SigStackFlags::SS_AUTODISARM)
}
}

View File

@ -0,0 +1,60 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::cpu::{
CpuException, CpuExceptionInfo, ALIGNMENT_CHECK, BOUND_RANGE_EXCEEDED, DIVIDE_BY_ZERO,
GENERAL_PROTECTION_FAULT, INVALID_OPCODE, PAGE_FAULT, SIMD_FLOATING_POINT_EXCEPTION,
X87_FLOATING_POINT_EXCEPTION,
};
use super::Signal;
use crate::{
prelude::*,
process::signal::{c_types::siginfo_t, constants::*, sig_num::SigNum},
};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct FaultSignal {
num: SigNum,
code: i32,
addr: Option<u64>,
}
impl FaultSignal {
pub fn new(trap_info: &CpuExceptionInfo) -> FaultSignal {
debug!("Trap id: {}", trap_info.id);
let exception = CpuException::to_cpu_exception(trap_info.id as u16).unwrap();
let (num, code, addr) = match *exception {
DIVIDE_BY_ZERO => (SIGFPE, FPE_INTDIV, None),
X87_FLOATING_POINT_EXCEPTION | SIMD_FLOATING_POINT_EXCEPTION => {
(SIGFPE, FPE_FLTDIV, None)
}
BOUND_RANGE_EXCEEDED => (SIGSEGV, SEGV_BNDERR, None),
ALIGNMENT_CHECK => (SIGBUS, BUS_ADRALN, None),
INVALID_OPCODE => (SIGILL, ILL_ILLOPC, None),
GENERAL_PROTECTION_FAULT => (SIGBUS, BUS_ADRERR, None),
PAGE_FAULT => {
const PF_ERR_FLAG_PRESENT: usize = 1usize << 0;
let code = if trap_info.error_code & PF_ERR_FLAG_PRESENT != 0 {
SEGV_ACCERR
} else {
SEGV_MAPERR
};
let addr = Some(trap_info.page_fault_addr as u64);
(SIGSEGV, code, addr)
}
_ => panic!("Exception cannnot be a signal"),
};
FaultSignal { num, code, addr }
}
}
impl Signal for FaultSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
siginfo_t::new(self.num, self.code)
// info.set_si_addr(self.addr.unwrap_or_default() as *const c_void);
// info
}
}

View File

@ -0,0 +1,25 @@
// SPDX-License-Identifier: MPL-2.0
use super::Signal;
use crate::process::signal::{c_types::siginfo_t, constants::SI_KERNEL, sig_num::SigNum};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct KernelSignal {
num: SigNum,
}
impl KernelSignal {
pub const fn new(num: SigNum) -> Self {
Self { num }
}
}
impl Signal for KernelSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
siginfo_t::new(self.num, SI_KERNEL)
}
}

View File

@ -0,0 +1,16 @@
// SPDX-License-Identifier: MPL-2.0
pub mod fault;
pub mod kernel;
pub mod user;
use core::{any::Any, fmt::Debug};
use super::{c_types::siginfo_t, sig_num::SigNum};
pub trait Signal: Send + Sync + Debug + Any {
/// Returns the number of the signal.
fn num(&self) -> SigNum;
/// Returns the siginfo_t that gives more details about a signal.
fn to_info(&self) -> siginfo_t;
}

View File

@ -0,0 +1,68 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use super::Signal;
use crate::process::{
signal::{
c_types::siginfo_t,
constants::{SI_QUEUE, SI_TKILL, SI_USER},
sig_num::SigNum,
},
Pid, Uid,
};
#[derive(Debug, Clone, Copy)]
pub struct UserSignal {
num: SigNum,
pid: Pid,
uid: Uid,
kind: UserSignalKind,
}
#[derive(Debug, Copy, Clone)]
pub enum UserSignalKind {
Kill,
Tkill,
Sigqueue,
}
impl UserSignal {
pub fn new(num: SigNum, kind: UserSignalKind, pid: Pid, uid: Uid) -> Self {
Self {
num,
kind,
pid,
uid,
}
}
pub fn pid(&self) -> Pid {
self.pid
}
pub fn kind(&self) -> UserSignalKind {
self.kind
}
}
impl Signal for UserSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
let code = match self.kind {
UserSignalKind::Kill => SI_USER,
UserSignalKind::Tkill => SI_TKILL,
UserSignalKind::Sigqueue => SI_QUEUE,
};
siginfo_t::new(self.num, code)
// info.set_si_pid(self.pid);
// info.set_si_uid(self.uid);
// if let UserSignalKind::Sigqueue(val) = self.kind {
// info.set_si_value(val);
// }
}
}

View File

@ -0,0 +1,35 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
//! The process status
use super::TermStatus;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessStatus {
// Not ready to run
Uninit,
/// Can be scheduled to run
Runnable,
/// Exit while not reaped by parent
Zombie(TermStatus),
}
impl ProcessStatus {
pub fn set_zombie(&mut self, term_status: TermStatus) {
*self = ProcessStatus::Zombie(term_status);
}
pub fn is_zombie(&self) -> bool {
matches!(self, ProcessStatus::Zombie(_))
}
pub fn set_runnable(&mut self) {
*self = ProcessStatus::Runnable;
}
pub fn is_runnable(&self) -> bool {
*self == ProcessStatus::Runnable
}
}

View File

@ -0,0 +1,366 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
#![allow(unused_variables)]
use alloc::sync::Arc;
use core::time::Duration;
use ostd::sync::{MutexGuard, SpinLock, WaitQueue};
use crate::time::wait::WaitTimeout;
/// Represents potential errors during lock operations on synchronization primitives,
/// specifically for operations associated with a `Condvar` (Condition Variable).
pub enum LockErr<Guard> {
Timeout(Guard),
Unknown(Guard),
}
/// LockResult, different from Rust std.
/// The result of a lock operation.
pub type LockResult<Guard> = Result<Guard, LockErr<Guard>>;
impl<Guard> LockErr<Guard> {
pub fn into_guard(self) -> Guard {
match self {
LockErr::Timeout(guard) => guard,
LockErr::Unknown(guard) => guard,
}
}
}
/// A `Condvar` (Condition Variable) is a synchronization primitive that can block threads
/// until a certain condition becomes true.
///
/// Although a `Condvar` can block threads, it is primarily used to achieve thread synchronization.
/// Threads waiting on a `Condvar` must acquire a mutex before proceeding. This setup is commonly
/// used with a shared mutable state to ensure safe concurrent access. A typical use involves one
/// or more threads waiting for a condition to become true to proceed with their operations.
///
/// # Usage
///
/// Pair a `Condvar` with a `Mutex` to allow threads to wait for certain conditions safely.
/// A waiting thread will sleep and atomically release the associated mutex.
/// Another thread can then update the shared state and notify the `Condvar`, allowing the
/// waiting thread to reacquire the mutex and proceed.
///
/// ## Example
///
/// This example demonstrates how a `Condvar` can synchronize threads:
///
/// ```rust
/// use alloc::sync::Arc;
/// use ostd::sync::Mutex;
/// use crate::{process::sync::Condvar, thread::{kernel_thread::KernelThreadExt, Thread}};
///
/// // Initializing a shared condition between threads
/// let pair = Arc::new((Mutex::new(false), Condvar::new()));
/// let pair2 = Arc::clone(&pair);
///
/// // Spawning a new kernel thread to change a shared state and notify the Condvar
/// Thread::spawn_kernel_thread(ThreadOptions::new(move || {
/// let (lock, cvar) = &*pair2;
/// Thread::yield_now();
/// let mut started = lock.lock();
/// *started = true; // Modifying the shared state
/// cvar.notify_one(); // Notifying one waiting thread
/// }));
///
/// // Main thread waiting for the shared state to be set to true
/// {
/// let (lock, cvar) = &*pair;
/// let mut started = lock.lock();
/// while !*started {
/// started = cvar.wait(started).unwrap_or_else(|err| err.into_guard());
/// }
/// }
/// ```
///
/// In this example, the main thread and a child thread synchronize access to a boolean flag
/// using a `Mutex` and a `Condvar`.
/// The main thread waits for the flag to be set to `true`,
/// utilizing the `Condvar` to sleep efficiently until the condition is met.
pub struct Condvar {
waitqueue: Arc<WaitQueue>,
counter: SpinLock<Inner>,
}
struct Inner {
waiter_count: u64,
notify_count: u64,
}
impl Condvar {
/// Creates a new condition variable.
pub fn new() -> Self {
Condvar {
waitqueue: Arc::new(WaitQueue::new()),
counter: SpinLock::new(Inner {
waiter_count: 0,
notify_count: 0,
}),
}
}
/// Atomically releases the given `MutexGuard`,
/// blocking the current thread until the condition variable
/// is notified, after which the mutex will be reacquired.
///
/// Returns a new `MutexGuard` if the operation is successful,
/// or returns the provided guard
/// within a `LockErr` if the waiting operation fails.
pub fn wait<'a, T>(&self, guard: MutexGuard<'a, T>) -> LockResult<MutexGuard<'a, T>> {
let cond = || {
// Check if the notify counter is greater than 0.
let mut counter = self.counter.lock();
if counter.notify_count > 0 {
// Decrement the notify counter.
counter.notify_count -= 1;
Some(())
} else {
None
}
};
{
let mut counter = self.counter.lock();
counter.waiter_count += 1;
}
let lock = MutexGuard::get_lock(&guard);
drop(guard);
self.waitqueue.wait_until(cond);
Ok(lock.lock())
}
/// Waits for the condition variable to be signaled or broadcasted,
/// or a timeout to elapse.
/// bool is true if the timeout is reached.
///
/// The function returns a tuple containing a `MutexGuard`
/// and a boolean that is true if the timeout elapsed
/// before the condition variable was notified.
pub fn wait_timeout<'a, T>(
&self,
guard: MutexGuard<'a, T>,
timeout: Duration,
) -> LockResult<(MutexGuard<'a, T>, bool)> {
let cond = || {
// Check if the notify counter is greater than 0.
let mut counter = self.counter.lock();
if counter.notify_count > 0 {
// Decrement the notify counter.
counter.notify_count -= 1;
Some(())
} else {
None
}
};
{
let mut counter = self.counter.lock();
counter.waiter_count += 1;
}
let lock = MutexGuard::get_lock(&guard);
drop(guard);
// Wait until the condition becomes true, we're explicitly woken up, or the timeout elapses.
let res = self.waitqueue.wait_until_or_timeout(cond, &timeout);
match res {
Some(_) => Ok((lock.lock(), false)),
None => {
let mut counter = self.counter.lock();
counter.waiter_count -= 1;
Err(LockErr::Timeout((lock.lock(), true)))
}
}
}
/// Wait for the condition to become true,
/// or until the timeout elapses,
/// or until the condition is explicitly woken up.
/// bool is true if the timeout is reached.
///
/// Similar to `wait_timeout`,
/// it returns a tuple containing the `MutexGuard`
/// and a boolean value indicating
/// whether the wait operation terminated due to a timeout.
pub fn wait_timeout_while<'a, T, F>(
&self,
mut guard: MutexGuard<'a, T>,
timeout: Duration,
mut condition: F,
) -> LockResult<(MutexGuard<'a, T>, bool)>
where
F: FnMut(&mut T) -> bool,
{
loop {
if !condition(&mut *guard) {
return Ok((guard, false));
}
guard = match self.wait_timeout(guard, timeout) {
Ok((guard, timeout_flag)) => guard,
Err(LockErr::Timeout((guard, timeout_flag))) => {
return Err(LockErr::Timeout((guard, timeout_flag)))
}
Err(LockErr::Unknown(guard)) => return Err(LockErr::Unknown(guard)),
}
}
}
/// Wait for the condition to become true,
/// and until the condition is explicitly woken up or interupted.
///
/// This function blocks until either the condition becomes false
/// or the condition variable is explicitly notified.
/// Returns the `MutexGuard` if the operation completes successfully.
pub fn wait_while<'a, T, F>(
&self,
mut guard: MutexGuard<'a, T>,
mut condition: F,
) -> LockResult<MutexGuard<'a, T>>
where
F: FnMut(&mut T) -> bool,
{
loop {
if !condition(&mut *guard) {
return Ok(guard);
}
guard = match self.wait(guard) {
Ok(guard) => guard,
Err(LockErr::Unknown(guard)) => return Err(LockErr::Unknown(guard)),
_ => unreachable!(),
}
}
}
/// Wakes up one blocked thread waiting on this condition variable.
///
/// If there is a waiting thread, it will be unblocked
/// and allowed to reacquire the associated mutex.
/// If no threads are waiting, this function is a no-op.
pub fn notify_one(&self) {
let mut counter = self.counter.lock();
if counter.waiter_count == 0 {
return;
}
counter.notify_count += 1;
self.waitqueue.wake_one();
counter.waiter_count -= 1;
}
/// Wakes up all blocked threads waiting on this condition variable.
///
/// This method will unblock all waiting threads
/// and they will be allowed to reacquire the associated mutex.
/// If no threads are waiting, this function is a no-op.
pub fn notify_all(&self) {
let mut counter = self.counter.lock();
if counter.waiter_count == 0 {
return;
}
counter.notify_count = counter.waiter_count;
self.waitqueue.wake_all();
counter.waiter_count = 0;
}
}
#[cfg(ktest)]
mod test {
use ostd::{prelude::*, sync::Mutex};
use super::*;
use crate::thread::{
kernel_thread::{KernelThreadExt, ThreadOptions},
Thread,
};
#[ktest]
fn test_condvar_wait() {
let pair = Arc::new((Mutex::new(false), Condvar::new()));
let pair2 = Arc::clone(&pair);
Thread::spawn_kernel_thread(ThreadOptions::new(move || {
Thread::yield_now();
let (lock, cvar) = &*pair2;
let mut started = lock.lock();
*started = true;
cvar.notify_one();
}));
{
let (lock, cvar) = &*pair;
let mut started = lock.lock();
while !*started {
started = cvar.wait(started).unwrap_or_else(|err| err.into_guard());
}
assert!(*started);
}
}
#[ktest]
fn test_condvar_wait_timeout() {
let pair = Arc::new((Mutex::new(false), Condvar::new()));
let pair2 = Arc::clone(&pair);
Thread::spawn_kernel_thread(ThreadOptions::new(move || {
Thread::yield_now();
let (lock, cvar) = &*pair2;
let mut started = lock.lock();
*started = true;
cvar.notify_one();
}));
{
let (lock, cvar) = &*pair;
let mut started = lock.lock();
while !*started {
(started, _) = cvar
.wait_timeout(started, Duration::from_secs(1))
.unwrap_or_else(|err| err.into_guard());
}
assert!(*started);
}
}
#[ktest]
fn test_condvar_wait_while() {
let pair = Arc::new((Mutex::new(true), Condvar::new()));
let pair2 = Arc::clone(&pair);
Thread::spawn_kernel_thread(ThreadOptions::new(move || {
Thread::yield_now();
let (lock, cvar) = &*pair2;
let mut started = lock.lock();
*started = false;
cvar.notify_one();
}));
{
let (lock, cvar) = &*pair;
let started = cvar
.wait_while(lock.lock(), |started| *started)
.unwrap_or_else(|err| err.into_guard());
assert!(!*started);
}
}
#[ktest]
fn test_condvar_wait_timeout_while() {
let pair = Arc::new((Mutex::new(true), Condvar::new()));
let pair2 = Arc::clone(&pair);
Thread::spawn_kernel_thread(ThreadOptions::new(move || {
Thread::yield_now();
let (lock, cvar) = &*pair2;
let mut started = lock.lock();
*started = false;
cvar.notify_one();
}));
{
let (lock, cvar) = &*pair;
let (started, _) = cvar
.wait_timeout_while(lock.lock(), Duration::from_secs(1), |started| *started)
.unwrap_or_else(|err| err.into_guard());
assert!(!*started);
}
}
}

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: MPL-2.0
mod condvar;
#[allow(unused_imports)]
pub use self::condvar::{Condvar, LockErr};

View File

@ -0,0 +1,19 @@
// SPDX-License-Identifier: MPL-2.0
use super::signal::sig_num::SigNum;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TermStatus {
Exited(u8),
Killed(SigNum),
}
impl TermStatus {
/// Return as a 32-bit integer encoded as specified in wait(2) man page.
pub fn as_u32(&self) -> u32 {
match self {
TermStatus::Exited(status) => (*status as u32) << 8,
TermStatus::Killed(signum) => signum.as_u8() as u32,
}
}
}

113
kernel/src/process/wait.rs Normal file
View File

@ -0,0 +1,113 @@
// SPDX-License-Identifier: MPL-2.0
#![allow(dead_code)]
use super::{process_filter::ProcessFilter, ExitCode, Pid, Process};
use crate::{prelude::*, process::process_table, thread::thread_table};
// The definition of WaitOptions is from Occlum
bitflags! {
pub struct WaitOptions: u32 {
const WNOHANG = 0x1;
//Note: Below flags are not supported yet
const WSTOPPED = 0x2; // Same as WUNTRACED
const WEXITED = 0x4;
const WCONTINUED = 0x8;
const WNOWAIT = 0x01000000;
}
}
impl WaitOptions {
pub fn supported(&self) -> bool {
let unsupported_flags = WaitOptions::all() - WaitOptions::WNOHANG;
!self.intersects(unsupported_flags)
}
}
pub fn wait_child_exit(
child_filter: ProcessFilter,
wait_options: WaitOptions,
) -> Result<Option<Arc<Process>>> {
let current = current!();
let zombie_child = current.children_pauser().pause_until(|| {
let unwaited_children = current
.children()
.lock()
.values()
.filter(|child| match child_filter {
ProcessFilter::Any => true,
ProcessFilter::WithPid(pid) => child.pid() == pid,
ProcessFilter::WithPgid(pgid) => child.pgid() == pgid,
})
.cloned()
.collect::<Vec<_>>();
if unwaited_children.is_empty() {
return Some(Err(Error::with_message(
Errno::ECHILD,
"the process has no child to wait",
)));
}
// return immediately if we find a zombie child
let zombie_child = unwaited_children.iter().find(|child| child.is_zombie());
if let Some(zombie_child) = zombie_child {
let zombie_pid = zombie_child.pid();
if wait_options.contains(WaitOptions::WNOWAIT) {
// does not reap child, directly return
return Some(Ok(Some(zombie_child.clone())));
} else {
reap_zombie_child(&current, zombie_pid);
return Some(Ok(Some(zombie_child.clone())));
}
}
if wait_options.contains(WaitOptions::WNOHANG) {
return Some(Ok(None));
}
// wait
None
})??;
Ok(zombie_child)
}
/// Free zombie child with pid, returns the exit code of child process.
fn reap_zombie_child(process: &Process, pid: Pid) -> ExitCode {
let child_process = process.children().lock().remove(&pid).unwrap();
assert!(child_process.is_zombie());
for thread in &*child_process.threads().lock() {
thread_table::remove_thread(thread.tid());
}
// Lock order: session table -> group table -> process table -> group of process
// -> group inner -> session inner
let mut session_table_mut = process_table::session_table_mut();
let mut group_table_mut = process_table::group_table_mut();
let mut process_table_mut = process_table::process_table_mut();
let mut child_group_mut = child_process.process_group.lock();
let process_group = child_group_mut.upgrade().unwrap();
let mut group_inner = process_group.inner.lock();
let session = group_inner.session.upgrade().unwrap();
let mut session_inner = session.inner.lock();
group_inner.remove_process(&child_process.pid());
session_inner.remove_process(&child_process);
*child_group_mut = Weak::new();
if group_inner.is_empty() {
group_table_mut.remove(&process_group.pgid());
session_inner.remove_process_group(&process_group.pgid());
if session_inner.is_empty() {
session_table_mut.remove(&session.sid());
}
}
process_table_mut.remove(&child_process.pid());
child_process.exit_code().unwrap()
}