Reorganize the codebase

This commit is contained in:
Jianfeng Jiang
2023-04-09 23:12:42 -04:00
committed by Tate, Hongliang Tian
parent 888853a6de
commit 271a16d492
416 changed files with 67 additions and 53 deletions

View File

@ -0,0 +1,30 @@
pub mod tty;
use jinux_input::INPUT_COMPONENT;
use log::info;
pub fn init() {
// print all the input device to make sure input crate will compile
for comp in INPUT_COMPONENT.get().unwrap().get_input_device() {
info!("input device name:{}", comp.name());
}
tty::init();
}
#[allow(unused)]
fn block_device_test() {
let block_device = jinux_block::BLK_COMPONENT.get().unwrap().get_device();
let mut write_buffer = [0u8; 512];
let mut read_buffer = [0u8; 512];
info!("write_buffer address:{:x}", write_buffer.as_ptr() as usize);
info!("read_buffer address:{:x}", read_buffer.as_ptr() as usize);
for i in 0..512 {
for byte in write_buffer.iter_mut() {
*byte = i as u8;
}
block_device.write_block(i as usize, &write_buffer);
block_device.read_block(i as usize, &mut read_buffer);
assert_eq!(write_buffer, read_buffer);
}
info!("block device test passed!");
}

View File

@ -0,0 +1,78 @@
pub use jinux_frame::arch::x86::device::serial::register_serial_input_callback;
use crate::{
prelude::*,
tty::{get_n_tty, Tty},
};
lazy_static! {
pub static ref TTY_DRIVER: Arc<TtyDriver> = {
let tty_driver = Arc::new(TtyDriver::new());
// FIXME: install n_tty into tty_driver?
let n_tty = get_n_tty();
tty_driver.install(n_tty.clone());
tty_driver
};
}
pub struct TtyDriver {
ttys: Mutex<Vec<Arc<Tty>>>,
}
impl TtyDriver {
pub fn new() -> Self {
Self {
ttys: Mutex::new(Vec::new()),
}
}
/// Return the tty device in driver's internal table.
pub fn lookup(&self, index: usize) -> Result<Arc<Tty>> {
let ttys = self.ttys.lock();
// Return the tty device corresponding to idx
if index >= ttys.len() {
return_errno_with_message!(Errno::ENODEV, "lookup failed. No tty device");
}
let tty = ttys[index].clone();
drop(ttys);
Ok(tty)
}
/// Install a new tty into the driver's internal tables.
pub fn install(self: &Arc<Self>, tty: Arc<Tty>) {
tty.set_driver(Arc::downgrade(self));
self.ttys.lock().push(tty);
}
/// remove a new tty into the driver's internal tables.
pub fn remove(&self, index: usize) -> Result<()> {
let mut ttys = self.ttys.lock();
if index >= ttys.len() {
return_errno_with_message!(Errno::ENODEV, "lookup failed. No tty device");
}
let removed_tty = ttys.remove(index);
removed_tty.set_driver(Weak::new());
drop(ttys);
Ok(())
}
pub fn receive_char(&self, item: u8) {
// FIXME: should the char send to all ttys?
for tty in &*self.ttys.lock() {
tty.receive_char(item);
}
}
}
fn serial_input_callback(item: u8) {
let tty_driver = get_tty_driver();
tty_driver.receive_char(item);
}
fn get_tty_driver() -> &'static TtyDriver {
&TTY_DRIVER
}
pub fn init() {
register_serial_input_callback(serial_input_callback);
}

View File

@ -0,0 +1,271 @@
/// Errno. Copied from Occlum
#[repr(i32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Errno {
EPERM = 1, /* Operation not permitted */
ENOENT = 2, /* No such file or directory */
ESRCH = 3, /* No such process */
EINTR = 4, /* Interrupted system call */
EIO = 5, /* I/O error */
ENXIO = 6, /* No such device or address */
E2BIG = 7, /* Argument list too long */
ENOEXEC = 8, /* Exec format error */
EBADF = 9, /* Bad file number */
ECHILD = 10, /* No child processes */
EAGAIN = 11, /* Try again */
ENOMEM = 12, /* Out of memory */
EACCES = 13, /* Permission denied */
EFAULT = 14, /* Bad address */
ENOTBLK = 15, /* Block device required */
EBUSY = 16, /* Device or resource busy */
EEXIST = 17, /* File exists */
EXDEV = 18, /* Cross-device link */
ENODEV = 19, /* No such device */
ENOTDIR = 20, /* Not a directory */
EISDIR = 21, /* Is a directory */
EINVAL = 22, /* Invalid argument */
ENFILE = 23, /* File table overflow */
EMFILE = 24, /* Too many open files */
ENOTTY = 25, /* Not a typewriter */
ETXTBSY = 26, /* Text file busy */
EFBIG = 27, /* File too large */
ENOSPC = 28, /* No space left on device */
ESPIPE = 29, /* Illegal seek */
EROFS = 30, /* Read-only file system */
EMLINK = 31, /* Too many links */
EPIPE = 32, /* Broken pipe */
EDOM = 33, /* Math argument out of domain of func */
ERANGE = 34, /* Math result not representable */
EDEADLK = 35, /* Resource deadlock would occur */
ENAMETOOLONG = 36, /* File name too long */
ENOLCK = 37, /* No record locks available */
/*
* This error code is special: arch syscall entry code will return
* -ENOSYS if users try to call a syscall that doesn't exist. To keep
* failures of syscalls that really do exist distinguishable from
* failures due to attempts to use a nonexistent syscall, syscall
* implementations should refrain from returning -ENOSYS.
*/
ENOSYS = 38, /* Invalid system call number */
ENOTEMPTY = 39, /* Directory not empty */
ELOOP = 40, /* Too many symbolic links encountered */
// EWOULDBLOCK EAGAIN /* Operation would block */
ENOMSG = 42, /* No message of desired type */
EIDRM = 43, /* Identifier removed */
ECHRNG = 44, /* Channel number out of range */
EL2NSYNC = 45, /* Level 2 not synchronized */
EL3HLT = 46, /* Level 3 halted */
EL3RST = 47, /* Level 3 reset */
ELNRNG = 48, /* Link number out of range */
EUNATCH = 49, /* Protocol driver not attached */
ENOCSI = 50, /* No CSI structure available */
EL2HLT = 51, /* Level 2 halted */
EBADE = 52, /* Invalid exchange */
EBADR = 53, /* Invalid request descriptor */
EXFULL = 54, /* Exchange full */
ENOANO = 55, /* No anode */
EBADRQC = 56, /* Invalid request code */
EBADSLT = 57, /* Invalid slot */
// EDEADLOCK EDEADLK
EBFONT = 59, /* Bad font file format */
ENOSTR = 60, /* Device not a stream */
ENODATA = 61, /* No data available */
ETIME = 62, /* Timer expired */
ENOSR = 63, /* Out of streams resources */
ENONET = 64, /* Machine is not on the network */
ENOPKG = 65, /* Package not installed */
EREMOTE = 66, /* Object is remote */
ENOLINK = 67, /* Link has been severed */
EADV = 68, /* Advertise error */
ESRMNT = 69, /* Srmount error */
ECOMM = 70, /* Communication error on send */
EPROTO = 71, /* Protocol error */
EMULTIHOP = 72, /* Multihop attempted */
EDOTDOT = 73, /* RFS specific error */
EBADMSG = 74, /* Not a data message */
EOVERFLOW = 75, /* Value too large for defined data type */
ENOTUNIQ = 76, /* Name not unique on network */
EBADFD = 77, /* File descriptor in bad state */
EREMCHG = 78, /* Remote address changed */
ELIBACC = 79, /* Can not access a needed shared library */
ELIBBAD = 80, /* Accessing a corrupted shared library */
ELIBSCN = 81, /* .lib section in a.out corrupted */
ELIBMAX = 82, /* Attempting to link in too many shared libraries */
ELIBEXEC = 83, /* Cannot exec a shared library directly */
EILSEQ = 84, /* Illegal byte sequence */
ERESTART = 85, /* Interrupted system call should be restarted */
ESTRPIPE = 86, /* Streams pipe error */
EUSERS = 87, /* Too many users */
ENOTSOCK = 88, /* Socket operation on non-socket */
EDESTADDRREQ = 89, /* Destination address required */
EMSGSIZE = 90, /* Message too long */
EPROTOTYPE = 91, /* Protocol wrong type for socket */
ENOPROTOOPT = 92, /* Protocol not available */
EPROTONOSUPPORT = 93, /* Protocol not supported */
ESOCKTNOSUPPORT = 94, /* Socket type not supported */
EOPNOTSUPP = 95, /* Operation not supported on transport endpoint */
EPFNOSUPPORT = 96, /* Protocol family not supported */
EAFNOSUPPORT = 97, /* Address family not supported by protocol */
EADDRINUSE = 98, /* Address already in use */
EADDRNOTAVAIL = 99, /* Cannot assign requested address */
ENETDOWN = 100, /* Network is down */
ENETUNREACH = 101, /* Network is unreachable */
ENETRESET = 102, /* Network dropped connection because of reset */
ECONNABORTED = 103, /* Software caused connection abort */
ECONNRESET = 104, /* Connection reset by peer */
ENOBUFS = 105, /* No buffer space available */
EISCONN = 106, /* Transport endpoint is already connected */
ENOTCONN = 107, /* Transport endpoint is not connected */
ESHUTDOWN = 108, /* Cannot send after transport endpoint shutdown */
ETOOMANYREFS = 109, /* Too many references: cannot splice */
ETIMEDOUT = 110, /* Connection timed out */
ECONNREFUSED = 111, /* Connection refused */
EHOSTDOWN = 112, /* Host is down */
EHOSTUNREACH = 113, /* No route to host */
EALREADY = 114, /* Operation already in progress */
EINPROGRESS = 115, /* Operation now in progress */
ESTALE = 116, /* Stale file handle */
EUCLEAN = 117, /* Structure needs cleaning */
ENOTNAM = 118, /* Not a XENIX named type file */
ENAVAIL = 119, /* No XENIX semaphores available */
EISNAM = 120, /* Is a named type file */
EREMOTEIO = 121, /* Remote I/O error */
EDQUOT = 122, /* Quota exceeded */
ENOMEDIUM = 123, /* No medium found */
EMEDIUMTYPE = 124, /* Wrong medium type */
ECANCELED = 125, /* Operation Canceled */
ENOKEY = 126, /* Required key not available */
EKEYEXPIRED = 127, /* Key has expired */
EKEYREVOKED = 128, /* Key has been revoked */
EKEYREJECTED = 129, /* Key was rejected by service */
/* for robust mutexes */
EOWNERDEAD = 130, /* Owner died */
ENOTRECOVERABLE = 131, /* State not recoverable */
ERFKILL = 132, /* Operation not possible due to RF-kill */
EHWPOISON = 133, /* Memory page has hardware error */
}
/// error used in this crate
#[derive(Debug, Clone)]
pub struct Error {
errno: Errno,
msg: Option<&'static str>,
}
impl Error {
pub const fn new(errno: Errno) -> Self {
Error { errno, msg: None }
}
pub const fn with_message(errno: Errno, msg: &'static str) -> Self {
Error {
errno,
msg: Some(msg),
}
}
pub fn error(&self) -> Errno {
self.errno
}
}
impl From<Errno> for Error {
fn from(errno: Errno) -> Self {
Error::new(errno)
}
}
impl From<jinux_frame::Error> for Error {
fn from(frame_error: jinux_frame::Error) -> Self {
match frame_error {
jinux_frame::Error::AccessDenied => Error::new(Errno::EFAULT),
jinux_frame::Error::NoMemory => Error::new(Errno::ENOMEM),
jinux_frame::Error::InvalidArgs => Error::new(Errno::EINVAL),
jinux_frame::Error::IoError => Error::new(Errno::EIO),
jinux_frame::Error::NotEnoughResources => Error::new(Errno::EBUSY),
jinux_frame::Error::PageFault => Error::new(Errno::EFAULT),
jinux_frame::Error::InvalidVmpermBits => Error::new(Errno::EINVAL),
jinux_frame::Error::NoChild => Error::new(Errno::ECHILD),
}
}
}
impl From<core::str::Utf8Error> for Error {
fn from(_: core::str::Utf8Error) -> Self {
Error::with_message(Errno::EINVAL, "Invalid utf-8 string")
}
}
impl From<core::ffi::FromBytesUntilNulError> for Error {
fn from(_: core::ffi::FromBytesUntilNulError) -> Self {
Error::with_message(Errno::E2BIG, "Cannot find null in cstring")
}
}
impl From<core::ffi::FromBytesWithNulError> for Error {
fn from(_: core::ffi::FromBytesWithNulError) -> Self {
Error::with_message(Errno::E2BIG, "Cannot find null in cstring")
}
}
impl From<cpio_decoder::error::Error> for Error {
fn from(cpio_error: cpio_decoder::error::Error) -> Self {
match cpio_error {
cpio_decoder::error::Error::MagicError => {
Error::with_message(Errno::EINVAL, "CPIO invalid magic number")
}
cpio_decoder::error::Error::Utf8Error => {
Error::with_message(Errno::EINVAL, "CPIO invalid utf-8 string")
}
cpio_decoder::error::Error::ParseIntError => {
Error::with_message(Errno::EINVAL, "CPIO parse int error")
}
cpio_decoder::error::Error::FileTypeError => {
Error::with_message(Errno::EINVAL, "CPIO invalid file type")
}
cpio_decoder::error::Error::FileNameError => {
Error::with_message(Errno::EINVAL, "CPIO invalid file name")
}
cpio_decoder::error::Error::BufferShortError => {
Error::with_message(Errno::EINVAL, "CPIO buffer is too short")
}
}
}
}
impl From<Error> for jinux_frame::Error {
fn from(error: Error) -> Self {
match error.errno {
Errno::EACCES => jinux_frame::Error::AccessDenied,
Errno::EIO => jinux_frame::Error::IoError,
Errno::ENOMEM => jinux_frame::Error::NoMemory,
Errno::EFAULT => jinux_frame::Error::PageFault,
Errno::EINVAL => jinux_frame::Error::InvalidArgs,
Errno::EBUSY => jinux_frame::Error::NotEnoughResources,
_ => jinux_frame::Error::InvalidArgs,
}
}
}
impl From<alloc::ffi::NulError> for Error {
fn from(_: alloc::ffi::NulError) -> Self {
Error::with_message(Errno::E2BIG, "Cannot find null in cstring")
}
}
#[macro_export]
macro_rules! return_errno {
($errno: expr) => {
return core::prelude::v1::Err(crate::error::Error::new($errno))
};
}
#[macro_export]
macro_rules! return_errno_with_message {
($errno: expr, $message: expr) => {
return core::prelude::v1::Err(crate::error::Error::with_message($errno, $message))
};
}

View File

@ -0,0 +1,2 @@
/// A trait to represent any events.
pub trait Events: Copy + Clone + Send + Sync + 'static {}

View File

@ -0,0 +1,7 @@
mod events;
mod observer;
mod subject;
pub use self::events::Events;
pub use self::observer::Observer;
pub use self::subject::Subject;

View File

@ -0,0 +1,11 @@
use super::Events;
/// An observer for events.
///
/// In a sense, event observers are just a fancy form of callback functions.
/// An observer's `on_events` methods are supposed to be called when
/// some events that are interesting to the observer happen.
pub trait Observer<E: Events>: Send + Sync {
/// Notify the observer that some interesting events happen.
fn on_events(&self, events: &E);
}

View File

@ -0,0 +1,43 @@
use crate::prelude::*;
use super::{Events, Observer};
/// A Subject notify interesting events to registered observers.
pub struct Subject<E: Events> {
observers: Mutex<Vec<Weak<dyn Observer<E>>>>,
}
impl<E: Events> Subject<E> {
pub fn new() -> Self {
Self {
observers: Mutex::new(Vec::new()),
}
}
/// Register an observer.
pub fn register_observer(&self, observer: Weak<dyn Observer<E>>) {
let mut observers = self.observers.lock();
observers.push(observer);
}
/// Unregister an observer.
pub fn unregister_observer(&self, observer: Weak<dyn Observer<E>>) {
let mut observers = self.observers.lock();
observers.retain(|e| !Weak::ptr_eq(&e, &observer));
}
/// Notify events to all registered observers.
/// It will remove the observers which have been freed.
pub fn notify_observers(&self, events: &E) {
let mut observers = self.observers.lock();
let mut idx = 0;
while idx < observers.len() {
if let Some(observer) = observers[idx].upgrade() {
observer.on_events(events);
idx += 1;
} else {
observers.remove(idx);
}
}
}
}

View File

@ -0,0 +1,43 @@
use crate::fs::utils::{IoEvents, IoctlCmd, Metadata, SeekFrom};
use crate::prelude::*;
use crate::tty::get_n_tty;
use core::any::Any;
/// The basic operations defined on a file
pub trait File: Send + Sync + Any {
fn read(&self, buf: &mut [u8]) -> Result<usize> {
panic!("read unsupported");
}
fn write(&self, buf: &[u8]) -> Result<usize> {
panic!("write unsupported");
}
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32> {
match cmd {
IoctlCmd::TCGETS => {
// FIXME: only a work around
let tty = get_n_tty();
tty.ioctl(cmd, arg)
}
_ => panic!("Ioctl unsupported"),
}
}
fn poll(&self) -> IoEvents {
IoEvents::empty()
}
fn flush(&self) -> Result<()> {
Ok(())
}
fn metadata(&self) -> Metadata {
panic!("metadata unsupported");
}
fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
panic!("seek unsupported");
}
}

View File

@ -0,0 +1,72 @@
use crate::prelude::*;
use crate::rights::{Rights, TRights};
use super::*;
impl InodeHandle<Rights> {
pub fn new(
dentry: Arc<Dentry>,
access_mode: AccessMode,
status_flags: StatusFlags,
) -> Result<Self> {
let vnode = dentry.vnode();
if access_mode.is_readable() && !vnode.inode_mode().is_readable() {
return_errno_with_message!(Errno::EACCES, "File is not readable");
}
if access_mode.is_writable() && !vnode.inode_mode().is_writable() {
return_errno_with_message!(Errno::EACCES, "File is not writable");
}
if access_mode.is_writable() && vnode.inode_type() == InodeType::Dir {
return_errno_with_message!(Errno::EISDIR, "Directory cannot open to write");
}
let inner = Arc::new(InodeHandle_ {
dentry,
offset: Mutex::new(0),
access_mode,
status_flags: Mutex::new(status_flags),
});
Ok(Self(inner, Rights::from(access_mode)))
}
pub fn to_static<R1: TRights>(self) -> Result<InodeHandle<R1>> {
let rights = Rights::from_bits(R1::BITS).ok_or(Error::new(Errno::EBADF))?;
if !self.1.contains(rights) {
return_errno_with_message!(Errno::EBADF, "check rights failed");
}
Ok(InodeHandle(self.0, R1::new()))
}
pub fn read(&self, buf: &mut [u8]) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
}
self.0.read(buf)
}
pub fn read_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
}
self.0.read_to_end(buf)
}
pub fn write(&self, buf: &[u8]) -> Result<usize> {
if !self.1.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EBADF, "File is not writable");
}
self.0.write(buf)
}
pub fn readdir(&self, visitor: &mut dyn DirentVisitor) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
}
self.0.readdir(visitor)
}
}
impl Clone for InodeHandle<Rights> {
fn clone(&self) -> Self {
Self(self.0.clone(), self.1.clone())
}
}

View File

@ -0,0 +1,151 @@
//! Opend Inode-backed File Handle
mod dyn_cap;
mod static_cap;
use crate::fs::utils::{AccessMode, Dentry, DirentVisitor, InodeType, SeekFrom, StatusFlags};
use crate::prelude::*;
use crate::rights::Rights;
pub struct InodeHandle<R = Rights>(Arc<InodeHandle_>, R);
struct InodeHandle_ {
dentry: Arc<Dentry>,
offset: Mutex<usize>,
access_mode: AccessMode,
status_flags: Mutex<StatusFlags>,
}
impl InodeHandle_ {
pub fn read(&self, buf: &mut [u8]) -> Result<usize> {
let mut offset = self.offset.lock();
let len = if self.status_flags.lock().contains(StatusFlags::O_DIRECT) {
self.dentry.vnode().read_direct_at(*offset, buf)?
} else {
self.dentry.vnode().read_at(*offset, buf)?
};
*offset += len;
Ok(len)
}
pub fn write(&self, buf: &[u8]) -> Result<usize> {
let mut offset = self.offset.lock();
if self.status_flags.lock().contains(StatusFlags::O_APPEND) {
*offset = self.dentry.vnode().len();
}
let len = if self.status_flags.lock().contains(StatusFlags::O_DIRECT) {
self.dentry.vnode().write_direct_at(*offset, buf)?
} else {
self.dentry.vnode().write_at(*offset, buf)?
};
*offset += len;
Ok(len)
}
pub fn read_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
let len = if self.status_flags.lock().contains(StatusFlags::O_DIRECT) {
self.dentry.vnode().read_direct_to_end(buf)?
} else {
self.dentry.vnode().read_to_end(buf)?
};
Ok(len)
}
pub fn seek(&self, pos: SeekFrom) -> Result<usize> {
let mut offset = self.offset.lock();
let new_offset: isize = match pos {
SeekFrom::Start(off /* as usize */) => {
if off > isize::max_value() as usize {
return_errno_with_message!(Errno::EINVAL, "file offset is too large");
}
off as isize
}
SeekFrom::End(off /* as isize */) => {
let file_size = self.dentry.vnode().len() as isize;
assert!(file_size >= 0);
file_size
.checked_add(off)
.ok_or_else(|| Error::with_message(Errno::EOVERFLOW, "file offset overflow"))?
}
SeekFrom::Current(off /* as isize */) => (*offset as isize)
.checked_add(off)
.ok_or_else(|| Error::with_message(Errno::EOVERFLOW, "file offset overflow"))?,
};
if new_offset < 0 {
return_errno_with_message!(Errno::EINVAL, "file offset must not be negative");
}
// Invariant: 0 <= new_offset <= isize::max_value()
let new_offset = new_offset as usize;
*offset = new_offset;
Ok(new_offset)
}
pub fn offset(&self) -> usize {
let offset = self.offset.lock();
*offset
}
pub fn len(&self) -> usize {
self.dentry.vnode().len()
}
pub fn access_mode(&self) -> AccessMode {
self.access_mode
}
pub fn status_flags(&self) -> StatusFlags {
let status_flags = self.status_flags.lock();
*status_flags
}
pub fn set_status_flags(&self, new_status_flags: StatusFlags) {
let mut status_flags = self.status_flags.lock();
// Can change only the O_APPEND, O_ASYNC, O_NOATIME, and O_NONBLOCK flags
let valid_flags_mask = StatusFlags::O_APPEND
| StatusFlags::O_ASYNC
| StatusFlags::O_NOATIME
| StatusFlags::O_NONBLOCK;
status_flags.remove(valid_flags_mask);
status_flags.insert(new_status_flags & valid_flags_mask);
}
pub fn readdir(&self, visitor: &mut dyn DirentVisitor) -> Result<usize> {
let mut offset = self.offset.lock();
let read_cnt = self.dentry.vnode().readdir_at(*offset, visitor)?;
*offset += read_cnt;
Ok(read_cnt)
}
}
/// Methods for both dyn and static
impl<R> InodeHandle<R> {
pub fn seek(&self, pos: SeekFrom) -> Result<usize> {
self.0.seek(pos)
}
pub fn offset(&self) -> usize {
self.0.offset()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn access_mode(&self) -> AccessMode {
self.0.access_mode()
}
pub fn status_flags(&self) -> StatusFlags {
self.0.status_flags()
}
pub fn set_status_flags(&self, new_status_flags: StatusFlags) {
self.0.set_status_flags(new_status_flags)
}
pub fn dentry(&self) -> &Arc<Dentry> {
&self.0.dentry
}
}

View File

@ -0,0 +1,27 @@
use crate::prelude::*;
use crate::rights::*;
use jinux_rights_proc::require;
use super::*;
impl<R: TRights> InodeHandle<R> {
#[require(R > Read)]
pub fn read(&self, buf: &mut [u8]) -> Result<usize> {
self.0.read(buf)
}
#[require(R > Read)]
pub fn read_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
self.0.read_to_end(buf)
}
#[require(R > Write)]
pub fn write(&self, buf: &[u8]) -> Result<usize> {
self.0.write(buf)
}
#[require(R > Read)]
pub fn readdir(&self, visitor: &mut dyn DirentVisitor) -> Result<usize> {
self.0.readdir(visitor)
}
}

View File

@ -0,0 +1,93 @@
//! Opend File Handle
mod file;
mod inode_handle;
use crate::fs::utils::{Metadata, SeekFrom};
use crate::prelude::*;
use crate::rights::{ReadOp, WriteOp};
use alloc::sync::Arc;
pub use self::file::File;
pub use self::inode_handle::InodeHandle;
#[derive(Clone)]
pub struct FileHandle {
inner: Inner,
}
#[derive(Clone)]
enum Inner {
File(Arc<dyn File>),
Inode(InodeHandle),
}
impl FileHandle {
pub fn new_file(file: Arc<dyn File>) -> Self {
let inner = Inner::File(file);
Self { inner }
}
pub fn new_inode_handle(inode_handle: InodeHandle) -> Self {
let inner = Inner::Inode(inode_handle);
Self { inner }
}
pub fn as_file(&self) -> Option<&Arc<dyn File>> {
match &self.inner {
Inner::File(file) => Some(file),
_ => None,
}
}
pub fn as_inode_handle(&self) -> Option<&InodeHandle> {
match &self.inner {
Inner::Inode(inode_handle) => Some(inode_handle),
_ => None,
}
}
pub fn read(&self, buf: &mut [u8]) -> Result<usize> {
match &self.inner {
Inner::File(file) => file.read(buf),
Inner::Inode(inode_handle) => {
let static_handle = inode_handle.clone().to_static::<ReadOp>()?;
static_handle.read(buf)
}
}
}
pub fn write(&self, buf: &[u8]) -> Result<usize> {
match &self.inner {
Inner::File(file) => file.write(buf),
Inner::Inode(inode_handle) => {
let static_handle = inode_handle.clone().to_static::<WriteOp>()?;
static_handle.write(buf)
}
}
}
pub fn metadata(&self) -> Metadata {
match &self.inner {
Inner::File(file) => file.metadata(),
Inner::Inode(inode_handle) => inode_handle.dentry().vnode().metadata(),
}
}
pub fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
match &self.inner {
Inner::File(file) => file.seek(seek_from),
Inner::Inode(inode_handle) => inode_handle.seek(seek_from),
}
}
pub fn clean_for_close(&self) -> Result<()> {
match &self.inner {
Inner::Inode(_) => {
// Close does not guarantee that the data has been successfully saved to disk.
}
Inner::File(file) => file.flush()?,
}
Ok(())
}
}

View File

@ -0,0 +1,128 @@
use crate::events::{Events, Observer, Subject};
use crate::prelude::*;
use super::{
file_handle::FileHandle,
stdio::{Stderr, Stdin, Stdout, FD_STDERR, FD_STDIN, FD_STDOUT},
};
pub type FileDescripter = i32;
pub struct FileTable {
table: BTreeMap<FileDescripter, FileHandle>,
subject: Subject<FdEvents>,
}
impl FileTable {
pub fn new() -> Self {
Self {
table: BTreeMap::new(),
subject: Subject::new(),
}
}
pub fn new_with_stdio() -> Self {
let mut table = BTreeMap::new();
let stdin = Stdin::new_with_default_console();
let stdout = Stdout::new_with_default_console();
let stderr = Stderr::new_with_default_console();
table.insert(FD_STDIN, FileHandle::new_file(Arc::new(stdin)));
table.insert(FD_STDOUT, FileHandle::new_file(Arc::new(stdout)));
table.insert(FD_STDERR, FileHandle::new_file(Arc::new(stderr)));
Self {
table,
subject: Subject::new(),
}
}
pub fn dup(&mut self, fd: FileDescripter, new_fd: Option<FileDescripter>) -> Result<()> {
let file = self.table.get(&fd).map_or_else(
|| return_errno_with_message!(Errno::ENOENT, "No such file"),
|f| Ok(f.clone()),
)?;
let new_fd = if let Some(new_fd) = new_fd {
new_fd
} else {
self.max_fd() + 1
};
if self.table.contains_key(&new_fd) {
return_errno_with_message!(Errno::EBADF, "Fd exists");
}
self.table.insert(new_fd, file);
Ok(())
}
fn max_fd(&self) -> FileDescripter {
self.table.iter().map(|(fd, _)| fd.clone()).max().unwrap()
}
pub fn insert(&mut self, item: FileHandle) -> FileDescripter {
let fd = self.max_fd() + 1;
self.table.insert(fd, item);
fd
}
pub fn insert_at(&mut self, fd: FileDescripter, item: FileHandle) -> Option<FileHandle> {
let file = self.table.insert(fd, item);
if file.is_some() {
self.notify_close_fd_event(fd);
}
file
}
pub fn close_file(&mut self, fd: FileDescripter) -> Option<FileHandle> {
let file = self.table.remove(&fd);
if file.is_some() {
self.notify_close_fd_event(fd);
}
file
}
pub fn get_file(&self, fd: FileDescripter) -> Result<&FileHandle> {
self.table
.get(&fd)
.ok_or(Error::with_message(Errno::EBADF, "fd not exits"))
}
pub fn fds_and_files(&self) -> impl Iterator<Item = (&'_ FileDescripter, &'_ FileHandle)> {
self.table.iter()
}
pub fn register_observer(&self, observer: Weak<dyn Observer<FdEvents>>) {
self.subject.register_observer(observer);
}
pub fn unregister_observer(&self, observer: Weak<dyn Observer<FdEvents>>) {
self.subject.unregister_observer(observer);
}
fn notify_close_fd_event(&self, fd: FileDescripter) {
let events = FdEvents::Close(fd);
self.subject.notify_observers(&events);
}
}
impl Clone for FileTable {
fn clone(&self) -> Self {
Self {
table: self.table.clone(),
subject: Subject::new(),
}
}
}
impl Drop for FileTable {
fn drop(&mut self) {
let events = FdEvents::DropFileTable;
self.subject.notify_observers(&events);
}
}
#[derive(Copy, Clone)]
pub enum FdEvents {
Close(FileDescripter),
DropFileTable,
}
impl Events for FdEvents {}

View File

@ -0,0 +1,423 @@
use crate::prelude::*;
use alloc::str;
use alloc::string::String;
use super::file_handle::InodeHandle;
use super::file_table::FileDescripter;
use super::procfs::ProcFS;
use super::ramfs::RamFS;
use super::utils::{
AccessMode, CreationFlags, Dentry, FileSystem, InodeMode, InodeType, StatusFlags, Vnode,
PATH_MAX, SYMLINKS_MAX,
};
lazy_static! {
static ref RAM_FS: Arc<dyn FileSystem> = RamFS::new();
static ref ROOT_DENTRY: Arc<Dentry> = {
fn init() -> Result<Arc<Dentry>> {
let root_vnode = Vnode::new(RAM_FS.root_inode())?;
Ok(Dentry::new_root(root_vnode))
}
init().unwrap()
};
static ref PROC_FS: Arc<dyn FileSystem> = ProcFS::new();
static ref PROC_DENTRY: Arc<Dentry> = {
let vnode = Vnode::new(PROC_FS.root_inode()).unwrap();
Dentry::new_root(vnode)
};
}
pub struct FsResolver {
root: Arc<Dentry>,
cwd: Arc<Dentry>,
}
impl Clone for FsResolver {
fn clone(&self) -> Self {
Self {
root: self.root.clone(),
cwd: self.cwd.clone(),
}
}
}
impl FsResolver {
pub fn new() -> Self {
Self {
root: ROOT_DENTRY.clone(),
cwd: ROOT_DENTRY.clone(),
}
}
/// Get the root directory
pub fn root(&self) -> &Arc<Dentry> {
&self.root
}
/// Get the current working directory
pub fn cwd(&self) -> &Arc<Dentry> {
&self.cwd
}
/// Set the current working directory.
pub fn set_cwd(&mut self, dentry: Arc<Dentry>) {
self.cwd = dentry;
}
/// Open or create a file inode handler.
pub fn open(&self, path: &FsPath, flags: u32, mode: u16) -> Result<InodeHandle> {
let creation_flags = CreationFlags::from_bits_truncate(flags);
let status_flags = StatusFlags::from_bits_truncate(flags);
let access_mode = AccessMode::from_u32(flags)?;
let inode_mode = InodeMode::from_bits_truncate(mode);
let follow_tail_link = !creation_flags.contains(CreationFlags::O_NOFOLLOW);
let dentry = match self.lookup_inner(path, follow_tail_link) {
Ok(dentry) => {
let vnode = dentry.vnode();
if vnode.inode_type() == InodeType::SymLink
&& !status_flags.contains(StatusFlags::O_PATH)
{
return_errno_with_message!(Errno::ELOOP, "file is a symlink");
}
if creation_flags.contains(CreationFlags::O_CREAT)
&& creation_flags.contains(CreationFlags::O_EXCL)
{
return_errno_with_message!(Errno::EEXIST, "file exists");
}
if creation_flags.contains(CreationFlags::O_DIRECTORY)
&& vnode.inode_type() != InodeType::Dir
{
return_errno_with_message!(
Errno::ENOTDIR,
"O_DIRECTORY is specified but file is not a directory"
);
}
dentry
}
Err(e)
if e.error() == Errno::ENOENT
&& creation_flags.contains(CreationFlags::O_CREAT) =>
{
if creation_flags.contains(CreationFlags::O_DIRECTORY) {
return_errno_with_message!(Errno::ENOTDIR, "cannot create directory");
}
let (dir_dentry, file_name) =
self.lookup_dir_and_base_name_inner(path, follow_tail_link)?;
if file_name.ends_with("/") {
return_errno_with_message!(Errno::EISDIR, "path refers to a directory");
}
if !dir_dentry.vnode().inode_mode().is_writable() {
return_errno_with_message!(Errno::EPERM, "file cannot be created");
}
let new_dentry = dir_dentry.create(&file_name, InodeType::File, inode_mode)?;
new_dentry
}
Err(e) => return Err(e),
};
let inode_handle = InodeHandle::new(dentry, access_mode, status_flags)?;
Ok(inode_handle)
}
/// Lookup dentry according to FsPath, always follow symlinks
pub fn lookup(&self, path: &FsPath) -> Result<Arc<Dentry>> {
self.lookup_inner(path, true)
}
/// Lookup dentry according to FsPath, do not follow it if last component is a symlink
pub fn lookup_no_follow(&self, path: &FsPath) -> Result<Arc<Dentry>> {
self.lookup_inner(path, false)
}
fn lookup_inner(&self, path: &FsPath, follow_tail_link: bool) -> Result<Arc<Dentry>> {
let dentry = match path.inner {
FsPathInner::Absolute(path) => {
// TODO: Mount procfs at "/proc" if mount feature is ready
if path.starts_with("/proc") {
let path = path.strip_prefix("/proc").unwrap();
self.lookup_from_parent(
&PROC_DENTRY,
path.trim_start_matches('/'),
follow_tail_link,
)?
} else {
self.lookup_from_parent(
&self.root,
path.trim_start_matches('/'),
follow_tail_link,
)?
}
}
FsPathInner::CwdRelative(path) => {
self.lookup_from_parent(&self.cwd, path, follow_tail_link)?
}
FsPathInner::Cwd => self.cwd.clone(),
FsPathInner::FdRelative(fd, path) => {
let parent = self.lookup_from_fd(fd)?;
self.lookup_from_parent(&parent, path, follow_tail_link)?
}
FsPathInner::Fd(fd) => self.lookup_from_fd(fd)?,
};
Ok(dentry)
}
/// Lookup dentry from parent
///
/// The length of `path` cannot exceed PATH_MAX.
/// If `path` ends with `/`, then the returned inode must be a directory inode.
///
/// While looking up the dentry, symbolic links will be followed for
/// at most `SYMLINKS_MAX` times.
///
/// If `follow_tail_link` is true and the trailing component is a symlink,
/// it will be followed.
/// Symlinks in earlier components of the path will always be followed.
fn lookup_from_parent(
&self,
parent: &Arc<Dentry>,
relative_path: &str,
follow_tail_link: bool,
) -> Result<Arc<Dentry>> {
debug_assert!(!relative_path.starts_with("/"));
if relative_path.len() > PATH_MAX {
return_errno_with_message!(Errno::ENAMETOOLONG, "path is too long");
}
// To handle symlinks
let mut link_path = String::new();
let mut follows = 0;
// Initialize the first dentry and the relative path
let (mut dentry, mut relative_path) = (parent.clone(), relative_path);
while !relative_path.is_empty() {
let (next_name, path_remain, must_be_dir) =
if let Some((prefix, suffix)) = relative_path.split_once('/') {
let suffix = suffix.trim_start_matches('/');
(prefix, suffix, true)
} else {
(relative_path, "", false)
};
// Iterate next dentry
let next_dentry = dentry.lookup(next_name)?;
let next_type = next_dentry.vnode().inode_type();
let next_is_tail = path_remain.is_empty();
// If next inode is a symlink, follow symlinks at most `SYMLINKS_MAX` times.
if next_type == InodeType::SymLink && (follow_tail_link || !next_is_tail) {
if follows >= SYMLINKS_MAX {
return_errno_with_message!(Errno::ELOOP, "too many symlinks");
}
let link_path_remain = {
let mut tmp_link_path = next_dentry.vnode().read_link()?;
if tmp_link_path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "empty symlink");
}
if !path_remain.is_empty() {
tmp_link_path += "/";
tmp_link_path += path_remain;
} else if must_be_dir {
tmp_link_path += "/";
}
tmp_link_path
};
// Change the dentry and relative path according to symlink
if link_path_remain.starts_with("/") {
dentry = self.root.clone();
}
link_path.clear();
link_path.push_str(&link_path_remain.trim_start_matches('/'));
relative_path = &link_path;
follows += 1;
} else {
// If path ends with `/`, the inode must be a directory
if must_be_dir && next_type != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "inode is not dir");
}
dentry = next_dentry;
relative_path = path_remain;
}
}
Ok(dentry)
}
/// Lookup dentry from the giving fd
pub fn lookup_from_fd(&self, fd: FileDescripter) -> Result<Arc<Dentry>> {
let current = current!();
let file_table = current.file_table().lock();
let inode_handle = file_table
.get_file(fd)?
.as_inode_handle()
.ok_or(Error::with_message(Errno::EBADE, "not inode"))?;
Ok(inode_handle.dentry().clone())
}
/// Lookup the dir dentry and base file name of the giving path.
///
/// If the last component is a symlink, do not deference it
pub fn lookup_dir_and_base_name(&self, path: &FsPath) -> Result<(Arc<Dentry>, String)> {
self.lookup_dir_and_base_name_inner(path, false)
}
fn lookup_dir_and_base_name_inner(
&self,
path: &FsPath,
follow_tail_link: bool,
) -> Result<(Arc<Dentry>, String)> {
let (mut dir_dentry, mut base_name) = match path.inner {
FsPathInner::Absolute(path) => {
let (dir, file_name) = split_path(path);
(
self.lookup_from_parent(&self.root, dir.trim_start_matches('/'), true)?,
String::from(file_name),
)
}
FsPathInner::CwdRelative(path) => {
let (dir, file_name) = split_path(path);
(
self.lookup_from_parent(&self.cwd, dir, true)?,
String::from(file_name),
)
}
FsPathInner::FdRelative(fd, path) => {
let (dir, file_name) = split_path(path);
let parent = self.lookup_from_fd(fd)?;
(
self.lookup_from_parent(&parent, dir, true)?,
String::from(file_name),
)
}
_ => return_errno!(Errno::ENOENT),
};
if !follow_tail_link {
return Ok((dir_dentry, base_name));
}
// Dereference the tail symlinks if needed
loop {
match dir_dentry.lookup(&base_name.trim_end_matches('/')) {
Ok(dentry) if dentry.vnode().inode_type() == InodeType::SymLink => {
let link = {
let mut link = dentry.vnode().read_link()?;
if link.is_empty() {
return_errno_with_message!(Errno::ENOENT, "invalid symlink");
}
if base_name.ends_with("/") && !link.ends_with("/") {
link += "/";
}
link
};
let (dir, file_name) = split_path(&link);
if dir.starts_with("/") {
dir_dentry =
self.lookup_from_parent(&self.root, dir.trim_start_matches('/'), true)?;
base_name = String::from(file_name);
} else {
dir_dentry = self.lookup_from_parent(&dir_dentry, dir, true)?;
base_name = String::from(file_name);
}
}
_ => break,
}
}
Ok((dir_dentry, base_name))
}
}
pub const AT_FDCWD: FileDescripter = -100;
pub struct FsPath<'a> {
inner: FsPathInner<'a>,
}
#[derive(Debug)]
enum FsPathInner<'a> {
// absolute path
Absolute(&'a str),
// path is relative to Cwd
CwdRelative(&'a str),
// Cwd
Cwd,
// path is relative to DirFd
FdRelative(FileDescripter, &'a str),
// Fd
Fd(FileDescripter),
}
impl<'a> FsPath<'a> {
pub fn new(dirfd: FileDescripter, path: &'a str) -> Result<Self> {
if path.len() > PATH_MAX {
return_errno_with_message!(Errno::ENAMETOOLONG, "path name too long");
}
let fs_path_inner = if path.starts_with("/") {
FsPathInner::Absolute(path)
} else if dirfd >= 0 {
if path.is_empty() {
FsPathInner::Fd(dirfd)
} else {
FsPathInner::FdRelative(dirfd, path)
}
} else if dirfd == AT_FDCWD {
if path.is_empty() {
FsPathInner::Cwd
} else {
FsPathInner::CwdRelative(path)
}
} else {
return_errno_with_message!(Errno::EINVAL, "invalid dirfd number");
};
Ok(Self {
inner: fs_path_inner,
})
}
}
impl<'a> TryFrom<&'a str> for FsPath<'a> {
type Error = crate::error::Error;
fn try_from(path: &'a str) -> Result<FsPath> {
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is an empty string");
}
FsPath::new(AT_FDCWD, path)
}
}
/// Split a `path` to (`dir_path`, `file_name`).
///
/// The `dir_path` must be a directory.
///
/// The `file_name` is the last component. It can be suffixed by "/".
///
/// Example:
///
/// The path "/dir/file/" will be split to ("/dir", "file/").
fn split_path(path: &str) -> (&str, &str) {
let file_name = path
.split_inclusive('/')
.filter(|&x| x != "/")
.last()
.unwrap_or(".");
let mut split = path.trim_end_matches('/').rsplitn(2, '/');
let dir_path = if split.next().unwrap().is_empty() {
"/"
} else {
let mut dir = split.next().unwrap_or(".").trim_end_matches('/');
if dir.is_empty() {
dir = "/";
}
dir
};
(dir_path, file_name)
}

View File

@ -0,0 +1,55 @@
use crate::prelude::*;
use super::fs_resolver::{FsPath, FsResolver};
use super::utils::{InodeMode, InodeType};
use cpio_decoder::{CpioDecoder, FileType};
/// Unpack and prepare the fs from the ramdisk CPIO buffer.
pub fn init(ramdisk_buf: &[u8]) -> Result<()> {
let decoder = CpioDecoder::new(ramdisk_buf);
let fs = FsResolver::new();
for entry_result in decoder.decode_entries() {
let entry = entry_result?;
// Make sure the name is a relative path, and is not end with "/".
let entry_name = entry.name().trim_start_matches('/').trim_end_matches('/');
if entry_name.is_empty() {
return_errno_with_message!(Errno::EINVAL, "invalid entry name");
}
if entry_name == "." {
continue;
}
// Here we assume that the directory referred by "prefix" must has been created.
// The basis of this assumption is
// The mkinitramfs script uses `find` command to ensure that the entries are
// sorted that a directory always appears before its child directories and files.
let (parent, name) = if let Some((prefix, last)) = entry_name.rsplit_once('/') {
(fs.lookup(&FsPath::try_from(prefix)?)?, last)
} else {
(fs.root().clone(), entry_name)
};
let metadata = entry.metadata();
let mode = InodeMode::from_bits_truncate(metadata.permission_mode());
match metadata.file_type() {
FileType::File => {
let dentry = parent.create(name, InodeType::File, mode)?;
dentry.vnode().write_at(0, entry.data())?;
}
FileType::Dir => {
let _ = parent.create(name, InodeType::Dir, mode)?;
}
FileType::Link => {
let dentry = parent.create(name, InodeType::SymLink, mode)?;
let link_content = core::str::from_utf8(entry.data())?;
dentry.vnode().write_link(link_content)?;
}
type_ => {
warn!("unsupported file type = {:?} in initramfs", type_);
}
}
}
Ok(())
}

View File

@ -0,0 +1,8 @@
pub mod file_handle;
pub mod file_table;
pub mod fs_resolver;
pub mod initramfs;
pub mod procfs;
pub mod ramfs;
pub mod stdio;
pub mod utils;

View File

@ -0,0 +1,125 @@
use alloc::string::{String, ToString};
use core::any::Any;
use core::sync::atomic::{AtomicUsize, Ordering};
use crate::events::Observer;
use crate::fs::utils::{DirEntryVecExt, FileSystem, FsFlags, Inode, SuperBlock, NAME_MAX};
use crate::prelude::*;
use crate::process::{process_table, process_table::PidEvent, Pid};
use self::pid::PidDirOps;
use self::self_::SelfSymOps;
use self::template::{DirOps, ProcDir, ProcDirBuilder, ProcSymBuilder, SymOps};
mod pid;
mod self_;
mod template;
/// Magic number.
const PROC_MAGIC: usize = 0x9fa0;
/// Root Inode ID.
const PROC_ROOT_INO: usize = 1;
/// Block size.
const BLOCK_SIZE: usize = 1024;
pub struct ProcFS {
sb: RwLock<SuperBlock>,
root: RwLock<Option<Arc<dyn Inode>>>,
inode_allocator: AtomicUsize,
}
impl ProcFS {
pub fn new() -> Arc<Self> {
let procfs = {
let sb = SuperBlock::new(PROC_MAGIC, BLOCK_SIZE, NAME_MAX);
Arc::new(Self {
sb: RwLock::new(sb),
root: RwLock::new(None),
inode_allocator: AtomicUsize::new(PROC_ROOT_INO),
})
};
let root = RootDirOps::new_inode(&procfs);
*procfs.root.write() = Some(root);
procfs
}
pub(in crate::fs::procfs) fn alloc_id(&self) -> usize {
let next_id = self.inode_allocator.fetch_add(1, Ordering::SeqCst);
next_id
}
}
impl FileSystem for ProcFS {
fn sync(&self) -> Result<()> {
Ok(())
}
fn root_inode(&self) -> Arc<dyn Inode> {
self.root.read().as_ref().unwrap().clone()
}
fn sb(&self) -> SuperBlock {
self.sb.read().clone()
}
fn flags(&self) -> FsFlags {
FsFlags::NO_PAGECACHE
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
/// Represents the inode at `/proc`.
struct RootDirOps;
impl RootDirOps {
pub fn new_inode(fs: &Arc<ProcFS>) -> Arc<dyn Inode> {
let root_inode = ProcDirBuilder::new(Self).fs(fs.clone()).build().unwrap();
let weak_ptr = Arc::downgrade(&root_inode);
process_table::register_observer(weak_ptr);
root_inode
}
}
impl Observer<PidEvent> for ProcDir<RootDirOps> {
fn on_events(&self, events: &PidEvent) {
let PidEvent::Exit(pid) = events;
let mut cached_children = self.cached_children().write();
cached_children.remove_entry_by_name(&pid.to_string());
}
}
impl DirOps for RootDirOps {
fn lookup_child(&self, this_ptr: Weak<dyn Inode>, name: &str) -> Result<Arc<dyn Inode>> {
let child = if name == "self" {
SelfSymOps::new_inode(this_ptr.clone())
} else if let Ok(pid) = name.parse::<Pid>() {
let process_ref =
process_table::pid_to_process(pid).ok_or_else(|| Error::new(Errno::ENOENT))?;
PidDirOps::new_inode(process_ref, this_ptr.clone())
} else {
return_errno!(Errno::ENOENT);
};
Ok(child)
}
fn populate_children(&self, this_ptr: Weak<dyn Inode>) {
let this = {
let this = this_ptr.upgrade().unwrap();
this.downcast_ref::<ProcDir<RootDirOps>>().unwrap().this()
};
let mut cached_children = this.cached_children().write();
cached_children.put_entry_if_not_found("self", || SelfSymOps::new_inode(this_ptr.clone()));
let processes = process_table::get_all_processes();
for process in processes {
let pid = process.pid().to_string();
cached_children.put_entry_if_not_found(&pid, || {
PidDirOps::new_inode(process.clone(), this_ptr.clone())
});
}
}
}

View File

@ -0,0 +1,30 @@
use super::*;
/// Represents the inode at `/proc/[pid]/comm`.
pub struct CommFileOps(Arc<Process>);
impl CommFileOps {
pub fn new_inode(process_ref: Arc<Process>, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
ProcFileBuilder::new(Self(process_ref))
.parent(parent)
.build()
.unwrap()
}
}
impl FileOps for CommFileOps {
fn data(&self) -> Result<Vec<u8>> {
let mut comm_output = {
let exe_path = self.0.executable_path().read();
let last_component = exe_path.rsplit('/').next().unwrap_or(&exe_path);
let mut comm = last_component.as_bytes().to_vec();
comm.push(b'\0');
comm.truncate(TASK_COMM_LEN);
comm
};
comm_output.push(b'\n');
Ok(comm_output)
}
}
const TASK_COMM_LEN: usize = 16;

View File

@ -0,0 +1,19 @@
use super::*;
/// Represents the inode at `/proc/[pid]/exe`.
pub struct ExeSymOps(Arc<Process>);
impl ExeSymOps {
pub fn new_inode(process_ref: Arc<Process>, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
ProcSymBuilder::new(Self(process_ref))
.parent(parent)
.build()
.unwrap()
}
}
impl SymOps for ExeSymOps {
fn read_link(&self) -> Result<String> {
Ok(self.0.executable_path().read().clone())
}
}

View File

@ -0,0 +1,88 @@
use super::*;
use crate::fs::file_handle::FileHandle;
use crate::fs::file_table::FileDescripter;
/// Represents the inode at `/proc/[pid]/fd`.
pub struct FdDirOps(Arc<Process>);
impl FdDirOps {
pub fn new_inode(process_ref: Arc<Process>, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
let fd_inode = ProcDirBuilder::new(Self(process_ref.clone()))
.parent(parent)
.build()
.unwrap();
let file_table = process_ref.file_table().lock();
let weak_ptr = Arc::downgrade(&fd_inode);
file_table.register_observer(weak_ptr);
fd_inode
}
}
impl Observer<FdEvents> for ProcDir<FdDirOps> {
fn on_events(&self, events: &FdEvents) {
let fd_string = if let FdEvents::Close(fd) = events {
fd.to_string()
} else {
return;
};
let mut cached_children = self.cached_children().write();
cached_children.remove_entry_by_name(&fd_string);
}
}
impl DirOps for FdDirOps {
fn lookup_child(&self, this_ptr: Weak<dyn Inode>, name: &str) -> Result<Arc<dyn Inode>> {
let file = {
let fd = name
.parse::<FileDescripter>()
.map_err(|_| Error::new(Errno::ENOENT))?;
let file_table = self.0.file_table().lock();
file_table
.get_file(fd)
.map_err(|_| Error::new(Errno::ENOENT))?
.clone()
};
Ok(FileSymOps::new_inode(file, this_ptr.clone()))
}
fn populate_children(&self, this_ptr: Weak<dyn Inode>) {
let this = {
let this = this_ptr.upgrade().unwrap();
this.downcast_ref::<ProcDir<FdDirOps>>().unwrap().this()
};
let file_table = self.0.file_table().lock();
let mut cached_children = this.cached_children().write();
for (fd, file) in file_table.fds_and_files() {
cached_children.put_entry_if_not_found(&fd.to_string(), || {
FileSymOps::new_inode(file.clone(), this_ptr.clone())
});
}
}
}
/// Represents the inode at `/proc/[pid]/fd/N`.
struct FileSymOps(FileHandle);
impl FileSymOps {
pub fn new_inode(file: FileHandle, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
ProcSymBuilder::new(Self(file))
.parent(parent)
.build()
.unwrap()
}
}
impl SymOps for FileSymOps {
fn read_link(&self) -> Result<String> {
let path = if let Some(inode_handle) = self.0.as_inode_handle() {
inode_handle.dentry().abs_path()
} else if let Some(file) = self.0.as_file() {
// TODO: get the real path for stdio
String::from("/dev/tty")
} else {
unreachable!()
};
Ok(path)
}
}

View File

@ -0,0 +1,72 @@
use crate::events::Observer;
use crate::fs::file_table::FdEvents;
use crate::fs::utils::{DirEntryVecExt, Inode};
use crate::prelude::*;
use crate::process::Process;
use self::comm::CommFileOps;
use self::exe::ExeSymOps;
use self::fd::FdDirOps;
use super::template::{
DirOps, FileOps, ProcDir, ProcDirBuilder, ProcFileBuilder, ProcSymBuilder, SymOps,
};
mod comm;
mod exe;
mod fd;
/// Represents the inode at `/proc/[pid]`.
pub struct PidDirOps(Arc<Process>);
impl PidDirOps {
pub fn new_inode(process_ref: Arc<Process>, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
let pid_inode = ProcDirBuilder::new(Self(process_ref.clone()))
.parent(parent)
// The pid directories must be volatile, because it is just associated with one process.
.volatile()
.build()
.unwrap();
let file_table = process_ref.file_table().lock();
let weak_ptr = Arc::downgrade(&pid_inode);
file_table.register_observer(weak_ptr);
pid_inode
}
}
impl Observer<FdEvents> for ProcDir<PidDirOps> {
fn on_events(&self, events: &FdEvents) {
if let FdEvents::DropFileTable = events {
let mut cached_children = self.cached_children().write();
cached_children.remove_entry_by_name("fd");
}
}
}
impl DirOps for PidDirOps {
fn lookup_child(&self, this_ptr: Weak<dyn Inode>, name: &str) -> Result<Arc<dyn Inode>> {
let inode = match name {
"exe" => ExeSymOps::new_inode(self.0.clone(), this_ptr.clone()),
"comm" => CommFileOps::new_inode(self.0.clone(), this_ptr.clone()),
"fd" => FdDirOps::new_inode(self.0.clone(), this_ptr.clone()),
_ => return_errno!(Errno::ENOENT),
};
Ok(inode)
}
fn populate_children(&self, this_ptr: Weak<dyn Inode>) {
let this = {
let this = this_ptr.upgrade().unwrap();
this.downcast_ref::<ProcDir<PidDirOps>>().unwrap().this()
};
let mut cached_children = this.cached_children().write();
cached_children.put_entry_if_not_found("exe", || {
ExeSymOps::new_inode(self.0.clone(), this_ptr.clone())
});
cached_children.put_entry_if_not_found("comm", || {
CommFileOps::new_inode(self.0.clone(), this_ptr.clone())
});
cached_children.put_entry_if_not_found("fd", || {
FdDirOps::new_inode(self.0.clone(), this_ptr.clone())
})
}
}

View File

@ -0,0 +1,16 @@
use super::*;
/// Represents the inode at `/proc/self`.
pub struct SelfSymOps;
impl SelfSymOps {
pub fn new_inode(parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
ProcSymBuilder::new(Self).parent(parent).build().unwrap()
}
}
impl SymOps for SelfSymOps {
fn read_link(&self) -> Result<String> {
Ok(current!().pid().to_string())
}
}

View File

@ -0,0 +1,175 @@
use crate::fs::utils::{FileSystem, Inode};
use crate::prelude::*;
use super::{
dir::{DirOps, ProcDir},
file::{FileOps, ProcFile},
sym::{ProcSym, SymOps},
};
pub struct ProcDirBuilder<O: DirOps> {
// Mandatory field
dir: O,
// Optional fields
optional_builder: Option<OptionalBuilder>,
}
impl<O: DirOps> ProcDirBuilder<O> {
pub fn new(dir: O) -> Self {
let optional_builder: OptionalBuilder = Default::default();
Self {
dir,
optional_builder: Some(optional_builder),
}
}
pub fn parent(self, parent: Weak<dyn Inode>) -> Self {
self.optional_builder(|ob| ob.parent(parent))
}
pub fn fs(self, fs: Arc<dyn FileSystem>) -> Self {
self.optional_builder(|ob| ob.fs(fs))
}
pub fn volatile(self) -> Self {
self.optional_builder(|ob| ob.volatile())
}
pub fn build(mut self) -> Result<Arc<ProcDir<O>>> {
let (fs, parent, is_volatile) = self.optional_builder.take().unwrap().build()?;
Ok(ProcDir::new(self.dir, fs, parent, is_volatile))
}
fn optional_builder<F>(mut self, f: F) -> Self
where
F: FnOnce(OptionalBuilder) -> OptionalBuilder,
{
let optional_builder = self.optional_builder.take().unwrap();
self.optional_builder = Some(f(optional_builder));
self
}
}
pub struct ProcFileBuilder<O: FileOps> {
// Mandatory field
file: O,
// Optional fields
optional_builder: Option<OptionalBuilder>,
}
impl<O: FileOps> ProcFileBuilder<O> {
pub fn new(file: O) -> Self {
let optional_builder: OptionalBuilder = Default::default();
Self {
file,
optional_builder: Some(optional_builder),
}
}
pub fn parent(self, parent: Weak<dyn Inode>) -> Self {
self.optional_builder(|ob| ob.parent(parent))
}
pub fn volatile(self) -> Self {
self.optional_builder(|ob| ob.volatile())
}
pub fn build(mut self) -> Result<Arc<ProcFile<O>>> {
let (fs, _, is_volatile) = self.optional_builder.take().unwrap().build()?;
Ok(ProcFile::new(self.file, fs, is_volatile))
}
fn optional_builder<F>(mut self, f: F) -> Self
where
F: FnOnce(OptionalBuilder) -> OptionalBuilder,
{
let optional_builder = self.optional_builder.take().unwrap();
self.optional_builder = Some(f(optional_builder));
self
}
}
pub struct ProcSymBuilder<O: SymOps> {
// Mandatory field
sym: O,
// Optional fields
optional_builder: Option<OptionalBuilder>,
}
impl<O: SymOps> ProcSymBuilder<O> {
pub fn new(sym: O) -> Self {
let optional_builder: OptionalBuilder = Default::default();
Self {
sym,
optional_builder: Some(optional_builder),
}
}
pub fn parent(self, parent: Weak<dyn Inode>) -> Self {
self.optional_builder(|ob| ob.parent(parent))
}
pub fn volatile(self) -> Self {
self.optional_builder(|ob| ob.volatile())
}
pub fn build(mut self) -> Result<Arc<ProcSym<O>>> {
let (fs, _, is_volatile) = self.optional_builder.take().unwrap().build()?;
Ok(ProcSym::new(self.sym, fs, is_volatile))
}
fn optional_builder<F>(mut self, f: F) -> Self
where
F: FnOnce(OptionalBuilder) -> OptionalBuilder,
{
let optional_builder = self.optional_builder.take().unwrap();
self.optional_builder = Some(f(optional_builder));
self
}
}
#[derive(Default)]
struct OptionalBuilder {
parent: Option<Weak<dyn Inode>>,
fs: Option<Arc<dyn FileSystem>>,
is_volatile: bool,
}
impl OptionalBuilder {
pub fn parent(mut self, parent: Weak<dyn Inode>) -> Self {
self.parent = Some(parent);
self
}
pub fn fs(mut self, fs: Arc<dyn FileSystem>) -> Self {
self.fs = Some(fs);
self
}
pub fn volatile(mut self) -> Self {
self.is_volatile = true;
self
}
pub fn build(self) -> Result<(Arc<dyn FileSystem>, Option<Weak<dyn Inode>>, bool)> {
if self.parent.is_none() && self.fs.is_none() {
return_errno_with_message!(Errno::EINVAL, "must have parent or fs");
}
let fs = self
.fs
.unwrap_or_else(|| self.parent.as_ref().unwrap().upgrade().unwrap().fs());
// The volatile property is inherited from parent.
let is_volatile = {
let mut is_volatile = self.is_volatile;
if let Some(parent) = self.parent.as_ref() {
if !parent.upgrade().unwrap().is_dentry_cacheable() {
is_volatile = true;
}
}
is_volatile
};
Ok((fs, self.parent, is_volatile))
}
}

View File

@ -0,0 +1,225 @@
use alloc::string::String;
use core::any::Any;
use core::time::Duration;
use jinux_frame::vm::VmFrame;
use crate::fs::utils::{
DirEntryVec, DirentVisitor, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata,
};
use crate::prelude::*;
use super::{ProcFS, ProcInodeInfo};
pub struct ProcDir<D: DirOps> {
inner: D,
this: Weak<ProcDir<D>>,
parent: Option<Weak<dyn Inode>>,
cached_children: RwLock<DirEntryVec<(String, Arc<dyn Inode>)>>,
info: ProcInodeInfo,
}
impl<D: DirOps> ProcDir<D> {
pub fn new(
dir: D,
fs: Arc<dyn FileSystem>,
parent: Option<Weak<dyn Inode>>,
is_volatile: bool,
) -> Arc<Self> {
let info = {
let procfs = fs.downcast_ref::<ProcFS>().unwrap();
let metadata = Metadata::new_dir(
procfs.alloc_id(),
InodeMode::from_bits_truncate(0o555),
&fs.sb(),
);
ProcInodeInfo::new(metadata, Arc::downgrade(&fs), is_volatile)
};
Arc::new_cyclic(|weak_self| Self {
inner: dir,
this: weak_self.clone(),
parent,
cached_children: RwLock::new(DirEntryVec::new()),
info,
})
}
pub fn this(&self) -> Arc<ProcDir<D>> {
self.this.upgrade().unwrap()
}
pub fn parent(&self) -> Option<Arc<dyn Inode>> {
self.parent.as_ref().and_then(|p| p.upgrade())
}
pub fn cached_children(&self) -> &RwLock<DirEntryVec<(String, Arc<dyn Inode>)>> {
&self.cached_children
}
}
impl<D: DirOps + 'static> Inode for ProcDir<D> {
fn len(&self) -> usize {
self.info.metadata().size
}
fn resize(&self, _new_size: usize) {}
fn metadata(&self) -> Metadata {
self.info.metadata().clone()
}
fn atime(&self) -> Duration {
self.info.metadata().atime
}
fn set_atime(&self, _time: Duration) {}
fn mtime(&self) -> Duration {
self.info.metadata().mtime
}
fn set_mtime(&self, _time: Duration) {}
fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
Err(Error::new(Errno::EISDIR))
}
fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
Err(Error::new(Errno::EISDIR))
}
fn read_at(&self, _offset: usize, _buf: &mut [u8]) -> Result<usize> {
Err(Error::new(Errno::EISDIR))
}
fn write_at(&self, _offset: usize, _buf: &[u8]) -> Result<usize> {
Err(Error::new(Errno::EISDIR))
}
fn mknod(&self, _name: &str, _type_: InodeType, _mode: InodeMode) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::EPERM))
}
fn readdir_at(&self, mut offset: usize, visitor: &mut dyn DirentVisitor) -> Result<usize> {
let try_readdir = |offset: &mut usize, visitor: &mut dyn DirentVisitor| -> Result<()> {
// Read the two special entries.
if *offset == 0 {
let this_inode = self.this();
visitor.visit(
".",
this_inode.info.metadata().ino as u64,
this_inode.info.metadata().type_,
*offset,
)?;
*offset += 1;
}
if *offset == 1 {
let parent_inode = self.parent().unwrap_or(self.this());
visitor.visit(
"..",
parent_inode.metadata().ino as u64,
parent_inode.metadata().type_,
*offset,
)?;
*offset += 1;
}
// Read the normal child entries.
self.inner.populate_children(self.this.clone());
let cached_children = self.cached_children.read();
for (idx, (name, child)) in cached_children
.idxes_and_entries()
.map(|(idx, (name, child))| (idx + 2, (name, child)))
{
if idx < *offset {
continue;
}
visitor.visit(
name.as_ref(),
child.metadata().ino as u64,
child.metadata().type_,
idx,
)?;
*offset = idx + 1;
}
Ok(())
};
let initial_offset = offset;
match try_readdir(&mut offset, visitor) {
Err(e) if initial_offset == offset => Err(e),
_ => Ok(offset - initial_offset),
}
}
fn link(&self, _old: &Arc<dyn Inode>, _name: &str) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn unlink(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn rmdir(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn lookup(&self, name: &str) -> Result<Arc<dyn Inode>> {
let inode = match name {
"." => self.this(),
".." => self.parent().unwrap_or(self.this()),
name => {
let mut cached_children = self.cached_children.write();
if let Some((_, inode)) = cached_children
.iter()
.find(|(child_name, inode)| child_name.as_str() == name)
{
return Ok(inode.clone());
}
let inode = self.inner.lookup_child(self.this.clone(), name)?;
cached_children.put((String::from(name), inode.clone()));
inode
}
};
Ok(inode)
}
fn rename(&self, _old_name: &str, _target: &Arc<dyn Inode>, _new_name: &str) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn read_link(&self) -> Result<String> {
Err(Error::new(Errno::EISDIR))
}
fn write_link(&self, _target: &str) -> Result<()> {
Err(Error::new(Errno::EISDIR))
}
fn ioctl(&self, _cmd: &IoctlCmd) -> Result<()> {
Err(Error::new(Errno::EISDIR))
}
fn sync(&self) -> Result<()> {
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
self.info.fs().upgrade().unwrap()
}
fn is_dentry_cacheable(&self) -> bool {
!self.info.is_volatile()
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
pub trait DirOps: Sync + Send {
fn lookup_child(&self, this_ptr: Weak<dyn Inode>, name: &str) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::ENOENT))
}
fn populate_children(&self, this_ptr: Weak<dyn Inode>) {}
}

View File

@ -0,0 +1,136 @@
use alloc::string::String;
use core::any::Any;
use core::time::Duration;
use jinux_frame::vm::VmFrame;
use crate::fs::utils::{
DirentVisitor, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata,
};
use crate::prelude::*;
use super::{ProcFS, ProcInodeInfo};
pub struct ProcFile<F: FileOps> {
inner: F,
info: ProcInodeInfo,
}
impl<F: FileOps> ProcFile<F> {
pub fn new(file: F, fs: Arc<dyn FileSystem>, is_volatile: bool) -> Arc<Self> {
let info = {
let procfs = fs.downcast_ref::<ProcFS>().unwrap();
let metadata = Metadata::new_file(
procfs.alloc_id(),
InodeMode::from_bits_truncate(0o444),
&fs.sb(),
);
ProcInodeInfo::new(metadata, Arc::downgrade(&fs), is_volatile)
};
Arc::new(Self { inner: file, info })
}
}
impl<F: FileOps + 'static> Inode for ProcFile<F> {
fn len(&self) -> usize {
self.info.metadata().size
}
fn resize(&self, _new_size: usize) {}
fn metadata(&self) -> Metadata {
self.info.metadata().clone()
}
fn atime(&self) -> Duration {
self.info.metadata().atime
}
fn set_atime(&self, _time: Duration) {}
fn mtime(&self) -> Duration {
self.info.metadata().mtime
}
fn set_mtime(&self, _time: Duration) {}
fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
unreachable!()
}
fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
unreachable!()
}
fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize> {
let data = self.inner.data()?;
let start = data.len().min(offset);
let end = data.len().min(offset + buf.len());
let len = end - start;
buf[0..len].copy_from_slice(&data[start..end]);
Ok(len)
}
fn write_at(&self, _offset: usize, _buf: &[u8]) -> Result<usize> {
Err(Error::new(Errno::EPERM))
}
fn mknod(&self, _name: &str, _type_: InodeType, _mode: InodeMode) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::ENOTDIR))
}
fn readdir_at(&self, _offset: usize, _visitor: &mut dyn DirentVisitor) -> Result<usize> {
Err(Error::new(Errno::ENOTDIR))
}
fn link(&self, _old: &Arc<dyn Inode>, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn unlink(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn rmdir(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn lookup(&self, _name: &str) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::ENOTDIR))
}
fn rename(&self, _old_name: &str, _target: &Arc<dyn Inode>, _new_name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn read_link(&self) -> Result<String> {
Err(Error::new(Errno::EINVAL))
}
fn write_link(&self, _target: &str) -> Result<()> {
Err(Error::new(Errno::EINVAL))
}
fn ioctl(&self, _cmd: &IoctlCmd) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn sync(&self) -> Result<()> {
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
self.info.fs().upgrade().unwrap()
}
fn is_dentry_cacheable(&self) -> bool {
!self.info.is_volatile()
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
pub trait FileOps: Sync + Send {
fn data(&self) -> Result<Vec<u8>>;
}

View File

@ -0,0 +1,42 @@
use crate::fs::utils::{FileSystem, Metadata};
use crate::prelude::*;
use super::ProcFS;
pub use self::builder::{ProcDirBuilder, ProcFileBuilder, ProcSymBuilder};
pub use self::dir::{DirOps, ProcDir};
pub use self::file::FileOps;
pub use self::sym::SymOps;
mod builder;
mod dir;
mod file;
mod sym;
struct ProcInodeInfo {
metadata: Metadata,
fs: Weak<dyn FileSystem>,
is_volatile: bool,
}
impl ProcInodeInfo {
pub fn new(metadata: Metadata, fs: Weak<dyn FileSystem>, is_volatile: bool) -> Self {
Self {
metadata,
fs,
is_volatile,
}
}
pub fn fs(&self) -> &Weak<dyn FileSystem> {
&self.fs
}
pub fn metadata(&self) -> &Metadata {
&self.metadata
}
pub fn is_volatile(&self) -> bool {
self.is_volatile
}
}

View File

@ -0,0 +1,131 @@
use alloc::string::String;
use core::any::Any;
use core::time::Duration;
use jinux_frame::vm::VmFrame;
use crate::fs::utils::{
DirentVisitor, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata,
};
use crate::prelude::*;
use super::{ProcFS, ProcInodeInfo};
pub struct ProcSym<S: SymOps> {
inner: S,
info: ProcInodeInfo,
}
impl<S: SymOps> ProcSym<S> {
pub fn new(sym: S, fs: Arc<dyn FileSystem>, is_volatile: bool) -> Arc<Self> {
let info = {
let procfs = fs.downcast_ref::<ProcFS>().unwrap();
let metadata = Metadata::new_symlink(
procfs.alloc_id(),
InodeMode::from_bits_truncate(0o777),
&fs.sb(),
);
ProcInodeInfo::new(metadata, Arc::downgrade(&fs), is_volatile)
};
Arc::new(Self { inner: sym, info })
}
}
impl<S: SymOps + 'static> Inode for ProcSym<S> {
fn len(&self) -> usize {
self.info.metadata().size
}
fn resize(&self, _new_size: usize) {}
fn metadata(&self) -> Metadata {
self.info.metadata().clone()
}
fn atime(&self) -> Duration {
self.info.metadata().atime
}
fn set_atime(&self, _time: Duration) {}
fn mtime(&self) -> Duration {
self.info.metadata().mtime
}
fn set_mtime(&self, _time: Duration) {}
fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn read_at(&self, _offset: usize, _buf: &mut [u8]) -> Result<usize> {
Err(Error::new(Errno::EPERM))
}
fn write_at(&self, _offset: usize, _buf: &[u8]) -> Result<usize> {
Err(Error::new(Errno::EPERM))
}
fn mknod(&self, _name: &str, _type_: InodeType, _mode: InodeMode) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::ENOTDIR))
}
fn readdir_at(&self, _offset: usize, _visitor: &mut dyn DirentVisitor) -> Result<usize> {
Err(Error::new(Errno::ENOTDIR))
}
fn link(&self, _old: &Arc<dyn Inode>, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn unlink(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn rmdir(&self, _name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn lookup(&self, _name: &str) -> Result<Arc<dyn Inode>> {
Err(Error::new(Errno::ENOTDIR))
}
fn rename(&self, _old_name: &str, _target: &Arc<dyn Inode>, _new_name: &str) -> Result<()> {
Err(Error::new(Errno::ENOTDIR))
}
fn read_link(&self) -> Result<String> {
self.inner.read_link()
}
fn write_link(&self, _target: &str) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn ioctl(&self, _cmd: &IoctlCmd) -> Result<()> {
Err(Error::new(Errno::EPERM))
}
fn sync(&self) -> Result<()> {
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
self.info.fs().upgrade().unwrap()
}
fn is_dentry_cacheable(&self) -> bool {
!self.info.is_volatile()
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
pub trait SymOps: Sync + Send {
fn read_link(&self) -> Result<String>;
}

View File

@ -0,0 +1,665 @@
use crate::prelude::*;
use alloc::str;
use alloc::string::String;
use core::any::Any;
use core::sync::atomic::{AtomicUsize, Ordering};
use core::time::Duration;
use jinux_frame::vm::VmFrame;
use spin::{RwLock, RwLockWriteGuard};
use super::*;
use crate::fs::utils::{
DirEntryVec, DirentVisitor, FileSystem, FsFlags, Inode, InodeMode, InodeType, IoctlCmd,
Metadata, SuperBlock,
};
pub struct RamFS {
metadata: RwLock<SuperBlock>,
root: Arc<RamInode>,
inode_allocator: AtomicUsize,
}
impl RamFS {
pub fn new() -> Arc<Self> {
let sb = SuperBlock::new(RAMFS_MAGIC, BLOCK_SIZE, NAME_MAX);
let root = Arc::new(RamInode(RwLock::new(Inode_::new_dir(
ROOT_INO,
InodeMode::from_bits_truncate(0o755),
&sb,
))));
let ramfs = Arc::new(Self {
metadata: RwLock::new(sb),
root,
inode_allocator: AtomicUsize::new(ROOT_INO + 1),
});
let mut root = ramfs.root.0.write();
root.inner
.as_direntry_mut()
.unwrap()
.init(Arc::downgrade(&ramfs.root), Arc::downgrade(&ramfs.root));
root.this = Arc::downgrade(&ramfs.root);
root.fs = Arc::downgrade(&ramfs);
drop(root);
ramfs
}
fn alloc_id(&self) -> usize {
let next_id = self.inode_allocator.fetch_add(1, Ordering::SeqCst);
self.metadata.write().files += 1;
next_id
}
}
impl FileSystem for RamFS {
fn sync(&self) -> Result<()> {
// do nothing
Ok(())
}
fn root_inode(&self) -> Arc<dyn Inode> {
self.root.clone()
}
fn sb(&self) -> SuperBlock {
self.metadata.read().clone()
}
fn flags(&self) -> FsFlags {
FsFlags::DENTRY_UNEVICTABLE
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
struct RamInode(RwLock<Inode_>);
struct Inode_ {
inner: Inner,
metadata: Metadata,
this: Weak<RamInode>,
fs: Weak<RamFS>,
}
impl Inode_ {
pub fn new_dir(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
inner: Inner::Dir(DirEntry::new()),
metadata: Metadata::new_dir(ino, mode, sb),
this: Weak::default(),
fs: Weak::default(),
}
}
pub fn new_file(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
inner: Inner::File,
metadata: Metadata::new_file(ino, mode, sb),
this: Weak::default(),
fs: Weak::default(),
}
}
pub fn new_symlink(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
inner: Inner::SymLink(Str256::from("")),
metadata: Metadata::new_symlink(ino, mode, sb),
this: Weak::default(),
fs: Weak::default(),
}
}
pub fn inc_size(&mut self) {
self.metadata.size += 1;
self.metadata.blocks = (self.metadata.size + BLOCK_SIZE - 1) / BLOCK_SIZE;
}
pub fn dec_size(&mut self) {
debug_assert!(self.metadata.size > 0);
self.metadata.size -= 1;
self.metadata.blocks = (self.metadata.size + BLOCK_SIZE - 1) / BLOCK_SIZE;
}
pub fn resize(&mut self, new_size: usize) {
self.metadata.size = new_size;
self.metadata.blocks = (new_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
}
}
enum Inner {
Dir(DirEntry),
File,
SymLink(Str256),
}
impl Inner {
fn as_direntry(&self) -> Option<&DirEntry> {
match self {
Inner::Dir(dir_entry) => Some(dir_entry),
_ => None,
}
}
fn as_direntry_mut(&mut self) -> Option<&mut DirEntry> {
match self {
Inner::Dir(dir_entry) => Some(dir_entry),
_ => None,
}
}
fn as_symlink(&self) -> Option<&str> {
match self {
Inner::SymLink(link) => Some(link.as_ref()),
_ => None,
}
}
fn as_symlink_mut(&mut self) -> Option<&mut Str256> {
match self {
Inner::SymLink(link) => Some(link),
_ => None,
}
}
}
struct DirEntry {
children: DirEntryVec<(Str256, Arc<RamInode>)>,
this: Weak<RamInode>,
parent: Weak<RamInode>,
}
impl DirEntry {
fn new() -> Self {
Self {
children: DirEntryVec::new(),
this: Weak::default(),
parent: Weak::default(),
}
}
fn init(&mut self, this: Weak<RamInode>, parent: Weak<RamInode>) {
self.this = this;
self.set_parent(parent);
}
fn set_parent(&mut self, parent: Weak<RamInode>) {
self.parent = parent;
}
fn contains_entry(&self, name: &str) -> bool {
if name == "." || name == ".." {
true
} else {
self.children
.iter()
.find(|(child, _)| child == &Str256::from(name))
.is_some()
}
}
fn get_entry(&self, name: &str) -> Option<(usize, Arc<RamInode>)> {
if name == "." {
Some((0, self.this.upgrade().unwrap()))
} else if name == ".." {
Some((1, self.parent.upgrade().unwrap()))
} else {
self.children
.idxes_and_entries()
.find(|(_, (child, _))| child == &Str256::from(name))
.map(|(idx, (_, inode))| (idx + 2, inode.clone()))
}
}
fn append_entry(&mut self, name: &str, inode: Arc<RamInode>) {
self.children.put((Str256::from(name), inode))
}
fn remove_entry(&mut self, idx: usize) -> Option<(Str256, Arc<RamInode>)> {
assert!(idx >= 2);
self.children.remove(idx - 2)
}
fn substitute_entry(
&mut self,
idx: usize,
new_entry: (Str256, Arc<RamInode>),
) -> Option<(Str256, Arc<RamInode>)> {
assert!(idx >= 2);
self.children.put_at(idx - 2, new_entry)
}
fn visit_entry(&self, mut idx: usize, visitor: &mut dyn DirentVisitor) -> Result<usize> {
let try_visit = |idx: &mut usize, visitor: &mut dyn DirentVisitor| -> Result<()> {
// Read the two special entries("." and "..").
if *idx == 0 {
let this_inode = self.this.upgrade().unwrap();
visitor.visit(
".",
this_inode.metadata().ino as u64,
this_inode.metadata().type_,
*idx,
)?;
*idx += 1;
}
if *idx == 1 {
let parent_inode = self.parent.upgrade().unwrap();
visitor.visit(
"..",
parent_inode.metadata().ino as u64,
parent_inode.metadata().type_,
*idx,
)?;
*idx += 1;
}
// Read the normal child entries.
for (offset, (name, child)) in self
.children
.idxes_and_entries()
.map(|(offset, (name, child))| (offset + 2, (name, child)))
{
if offset < *idx {
continue;
}
visitor.visit(
name.as_ref(),
child.metadata().ino as u64,
child.metadata().type_,
offset,
)?;
*idx = offset + 1;
}
Ok(())
};
let initial_idx = idx;
match try_visit(&mut idx, visitor) {
Err(e) if idx == initial_idx => Err(e),
_ => Ok(idx - initial_idx),
}
}
fn is_empty_children(&self) -> bool {
self.children.is_empty()
}
}
#[repr(C)]
#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)]
pub struct Str256([u8; 256]);
impl AsRef<str> for Str256 {
fn as_ref(&self) -> &str {
let len = self.0.iter().enumerate().find(|(_, &b)| b == 0).unwrap().0;
str::from_utf8(&self.0[0..len]).unwrap()
}
}
impl<'a> From<&'a str> for Str256 {
fn from(s: &'a str) -> Self {
let mut inner = [0u8; 256];
let len = if s.len() > NAME_MAX {
NAME_MAX
} else {
s.len()
};
inner[0..len].copy_from_slice(&s.as_bytes()[0..len]);
Str256(inner)
}
}
impl core::fmt::Debug for Str256 {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{}", self.as_ref())
}
}
impl Inode for RamInode {
fn read_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
// do nothing
Ok(())
}
fn write_page(&self, _idx: usize, _frame: &VmFrame) -> Result<()> {
// do nothing
Ok(())
}
fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize> {
return_errno_with_message!(Errno::EOPNOTSUPP, "direct read is not supported");
}
fn write_at(&self, offset: usize, buf: &[u8]) -> Result<usize> {
return_errno_with_message!(Errno::EOPNOTSUPP, "direct write is not supported");
}
fn len(&self) -> usize {
self.0.read().metadata.size
}
fn resize(&self, new_size: usize) {
self.0.write().resize(new_size)
}
fn atime(&self) -> Duration {
self.0.read().metadata.atime
}
fn set_atime(&self, time: Duration) {
self.0.write().metadata.atime = time;
}
fn mtime(&self) -> Duration {
self.0.read().metadata.mtime
}
fn set_mtime(&self, time: Duration) {
self.0.write().metadata.mtime = time;
}
fn mknod(&self, name: &str, type_: InodeType, mode: InodeMode) -> Result<Arc<dyn Inode>> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
let mut self_inode = self.0.write();
if self_inode.inner.as_direntry().unwrap().contains_entry(name) {
return_errno_with_message!(Errno::EEXIST, "entry exists");
}
let fs = self_inode.fs.upgrade().unwrap();
let new_inode = match type_ {
InodeType::File => {
let file_inode = Arc::new(RamInode(RwLock::new(Inode_::new_file(
fs.alloc_id(),
mode,
&fs.sb(),
))));
file_inode.0.write().fs = self_inode.fs.clone();
file_inode
}
InodeType::Dir => {
let dir_inode = Arc::new(RamInode(RwLock::new(Inode_::new_dir(
fs.alloc_id(),
mode,
&fs.sb(),
))));
dir_inode.0.write().fs = self_inode.fs.clone();
dir_inode.0.write().inner.as_direntry_mut().unwrap().init(
Arc::downgrade(&dir_inode),
self_inode.inner.as_direntry().unwrap().this.clone(),
);
self_inode.metadata.nlinks += 1;
dir_inode
}
InodeType::SymLink => {
let sym_inode = Arc::new(RamInode(RwLock::new(Inode_::new_symlink(
fs.alloc_id(),
mode,
&fs.sb(),
))));
sym_inode.0.write().fs = self_inode.fs.clone();
sym_inode
}
_ => {
panic!("unsupported inode type");
}
};
new_inode.0.write().this = Arc::downgrade(&new_inode);
self_inode
.inner
.as_direntry_mut()
.unwrap()
.append_entry(name, new_inode.clone());
self_inode.inc_size();
Ok(new_inode)
}
fn readdir_at(&self, offset: usize, visitor: &mut dyn DirentVisitor) -> Result<usize> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
let self_inode = self.0.read();
let cnt = self_inode
.inner
.as_direntry()
.unwrap()
.visit_entry(offset, visitor)?;
Ok(cnt)
}
fn link(&self, old: &Arc<dyn Inode>, name: &str) -> Result<()> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
let old = old
.downcast_ref::<RamInode>()
.ok_or(Error::new(Errno::EXDEV))?;
if old.0.read().metadata.type_ == InodeType::Dir {
return_errno_with_message!(Errno::EPERM, "old is a dir");
}
let mut self_inode = self.0.write();
if self_inode.inner.as_direntry().unwrap().contains_entry(name) {
return_errno_with_message!(Errno::EEXIST, "entry exist");
}
self_inode
.inner
.as_direntry_mut()
.unwrap()
.append_entry(name, old.0.read().this.upgrade().unwrap());
self_inode.inc_size();
drop(self_inode);
old.0.write().metadata.nlinks += 1;
Ok(())
}
fn unlink(&self, name: &str) -> Result<()> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
if name == "." || name == ".." {
return_errno_with_message!(Errno::EISDIR, "unlink . or ..");
}
let mut self_inode = self.0.write();
let self_dir = self_inode.inner.as_direntry_mut().unwrap();
let (idx, target) = self_dir.get_entry(name).ok_or(Error::new(Errno::ENOENT))?;
if target.0.read().metadata.type_ == InodeType::Dir {
return_errno_with_message!(Errno::EISDIR, "unlink on dir");
}
self_dir.remove_entry(idx);
self_inode.dec_size();
drop(self_inode);
target.0.write().metadata.nlinks -= 1;
Ok(())
}
fn rmdir(&self, name: &str) -> Result<()> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
if name == "." || name == ".." {
return_errno_with_message!(Errno::EISDIR, "rmdir on . or ..");
}
let mut self_inode = self.0.write();
let self_dir = self_inode.inner.as_direntry_mut().unwrap();
let (idx, target) = self_dir.get_entry(name).ok_or(Error::new(Errno::ENOENT))?;
if target.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "rmdir on not dir");
}
if !target
.0
.read()
.inner
.as_direntry()
.unwrap()
.is_empty_children()
{
return_errno_with_message!(Errno::ENOTEMPTY, "dir not empty");
}
self_dir.remove_entry(idx);
self_inode.dec_size();
self_inode.metadata.nlinks -= 1;
drop(self_inode);
target.0.write().metadata.nlinks -= 2;
Ok(())
}
fn lookup(&self, name: &str) -> Result<Arc<dyn Inode>> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
let (_, inode) = self
.0
.read()
.inner
.as_direntry()
.unwrap()
.get_entry(name)
.ok_or(Error::new(Errno::ENOENT))?;
Ok(inode as _)
}
fn rename(&self, old_name: &str, target: &Arc<dyn Inode>, new_name: &str) -> Result<()> {
if self.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "self is not dir");
}
let target = target
.downcast_ref::<RamInode>()
.ok_or(Error::new(Errno::EXDEV))?;
if target.0.read().metadata.type_ != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "target is not dir");
}
if old_name == "." || old_name == ".." {
return_errno_with_message!(Errno::EISDIR, "old_name is . or ..");
}
if new_name == "." || new_name == ".." {
return_errno_with_message!(Errno::EISDIR, "new_name is . or ..");
}
let src_inode = self.lookup(old_name)?;
if src_inode.metadata().ino == target.metadata().ino {
return_errno_with_message!(Errno::EINVAL, "target is a descendant of old");
}
if let Ok(dst_inode) = target.lookup(new_name) {
if src_inode.metadata().ino == dst_inode.metadata().ino {
return Ok(());
}
match (src_inode.metadata().type_, dst_inode.metadata().type_) {
(InodeType::Dir, InodeType::Dir) => {
let dst_inode = dst_inode.downcast_ref::<RamInode>().unwrap();
if !dst_inode
.0
.read()
.inner
.as_direntry()
.unwrap()
.is_empty_children()
{
return_errno_with_message!(Errno::ENOTEMPTY, "dir not empty");
}
}
(InodeType::Dir, _) => {
return_errno_with_message!(Errno::ENOTDIR, "old is not dir");
}
(_, InodeType::Dir) => {
return_errno_with_message!(Errno::EISDIR, "new is dir");
}
_ => {}
}
}
if self.metadata().ino == target.metadata().ino {
let mut self_inode = self.0.write();
let self_dir = self_inode.inner.as_direntry_mut().unwrap();
let (idx, inode) = self_dir
.get_entry(old_name)
.ok_or(Error::new(Errno::ENOENT))?;
self_dir.substitute_entry(idx, (Str256::from(new_name), inode));
} else {
let (mut self_inode, mut target_inode) = write_lock_two_inodes(self, target);
let self_dir = self_inode.inner.as_direntry_mut().unwrap();
let (idx, src_inode) = self_dir
.get_entry(old_name)
.ok_or(Error::new(Errno::ENOENT))?;
self_dir.remove_entry(idx);
target_inode
.inner
.as_direntry_mut()
.unwrap()
.append_entry(new_name, src_inode.clone());
self_inode.dec_size();
target_inode.inc_size();
if src_inode.0.read().metadata.type_ == InodeType::Dir {
self_inode.metadata.nlinks -= 1;
target_inode.metadata.nlinks += 1;
}
drop(self_inode);
drop(target_inode);
if src_inode.0.read().metadata.type_ == InodeType::Dir {
src_inode
.0
.write()
.inner
.as_direntry_mut()
.unwrap()
.set_parent(target.0.read().this.clone());
}
}
Ok(())
}
fn read_link(&self) -> Result<String> {
if self.0.read().metadata.type_ != InodeType::SymLink {
return_errno_with_message!(Errno::EINVAL, "self is not symlink");
}
let self_inode = self.0.read();
let link = self_inode.inner.as_symlink().unwrap();
Ok(String::from(link))
}
fn write_link(&self, target: &str) -> Result<()> {
if self.0.read().metadata.type_ != InodeType::SymLink {
return_errno_with_message!(Errno::EINVAL, "self is not symlink");
}
let mut self_inode = self.0.write();
let link = self_inode.inner.as_symlink_mut().unwrap();
*link = Str256::from(target);
// Symlink's metadata.blocks should be 0, so just set the size.
self_inode.metadata.size = target.len();
Ok(())
}
fn metadata(&self) -> Metadata {
self.0.read().metadata.clone()
}
fn sync(&self) -> Result<()> {
// do nothing
Ok(())
}
fn fs(&self) -> Arc<dyn FileSystem> {
Weak::upgrade(&self.0.read().fs).unwrap()
}
fn ioctl(&self, cmd: &IoctlCmd) -> Result<()> {
return_errno!(Errno::ENOSYS);
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
fn write_lock_two_inodes<'a>(
this: &'a RamInode,
other: &'a RamInode,
) -> (RwLockWriteGuard<'a, Inode_>, RwLockWriteGuard<'a, Inode_>) {
if this.0.read().metadata.ino < other.0.read().metadata.ino {
let this = this.0.write();
let other = other.0.write();
(this, other)
} else {
let other = other.0.write();
let this = this.0.write();
(this, other)
}
}

View File

@ -0,0 +1,10 @@
//! Ramfs based on PageCache
pub use fs::RamFS;
mod fs;
const RAMFS_MAGIC: usize = 0x0102_1994;
const BLOCK_SIZE: usize = 4096;
const NAME_MAX: usize = 255;
const ROOT_INO: usize = 1;

View File

@ -0,0 +1,188 @@
use crate::prelude::*;
use crate::tty::{get_n_tty, Tty};
use super::file_handle::File;
use super::file_table::FileDescripter;
use super::utils::{InodeMode, InodeType, IoEvents, Metadata, SeekFrom};
pub const FD_STDIN: FileDescripter = 0;
pub const FD_STDOUT: FileDescripter = 1;
pub const FD_STDERR: FileDescripter = 2;
pub struct Stdin {
console: Option<Arc<Tty>>,
}
pub struct Stdout {
console: Option<Arc<Tty>>,
}
pub struct Stderr {
console: Option<Arc<Tty>>,
}
impl File for Stdin {
fn poll(&self) -> IoEvents {
if let Some(console) = self.console.as_ref() {
console.poll()
} else {
todo!()
}
}
fn read(&self, buf: &mut [u8]) -> Result<usize> {
if let Some(console) = self.console.as_ref() {
console.read(buf)
} else {
todo!()
}
}
fn ioctl(&self, cmd: super::utils::IoctlCmd, arg: usize) -> Result<i32> {
if let Some(console) = self.console.as_ref() {
console.ioctl(cmd, arg)
} else {
todo!()
}
}
fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
// TODO: do real seek
Ok(0)
}
fn metadata(&self) -> Metadata {
Metadata {
dev: 0,
ino: 0,
size: 0,
blk_size: 1024,
blocks: 0,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::CharDevice,
mode: InodeMode::from_bits_truncate(0o620),
nlinks: 1,
uid: 0,
gid: 0,
rdev: 0,
}
}
}
impl File for Stdout {
fn ioctl(&self, cmd: super::utils::IoctlCmd, arg: usize) -> Result<i32> {
if let Some(console) = self.console.as_ref() {
console.ioctl(cmd, arg)
} else {
todo!()
}
}
fn write(&self, buf: &[u8]) -> Result<usize> {
if let Some(console) = self.console.as_ref() {
console.write(buf)
} else {
todo!()
}
}
fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
// TODO: do real seek
Ok(0)
}
fn metadata(&self) -> Metadata {
Metadata {
dev: 0,
ino: 0,
size: 0,
blk_size: 1024,
blocks: 0,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::CharDevice,
mode: InodeMode::from_bits_truncate(0o620),
nlinks: 1,
uid: 0,
gid: 0,
rdev: 0,
}
}
}
impl File for Stderr {
fn ioctl(&self, cmd: super::utils::IoctlCmd, arg: usize) -> Result<i32> {
if let Some(console) = self.console.as_ref() {
console.ioctl(cmd, arg)
} else {
todo!()
}
}
fn write(&self, buf: &[u8]) -> Result<usize> {
if let Some(console) = self.console.as_ref() {
console.write(buf)
} else {
todo!()
}
}
fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
// TODO: do real seek
Ok(0)
}
fn metadata(&self) -> Metadata {
Metadata {
dev: 0,
ino: 0,
size: 0,
blk_size: 1024,
blocks: 0,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::CharDevice,
mode: InodeMode::from_bits_truncate(0o620),
nlinks: 1,
uid: 0,
gid: 0,
rdev: 0,
}
}
}
impl Stdin {
/// FIXME: console should be file under devfs.
/// reimplement the function when devfs is enabled.
pub fn new_with_default_console() -> Self {
let console = get_n_tty();
Self {
console: Some(console.clone()),
}
}
}
impl Stdout {
/// FIXME: console should be file under devfs.
/// reimplement the function when devfs is enabled.
pub fn new_with_default_console() -> Self {
let console = get_n_tty();
Self {
console: Some(console.clone()),
}
}
}
impl Stderr {
/// FIXME: console should be file under devfs.
/// reimplement the function when devfs is enabled.
pub fn new_with_default_console() -> Self {
let console = get_n_tty();
Self {
console: Some(console.clone()),
}
}
}

View File

@ -0,0 +1,69 @@
use crate::prelude::*;
use crate::rights::Rights;
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub enum AccessMode {
/// read only
O_RDONLY = 0,
/// write only
O_WRONLY = 1,
/// read write
O_RDWR = 2,
}
impl AccessMode {
pub fn is_readable(&self) -> bool {
match *self {
AccessMode::O_RDONLY | AccessMode::O_RDWR => true,
_ => false,
}
}
pub fn is_writable(&self) -> bool {
match *self {
AccessMode::O_WRONLY | AccessMode::O_RDWR => true,
_ => false,
}
}
}
impl AccessMode {
pub fn from_u32(flags: u32) -> Result<Self> {
let bits = (flags & 0b11) as u8;
if bits > Self::O_RDWR as u8 {
return_errno_with_message!(Errno::EINVAL, "invalid bits for access mode");
}
Ok(match bits {
0 => Self::O_RDONLY,
1 => Self::O_WRONLY,
2 => Self::O_RDWR,
_ => unreachable!(),
})
}
}
impl From<Rights> for AccessMode {
fn from(rights: Rights) -> AccessMode {
if rights.contains(Rights::READ) && rights.contains(Rights::WRITE) {
AccessMode::O_RDWR
} else if rights.contains(Rights::READ) {
AccessMode::O_RDONLY
} else if rights.contains(Rights::WRITE) {
AccessMode::O_WRONLY
} else {
panic!("invalid rights");
}
}
}
impl From<AccessMode> for Rights {
fn from(access_mode: AccessMode) -> Rights {
match access_mode {
AccessMode::O_RDONLY => Rights::READ,
AccessMode::O_WRONLY => Rights::WRITE,
AccessMode::O_RDWR => Rights::READ | Rights::WRITE,
}
}
}

View File

@ -0,0 +1,23 @@
use bitflags::bitflags;
bitflags! {
pub struct CreationFlags: u32 {
/// create file if it does not exist
const O_CREAT = 1 << 6;
/// error if CREATE and the file exists
const O_EXCL = 1 << 7;
/// not become the process's controlling terminal
const O_NOCTTY = 1 << 8;
/// truncate file upon open
const O_TRUNC = 1 << 9;
/// file is a directory
const O_DIRECTORY = 1 << 16;
/// pathname is not a symbolic link
const O_NOFOLLOW = 1 << 17;
/// close on exec
const O_CLOEXEC = 1 << 19;
/// create an unnamed temporary regular file
/// O_TMPFILE is (_O_TMPFILE | O_DIRECTORY)
const _O_TMPFILE = 1 << 22;
}
}

View File

@ -0,0 +1,349 @@
use crate::prelude::*;
use alloc::string::String;
use core::time::Duration;
use super::{InodeMode, InodeType, Metadata, Vnode, NAME_MAX};
lazy_static! {
static ref DCACHE: Mutex<BTreeMap<DentryKey, Arc<Dentry>>> = Mutex::new(BTreeMap::new());
}
/// The dentry cache to accelerate path lookup
pub struct Dentry {
vnode: Vnode,
name_and_parent: RwLock<(String, Option<Arc<Dentry>>)>,
this: Weak<Dentry>,
children: Mutex<Children>,
}
impl Dentry {
/// Create a new dentry cache with root inode
pub fn new_root(root_vnode: Vnode) -> Arc<Self> {
let root = Self::new("/", None, root_vnode);
DCACHE.lock().insert(root.key(), root.clone());
root
}
/// Internal constructor
fn new(name: &str, parent: Option<Arc<Dentry>>, vnode: Vnode) -> Arc<Self> {
Arc::new_cyclic(|weak_self| Self {
vnode,
name_and_parent: RwLock::new((String::from(name), parent)),
this: weak_self.clone(),
children: Mutex::new(Children::new()),
})
}
/// Get the name of Dentry.
pub fn name(&self) -> String {
self.name_and_parent.read().0.clone()
}
/// Get the parent dentry.
///
/// Returns None if it is root dentry.
pub fn parent(&self) -> Option<Arc<Dentry>> {
self.name_and_parent.read().1.clone()
}
fn set_name_and_parent(&self, name: &str, parent: Option<Arc<Dentry>>) {
let mut name_and_parent = self.name_and_parent.write();
name_and_parent.0 = String::from(name);
name_and_parent.1 = parent;
}
fn this(&self) -> Arc<Dentry> {
self.this.upgrade().unwrap()
}
fn key(&self) -> DentryKey {
let parent = self.parent().unwrap_or(self.this());
DentryKey::new(&self.name_and_parent.read().0, &parent)
}
pub fn vnode(&self) -> &Vnode {
&self.vnode
}
/// Create a dentry by making inode.
pub fn create(&self, name: &str, type_: InodeType, mode: InodeMode) -> Result<Arc<Self>> {
if self.vnode.inode_type() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut children = self.children.lock();
if children.find_dentry(name).is_some() {
return_errno!(Errno::EEXIST);
}
let child = {
let vnode = self.vnode.mknod(name, type_, mode)?;
let dentry = Dentry::new(name, Some(self.this()), vnode);
children.insert_dentry(&dentry);
dentry
};
Ok(child)
}
/// Lookup a dentry.
pub fn lookup(&self, name: &str) -> Result<Arc<Self>> {
if self.vnode.inode_type() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
if name.len() > NAME_MAX {
return_errno!(Errno::ENAMETOOLONG);
}
let dentry = match name {
"." => self.this(),
".." => self.parent().unwrap_or(self.this()),
name => {
let mut children = self.children.lock();
match children.find_dentry(name) {
Some(dentry) => dentry.clone(),
None => {
let vnode = self.vnode.lookup(name)?;
let dentry = Dentry::new(name, Some(self.this()), vnode);
children.insert_dentry(&dentry);
dentry
}
}
}
};
Ok(dentry)
}
/// Link a new name for the dentry by linking inode.
pub fn link(&self, old: &Arc<Self>, name: &str) -> Result<()> {
if self.vnode.inode_type() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut children = self.children.lock();
if children.find_dentry(name).is_some() {
return_errno!(Errno::EEXIST);
}
let old_vnode = old.vnode();
self.vnode.link(old_vnode, name)?;
let dentry = Dentry::new(name, Some(self.this()), old_vnode.clone());
children.insert_dentry(&dentry);
Ok(())
}
/// Delete a dentry by unlinking inode.
pub fn unlink(&self, name: &str) -> Result<()> {
if self.vnode.inode_type() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut children = self.children.lock();
self.vnode.unlink(name)?;
children.delete_dentry(name);
Ok(())
}
/// Delete a directory dentry by rmdiring inode.
pub fn rmdir(&self, name: &str) -> Result<()> {
if self.vnode.inode_type() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut children = self.children.lock();
self.vnode.rmdir(name)?;
children.delete_dentry(name);
Ok(())
}
/// Read symbolic link.
pub fn read_link(&self) -> Result<String> {
if self.vnode.inode_type() != InodeType::SymLink {
return_errno!(Errno::EINVAL);
}
self.vnode.read_link()
}
/// Write symbolic link.
pub fn write_link(&self, target: &str) -> Result<()> {
if self.vnode.inode_type() != InodeType::SymLink {
return_errno!(Errno::EINVAL);
}
self.vnode.write_link(target)
}
/// Rename a dentry to the new dentry by renaming inode.
pub fn rename(&self, old_name: &str, new_dir: &Arc<Self>, new_name: &str) -> Result<()> {
if old_name == "." || old_name == ".." || new_name == "." || new_name == ".." {
return_errno_with_message!(Errno::EISDIR, "old_name or new_name is a directory");
}
if self.vnode.inode_type() != InodeType::Dir || new_dir.vnode.inode_type() != InodeType::Dir
{
return_errno!(Errno::ENOTDIR);
}
// Self and new_dir are same Dentry, just modify name
if Arc::ptr_eq(&self.this(), new_dir) {
if old_name == new_name {
return Ok(());
}
let mut children = self.children.lock();
self.vnode.rename(old_name, &self.vnode, new_name)?;
match children.find_dentry(old_name) {
Some(dentry) => {
children.delete_dentry(old_name);
dentry.set_name_and_parent(new_name, Some(self.this()));
children.insert_dentry(&dentry);
}
None => {
children.delete_dentry(new_name);
}
}
} else {
// Self and new_dir are different Dentry
let (mut self_children, mut new_dir_children) =
write_lock_children_on_two_dentries(&self, &new_dir);
self.vnode.rename(old_name, &new_dir.vnode, new_name)?;
match self_children.find_dentry(old_name) {
Some(dentry) => {
self_children.delete_dentry(old_name);
dentry.set_name_and_parent(new_name, Some(new_dir.this()));
new_dir_children.insert_dentry(&dentry);
}
None => {
new_dir_children.delete_dentry(new_name);
}
}
}
Ok(())
}
/// Get the inode metadata
pub fn inode_metadata(&self) -> Metadata {
self.vnode.metadata()
}
/// Get the inode type
pub fn inode_type(&self) -> InodeType {
self.vnode.inode_type()
}
/// Get the inode permission mode
pub fn inode_mode(&self) -> InodeMode {
self.vnode.inode_mode()
}
/// Get the inode length
pub fn inode_len(&self) -> usize {
self.vnode.len()
}
/// Get the access timestamp
pub fn atime(&self) -> Duration {
self.vnode.atime()
}
/// Set the access timestamp
pub fn set_atime(&self, time: Duration) {
self.vnode.set_atime(time)
}
/// Get the modified timestamp
pub fn mtime(&self) -> Duration {
self.vnode.mtime()
}
/// Set the modified timestamp
pub fn set_mtime(&self, time: Duration) {
self.vnode.set_mtime(time)
}
/// Get the absolute path.
pub fn abs_path(&self) -> String {
let mut path = self.name();
let mut dentry = self.this();
loop {
match dentry.parent() {
None => break,
Some(parent_dentry) => {
path = {
let parent_name = parent_dentry.name();
if parent_name != "/" {
parent_name + "/" + &path
} else {
parent_name + &path
}
};
dentry = parent_dentry;
}
}
}
debug_assert!(path.starts_with("/"));
path
}
}
struct Children {
inner: BTreeMap<String, Weak<Dentry>>,
}
impl Children {
pub fn new() -> Self {
Self {
inner: BTreeMap::new(),
}
}
pub fn insert_dentry(&mut self, dentry: &Arc<Dentry>) {
if dentry.vnode().is_dentry_cacheable() {
DCACHE.lock().insert(dentry.key(), dentry.clone());
}
self.inner.insert(dentry.name(), Arc::downgrade(dentry));
}
pub fn delete_dentry(&mut self, name: &str) -> Option<Arc<Dentry>> {
self.inner
.remove(name)
.and_then(|d| d.upgrade())
.and_then(|d| DCACHE.lock().remove(&d.key()))
}
pub fn find_dentry(&mut self, name: &str) -> Option<Arc<Dentry>> {
if let Some(dentry) = self.inner.get(name) {
dentry.upgrade().or_else(|| {
self.inner.remove(name);
None
})
} else {
None
}
}
}
#[derive(Clone, Hash, PartialOrd, Ord, Eq, PartialEq)]
struct DentryKey {
name: String,
parent_ptr: usize,
}
impl DentryKey {
pub fn new(name: &str, parent: &Arc<Dentry>) -> Self {
Self {
name: String::from(name),
parent_ptr: Arc::as_ptr(parent) as usize,
}
}
}
fn write_lock_children_on_two_dentries<'a>(
this: &'a Dentry,
other: &'a Dentry,
) -> (MutexGuard<'a, Children>, MutexGuard<'a, Children>) {
let this_key = this.key();
let other_key = other.key();
if this_key < other_key {
let this = this.children.lock();
let other = other.children.lock();
(this, other)
} else {
let other = other.children.lock();
let this = this.children.lock();
(this, other)
}
}

View File

@ -0,0 +1,32 @@
use super::InodeType;
use crate::prelude::*;
/// A visitor for dir entries.
pub trait DirentVisitor {
/// Visit a dir entry.
///
/// If the visitor succeeds in visiting the given inode, an `Ok(())` is returned;
/// Otherwise, an error is returned. Different implementations for `DirentVisitor`
/// may choose to report errors for different reasons. Regardless of the exact
/// errors and reasons, `readdir`-family methods shall stop feeding the visitor
/// with the next inode as long as an error is returned by the visitor.
///
/// # Example
///
/// `Vec<String>` is implemented as `DirentVisitor` so that the file names
/// under a dir can be easily collected, which is convenient for testing purposes.
///
/// ```no_run
/// let mut all_dirents = Vec::new();
/// let dir_inode = todo!("create an inode");
/// dir_inode.readdir_at(0, &mut all_dirents).unwrap();
/// ```
fn visit(&mut self, name: &str, ino: u64, type_: InodeType, offset: usize) -> Result<()>;
}
impl DirentVisitor for Vec<String> {
fn visit(&mut self, name: &str, ino: u64, type_: InodeType, offset: usize) -> Result<()> {
self.push(name.into());
Ok(())
}
}

View File

@ -0,0 +1,118 @@
use super::Inode;
use crate::prelude::*;
/// DirEntryVec is used to store the entries of a directory.
/// It can guarantee that the index of one dir entry remains unchanged during
/// adding or deleting other dir entries of it.
pub struct DirEntryVec<T> {
// The slots to store dir entries.
slots: Vec<Option<T>>,
// The number of occupied slots.
// The i-th slot is occupied if `self.slots[i].is_some()`.
num_occupied: usize,
}
impl<T> DirEntryVec<T> {
/// New an empty vec.
pub fn new() -> Self {
Self {
slots: Vec::new(),
num_occupied: 0,
}
}
/// Returns `true` if the vec contains no entries.
pub fn is_empty(&self) -> bool {
self.num_occupied == 0
}
/// Put a dir entry into the vec.
/// it may be put into an existing empty slot or the back of the vec.
pub fn put(&mut self, entry: T) {
if self.num_occupied == self.slots.len() {
self.slots.push(Some(entry));
} else {
let idx = self.slots.iter().position(|x| x.is_none()).unwrap();
self.slots[idx] = Some(entry);
}
self.num_occupied += 1;
}
/// Removes and returns the entry at position `idx`.
/// Returns `None` if `idx` is out of bounds or the entry has been removed.
pub fn remove(&mut self, idx: usize) -> Option<T> {
if idx >= self.slots.len() {
return None;
}
let mut del_entry = None;
core::mem::swap(&mut del_entry, &mut self.slots[idx]);
if del_entry.is_some() {
debug_assert!(self.num_occupied > 0);
self.num_occupied -= 1;
}
del_entry
}
/// Put and returns the entry at position `idx`.
/// Returns `None` if `idx` is out of bounds or the entry has been removed.
pub fn put_at(&mut self, idx: usize, entry: T) -> Option<T> {
if idx >= self.slots.len() {
return None;
}
let mut sub_entry = Some(entry);
core::mem::swap(&mut sub_entry, &mut self.slots[idx]);
if sub_entry.is_none() {
self.num_occupied += 1;
}
sub_entry
}
/// Creates an iterator which gives both of the index and the dir entry.
/// The index may not be continuous.
pub fn idxes_and_entries(&self) -> impl Iterator<Item = (usize, &'_ T)> {
self.slots
.iter()
.enumerate()
.filter(|(_, x)| x.is_some())
.map(|(idx, x)| (idx, x.as_ref().unwrap()))
}
/// Creates an iterator which gives the dir entry.
pub fn iter(&self) -> impl Iterator<Item = &'_ T> {
self.slots.iter().filter_map(|x| x.as_ref())
}
}
pub trait DirEntryVecExt {
/// If the entry is not found by `name`, use `f` to get the inode, then put the entry into vec.
fn put_entry_if_not_found(&mut self, name: &str, f: impl Fn() -> Arc<dyn Inode>);
/// Remove and returns the entry by name.
/// Returns `None` if the entry has been removed.
fn remove_entry_by_name(&mut self, name: &str) -> Option<(String, Arc<dyn Inode>)>;
}
impl DirEntryVecExt for DirEntryVec<(String, Arc<dyn Inode>)> {
fn put_entry_if_not_found(&mut self, name: &str, f: impl Fn() -> Arc<dyn Inode>) {
if self
.iter()
.find(|(child_name, _)| child_name == name)
.is_none()
{
let inode = f();
self.put((String::from(name), inode));
}
}
fn remove_entry_by_name(&mut self, name: &str) -> Option<(String, Arc<dyn Inode>)> {
let idx = self
.idxes_and_entries()
.find(|(_, (child_name, _))| child_name == name)
.map(|(idx, _)| idx);
if let Some(idx) = idx {
self.remove(idx)
} else {
None
}
}
}

View File

@ -0,0 +1,33 @@
use crate::prelude::*;
macro_rules! define_fcntl_cmd {
($($name: ident = $value: expr),*) => {
#[repr(i32)]
#[derive(Debug, Clone, Copy)]
#[allow(non_camel_case_types)]
pub enum FcntlCmd {
$($name = $value,)*
}
$(
pub const $name: i32 = $value;
)*
impl TryFrom<i32> for FcntlCmd {
type Error = Error;
fn try_from(value: i32) -> Result<Self> {
match value {
$($name => Ok(FcntlCmd::$name),)*
_ => return_errno_with_message!(Errno::EINVAL, "Unknown fcntl cmd"),
}
}
}
}
}
define_fcntl_cmd! {
F_DUPFD = 0,
F_GETFD = 1,
F_SETFD = 2,
F_DUPFD_CLOEXEC = 1030
}

View File

@ -0,0 +1,30 @@
/// A mask for the file mode of a newly-created file or directory.
///
/// This mask is always a subset of `0o777`.
pub struct FileCreationMask(u16);
impl FileCreationMask {
// Creates a new instance, the initial value is `0o777`.
pub fn new(val: u16) -> Self {
Self(0o777 & val)
}
/// Get a new value.
pub fn get(&self) -> u16 {
self.0
}
/// Set a new value.
pub fn set(&mut self, new_mask: u16) -> u16 {
let new_mask = new_mask & 0o777;
let old_mask = self.0;
self.0 = new_mask;
old_mask
}
}
impl Default for FileCreationMask {
fn default() -> Self {
Self(0o777)
}
}

View File

@ -0,0 +1,66 @@
use alloc::sync::Arc;
use bitflags::bitflags;
use core::any::Any;
use super::Inode;
use crate::prelude::*;
#[derive(Debug, Clone)]
pub struct SuperBlock {
pub magic: usize,
pub bsize: usize,
pub blocks: usize,
pub bfree: usize,
pub bavail: usize,
pub files: usize,
pub ffree: usize,
pub fsid: usize,
pub namelen: usize,
pub frsize: usize,
pub flags: usize,
}
impl SuperBlock {
pub fn new(magic: usize, block_size: usize, name_len: usize) -> Self {
Self {
magic,
bsize: block_size,
blocks: 0,
bfree: 0,
bavail: 0,
files: 0,
ffree: 0,
fsid: 0,
namelen: 255,
frsize: block_size,
flags: 0,
}
}
}
bitflags! {
pub struct FsFlags: u32 {
/// Disable page cache.
const NO_PAGECACHE = 1 << 0;
/// Dentry cannot be evicted.
const DENTRY_UNEVICTABLE = 1 << 1;
}
}
pub trait FileSystem: Any + Sync + Send {
fn sync(&self) -> Result<()>;
fn root_inode(&self) -> Arc<dyn Inode>;
fn sb(&self) -> SuperBlock;
fn flags(&self) -> FsFlags;
fn as_any_ref(&self) -> &dyn Any;
}
impl dyn FileSystem {
pub fn downcast_ref<T: FileSystem>(&self) -> Option<&T> {
self.as_any_ref().downcast_ref::<T>()
}
}

View File

@ -0,0 +1,229 @@
use alloc::string::String;
use alloc::sync::Arc;
use bitflags::bitflags;
use core::any::Any;
use core::time::Duration;
use jinux_frame::vm::VmFrame;
use super::{DirentVisitor, FileSystem, IoctlCmd, SuperBlock};
use crate::prelude::*;
#[repr(u32)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum InodeType {
NamedPipe = 0o010000,
CharDevice = 0o020000,
Dir = 0o040000,
BlockDevice = 0o060000,
File = 0o100000,
SymLink = 0o120000,
Socket = 0o140000,
}
bitflags! {
pub struct InodeMode: u16 {
/// set-user-ID
const S_ISUID = 0o4000;
/// set-group-ID
const S_ISGID = 0o2000;
/// sticky bit
const S_ISVTX = 0o1000;
/// read by owner
const S_IRUSR = 0o0400;
/// write by owner
const S_IWUSR = 0o0200;
/// execute/search by owner
const S_IXUSR = 0o0100;
/// read by group
const S_IRGRP = 0o0040;
/// write by group
const S_IWGRP = 0o0020;
/// execute/search by group
const S_IXGRP = 0o0010;
/// read by others
const S_IROTH = 0o0004;
/// write by others
const S_IWOTH = 0o0002;
/// execute/search by others
const S_IXOTH = 0o0001;
}
}
impl InodeMode {
pub fn is_readable(&self) -> bool {
self.contains(Self::S_IRUSR)
}
pub fn is_writable(&self) -> bool {
self.contains(Self::S_IWUSR)
}
pub fn is_executable(&self) -> bool {
self.contains(Self::S_IXUSR)
}
pub fn has_sticky_bit(&self) -> bool {
self.contains(Self::S_ISVTX)
}
pub fn has_set_uid(&self) -> bool {
self.contains(Self::S_ISUID)
}
pub fn has_set_gid(&self) -> bool {
self.contains(Self::S_ISGID)
}
}
#[derive(Debug, Clone)]
pub struct Metadata {
pub dev: usize,
pub ino: usize,
pub size: usize,
pub blk_size: usize,
pub blocks: usize,
pub atime: Duration,
pub mtime: Duration,
pub ctime: Duration,
pub type_: InodeType,
pub mode: InodeMode,
pub nlinks: usize,
pub uid: usize,
pub gid: usize,
pub rdev: usize,
}
impl Metadata {
pub fn new_dir(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
dev: 0,
ino,
size: 2,
blk_size: sb.bsize,
blocks: 1,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::Dir,
mode,
nlinks: 2,
uid: 0,
gid: 0,
rdev: 0,
}
}
pub fn new_file(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
dev: 0,
ino,
size: 0,
blk_size: sb.bsize,
blocks: 0,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::File,
mode,
nlinks: 1,
uid: 0,
gid: 0,
rdev: 0,
}
}
pub fn new_symlink(ino: usize, mode: InodeMode, sb: &SuperBlock) -> Self {
Self {
dev: 0,
ino,
size: 0,
blk_size: sb.bsize,
blocks: 0,
atime: Default::default(),
mtime: Default::default(),
ctime: Default::default(),
type_: InodeType::SymLink,
mode,
nlinks: 1,
uid: 0,
gid: 0,
rdev: 0,
}
}
}
pub trait Inode: Any + Sync + Send {
fn len(&self) -> usize;
fn resize(&self, new_size: usize);
fn metadata(&self) -> Metadata;
fn atime(&self) -> Duration;
fn set_atime(&self, time: Duration);
fn mtime(&self) -> Duration;
fn set_mtime(&self, time: Duration);
fn read_page(&self, idx: usize, frame: &VmFrame) -> Result<()>;
fn write_page(&self, idx: usize, frame: &VmFrame) -> Result<()>;
fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize>;
fn write_at(&self, offset: usize, buf: &[u8]) -> Result<usize>;
fn mknod(&self, name: &str, type_: InodeType, mode: InodeMode) -> Result<Arc<dyn Inode>>;
fn readdir_at(&self, offset: usize, visitor: &mut dyn DirentVisitor) -> Result<usize>;
fn link(&self, old: &Arc<dyn Inode>, name: &str) -> Result<()>;
fn unlink(&self, name: &str) -> Result<()>;
fn rmdir(&self, name: &str) -> Result<()>;
fn lookup(&self, name: &str) -> Result<Arc<dyn Inode>>;
fn rename(&self, old_name: &str, target: &Arc<dyn Inode>, new_name: &str) -> Result<()>;
fn read_link(&self) -> Result<String>;
fn write_link(&self, target: &str) -> Result<()>;
fn ioctl(&self, cmd: &IoctlCmd) -> Result<()>;
fn sync(&self) -> Result<()>;
fn fs(&self) -> Arc<dyn FileSystem>;
fn as_any_ref(&self) -> &dyn Any;
/// Returns whether a VFS dentry for this inode should be put into the dentry cache.
///
/// The dentry cache in the VFS layer can accelerate the lookup of inodes. So usually,
/// it is preferable to use the dentry cache. And thus, the default return value of this method
/// is `true`.
///
/// But this caching can raise consistency issues in certain use cases. Specifically, the dentry
/// cache works on the assumption that all FS operations go through the dentry layer first.
/// This is why the dentry cache can reflect the up-to-date FS state. Yet, this assumption
/// may be broken. For example, an inode in procfs (say, `/proc/1/fd/2`) can "disappear" without
/// notice from the perspective of the dentry cache. So for such inodes, they are incompatible
/// with the dentry cache. And this method returns `false`.
///
/// Note that if any ancestor directory of an inode has this method returns `false`, then
/// this inode would not be cached by the dentry cache, even when the method of this
/// inode returns `true`.
fn is_dentry_cacheable(&self) -> bool {
true
}
}
impl dyn Inode {
pub fn downcast_ref<T: Inode>(&self) -> Option<&T> {
self.as_any_ref().downcast_ref::<T>()
}
}

View File

@ -0,0 +1,11 @@
crate::bitflags! {
pub struct IoEvents: u32 {
const POLLIN = 0x0001;
const POLLPRI = 0x0002;
const POLLOUT = 0x0004;
const POLLERR = 0x0008;
const POLLHUP = 0x0010;
const POLLNVAL = 0x0020;
const POLLRDHUP = 0x2000;
}
}

View File

@ -0,0 +1,38 @@
use crate::prelude::*;
macro_rules! define_ioctl_cmd {
($($name: ident = $value: expr),*) => {
#[repr(u32)]
#[derive(Debug, Clone, Copy)]
pub enum IoctlCmd {
$($name = $value,)*
}
$(
pub const $name: u32 = $value;
)*
impl TryFrom<u32> for IoctlCmd {
type Error = Error;
fn try_from(value: u32) -> Result<Self> {
match value {
$($name => Ok(IoctlCmd::$name),)*
_ => return_errno!(Errno::EINVAL),
}
}
}
}
}
define_ioctl_cmd! {
// Get terminal attributes
TCGETS = 0x5401,
TCSETS = 0x5402,
// Get the process group ID of the foreground process group on this terminal
TIOCGPGRP = 0x540f,
// Set the foreground process group ID of this terminal.
TIOCSPGRP = 0x5410,
// Set window size
TIOCGWINSZ = 0x5413,
TIOCSWINSZ = 0x5414
}

View File

@ -0,0 +1,49 @@
//! VFS components
pub use access_mode::AccessMode;
pub use creation_flags::CreationFlags;
pub use dentry_cache::Dentry;
pub use dirent_visitor::DirentVisitor;
pub use direntry_vec::{DirEntryVec, DirEntryVecExt};
pub use fcntl::FcntlCmd;
pub use file_creation_mask::FileCreationMask;
pub use fs::{FileSystem, FsFlags, SuperBlock};
pub use inode::{Inode, InodeMode, InodeType, Metadata};
pub use io_events::IoEvents;
pub use ioctl::IoctlCmd;
pub use page_cache::PageCache;
pub use poll::{c_nfds, c_pollfd, PollFd};
pub use status_flags::StatusFlags;
pub use vnode::Vnode;
mod access_mode;
mod creation_flags;
mod dentry_cache;
mod dirent_visitor;
mod direntry_vec;
mod fcntl;
mod file_creation_mask;
mod fs;
mod inode;
mod io_events;
mod ioctl;
mod page_cache;
mod poll;
mod status_flags;
mod vnode;
#[derive(Copy, PartialEq, Eq, Clone, Debug)]
pub enum SeekFrom {
Start(usize),
End(isize),
Current(isize),
}
/// Maximum bytes in a path
pub const PATH_MAX: usize = 4096;
/// Maximum bytes in a file name
pub const NAME_MAX: usize = 255;
/// The upper limit for resolving symbolic links
pub const SYMLINKS_MAX: usize = 40;

View File

@ -0,0 +1,161 @@
use super::Inode;
use crate::prelude::*;
use crate::rights::Full;
use crate::vm::vmo::{Pager, Vmo, VmoFlags, VmoOptions};
use core::ops::Range;
use jinux_frame::vm::{VmAllocOptions, VmFrame, VmFrameVec};
use lru::LruCache;
pub struct PageCache {
pages: Vmo<Full>,
manager: Arc<PageCacheManager>,
}
impl PageCache {
pub fn new(inode: &Arc<dyn Inode>) -> Result<Self> {
let manager = Arc::new(PageCacheManager::new(Arc::downgrade(inode)));
let pages = VmoOptions::<Full>::new(inode.len())
.flags(VmoFlags::RESIZABLE)
.pager(manager.clone())
.alloc()?;
Ok(Self { pages, manager })
}
pub fn pages(&self) -> &Vmo<Full> {
&self.pages
}
/// Evict the data within a specified range from the page cache and persist
/// them to the disk.
pub fn evict_range(&self, range: Range<usize>) {
// TODO: Implement this method.
warn!("pagecache: evict_range is not implemented");
}
}
struct PageCacheManager {
pages: Mutex<LruCache<usize, Page>>,
backed_inode: Weak<dyn Inode>,
}
impl PageCacheManager {
pub fn new(inode: Weak<dyn Inode>) -> Self {
Self {
pages: Mutex::new(LruCache::unbounded()),
backed_inode: inode,
}
}
}
impl Pager for PageCacheManager {
fn commit_page(&self, offset: usize) -> Result<VmFrame> {
let page_idx = offset / PAGE_SIZE;
let mut pages = self.pages.lock();
let frame = if let Some(page) = pages.get(&page_idx) {
page.frame()
} else {
let page = if offset < self.backed_inode.upgrade().unwrap().metadata().size {
let mut page = Page::alloc_zero()?;
self.backed_inode
.upgrade()
.unwrap()
.read_page(page_idx, &page.frame())?;
page.set_state(PageState::UpToDate);
page
} else {
Page::alloc_zero()?
};
let frame = page.frame();
pages.put(page_idx, page);
frame
};
Ok(frame)
}
fn update_page(&self, offset: usize) -> Result<()> {
let page_idx = offset / PAGE_SIZE;
let mut pages = self.pages.lock();
if let Some(page) = pages.get_mut(&page_idx) {
page.set_state(PageState::Dirty);
} else {
error!("page {} is not in page cache", page_idx);
panic!();
}
Ok(())
}
fn decommit_page(&self, offset: usize) -> Result<()> {
let page_idx = offset / PAGE_SIZE;
let mut pages = self.pages.lock();
if let Some(page) = pages.pop(&page_idx) {
match page.state() {
PageState::Dirty => self
.backed_inode
.upgrade()
.unwrap()
.write_page(page_idx, &page.frame())?,
_ => (),
}
} else {
warn!("page {} is not in page cache, do nothing", page_idx);
}
Ok(())
}
}
struct Page {
frame: VmFrame,
state: PageState,
}
impl Page {
pub fn alloc() -> Result<Self> {
let frame = {
let vm_alloc_option = VmAllocOptions::new(1);
let mut frames = VmFrameVec::allocate(&vm_alloc_option)?;
frames.pop().unwrap()
};
Ok(Self {
frame,
state: PageState::Uninit,
})
}
pub fn alloc_zero() -> Result<Self> {
let frame = {
let vm_alloc_option = VmAllocOptions::new(1);
let mut frames = VmFrameVec::allocate(&vm_alloc_option)?;
frames.zero();
frames.pop().unwrap()
};
Ok(Self {
frame,
state: PageState::Dirty,
})
}
pub fn frame(&self) -> VmFrame {
self.frame.clone()
}
pub fn state(&self) -> &PageState {
&self.state
}
pub fn set_state(&mut self, new_state: PageState) {
self.state = new_state;
}
}
enum PageState {
/// `Uninit` indicates a new allocated page which content has not been initialized.
/// The page is available to write, not available to read.
Uninit,
/// `UpToDate` indicates a page which content is consistent with corresponding disk content.
/// The page is available to read and write.
UpToDate,
/// `Dirty` indicates a page which content has been updated and not written back to underlying disk.
/// The page is available to read and write.
Dirty,
}

View File

@ -0,0 +1,46 @@
#![allow(non_camel_case_types)]
use super::IoEvents;
use crate::fs::file_table::FileDescripter;
use crate::prelude::*;
pub type c_nfds = u64;
// https://github.com/torvalds/linux/blob/master/include/uapi/asm-generic/poll.h
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct c_pollfd {
fd: FileDescripter,
events: i16,
revents: i16,
}
#[derive(Debug, Clone, Copy)]
pub struct PollFd {
pub fd: FileDescripter,
pub events: IoEvents,
pub revents: IoEvents,
}
impl From<c_pollfd> for PollFd {
fn from(raw: c_pollfd) -> Self {
let events = IoEvents::from_bits_truncate(raw.events as _);
let revents = IoEvents::from_bits_truncate(raw.revents as _);
Self {
fd: raw.fd,
events,
revents,
}
}
}
impl From<PollFd> for c_pollfd {
fn from(raw: PollFd) -> Self {
let events = raw.events.bits() as i16;
let revents = raw.revents.bits() as i16;
Self {
fd: raw.fd,
events,
revents,
}
}
}

View File

@ -0,0 +1,23 @@
use bitflags::bitflags;
bitflags! {
pub struct StatusFlags: u32 {
/// append on each write
const O_APPEND = 1 << 10;
/// non block
const O_NONBLOCK = 1 << 11;
/// synchronized I/O, data
const O_DSYNC = 1 << 12;
/// signal-driven I/O
const O_ASYNC = 1 << 13;
/// direct I/O
const O_DIRECT = 1 << 14;
/// on x86_64, O_LARGEFILE is 0
/// not update st_atime
const O_NOATIME = 1 << 18;
/// synchronized I/O, data and metadata
const O_SYNC = 1 << 20;
/// equivalent of POSIX.1's O_EXEC
const O_PATH = 1 << 21;
}
}

View File

@ -0,0 +1,222 @@
use super::{DirentVisitor, FsFlags, Inode, InodeMode, InodeType, Metadata, PageCache};
use crate::prelude::*;
use crate::rights::Full;
use crate::vm::vmo::Vmo;
use alloc::string::String;
use core::time::Duration;
use jinux_frame::vm::VmIo;
/// VFS-level representation of an inode
#[derive(Clone)]
pub struct Vnode {
// The RwLock is to maintain the correct file length for concurrent read or write.
inner: Arc<RwLock<Inner>>,
}
struct Inner {
inode: Arc<dyn Inode>,
page_cache: Option<PageCache>,
}
impl Vnode {
pub fn page_cache(&self) -> Option<Vmo<Full>> {
self.inner
.read()
.page_cache
.as_ref()
.map(|page_chche| page_chche.pages().dup().unwrap())
}
pub fn new(inode: Arc<dyn Inode>) -> Result<Self> {
let page_cache = if inode.fs().flags().contains(FsFlags::NO_PAGECACHE) {
None
} else {
Some(PageCache::new(&inode)?)
};
Ok(Self {
inner: Arc::new(RwLock::new(Inner { inode, page_cache })),
})
}
pub fn write_at(&self, offset: usize, buf: &[u8]) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EINVAL);
}
let inner = self.inner.write();
match &inner.page_cache {
None => inner.inode.write_at(offset, buf),
Some(page_cache) => {
let file_len = inner.inode.len();
let should_expand_len = offset + buf.len() > file_len;
if should_expand_len {
page_cache.pages().resize(offset + buf.len())?;
}
page_cache.pages().write_bytes(offset, buf)?;
if should_expand_len {
inner.inode.resize(offset + buf.len());
}
Ok(buf.len())
}
}
}
pub fn write_direct_at(&self, offset: usize, buf: &[u8]) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EINVAL);
}
let inner = self.inner.write();
if let Some(page_cache) = &inner.page_cache {
page_cache.evict_range(offset..offset + buf.len());
}
inner.inode.write_at(offset, buf)
}
pub fn read_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EISDIR);
}
let inner = self.inner.read();
match &inner.page_cache {
None => inner.inode.read_at(offset, buf),
Some(page_cache) => {
let (offset, read_len) = {
let file_len = inner.inode.len();
let start = file_len.min(offset);
let end = file_len.min(offset + buf.len());
(start, end - start)
};
page_cache
.pages()
.read_bytes(offset, &mut buf[..read_len])?;
Ok(read_len)
}
}
}
pub fn read_direct_at(&self, offset: usize, buf: &mut [u8]) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EISDIR);
}
let inner = self.inner.read();
if let Some(page_cache) = &inner.page_cache {
page_cache.evict_range(offset..offset + buf.len());
}
inner.inode.read_at(offset, buf)
}
pub fn read_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EISDIR);
}
let inner = self.inner.read();
let file_len = inner.inode.len();
if buf.len() < file_len {
buf.resize(file_len, 0);
}
match &inner.page_cache {
None => inner.inode.read_at(0, &mut buf[..file_len]),
Some(page_cache) => {
page_cache.pages().read_bytes(0, &mut buf[..file_len])?;
Ok(file_len)
}
}
}
pub fn read_direct_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
let type_ = self.inode_type();
if type_ != InodeType::File && type_ != InodeType::Socket {
return_errno!(Errno::EISDIR);
}
let inner = self.inner.read();
let file_len = inner.inode.len();
if buf.len() < file_len {
buf.resize(file_len, 0);
}
if let Some(page_cache) = &inner.page_cache {
page_cache.evict_range(0..file_len);
}
inner.inode.read_at(0, &mut buf[..file_len])
}
pub fn mknod(&self, name: &str, type_: InodeType, mode: InodeMode) -> Result<Self> {
let inode = self.inner.read().inode.mknod(name, type_, mode)?;
Self::new(inode)
}
pub fn lookup(&self, name: &str) -> Result<Self> {
let inode = self.inner.read().inode.lookup(name)?;
Self::new(inode)
}
pub fn link(&self, old: &Vnode, name: &str) -> Result<()> {
self.inner.read().inode.link(&old.inner.read().inode, name)
}
pub fn unlink(&self, name: &str) -> Result<()> {
self.inner.read().inode.unlink(name)
}
pub fn rmdir(&self, name: &str) -> Result<()> {
self.inner.read().inode.rmdir(name)
}
pub fn rename(&self, old_name: &str, target: &Vnode, new_name: &str) -> Result<()> {
self.inner
.read()
.inode
.rename(old_name, &target.inner.read().inode, new_name)
}
pub fn read_link(&self) -> Result<String> {
self.inner.read().inode.read_link()
}
pub fn write_link(&self, target: &str) -> Result<()> {
self.inner.write().inode.write_link(target)
}
pub fn readdir_at(&self, offset: usize, visitor: &mut dyn DirentVisitor) -> Result<usize> {
self.inner.read().inode.readdir_at(offset, visitor)
}
pub fn metadata(&self) -> Metadata {
self.inner.read().inode.metadata()
}
pub fn inode_type(&self) -> InodeType {
self.inner.read().inode.metadata().type_
}
pub fn inode_mode(&self) -> InodeMode {
self.inner.read().inode.metadata().mode
}
pub fn len(&self) -> usize {
self.inner.read().inode.len()
}
pub fn atime(&self) -> Duration {
self.inner.read().inode.atime()
}
pub fn set_atime(&self, time: Duration) {
self.inner.read().inode.set_atime(time)
}
pub fn mtime(&self) -> Duration {
self.inner.read().inode.mtime()
}
pub fn set_mtime(&self, time: Duration) {
self.inner.read().inode.set_mtime(time)
}
pub fn is_dentry_cacheable(&self) -> bool {
self.inner.read().inode.is_dentry_cacheable()
}
}

View File

@ -0,0 +1,128 @@
//! The std library of jinux
#![no_std]
#![forbid(unsafe_code)]
#![allow(dead_code)]
#![allow(incomplete_features)]
#![allow(unused_variables)]
#![feature(cstr_from_bytes_until_nul)]
#![feature(exclusive_range_pattern)]
#![feature(btree_drain_filter)]
#![feature(const_option)]
#![feature(extend_one)]
#![feature(let_chains)]
// FIXME: This feature is used to support vm capbility now as a work around.
// Since this is an incomplete feature, use this feature is unsafe.
// We should find a proper method to replace this feature with min_specialization, which is a sound feature.
#![feature(specialization)]
#![feature(fn_traits)]
#![feature(linked_list_remove)]
#![feature(register_tool)]
#![register_tool(component_access_control)]
use crate::{
prelude::*,
thread::{kernel_thread::KernelThreadExt, Thread},
};
use process::Process;
extern crate alloc;
extern crate lru;
#[macro_use]
extern crate controlled;
pub mod driver;
pub mod error;
pub mod events;
pub mod fs;
pub mod prelude;
mod process;
pub mod rights;
pub mod syscall;
pub mod thread;
pub mod time;
pub mod tty;
mod util;
pub mod vm;
pub fn init() {
driver::init();
process::fifo_scheduler::init();
fs::initramfs::init(read_ramdisk_content()).unwrap();
}
fn init_thread() {
println!(
"[kernel] Spawn init thread, tid = {}",
current_thread!().tid()
);
// driver::pci::virtio::block::block_device_test();
let thread = Thread::spawn_kernel_thread(|| {
println!("[kernel] Hello world from kernel!");
let current = current_thread!();
let tid = current.tid();
debug!("current tid = {}", tid);
});
thread.join();
info!(
"[jinux-std/lib.rs] spawn kernel thread, tid = {}",
thread.tid()
);
print_banner();
run_busybox().expect("run busybox fails");
loop {
// We don't have preemptive scheduler now.
// The long running init thread should yield its own execution to allow other tasks to go on.
Thread::yield_now();
}
}
fn read_ramdisk_content() -> &'static [u8] {
include_bytes!("../../../../regression/ramdisk/build/ramdisk.cpio")
}
/// first process never return
#[controlled]
pub fn run_first_process() -> ! {
Thread::spawn_kernel_thread(init_thread);
unreachable!()
}
fn run_busybox() -> Result<Arc<Process>> {
let executable_path = "/busybox/busybox";
let argv = ["sh", "-l"];
let envp = [
"SHELL=/bin/sh",
"LOGNAME=root",
"HOME=/",
"USER=root",
"PATH=/bin",
];
let argv = argv
.into_iter()
.map(|arg| CString::new(arg).unwrap())
.collect();
let envp = envp
.into_iter()
.map(|env| CString::new(env).unwrap())
.collect();
println!("");
println!("BusyBox v1.35.0 built-in shell (ash)\n");
Process::spawn_user_process(executable_path, argv, envp)
}
fn print_banner() {
println!("\x1B[36m");
println!(
r"
__ __ .__ __. __ __ ___ ___
| | | | | \ | | | | | | \ \ / /
| | | | | \| | | | | | \ V /
.--. | | | | | . ` | | | | | > <
| `--' | | | | |\ | | `--' | / . \
\______/ |__| |__| \__| \______/ /__/ \__\
"
);
println!("\x1B[0m");
}

View File

@ -0,0 +1,46 @@
#![allow(unused)]
pub(crate) use alloc::boxed::Box;
pub(crate) use alloc::collections::BTreeMap;
pub(crate) use alloc::collections::BTreeSet;
pub(crate) use alloc::collections::LinkedList;
pub(crate) use alloc::collections::VecDeque;
pub(crate) use alloc::ffi::CString;
pub(crate) use alloc::string::String;
pub(crate) use alloc::string::ToString;
pub(crate) use alloc::sync::Arc;
pub(crate) use alloc::sync::Weak;
pub(crate) use alloc::vec;
pub(crate) use alloc::vec::Vec;
pub(crate) use bitflags::bitflags;
pub(crate) use core::ffi::CStr;
pub(crate) use jinux_frame::config::PAGE_SIZE;
pub(crate) use jinux_frame::sync::{Mutex, MutexGuard};
pub(crate) use jinux_frame::vm::Vaddr;
pub(crate) use jinux_frame::{print, println};
pub(crate) use log::{debug, error, info, trace, warn};
pub(crate) use pod::Pod;
pub(crate) use spin::RwLock;
/// return current process
#[macro_export]
macro_rules! current {
() => {
crate::process::Process::current()
};
}
/// return current thread
#[macro_export]
macro_rules! current_thread {
() => {
crate::thread::Thread::current()
};
}
pub(crate) use crate::current;
pub(crate) use crate::current_thread;
pub(crate) use crate::error::{Errno, Error};
pub(crate) use lazy_static::lazy_static;
pub(crate) type Result<T> = core::result::Result<T, Error>;
pub(crate) use crate::{return_errno, return_errno_with_message};

View File

@ -0,0 +1,412 @@
use jinux_frame::{
cpu::UserContext,
user::UserSpace,
vm::{VmIo, VmSpace},
};
use crate::{
current_thread,
fs::file_table::FileTable,
fs::{fs_resolver::FsResolver, utils::FileCreationMask},
prelude::*,
process::{
posix_thread::{
builder::PosixThreadBuilder, name::ThreadName, posix_thread_ext::PosixThreadExt,
},
process_table,
},
rights::Full,
thread::{allocate_tid, thread_table, Thread, Tid},
util::write_val_to_user,
vm::vmar::Vmar,
};
use super::{posix_thread::PosixThread, signal::sig_disposition::SigDispositions, Process};
bitflags! {
pub struct CloneFlags: u32 {
const CLONE_VM = 0x00000100; /* Set if VM shared between processes. */
const CLONE_FS = 0x00000200; /* Set if fs info shared between processes. */
const CLONE_FILES = 0x00000400; /* Set if open files shared between processes. */
const CLONE_SIGHAND = 0x00000800; /* Set if signal handlers shared. */
const CLONE_PIDFD = 0x00001000; /* Set if a pidfd should be placed in parent. */
const CLONE_PTRACE = 0x00002000; /* Set if tracing continues on the child. */
const CLONE_VFORK = 0x00004000; /* Set if the parent wants the child to wake it up on mm_release. */
const CLONE_PARENT = 0x00008000; /* Set if we want to have the same parent as the cloner. */
const CLONE_THREAD = 0x00010000; /* Set to add to same thread group. */
const CLONE_NEWNS = 0x00020000; /* Set to create new namespace. */
const CLONE_SYSVSEM = 0x00040000; /* Set to shared SVID SEM_UNDO semantics. */
const CLONE_SETTLS = 0x00080000; /* Set TLS info. */
const CLONE_PARENT_SETTID = 0x00100000; /* Store TID in userlevel buffer before MM copy. */
const CLONE_CHILD_CLEARTID = 0x00200000;/* Register exit futex and memory location to clear. */
const CLONE_DETACHED = 0x00400000; /* Create clone detached. */
const CLONE_UNTRACED = 0x00800000; /* Set if the tracing process can't force CLONE_PTRACE on this clone. */
const CLONE_CHILD_SETTID = 0x01000000; /* Store TID in userlevel buffer in the child. */
const CLONE_NEWCGROUP = 0x02000000; /* New cgroup namespace. */
const CLONE_NEWUTS = 0x04000000; /* New utsname group. */
const CLONE_NEWIPC = 0x08000000; /* New ipcs. */
const CLONE_NEWUSER = 0x10000000; /* New user namespace. */
const CLONE_NEWPID = 0x20000000; /* New pid namespace. */
const CLONE_NEWNET = 0x40000000; /* New network namespace. */
const CLONE_IO = 0x80000000; /* Clone I/O context. */
}
}
#[derive(Debug, Clone, Copy)]
pub struct CloneArgs {
new_sp: u64,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
clone_flags: CloneFlags,
}
impl CloneArgs {
pub const fn default() -> Self {
CloneArgs {
new_sp: 0,
parent_tidptr: 0,
child_tidptr: 0,
tls: 0,
clone_flags: CloneFlags::empty(),
}
}
pub const fn new(
new_sp: u64,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
clone_flags: CloneFlags,
) -> Self {
CloneArgs {
new_sp,
parent_tidptr,
child_tidptr,
tls,
clone_flags,
}
}
}
impl From<u64> for CloneFlags {
fn from(flags: u64) -> Self {
// We use the lower 32 bits
let clone_flags = (flags & 0xffff_ffff) as u32;
CloneFlags::from_bits_truncate(clone_flags)
}
}
impl CloneFlags {
fn check_unsupported_flags(&self) -> Result<()> {
let supported_flags = CloneFlags::CLONE_VM
| CloneFlags::CLONE_FS
| CloneFlags::CLONE_FILES
| CloneFlags::CLONE_SIGHAND
| CloneFlags::CLONE_THREAD
| CloneFlags::CLONE_SYSVSEM
| CloneFlags::CLONE_SETTLS
| CloneFlags::CLONE_PARENT_SETTID
| CloneFlags::CLONE_CHILD_SETTID
| CloneFlags::CLONE_CHILD_CLEARTID;
let unsupported_flags = *self - supported_flags;
if !unsupported_flags.is_empty() {
panic!("contains unsupported clone flags: {:?}", unsupported_flags);
}
Ok(())
}
}
/// Clone a child thread. Without schedule it to run.
pub fn clone_child(parent_context: UserContext, clone_args: CloneArgs) -> Result<Tid> {
clone_args.clone_flags.check_unsupported_flags()?;
if clone_args.clone_flags.contains(CloneFlags::CLONE_THREAD) {
let child_thread = clone_child_thread(parent_context, clone_args)?;
let child_tid = child_thread.tid();
debug!(
"*********schedule child thread, current tid = {}, child pid = {}**********",
current_thread!().tid(),
child_tid
);
child_thread.run();
debug!(
"*********return to parent thread, current tid = {}, child pid = {}*********",
current_thread!().tid(),
child_tid
);
Ok(child_tid)
} else {
let child_process = clone_child_process(parent_context, clone_args)?;
let child_pid = child_process.pid();
debug!(
"*********schedule child process, current pid = {}, child pid = {}**********",
current!().pid(),
child_pid
);
child_process.run();
debug!(
"*********return to parent process, current pid = {}, child pid = {}*********",
current!().pid(),
child_pid
);
Ok(child_pid)
}
}
fn clone_child_thread(parent_context: UserContext, clone_args: CloneArgs) -> Result<Arc<Thread>> {
let clone_flags = clone_args.clone_flags;
let current = current!();
debug_assert!(clone_flags.contains(CloneFlags::CLONE_VM));
debug_assert!(clone_flags.contains(CloneFlags::CLONE_FILES));
debug_assert!(clone_flags.contains(CloneFlags::CLONE_SIGHAND));
let child_root_vmar = current.root_vmar();
let child_vm_space = child_root_vmar.vm_space().clone();
let child_cpu_context = clone_cpu_context(
parent_context,
clone_args.new_sp,
clone_args.tls,
clone_flags,
);
let child_user_space = Arc::new(UserSpace::new(child_vm_space, child_cpu_context));
clone_sysvsem(clone_flags)?;
let child_tid = allocate_tid();
// inherit sigmask from current thread
let current_thread = current_thread!();
let current_posix_thread = current_thread.as_posix_thread().unwrap();
let sig_mask = current_posix_thread.sig_mask().lock().clone();
let is_main_thread = child_tid == current.pid();
let thread_builder = PosixThreadBuilder::new(child_tid, child_user_space)
.process(Arc::downgrade(&current))
.is_main_thread(is_main_thread);
let child_thread = thread_builder.build();
current.threads.lock().push(child_thread.clone());
let child_posix_thread = child_thread.as_posix_thread().unwrap();
clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
clone_child_settid(
child_root_vmar,
child_tid,
clone_args.child_tidptr,
clone_flags,
)?;
Ok(child_thread)
}
fn clone_child_process(parent_context: UserContext, clone_args: CloneArgs) -> Result<Arc<Process>> {
let current = current!();
let parent = Arc::downgrade(&current);
let clone_flags = clone_args.clone_flags;
// clone vm
let parent_root_vmar = current.root_vmar();
let child_root_vmar = clone_vm(parent_root_vmar, clone_flags)?;
let child_user_vm = current.user_vm().clone();
// clone user space
let child_cpu_context = clone_cpu_context(
parent_context,
clone_args.new_sp,
clone_args.tls,
clone_flags,
);
let child_vm_space = child_root_vmar.vm_space().clone();
let child_user_space = Arc::new(UserSpace::new(child_vm_space, child_cpu_context));
// clone file table
let child_file_table = clone_files(current.file_table(), clone_flags);
// clone fs
let child_fs = clone_fs(current.fs(), clone_flags);
// clone umask
let parent_umask = current.umask.read().get();
let child_umask = Arc::new(RwLock::new(FileCreationMask::new(parent_umask)));
// clone sig dispositions
let child_sig_dispositions = clone_sighand(current.sig_dispositions(), clone_flags);
// clone system V semaphore
clone_sysvsem(clone_flags)?;
let child_elf_path = current.executable_path().read().clone();
let child_thread_name = ThreadName::new_from_executable_path(&child_elf_path)?;
// inherit parent's sig mask
let current_thread = current_thread!();
let posix_thread = current_thread.as_posix_thread().unwrap();
let child_sig_mask = posix_thread.sig_mask().lock().clone();
let child_tid = allocate_tid();
let mut child_thread_builder = PosixThreadBuilder::new(child_tid, child_user_space)
.thread_name(Some(child_thread_name))
.sig_mask(child_sig_mask);
let child = Arc::new_cyclic(|child_process_ref| {
let weak_child_process = child_process_ref.clone();
let child_pid = child_tid;
child_thread_builder = child_thread_builder.process(weak_child_process);
let child_thread = child_thread_builder.build();
Process::new(
child_pid,
parent,
vec![child_thread],
child_elf_path,
child_user_vm,
child_root_vmar.clone(),
Weak::new(),
child_file_table,
child_fs,
child_umask,
child_sig_dispositions,
)
});
// Inherit parent's process group
let parent_process_group = current.process_group().lock().upgrade().unwrap();
parent_process_group.add_process(child.clone());
child.set_process_group(Arc::downgrade(&parent_process_group));
current!().add_child(child.clone());
process_table::add_process(child.clone());
let child_thread = thread_table::tid_to_thread(child_tid).unwrap();
let child_posix_thread = child_thread.as_posix_thread().unwrap();
clone_parent_settid(child_tid, clone_args.parent_tidptr, clone_flags)?;
clone_child_cleartid(child_posix_thread, clone_args.child_tidptr, clone_flags)?;
clone_child_settid(
&child_root_vmar,
child_tid,
clone_args.child_tidptr,
clone_flags,
)?;
Ok(child)
}
fn clone_child_cleartid(
child_posix_thread: &PosixThread,
child_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
let mut clear_tid = child_posix_thread.clear_child_tid().lock();
*clear_tid = child_tidptr;
}
Ok(())
}
fn clone_child_settid(
child_root_vmar: &Vmar<Full>,
child_tid: Tid,
child_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
child_root_vmar.write_val(child_tidptr, &child_tid)?;
}
Ok(())
}
fn clone_parent_settid(
child_tid: Tid,
parent_tidptr: Vaddr,
clone_flags: CloneFlags,
) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
write_val_to_user(parent_tidptr, &child_tid)?;
}
Ok(())
}
/// clone child vmar. If CLONE_VM is set, both threads share the same root vmar.
/// Otherwise, fork a new copy-on-write vmar.
fn clone_vm(
parent_root_vmar: &Arc<Vmar<Full>>,
clone_flags: CloneFlags,
) -> Result<Arc<Vmar<Full>>> {
if clone_flags.contains(CloneFlags::CLONE_VM) {
Ok(parent_root_vmar.clone())
} else {
Ok(Arc::new(parent_root_vmar.fork_vmar()?))
}
}
fn clone_cpu_context(
parent_context: UserContext,
new_sp: u64,
tls: u64,
clone_flags: CloneFlags,
) -> UserContext {
let mut child_context = parent_context.clone();
// The return value of child thread is zero
child_context.set_rax(0);
if clone_flags.contains(CloneFlags::CLONE_VM) {
// if parent and child shares the same address space, a new stack must be specified.
debug_assert!(new_sp != 0);
}
if new_sp != 0 {
child_context.set_rsp(new_sp as usize);
}
if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
// x86_64 specific: TLS is the fsbase register
child_context.set_fsbase(tls as usize);
}
child_context
}
fn clone_fs(
parent_fs: &Arc<RwLock<FsResolver>>,
clone_flags: CloneFlags,
) -> Arc<RwLock<FsResolver>> {
if clone_flags.contains(CloneFlags::CLONE_FS) {
parent_fs.clone()
} else {
Arc::new(RwLock::new(parent_fs.read().clone()))
}
}
fn clone_files(
parent_file_table: &Arc<Mutex<FileTable>>,
clone_flags: CloneFlags,
) -> Arc<Mutex<FileTable>> {
// if CLONE_FILES is set, the child and parent shares the same file table
// Otherwise, the child will deep copy a new file table.
// FIXME: the clone may not be deep copy.
if clone_flags.contains(CloneFlags::CLONE_FILES) {
parent_file_table.clone()
} else {
Arc::new(Mutex::new(parent_file_table.lock().clone()))
}
}
fn clone_sighand(
parent_sig_dispositions: &Arc<Mutex<SigDispositions>>,
clone_flags: CloneFlags,
) -> Arc<Mutex<SigDispositions>> {
// similer to CLONE_FILES
if clone_flags.contains(CloneFlags::CLONE_SIGHAND) {
parent_sig_dispositions.clone()
} else {
Arc::new(Mutex::new(parent_sig_dispositions.lock().clone()))
}
}
fn clone_sysvsem(clone_flags: CloneFlags) -> Result<()> {
if clone_flags.contains(CloneFlags::CLONE_SYSVSEM) {
warn!("CLONE_SYSVSEM is not supported now");
}
Ok(())
}
/// debug use. check clone vm space corrent.
fn debug_check_clone_vm_space(parent_vm_space: &VmSpace, child_vm_space: &VmSpace) {
let mut buffer1 = vec![0u8; 0x78];
let mut buffer2 = vec![0u8; 0x78];
parent_vm_space
.read_bytes(0x401000, &mut buffer1)
.expect("read buffer1 failed");
child_vm_space
.read_bytes(0x401000, &mut buffer2)
.expect("read buffer1 failed");
for len in 0..buffer1.len() {
assert_eq!(buffer1[len], buffer2[len]);
}
debug!("check clone vm space succeed.");
}

View File

@ -0,0 +1,32 @@
use crate::prelude::*;
use jinux_frame::task::{set_scheduler, Scheduler, Task, TaskAdapter};
use intrusive_collections::LinkedList;
pub struct FifoScheduler {
tasks: Mutex<LinkedList<TaskAdapter>>,
}
impl FifoScheduler {
pub fn new() -> Self {
Self {
tasks: Mutex::new(LinkedList::new(TaskAdapter::new())),
}
}
}
impl Scheduler for FifoScheduler {
fn enqueue(&self, task: Arc<Task>) {
self.tasks.lock().push_back(task.clone());
}
fn dequeue(&self) -> Option<Arc<Task>> {
self.tasks.lock().pop_front()
}
}
pub fn init() {
let fifo_scheduler = Box::new(FifoScheduler::new());
let scheduler = Box::<FifoScheduler>::leak(fifo_scheduler);
set_scheduler(scheduler);
}

View File

@ -0,0 +1,394 @@
use core::sync::atomic::{AtomicI32, Ordering};
use self::posix_thread::posix_thread_ext::PosixThreadExt;
use self::process_group::ProcessGroup;
use self::process_vm::user_heap::UserHeap;
use self::process_vm::UserVm;
use self::rlimit::ResourceLimits;
use self::signal::constants::SIGCHLD;
use self::signal::sig_disposition::SigDispositions;
use self::signal::sig_queues::SigQueues;
use self::signal::signals::kernel::KernelSignal;
use self::signal::signals::Signal;
use self::status::ProcessStatus;
use crate::fs::file_table::FileTable;
use crate::fs::fs_resolver::FsResolver;
use crate::fs::utils::FileCreationMask;
use crate::prelude::*;
use crate::rights::Full;
use crate::thread::{allocate_tid, thread_table, Thread};
use crate::tty::get_n_tty;
use crate::vm::vmar::Vmar;
use jinux_frame::sync::WaitQueue;
pub mod clone;
pub mod fifo_scheduler;
pub mod posix_thread;
pub mod process_filter;
pub mod process_group;
pub mod process_table;
pub mod process_vm;
pub mod program_loader;
pub mod rlimit;
pub mod signal;
pub mod status;
pub mod wait;
pub type Pid = i32;
pub type Pgid = i32;
pub type ExitCode = i32;
const INIT_PROCESS_PID: Pid = 1;
/// Process stands for a set of threads that shares the same userspace.
pub struct Process {
// Immutable Part
pid: Pid,
user_vm: UserVm,
root_vmar: Arc<Vmar<Full>>,
/// wait for child status changed
waiting_children: WaitQueue,
/// wait for io events
poll_queue: WaitQueue,
// Mutable Part
/// The executable path.
executable_path: RwLock<String>,
/// The threads
threads: Mutex<Vec<Arc<Thread>>>,
/// The exit code
exit_code: AtomicI32,
/// Process status
status: Mutex<ProcessStatus>,
/// Parent process
parent: Mutex<Weak<Process>>,
/// Children processes
children: Mutex<BTreeMap<Pid, Arc<Process>>>,
/// Process group
process_group: Mutex<Weak<ProcessGroup>>,
/// File table
file_table: Arc<Mutex<FileTable>>,
/// FsResolver
fs: Arc<RwLock<FsResolver>>,
/// umask
umask: Arc<RwLock<FileCreationMask>>,
/// resource limits
resource_limits: Mutex<ResourceLimits>,
// Signal
/// sig dispositions
sig_dispositions: Arc<Mutex<SigDispositions>>,
/// Process-level signal queues
sig_queues: Mutex<SigQueues>,
}
impl Process {
/// returns the current process
pub fn current() -> Arc<Process> {
let current_thread = Thread::current();
if let Some(posix_thread) = current_thread.as_posix_thread() {
posix_thread.process()
} else {
panic!("[Internal error]The current thread does not belong to a process");
}
}
/// create a new process(not schedule it)
pub fn new(
pid: Pid,
parent: Weak<Process>,
threads: Vec<Arc<Thread>>,
executable_path: String,
user_vm: UserVm,
root_vmar: Arc<Vmar<Full>>,
process_group: Weak<ProcessGroup>,
file_table: Arc<Mutex<FileTable>>,
fs: Arc<RwLock<FsResolver>>,
umask: Arc<RwLock<FileCreationMask>>,
sig_dispositions: Arc<Mutex<SigDispositions>>,
) -> Self {
let children = BTreeMap::new();
let waiting_children = WaitQueue::new();
let poll_queue = WaitQueue::new();
let resource_limits = ResourceLimits::default();
Self {
pid,
threads: Mutex::new(threads),
executable_path: RwLock::new(executable_path),
user_vm,
root_vmar,
waiting_children,
poll_queue,
exit_code: AtomicI32::new(0),
status: Mutex::new(ProcessStatus::Runnable),
parent: Mutex::new(parent),
children: Mutex::new(children),
process_group: Mutex::new(process_group),
file_table,
fs,
umask,
sig_dispositions,
sig_queues: Mutex::new(SigQueues::new()),
resource_limits: Mutex::new(resource_limits),
}
}
pub fn waiting_children(&self) -> &WaitQueue {
&self.waiting_children
}
pub fn poll_queue(&self) -> &WaitQueue {
&self.poll_queue
}
/// init a user process and run the process
pub fn spawn_user_process(
executable_path: &str,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
// spawn user process should give an absolute path
debug_assert!(executable_path.starts_with('/'));
let process = Process::create_user_process(executable_path, argv, envp)?;
// FIXME: How to determine the fg process group?
let pgid = process.pgid();
// FIXME: tty should be a parameter?
let tty = get_n_tty();
tty.set_fg(pgid);
process.run();
Ok(process)
}
fn create_user_process(
executable_path: &str,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
let root_vmar = Vmar::<Full>::new_root()?;
let fs = FsResolver::new();
let umask = FileCreationMask::default();
let pid = allocate_tid();
let parent = Weak::new();
let process_group = Weak::new();
let user_vm = UserVm::new(&root_vmar)?;
let file_table = FileTable::new_with_stdio();
let sig_dispositions = SigDispositions::new();
let user_process = Arc::new(Process::new(
pid,
parent,
vec![],
executable_path.to_string(),
user_vm,
Arc::new(root_vmar),
process_group,
Arc::new(Mutex::new(file_table)),
Arc::new(RwLock::new(fs)),
Arc::new(RwLock::new(umask)),
Arc::new(Mutex::new(sig_dispositions)),
));
let thread = Thread::new_posix_thread_from_executable(
pid,
&user_process.root_vmar(),
&user_process.fs().read(),
executable_path,
Arc::downgrade(&user_process),
argv,
envp,
)?;
user_process.threads().lock().push(thread);
// Set process group
user_process.create_and_set_process_group();
process_table::add_process(user_process.clone());
Ok(user_process)
}
/// returns the pid of the process
pub fn pid(&self) -> Pid {
self.pid
}
/// returns the process group id of the process
pub fn pgid(&self) -> Pgid {
if let Some(process_group) = self.process_group.lock().upgrade() {
process_group.pgid()
} else {
0
}
}
pub fn process_group(&self) -> &Mutex<Weak<ProcessGroup>> {
&self.process_group
}
/// add a child process
pub fn add_child(&self, child: Arc<Process>) {
let child_pid = child.pid();
self.children.lock().insert(child_pid, child);
}
pub fn set_parent(&self, parent: Weak<Process>) {
*self.parent.lock() = parent;
}
/// Set process group for current process. If old process group exists,
/// remove current process from old process group.
pub fn set_process_group(&self, process_group: Weak<ProcessGroup>) {
if let Some(old_process_group) = self.process_group.lock().upgrade() {
old_process_group.remove_process(self.pid());
}
*self.process_group.lock() = process_group;
}
pub fn file_table(&self) -> &Arc<Mutex<FileTable>> {
&self.file_table
}
pub fn fs(&self) -> &Arc<RwLock<FsResolver>> {
&self.fs
}
pub fn umask(&self) -> &Arc<RwLock<FileCreationMask>> {
&self.umask
}
/// create a new process group for the process and add it to globle table.
/// Then set the process group for current process.
fn create_and_set_process_group(self: &Arc<Self>) {
let process_group = Arc::new(ProcessGroup::new(self.clone()));
let pgid = process_group.pgid();
self.set_process_group(Arc::downgrade(&process_group));
process_table::add_process_group(process_group);
}
pub fn parent(&self) -> Option<Arc<Process>> {
self.parent.lock().upgrade()
}
/// Exit thread group(the process).
/// Set the status of the process as Zombie and set exit code.
/// Move all children to init process.
/// Wake up the parent wait queue if parent is waiting for self.
pub fn exit_group(&self, exit_code: i32) {
debug!("exit group was called");
self.status.lock().set_zombie();
self.exit_code.store(exit_code, Ordering::Relaxed);
for thread in &*self.threads.lock() {
thread.exit();
}
// move children to the init process
if !self.is_init_process() {
if let Some(init_process) = get_init_process() {
for (_, child_process) in self.children.lock().drain_filter(|_, _| true) {
child_process.set_parent(Arc::downgrade(&init_process));
init_process.add_child(child_process);
}
}
}
if let Some(parent) = self.parent() {
// set parent sig child
let signal = Box::new(KernelSignal::new(SIGCHLD));
parent.sig_queues().lock().enqueue(signal);
// wake up parent waiting children, if any
parent.waiting_children().wake_all();
}
}
/// if the current process is init process
pub fn is_init_process(&self) -> bool {
self.pid == 0
}
/// start to run current process
pub fn run(&self) {
let threads = self.threads.lock();
// when run the process, the process should has only one thread
debug_assert!(threads.len() == 1);
let thread = threads[0].clone();
// should not hold the lock when run thread
drop(threads);
thread.run();
}
pub fn threads(&self) -> &Mutex<Vec<Arc<Thread>>> {
&self.threads
}
/// returns the user_vm
pub fn user_vm(&self) -> &UserVm {
&self.user_vm
}
/// returns the root vmar
pub fn root_vmar(&self) -> &Arc<Vmar<Full>> {
&self.root_vmar
}
/// returns the user heap if the process does have, otherwise None
pub fn user_heap(&self) -> &UserHeap {
self.user_vm.user_heap()
}
/// free zombie child with pid, returns the exit code of child process.
/// remove process from process group.
pub fn reap_zombie_child(&self, pid: Pid) -> i32 {
let child_process = self.children.lock().remove(&pid).unwrap();
assert!(child_process.status().lock().is_zombie());
child_process.root_vmar().destroy_all().unwrap();
for thread in &*child_process.threads.lock() {
thread_table::remove_thread(thread.tid());
}
process_table::remove_process(child_process.pid());
if let Some(process_group) = child_process.process_group().lock().upgrade() {
process_group.remove_process(child_process.pid);
}
child_process.exit_code().load(Ordering::SeqCst)
}
pub fn children(&self) -> &Mutex<BTreeMap<Pid, Arc<Process>>> {
&self.children
}
pub fn exit_code(&self) -> &AtomicI32 {
&self.exit_code
}
/// whether the process has child process
pub fn has_child(&self) -> bool {
self.children.lock().len() != 0
}
pub fn executable_path(&self) -> &RwLock<String> {
&self.executable_path
}
pub fn status(&self) -> &Mutex<ProcessStatus> {
&self.status
}
pub fn resource_limits(&self) -> &Mutex<ResourceLimits> {
&self.resource_limits
}
pub fn sig_dispositions(&self) -> &Arc<Mutex<SigDispositions>> {
&self.sig_dispositions
}
pub fn sig_queues(&self) -> &Mutex<SigQueues> {
&self.sig_queues
}
pub fn enqueue_signal(&self, signal: Box<dyn Signal>) {
if !self.status().lock().is_zombie() {
self.sig_queues.lock().enqueue(signal);
}
}
}
/// Get the init process
pub fn get_init_process() -> Option<Arc<Process>> {
process_table::pid_to_process(INIT_PROCESS_PID)
}

View File

@ -0,0 +1,109 @@
use jinux_frame::user::UserSpace;
use crate::{
prelude::*,
process::{
posix_thread::name::ThreadName,
signal::{sig_mask::SigMask, sig_queues::SigQueues},
Process,
},
thread::{status::ThreadStatus, task::create_new_user_task, thread_table, Thread, Tid},
};
use super::PosixThread;
/// The builder to build a posix thread
pub struct PosixThreadBuilder {
// The essential part
tid: Tid,
user_space: Arc<UserSpace>,
process: Weak<Process>,
// Optional part
thread_name: Option<ThreadName>,
set_child_tid: Vaddr,
clear_child_tid: Vaddr,
sig_mask: SigMask,
sig_queues: SigQueues,
is_main_thread: bool,
}
impl PosixThreadBuilder {
pub fn new(tid: Tid, user_space: Arc<UserSpace>) -> Self {
Self {
tid,
user_space,
process: Weak::new(),
thread_name: None,
set_child_tid: 0,
clear_child_tid: 0,
sig_mask: SigMask::new_empty(),
sig_queues: SigQueues::new(),
is_main_thread: true,
}
}
pub fn process(mut self, process: Weak<Process>) -> Self {
self.process = process;
self
}
pub fn thread_name(mut self, thread_name: Option<ThreadName>) -> Self {
self.thread_name = thread_name;
self
}
pub fn set_child_tid(mut self, set_child_tid: Vaddr) -> Self {
self.set_child_tid = set_child_tid;
self
}
pub fn clear_child_tid(mut self, clear_child_tid: Vaddr) -> Self {
self.clear_child_tid = clear_child_tid;
self
}
pub fn is_main_thread(mut self, is_main_thread: bool) -> Self {
self.is_main_thread = is_main_thread;
self
}
pub fn sig_mask(mut self, sig_mask: SigMask) -> Self {
self.sig_mask = sig_mask;
self
}
pub fn build(self) -> Arc<Thread> {
let Self {
tid,
user_space,
process,
thread_name,
set_child_tid,
clear_child_tid,
sig_mask,
sig_queues,
is_main_thread,
} = self;
let thread = Arc::new_cyclic(|thread_ref| {
let task = create_new_user_task(user_space, thread_ref.clone());
let status = ThreadStatus::Init;
let sig_context = Mutex::new(None);
let posix_thread = PosixThread {
process,
is_main_thread,
name: Mutex::new(thread_name),
set_child_tid: Mutex::new(set_child_tid),
clear_child_tid: Mutex::new(clear_child_tid),
sig_mask: Mutex::new(sig_mask),
sig_queues: Mutex::new(sig_queues),
sig_context,
robust_list: Mutex::new(None),
};
Thread::new(tid, task, posix_thread, status)
});
thread_table::add_thread(thread.clone());
thread
}
}

View File

@ -0,0 +1,438 @@
use core::sync::atomic::{AtomicBool, Ordering};
use jinux_frame::cpu::num_cpus;
use crate::{
prelude::*,
thread::{Thread, Tid},
util::read_val_from_user,
};
type FutexBitSet = u32;
type FutexBucketRef = Arc<Mutex<FutexBucket>>;
const FUTEX_OP_MASK: u32 = 0x0000_000F;
const FUTEX_FLAGS_MASK: u32 = 0xFFFF_FFF0;
const FUTEX_BITSET_MATCH_ANY: FutexBitSet = 0xFFFF_FFFF;
/// do futex wait
pub fn futex_wait(futex_addr: u64, futex_val: i32, timeout: &Option<FutexTimeout>) -> Result<()> {
futex_wait_bitset(futex_addr as _, futex_val, timeout, FUTEX_BITSET_MATCH_ANY)
}
/// do futex wait bitset
pub fn futex_wait_bitset(
futex_addr: Vaddr,
futex_val: i32,
timeout: &Option<FutexTimeout>,
bitset: FutexBitSet,
) -> Result<()> {
debug!(
"futex_wait_bitset addr: {:#x}, val: {}, timeout: {:?}, bitset: {:#x}",
futex_addr, futex_val, timeout, bitset
);
let futex_key = FutexKey::new(futex_addr);
let (_, futex_bucket_ref) = FUTEX_BUCKETS.get_bucket(futex_key);
// lock futex bucket ref here to avoid data race
let mut futex_bucket = futex_bucket_ref.lock();
if futex_key.load_val() != futex_val {
return_errno_with_message!(Errno::EINVAL, "futex value does not match");
}
let futex_item = FutexItem::new(futex_key, bitset);
futex_bucket.enqueue_item(futex_item.clone());
// drop lock
drop(futex_bucket);
// Wait on the futex item
futex_item.wait();
Ok(())
}
/// do futex wake
pub fn futex_wake(futex_addr: Vaddr, max_count: usize) -> Result<usize> {
futex_wake_bitset(futex_addr, max_count, FUTEX_BITSET_MATCH_ANY)
}
/// Do futex wake with bitset
pub fn futex_wake_bitset(
futex_addr: Vaddr,
max_count: usize,
bitset: FutexBitSet,
) -> Result<usize> {
debug!(
"futex_wake_bitset addr: {:#x}, max_count: {}, bitset: {:#x}",
futex_addr as usize, max_count, bitset
);
let futex_key = FutexKey::new(futex_addr);
let (_, futex_bucket_ref) = FUTEX_BUCKETS.get_bucket(futex_key);
let mut futex_bucket = futex_bucket_ref.lock();
let res = futex_bucket.dequeue_and_wake_items(futex_key, max_count, bitset);
// debug!("futex wake bitset succeeds, res = {}", res);
drop(futex_bucket);
// for _ in 0..res {
// Thread::yield_now();
// }
Ok(res)
}
/// Do futex requeue
pub fn futex_requeue(
futex_addr: Vaddr,
max_nwakes: usize,
max_nrequeues: usize,
futex_new_addr: Vaddr,
) -> Result<usize> {
if futex_new_addr == futex_addr {
return futex_wake(futex_addr, max_nwakes);
}
let futex_key = FutexKey::new(futex_addr);
let futex_new_key = FutexKey::new(futex_new_addr);
let (bucket_idx, futex_bucket_ref) = FUTEX_BUCKETS.get_bucket(futex_key);
let (new_bucket_idx, futex_new_bucket_ref) = FUTEX_BUCKETS.get_bucket(futex_new_key);
let nwakes = {
if bucket_idx == new_bucket_idx {
let mut futex_bucket = futex_bucket_ref.lock();
let nwakes =
futex_bucket.dequeue_and_wake_items(futex_key, max_nwakes, FUTEX_BITSET_MATCH_ANY);
futex_bucket.update_item_keys(futex_key, futex_new_key, max_nrequeues);
drop(futex_bucket);
nwakes
} else {
let (mut futex_bucket, mut futex_new_bucket) = {
if bucket_idx < new_bucket_idx {
let futex_bucket = futex_bucket_ref.lock();
let futext_new_bucket = futex_new_bucket_ref.lock();
(futex_bucket, futext_new_bucket)
} else {
// bucket_idx > new_bucket_idx
let futex_new_bucket = futex_new_bucket_ref.lock();
let futex_bucket = futex_bucket_ref.lock();
(futex_bucket, futex_new_bucket)
}
};
let nwakes =
futex_bucket.dequeue_and_wake_items(futex_key, max_nwakes, FUTEX_BITSET_MATCH_ANY);
futex_bucket.requeue_items_to_another_bucket(
futex_key,
&mut futex_new_bucket,
futex_new_key,
max_nrequeues,
);
nwakes
}
};
Ok(nwakes)
}
lazy_static! {
// Use the same count as linux kernel to keep the same performance
static ref BUCKET_COUNT: usize = ((1<<8)* num_cpus()).next_power_of_two() as _;
static ref BUCKET_MASK: usize = *BUCKET_COUNT - 1;
static ref FUTEX_BUCKETS: FutexBucketVec = FutexBucketVec::new(*BUCKET_COUNT);
}
#[derive(Debug, Clone)]
pub struct FutexTimeout {}
impl FutexTimeout {
pub fn new() -> Self {
todo!()
}
}
struct FutexBucketVec {
vec: Vec<FutexBucketRef>,
}
impl FutexBucketVec {
pub fn new(size: usize) -> FutexBucketVec {
let mut buckets = FutexBucketVec {
vec: Vec::with_capacity(size),
};
for _ in 0..size {
let bucket = Arc::new(Mutex::new(FutexBucket::new()));
buckets.vec.push(bucket);
}
buckets
}
pub fn get_bucket(&self, key: FutexKey) -> (usize, FutexBucketRef) {
let index = *BUCKET_MASK & {
// The addr is the multiples of 4, so we ignore the last 2 bits
let addr = key.addr() >> 2;
// simple hash
addr / self.size()
};
(index, self.vec[index].clone())
}
fn size(&self) -> usize {
self.vec.len()
}
}
struct FutexBucket {
queue: VecDeque<FutexItem>,
}
impl FutexBucket {
pub fn new() -> FutexBucket {
FutexBucket {
queue: VecDeque::new(),
}
}
pub fn enqueue_item(&mut self, item: FutexItem) {
self.queue.push_back(item);
}
pub fn dequeue_item(&mut self, item: &FutexItem) {
let item_i = self
.queue
.iter()
.position(|futex_item| *futex_item == *item);
if let Some(item_i) = item_i {
self.queue.remove(item_i).unwrap();
}
}
pub fn dequeue_and_wake_items(
&mut self,
key: FutexKey,
max_count: usize,
bitset: FutexBitSet,
) -> usize {
let mut count = 0;
let mut items_to_wake = Vec::new();
self.queue.retain(|item| {
if count >= max_count || key != item.key || (bitset & item.bitset) == 0 {
true
} else {
items_to_wake.push(item.clone());
count += 1;
false
}
});
// debug!("items to wake len: {}", items_to_wake.len());
FutexItem::batch_wake(&items_to_wake);
count
}
pub fn update_item_keys(&mut self, key: FutexKey, new_key: FutexKey, max_count: usize) {
let mut count = 0;
for item in self.queue.iter_mut() {
if count == max_count {
break;
}
if (*item).key == key {
(*item).key = new_key;
count += 1;
}
}
}
pub fn requeue_items_to_another_bucket(
&mut self,
key: FutexKey,
another: &mut Self,
new_key: FutexKey,
max_nrequeues: usize,
) {
let mut count = 0;
self.queue.retain(|item| {
if count >= max_nrequeues || key != item.key {
true
} else {
let mut new_item = item.clone();
new_item.key = new_key;
another.enqueue_item(new_item);
count += 1;
false
}
});
}
}
#[derive(Debug, PartialEq, Clone)]
struct FutexItem {
key: FutexKey,
bitset: FutexBitSet,
waiter: FutexWaiterRef,
}
impl FutexItem {
pub fn new(key: FutexKey, bitset: FutexBitSet) -> Self {
FutexItem {
key,
bitset,
waiter: Arc::new(FutexWaiter::new()),
}
}
pub fn wake(&self) {
// debug!("wake futex item, key = {:?}", self.key);
self.waiter.wake();
}
pub fn wait(&self) {
// debug!("wait on futex item, key = {:?}", self.key);
self.waiter.wait();
// debug!("wait finished, key = {:?}", self.key);
}
pub fn waiter(&self) -> &FutexWaiterRef {
&self.waiter
}
pub fn batch_wake(items: &[FutexItem]) {
let waiters = items.iter().map(|item| item.waiter()).collect::<Vec<_>>();
FutexWaiter::batch_wake(&waiters);
}
}
// The addr of a futex, it should be used to mark different futex word
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct FutexKey(Vaddr);
impl FutexKey {
pub fn new(futex_addr: Vaddr) -> Self {
FutexKey(futex_addr as _)
}
pub fn load_val(&self) -> i32 {
// FIXME: how to implement a atomic load?
warn!("implement an atomic load");
read_val_from_user(self.0).unwrap()
}
pub fn addr(&self) -> Vaddr {
self.0
}
}
// The implementation is from occlum
#[derive(PartialEq, Debug, Clone, Copy)]
#[allow(non_camel_case_types)]
pub enum FutexOp {
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
FUTEX_FD = 2,
FUTEX_REQUEUE = 3,
FUTEX_CMP_REQUEUE = 4,
FUTEX_WAKE_OP = 5,
FUTEX_LOCK_PI = 6,
FUTEX_UNLOCK_PI = 7,
FUTEX_TRYLOCK_PI = 8,
FUTEX_WAIT_BITSET = 9,
FUTEX_WAKE_BITSET = 10,
}
impl FutexOp {
pub fn from_u32(bits: u32) -> Result<FutexOp> {
match bits {
0 => Ok(FutexOp::FUTEX_WAIT),
1 => Ok(FutexOp::FUTEX_WAKE),
2 => Ok(FutexOp::FUTEX_FD),
3 => Ok(FutexOp::FUTEX_REQUEUE),
4 => Ok(FutexOp::FUTEX_CMP_REQUEUE),
5 => Ok(FutexOp::FUTEX_WAKE_OP),
6 => Ok(FutexOp::FUTEX_LOCK_PI),
7 => Ok(FutexOp::FUTEX_UNLOCK_PI),
8 => Ok(FutexOp::FUTEX_TRYLOCK_PI),
9 => Ok(FutexOp::FUTEX_WAIT_BITSET),
10 => Ok(FutexOp::FUTEX_WAKE_BITSET),
_ => return_errno_with_message!(Errno::EINVAL, "Unknown futex op"),
}
}
}
bitflags! {
pub struct FutexFlags : u32 {
const FUTEX_PRIVATE = 128;
const FUTEX_CLOCK_REALTIME = 256;
}
}
impl FutexFlags {
pub fn from_u32(bits: u32) -> Result<FutexFlags> {
FutexFlags::from_bits(bits)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown futex flags"))
}
}
pub fn futex_op_and_flags_from_u32(bits: u32) -> Result<(FutexOp, FutexFlags)> {
let op = {
let op_bits = bits & FUTEX_OP_MASK;
FutexOp::from_u32(op_bits)?
};
let flags = {
let flags_bits = bits & FUTEX_FLAGS_MASK;
FutexFlags::from_u32(flags_bits)?
};
Ok((op, flags))
}
type FutexWaiterRef = Arc<FutexWaiter>;
#[derive(Debug)]
struct FutexWaiter {
is_woken: AtomicBool,
tid: Tid,
}
impl PartialEq for FutexWaiter {
fn eq(&self, other: &Self) -> bool {
self.tid == other.tid
}
}
impl FutexWaiter {
pub fn new() -> Self {
Self {
is_woken: AtomicBool::new(false),
tid: current_thread!().tid(),
}
}
pub fn wait(&self) {
let current_thread = current_thread!();
if current_thread.tid() != self.tid {
return;
}
self.is_woken.store(false, Ordering::SeqCst);
while !self.is_woken() {
// debug!("futex is wait for waken, tid = {}", self.tid);
Thread::yield_now();
}
// debug!("futex is waken, tid = {}", self.tid);
}
pub fn wake(&self) {
if !self.is_woken() {
// debug!("wake up futex, tid = {}", self.tid);
self.is_woken.store(true, Ordering::SeqCst);
}
}
pub fn is_woken(&self) -> bool {
self.is_woken.load(Ordering::SeqCst)
}
pub fn batch_wake(waiters: &[&FutexWaiterRef]) {
waiters.iter().for_each(|waiter| {
waiter.wake();
});
}
}

View File

@ -0,0 +1,150 @@
use crate::{
prelude::*,
process::posix_thread::{futex::futex_wake, robust_list::wake_robust_futex},
thread::{thread_table, Tid},
util::write_val_to_user,
};
use self::{name::ThreadName, robust_list::RobustListHead};
use super::{
signal::{sig_mask::SigMask, sig_queues::SigQueues},
Process,
};
pub mod builder;
pub mod futex;
pub mod name;
pub mod posix_thread_ext;
pub mod robust_list;
pub struct PosixThread {
// Immutable part
process: Weak<Process>,
is_main_thread: bool,
// Mutable part
name: Mutex<Option<ThreadName>>,
// Linux specific attributes.
// https://man7.org/linux/man-pages/man2/set_tid_address.2.html
set_child_tid: Mutex<Vaddr>,
clear_child_tid: Mutex<Vaddr>,
robust_list: Mutex<Option<RobustListHead>>,
// signal
/// blocked signals
sig_mask: Mutex<SigMask>,
/// thread-directed sigqueue
sig_queues: Mutex<SigQueues>,
/// Signal handler ucontext address
/// FIXME: This field may be removed. For glibc applications with RESTORER flag set, the sig_context is always equals with rsp.
sig_context: Mutex<Option<Vaddr>>,
}
impl PosixThread {
pub fn process(&self) -> Arc<Process> {
self.process.upgrade().unwrap()
}
pub fn thread_name(&self) -> &Mutex<Option<ThreadName>> {
&self.name
}
pub fn set_child_tid(&self) -> &Mutex<Vaddr> {
&self.set_child_tid
}
pub fn clear_child_tid(&self) -> &Mutex<Vaddr> {
&self.clear_child_tid
}
pub fn sig_mask(&self) -> &Mutex<SigMask> {
&self.sig_mask
}
pub fn sig_queues(&self) -> &Mutex<SigQueues> {
&self.sig_queues
}
pub fn sig_context(&self) -> &Mutex<Option<Vaddr>> {
&self.sig_context
}
pub fn robust_list(&self) -> &Mutex<Option<RobustListHead>> {
&self.robust_list
}
/// Whether the thread is main thread. For Posix thread, If a thread's tid is equal to pid, it's main thread.
pub fn is_main_thread(&self) -> bool {
self.is_main_thread
}
/// whether the thread is the last running thread in process
pub fn is_last_thread(&self) -> bool {
let process = self.process.upgrade().unwrap();
let threads = process.threads().lock();
threads
.iter()
.filter(|thread| !thread.status().lock().is_exited())
.count()
== 0
}
/// Walks the robust futex list, marking futex dead and wake waiters.
/// It corresponds to Linux's exit_robust_list(), errors are silently ignored.
pub fn wake_robust_list(&self, tid: Tid) {
let mut robust_list = self.robust_list.lock();
let list_head = match *robust_list {
None => {
return;
}
Some(robust_list_head) => robust_list_head,
};
debug!("wake the rubust_list: {:?}", list_head);
for futex_addr in list_head.futexes() {
// debug!("futex addr = 0x{:x}", futex_addr);
wake_robust_futex(futex_addr, tid).unwrap();
}
debug!("wake robust futex success");
*robust_list = None;
}
/// Posix thread does not contains tid info. So we require tid as a parameter.
pub fn exit(&self, tid: Tid, exit_code: i32) -> Result<()> {
let mut clear_ctid = self.clear_child_tid().lock();
// If clear_ctid !=0 ,do a futex wake and write zero to the clear_ctid addr.
debug!("wake up ctid");
if *clear_ctid != 0 {
debug!("futex wake");
futex_wake(*clear_ctid, 1)?;
debug!("write ctid");
// FIXME: the correct write length?
debug!("ctid = 0x{:x}", *clear_ctid);
write_val_to_user(*clear_ctid, &0u32).unwrap();
debug!("clear ctid");
*clear_ctid = 0;
}
debug!("wake up ctid succeeds");
// exit the robust list: walk the robust list; mark futex words as dead and do futex wake
self.wake_robust_list(tid);
if tid != self.process().pid {
// If the thread is not main thread. We don't remove main thread.
// Main thread are removed when the whole process is reaped.
thread_table::remove_thread(tid);
}
if self.is_main_thread() || self.is_last_thread() {
// exit current process.
debug!("self is main thread or last thread");
debug!("main thread: {}", self.is_main_thread());
debug!("last thread: {}", self.is_last_thread());
current!().exit_group(exit_code);
}
debug!("perform futex wake");
futex_wake(Arc::as_ptr(&self.process()) as Vaddr, 1)?;
Ok(())
}
}

View File

@ -0,0 +1,48 @@
use crate::prelude::*;
pub const MAX_THREAD_NAME_LEN: usize = 256;
#[derive(Debug)]
pub struct ThreadName {
inner: [u8; MAX_THREAD_NAME_LEN],
count: usize,
}
impl ThreadName {
pub fn new() -> Self {
ThreadName {
inner: [0; MAX_THREAD_NAME_LEN],
count: 0,
}
}
pub fn new_from_executable_path(executable_path: &str) -> Result<Self> {
let mut thread_name = ThreadName::new();
let executable_file_name = executable_path
.split('/')
.last()
.ok_or(Error::with_message(Errno::EINVAL, "invalid elf path"))?;
let name = CString::new(executable_file_name)?;
thread_name.set_name(&name)?;
Ok(thread_name)
}
pub fn set_name(&mut self, name: &CStr) -> Result<()> {
let bytes = name.to_bytes_with_nul();
let bytes_len = bytes.len();
if bytes_len > MAX_THREAD_NAME_LEN {
// if len > MAX_THREAD_NAME_LEN, truncate it.
self.count = MAX_THREAD_NAME_LEN;
self.inner[..MAX_THREAD_NAME_LEN].clone_from_slice(&bytes[..MAX_THREAD_NAME_LEN]);
self.inner[MAX_THREAD_NAME_LEN - 1] = 0;
return Ok(());
}
self.count = bytes_len;
self.inner[..bytes_len].clone_from_slice(bytes);
Ok(())
}
pub fn name(&self) -> Result<Option<&CStr>> {
Ok(Some(&(CStr::from_bytes_until_nul(&self.inner)?)))
}
}

View File

@ -0,0 +1,61 @@
use jinux_frame::{cpu::UserContext, user::UserSpace};
use crate::{
fs::fs_resolver::FsResolver,
prelude::*,
process::{program_loader::load_program_to_root_vmar, Process},
rights::Full,
thread::{Thread, Tid},
vm::vmar::Vmar,
};
use super::{builder::PosixThreadBuilder, name::ThreadName, PosixThread};
pub trait PosixThreadExt {
fn as_posix_thread(&self) -> Option<&PosixThread>;
fn new_posix_thread_from_executable(
tid: Tid,
root_vmar: &Vmar<Full>,
fs_resolver: &FsResolver,
executable_path: &str,
process: Weak<Process>,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>>;
}
impl PosixThreadExt for Thread {
/// This function should only be called when launch shell()
fn new_posix_thread_from_executable(
tid: Tid,
root_vmar: &Vmar<Full>,
fs_resolver: &FsResolver,
executable_path: &str,
process: Weak<Process>,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<Arc<Self>> {
let (_, elf_load_info) = load_program_to_root_vmar(
root_vmar,
executable_path.to_string(),
argv,
envp,
fs_resolver,
1,
)?;
let vm_space = root_vmar.vm_space().clone();
let mut cpu_ctx = UserContext::default();
cpu_ctx.set_rip(elf_load_info.entry_point() as _);
cpu_ctx.set_rsp(elf_load_info.user_stack_top() as _);
let user_space = Arc::new(UserSpace::new(vm_space, cpu_ctx));
let thread_name = Some(ThreadName::new_from_executable_path(executable_path)?);
let thread_builder = PosixThreadBuilder::new(tid, user_space)
.thread_name(thread_name)
.process(process);
Ok(thread_builder.build())
}
fn as_posix_thread(&self) -> Option<&PosixThread> {
self.data().downcast_ref::<PosixThread>()
}
}

View File

@ -0,0 +1,152 @@
//! The implementation of robust list is from occlum.
use crate::{
prelude::*,
process::{posix_thread::futex::futex_wake, Pid},
util::{read_val_from_user, write_val_to_user},
};
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod)]
struct RobustList {
next: Vaddr, // *const Robust list
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Pod)]
pub struct RobustListHead {
/// Linked list of lock entries
///
/// If it points to the head of the list, then it is the end of the list.
/// If it is an invalid user space pointer or a null pointer, stop iterating
/// the list.
list: RobustList,
/// Specifies the offset from the address of the lock entry to the address
/// of the futex.
futex_offset: isize,
/// Contains transient copy of the address of the lock entry, during list
/// insertion and removal.
list_op_pending: Vaddr, // *const RobustList
}
impl RobustListHead {
/// Return an iterator for all futexes in the robust list.
///
/// The futex refered to by `list_op_pending`, if any, will be returned as
/// the last item.
pub fn futexes<'a>(&'a self) -> FutexIter<'a> {
FutexIter::new(self)
}
/// Return the pending futex address if exist
fn pending_futex_addr(&self) -> Option<Vaddr> {
if self.list_op_pending == 0 {
None
} else {
Some(self.futex_addr(self.list_op_pending))
}
}
/// Get the futex address
fn futex_addr(&self, entry_ptr: Vaddr) -> Vaddr {
(entry_ptr as isize + self.futex_offset) as _
}
}
pub struct FutexIter<'a> {
robust_list: &'a RobustListHead,
entry_ptr: Vaddr,
count: isize,
}
impl<'a> FutexIter<'a> {
pub fn new(robust_list: &'a RobustListHead) -> Self {
Self {
robust_list,
entry_ptr: robust_list.list.next,
count: 0,
}
}
// The `self.count` is normally a positive value used to iterate the list
// to avoid excessively long or circular list, we use a special value -1
// to represent the end of the Iterator.
fn set_end(&mut self) {
self.count = -1;
}
fn is_end(&self) -> bool {
self.count < 0
}
}
const ROBUST_LIST_LIMIT: isize = 2048;
impl<'a> Iterator for FutexIter<'a> {
type Item = Vaddr;
fn next(&mut self) -> Option<Self::Item> {
if self.is_end() {
return None;
}
while self.entry_ptr != &self.robust_list.list as *const _ as usize {
if self.count == ROBUST_LIST_LIMIT {
break;
}
if self.entry_ptr == 0 {
return None;
}
let futex_addr = if self.entry_ptr != self.robust_list.list_op_pending {
Some(self.robust_list.futex_addr(self.entry_ptr))
} else {
None
};
let robust_list = read_val_from_user::<RobustList>(self.entry_ptr).unwrap();
self.entry_ptr = robust_list.next;
self.count += 1;
if futex_addr.is_some() {
return futex_addr;
}
}
self.set_end();
self.robust_list.pending_futex_addr()
}
}
const FUTEX_WAITERS: u32 = 0x8000_0000;
const FUTEX_OWNER_DIED: u32 = 0x4000_0000;
const FUTEX_TID_MASK: u32 = 0x3FFF_FFFF;
/// Wakeup one robust futex owned by the thread
/// FIXME: requires atomic operations here
pub fn wake_robust_futex(futex_addr: Vaddr, tid: Pid) -> Result<()> {
let futex_val = {
if futex_addr == 0 {
return_errno_with_message!(Errno::EINVAL, "invalid futext addr");
}
read_val_from_user::<u32>(futex_addr)?
};
let mut old_val = futex_val;
loop {
// This futex may held by another thread, do nothing
if old_val & FUTEX_TID_MASK != tid as u32 {
break;
}
let new_val = (old_val & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
let cur_val = read_val_from_user(futex_addr)?;
if cur_val != new_val {
// The futex value has changed, let's retry with current value
old_val = cur_val;
write_val_to_user(futex_addr, &new_val)?;
continue;
}
// Wakeup one waiter
if cur_val & FUTEX_WAITERS != 0 {
debug!("wake robust futex addr: {:?}", futex_addr);
futex_wake(futex_addr, 1)?;
}
break;
}
Ok(())
}

View File

@ -0,0 +1,50 @@
use super::{Pgid, Pid, Process};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessFilter {
Any,
WithPid(Pid),
WithPgid(Pgid),
}
impl ProcessFilter {
// used for waitid
pub fn from_which_and_id(which: u64, id: u64) -> Self {
// Does not support PID_FD now(which = 3)
// https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/wait.h#L20
match which {
0 => ProcessFilter::Any,
1 => ProcessFilter::WithPid(id as Pid),
2 => ProcessFilter::WithPgid(id as Pgid),
_ => panic!("Unknown id type"),
}
}
// used for wait4 and kill
pub fn from_id(wait_pid: i32) -> Self {
// https://man7.org/linux/man-pages/man2/waitpid.2.html
// https://man7.org/linux/man-pages/man2/kill.2.html
if wait_pid < -1 {
// process group ID is equal to the absolute value of pid.
ProcessFilter::WithPgid((-wait_pid) as Pgid)
} else if wait_pid == -1 {
// wait for any child process
ProcessFilter::Any
} else if wait_pid == 0 {
// wait for any child process with same process group ID
let pgid = Process::current().pgid();
ProcessFilter::WithPgid(pgid)
} else {
// pid > 0. wait for the child whose process ID is equal to the value of pid.
ProcessFilter::WithPid(wait_pid as Pid)
}
}
pub fn contains_pid(&self, pid: Pid) -> bool {
match self {
ProcessFilter::Any => true,
ProcessFilter::WithPid(filter_pid) => *filter_pid == pid,
ProcessFilter::WithPgid(_) => todo!(),
}
}
}

View File

@ -0,0 +1,93 @@
use super::{
process_table,
signal::signals::{kernel::KernelSignal, user::UserSignal},
Pgid, Pid, Process,
};
use crate::prelude::*;
pub struct ProcessGroup {
inner: Mutex<ProcessGroupInner>,
}
struct ProcessGroupInner {
pgid: Pgid,
processes: BTreeMap<Pid, Arc<Process>>,
leader_process: Option<Arc<Process>>,
}
impl ProcessGroup {
fn default() -> Self {
ProcessGroup {
inner: Mutex::new(ProcessGroupInner {
pgid: 0,
processes: BTreeMap::new(),
leader_process: None,
}),
}
}
pub fn new(process: Arc<Process>) -> Self {
let process_group = ProcessGroup::default();
let pid = process.pid();
process_group.set_pgid(pid);
process_group.add_process(process.clone());
process_group.set_leader_process(process);
process_group
}
pub fn set_pgid(&self, pgid: Pgid) {
self.inner.lock().pgid = pgid;
}
pub fn set_leader_process(&self, leader_process: Arc<Process>) {
self.inner.lock().leader_process = Some(leader_process);
}
pub fn add_process(&self, process: Arc<Process>) {
self.inner.lock().processes.insert(process.pid(), process);
}
pub fn contains_process(&self, pid: Pid) -> bool {
self.inner.lock().processes.contains_key(&pid)
}
/// remove a process from this process group.
/// If this group contains no processes now, the group itself will be deleted from global table.
pub fn remove_process(&self, pid: Pid) {
let mut inner_lock = self.inner.lock();
inner_lock.processes.remove(&pid);
let len = inner_lock.processes.len();
let pgid = inner_lock.pgid;
// if self contains no process, remove self from table
if len == 0 {
// this must be the last statement
process_table::remove_process_group(pgid);
}
}
pub fn pgid(&self) -> Pgid {
self.inner.lock().pgid
}
/// Wake up all processes waiting on polling queue
pub fn wake_all_polling_procs(&self) {
let inner = self.inner.lock();
for (_, process) in &inner.processes {
process.poll_queue().wake_all();
}
}
/// send kernel signal to all processes in the group
pub fn kernel_signal(&self, signal: KernelSignal) {
for (_, process) in &self.inner.lock().processes {
process.enqueue_signal(Box::new(signal.clone()));
}
}
/// send user signal to all processes in the group
pub fn user_signal(&self, signal: UserSignal) {
for (_, process) in &self.inner.lock().processes {
process.enqueue_signal(Box::new(signal.clone()));
}
}
}

View File

@ -0,0 +1,80 @@
//! A global table stores the pid to process mapping.
//! This table can be used to get process with pid.
//! TODO: progress group, thread all need similar mapping
use crate::events::{Events, Observer, Subject};
use crate::prelude::*;
use super::{process_group::ProcessGroup, Pgid, Pid, Process};
lazy_static! {
static ref PROCESS_TABLE: Mutex<BTreeMap<Pid, Arc<Process>>> = Mutex::new(BTreeMap::new());
static ref PROCESS_GROUP_TABLE: Mutex<BTreeMap<Pgid, Arc<ProcessGroup>>> =
Mutex::new(BTreeMap::new());
static ref PROCESS_TABLE_SUBJECT: Subject<PidEvent> = Subject::new();
}
/// add a process to global table
pub fn add_process(process: Arc<Process>) {
let pid = process.pid();
PROCESS_TABLE.lock().insert(pid, process);
}
/// remove a process from global table
pub fn remove_process(pid: Pid) {
PROCESS_TABLE.lock().remove(&pid);
let events = PidEvent::Exit(pid);
PROCESS_TABLE_SUBJECT.notify_observers(&events);
}
/// get a process with pid
pub fn pid_to_process(pid: Pid) -> Option<Arc<Process>> {
PROCESS_TABLE
.lock()
.get(&pid)
.map(|process| process.clone())
}
/// get all processes
pub fn get_all_processes() -> Vec<Arc<Process>> {
PROCESS_TABLE
.lock()
.iter()
.map(|(_, process)| process.clone())
.collect()
}
/// add process group to global table
pub fn add_process_group(process_group: Arc<ProcessGroup>) {
let pgid = process_group.pgid();
PROCESS_GROUP_TABLE.lock().insert(pgid, process_group);
}
/// remove process group from global table
pub fn remove_process_group(pgid: Pgid) {
PROCESS_GROUP_TABLE.lock().remove(&pgid);
}
/// get a process group with pgid
pub fn pgid_to_process_group(pgid: Pgid) -> Option<Arc<ProcessGroup>> {
PROCESS_GROUP_TABLE
.lock()
.get(&pgid)
.map(|process_group| process_group.clone())
}
pub fn register_observer(observer: Weak<dyn Observer<PidEvent>>) {
PROCESS_TABLE_SUBJECT.register_observer(observer);
}
pub fn unregister_observer(observer: Weak<dyn Observer<PidEvent>>) {
PROCESS_TABLE_SUBJECT.unregister_observer(observer);
}
#[derive(Copy, Clone)]
pub enum PidEvent {
Exit(Pid),
}
impl Events for PidEvent {}

View File

@ -0,0 +1,34 @@
use crate::prelude::*;
// The definition of MMapFlags is from occlum
bitflags! {
pub struct MMapFlags : u32 {
const MAP_FILE = 0x0;
const MAP_SHARED = 0x1;
const MAP_PRIVATE = 0x2;
const MAP_SHARED_VALIDATE = 0x3;
const MAP_TYPE = 0xf;
const MAP_FIXED = 0x10;
const MAP_ANONYMOUS = 0x20;
const MAP_GROWSDOWN = 0x100;
const MAP_DENYWRITE = 0x800;
const MAP_EXECUTABLE = 0x1000;
const MAP_LOCKED = 0x2000;
const MAP_NORESERVE = 0x4000;
const MAP_POPULATE = 0x8000;
const MAP_NONBLOCK = 0x10000;
const MAP_STACK = 0x20000;
const MAP_HUGETLB = 0x40000;
const MAP_SYNC = 0x80000;
const MAP_FIXED_NOREPLACE = 0x100000;
}
}
impl TryFrom<u64> for MMapFlags {
type Error = Error;
fn try_from(value: u64) -> Result<Self> {
MMapFlags::from_bits(value as u32)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown mmap flags"))
}
}

View File

@ -0,0 +1,63 @@
//! This module defines the UserVm of a process.
//! The UserSpace of a process only contains the virtual-physical memory mapping.
//! But we cannot know which vaddr is user heap, which vaddr is mmap areas.
//! So we define a UserVm struct to store such infomation.
//! Briefly, it contains the exact usage of each segment of virtual spaces.
pub mod mmap_flags;
pub mod user_heap;
use crate::prelude::*;
use user_heap::UserHeap;
use crate::{rights::Full, vm::vmar::Vmar};
/*
* The user vm space layout is look like below.
* |-----------------------|-------The highest user vm address
* | |
* | Mmap Areas |
* | |
* | |
* --------------------------------The init stack base
* | |
* | User Stack(Init Stack)|
* | |
* | || |
* ----------||----------------------The user stack top, grows down
* | \/ |
* | |
* | Unmapped Areas |
* | |
* | /\ |
* ----------||---------------------The user heap top, grows up
* | || |
* | |
* | User Heap |
* | |
* ----------------------------------The user heap base
*/
/// The virtual space usage.
/// This struct is used to control brk and mmap now.
#[derive(Debug, Clone)]
pub struct UserVm {
user_heap: UserHeap,
}
impl UserVm {
pub fn new(root_vmar: &Vmar<Full>) -> Result<Self> {
let user_heap = UserHeap::new();
user_heap.init(root_vmar).unwrap();
Ok(UserVm { user_heap })
}
pub fn user_heap(&self) -> &UserHeap {
&self.user_heap
}
/// Set user vm to the init status
pub fn set_default(&self) -> Result<()> {
self.user_heap.set_default()
}
}

View File

@ -0,0 +1,93 @@
use core::sync::atomic::{AtomicUsize, Ordering};
use crate::rights::Full;
use crate::vm::perms::VmPerms;
use crate::vm::vmar::Vmar;
use crate::{
prelude::*,
rights::Rights,
vm::vmo::{VmoFlags, VmoOptions},
};
use align_ext::AlignExt;
pub const USER_HEAP_BASE: Vaddr = 0x0000_0000_1000_0000;
pub const USER_HEAP_SIZE_LIMIT: usize = PAGE_SIZE * 1000;
#[derive(Debug)]
pub struct UserHeap {
/// the low address of user heap
heap_base: Vaddr,
/// the max heap size
heap_size_limit: usize,
current_heap_end: AtomicUsize,
}
impl UserHeap {
pub const fn new() -> Self {
UserHeap {
heap_base: USER_HEAP_BASE,
heap_size_limit: USER_HEAP_SIZE_LIMIT,
current_heap_end: AtomicUsize::new(USER_HEAP_BASE),
}
}
pub fn init(&self, root_vmar: &Vmar<Full>) -> Result<Vaddr> {
let perms = VmPerms::READ | VmPerms::WRITE;
let vmo_options = VmoOptions::<Rights>::new(0).flags(VmoFlags::RESIZABLE);
let heap_vmo = vmo_options.alloc().unwrap();
let vmar_map_options = root_vmar
.new_map(heap_vmo, perms)
.unwrap()
.offset(self.heap_base)
.size(self.heap_size_limit);
vmar_map_options.build().unwrap();
return Ok(self.current_heap_end.load(Ordering::Relaxed));
}
pub fn brk(&self, new_heap_end: Option<Vaddr>) -> Result<Vaddr> {
let current = current!();
let root_vmar = current.root_vmar();
match new_heap_end {
None => {
return Ok(self.current_heap_end.load(Ordering::Relaxed));
}
Some(new_heap_end) => {
if new_heap_end > self.heap_base + self.heap_size_limit {
return_errno_with_message!(Errno::ENOMEM, "heap size limit was met.");
}
let current_heap_end = self.current_heap_end.load(Ordering::Acquire);
if new_heap_end < current_heap_end {
// FIXME: should we allow shrink current user heap?
return Ok(current_heap_end);
}
let new_size = (new_heap_end - self.heap_base).align_up(PAGE_SIZE);
let heap_mapping = root_vmar.get_vm_mapping(USER_HEAP_BASE)?;
let heap_vmo = heap_mapping.vmo();
heap_vmo.resize(new_size)?;
self.current_heap_end.store(new_heap_end, Ordering::Release);
return Ok(new_heap_end);
}
}
}
/// Set heap to the default status. i.e., point the heap end to heap base.
/// This function will we called in execve.
pub fn set_default(&self) -> Result<()> {
self.current_heap_end
.store(self.heap_base, Ordering::Relaxed);
let current = current!();
self.init(current.root_vmar())?;
Ok(())
}
}
impl Clone for UserHeap {
fn clone(&self) -> Self {
let current_heap_end = self.current_heap_end.load(Ordering::Relaxed);
Self {
heap_base: self.heap_base.clone(),
heap_size_limit: self.heap_size_limit.clone(),
current_heap_end: AtomicUsize::new(current_heap_end),
}
}
}

View File

@ -0,0 +1,93 @@
use crate::prelude::*;
/// This implementation is from occlum.
/// Auxiliary Vector.
///
/// # What is Auxiliary Vector?
///
/// Here is a concise description of Auxiliary Vector from GNU's manual:
///
/// > When a program is executed, it receives information from the operating system
/// about the environment in which it is operating. The form of this information
/// is a table of key-value pairs, where the keys are from the set of AT_
/// values in elf.h.
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum AuxKey {
AT_NULL = 0, /* end of vector */
AT_IGNORE = 1, /* entry should be ignored */
AT_EXECFD = 2, /* file descriptor of program */
AT_PHDR = 3, /* program headers for program */
AT_PHENT = 4, /* size of program header entry */
AT_PHNUM = 5, /* number of program headers */
AT_PAGESZ = 6, /* system page size */
AT_BASE = 7, /* base address of interpreter */
AT_FLAGS = 8, /* flags */
AT_ENTRY = 9, /* entry point of program */
AT_NOTELF = 10, /* program is not ELF */
AT_UID = 11, /* real uid */
AT_EUID = 12, /* effective uid */
AT_GID = 13, /* real gid */
AT_EGID = 14, /* effective gid */
AT_PLATFORM = 15, /* string identifying CPU for optimizations */
AT_HWCAP = 16, /* arch dependent hints at CPU capabilities */
AT_CLKTCK = 17, /* frequency at which times() increments */
/* 18...22 not used */
AT_SECURE = 23, /* secure mode boolean */
AT_BASE_PLATFORM = 24, /* string identifying real platform, may
* differ from AT_PLATFORM. */
AT_RANDOM = 25, /* address of 16 random bytes */
AT_HWCAP2 = 26, /* extension of AT_HWCAP */
/* 28...30 not used */
AT_EXECFN = 31, /* filename of program */
AT_SYSINFO = 32,
}
impl AuxKey {
pub fn as_u64(&self) -> u64 {
*self as u64
}
}
#[derive(Clone, Default, Debug)]
pub struct AuxVec {
table: BTreeMap<AuxKey, u64>,
}
impl AuxVec {
pub const fn new() -> AuxVec {
AuxVec {
table: BTreeMap::new(),
}
}
}
impl AuxVec {
pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
return_errno_with_message!(Errno::EINVAL, "Illegal key");
}
self.table
.entry(key)
.and_modify(|val_mut| *val_mut = val)
.or_insert(val);
Ok(())
}
pub fn get(&self, key: AuxKey) -> Option<u64> {
self.table.get(&key).map(|val_ref| *val_ref)
}
pub fn del(&mut self, key: AuxKey) -> Option<u64> {
self.table.remove(&key)
}
pub fn table(&self) -> &BTreeMap<AuxKey, u64> {
&self.table
}
}

View File

@ -0,0 +1,211 @@
/// A wrapper of xmas_elf's elf parsing
use xmas_elf::{
header::{self, Header, HeaderPt1, HeaderPt2, HeaderPt2_, Machine_, Type_},
program::{self, ProgramHeader64},
};
use crate::prelude::*;
pub struct Elf {
pub elf_header: ElfHeader,
pub program_headers: Vec<ProgramHeader64>,
}
impl Elf {
pub fn parse_elf(input: &[u8]) -> Result<Self> {
// first parse elf header
// The elf header is usually 64 bytes. pt1 is 16bytes and pt2 is 48 bytes.
// We require 128 bytes here is to keep consistency with linux implementations.
debug_assert!(input.len() >= 128);
let header = xmas_elf::header::parse_header(input)
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse elf header fails"))?;
let elf_header = ElfHeader::parse_elf_header(header)?;
check_elf_header(&elf_header)?;
// than parse the program headers table
// FIXME: we should acquire enough pages before parse
let ph_offset = elf_header.pt2.ph_offset;
let ph_count = elf_header.pt2.ph_count;
let ph_entry_size = elf_header.pt2.ph_entry_size;
debug_assert!(
input.len() >= ph_offset as usize + ph_count as usize * ph_entry_size as usize
);
let mut program_headers = Vec::with_capacity(ph_count as usize);
for index in 0..ph_count {
let program_header = xmas_elf::program::parse_program_header(input, header, index)
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header fails"))?;
let ph64 = match program_header {
xmas_elf::program::ProgramHeader::Ph64(ph64) => ph64.clone(),
xmas_elf::program::ProgramHeader::Ph32(_) => {
return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable")
}
};
program_headers.push(ph64);
}
Ok(Self {
elf_header,
program_headers,
})
}
// The following info is used to setup init stack
/// the entry point of the elf
pub fn entry_point(&self) -> Vaddr {
self.elf_header.pt2.entry_point as Vaddr
}
/// program header table offset
pub fn ph_off(&self) -> u64 {
self.elf_header.pt2.ph_offset
}
/// number of program headers
pub fn ph_count(&self) -> u16 {
self.elf_header.pt2.ph_count
}
/// The size of a program header
pub fn ph_ent(&self) -> u16 {
self.elf_header.pt2.ph_entry_size
}
/// The virtual addr of program headers table address
pub fn ph_addr(&self) -> Result<Vaddr> {
let ph_offset = self.ph_off();
for program_header in &self.program_headers {
if program_header.offset <= ph_offset
&& ph_offset < program_header.offset + program_header.file_size
{
return Ok(
(ph_offset - program_header.offset + program_header.virtual_addr) as Vaddr,
);
}
}
return_errno_with_message!(
Errno::ENOEXEC,
"can not find program header table address in elf"
);
}
/// whether the elf is a shared object
pub fn is_shared_object(&self) -> bool {
self.elf_header.pt2.type_.as_type() == header::Type::SharedObject
}
/// read the ldso path from the elf interpret section
pub fn ldso_path(&self, file_header_buf: &[u8]) -> Result<String> {
for program_header in &self.program_headers {
let type_ = program_header.get_type().map_err(|_| {
Error::with_message(Errno::ENOEXEC, "parse program header type fails")
})?;
if type_ == program::Type::Interp {
let file_size = program_header.file_size as usize;
let file_offset = program_header.offset as usize;
debug_assert!(file_offset + file_size <= file_header_buf.len());
let ldso = CStr::from_bytes_with_nul(
&file_header_buf[file_offset..file_offset + file_size],
)?;
return Ok(ldso.to_string_lossy().to_string());
}
}
return_errno_with_message!(
Errno::ENOEXEC,
"cannot find interpreter section in dyn-link program"
)
}
// An offset to be subtracted from ELF vaddr for PIE
pub fn base_load_address_offset(&self) -> u64 {
let phdr = self.program_headers.iter().nth(0).unwrap();
phdr.virtual_addr - phdr.offset
}
}
pub struct ElfHeader {
pub pt1: HeaderPt1,
pub pt2: HeaderPt2_64,
}
impl ElfHeader {
fn parse_elf_header(header: Header) -> Result<Self> {
let pt1 = header.pt1.clone();
let pt2 = match header.pt2 {
HeaderPt2::Header64(header_pt2) => {
let HeaderPt2_ {
type_,
machine,
version,
entry_point,
ph_offset,
sh_offset,
flags,
header_size,
ph_entry_size,
ph_count,
sh_entry_size,
sh_count,
sh_str_index,
} = header_pt2;
HeaderPt2_64 {
type_: *type_,
machine: *machine,
version: *version,
entry_point: *entry_point,
ph_offset: *ph_offset,
sh_offset: *sh_offset,
flags: *flags,
header_size: *header_size,
ph_entry_size: *ph_entry_size,
ph_count: *ph_count,
sh_entry_size: *sh_entry_size,
sh_count: *sh_count,
sh_str_index: *sh_str_index,
}
}
_ => return_errno_with_message!(Errno::ENOEXEC, "parse elf header failed"),
};
Ok(ElfHeader { pt1, pt2 })
}
}
pub struct HeaderPt2_64 {
pub type_: Type_,
pub machine: Machine_,
pub version: u32,
pub entry_point: u64,
pub ph_offset: u64,
pub sh_offset: u64,
pub flags: u32,
pub header_size: u16,
pub ph_entry_size: u16,
pub ph_count: u16,
pub sh_entry_size: u16,
pub sh_count: u16,
pub sh_str_index: u16,
}
fn check_elf_header(elf_header: &ElfHeader) -> Result<()> {
// 64bit
debug_assert_eq!(elf_header.pt1.class(), header::Class::SixtyFour);
if elf_header.pt1.class() != header::Class::SixtyFour {
return_errno_with_message!(Errno::ENOEXEC, "Not 64 byte executable");
}
// little endian
debug_assert_eq!(elf_header.pt1.data(), header::Data::LittleEndian);
if elf_header.pt1.data() != header::Data::LittleEndian {
return_errno_with_message!(Errno::ENOEXEC, "Not little endian executable");
}
// system V ABI
// debug_assert_eq!(elf_header.pt1.os_abi(), header::OsAbi::SystemV);
// if elf_header.pt1.os_abi() != header::OsAbi::SystemV {
// return Error::new(Errno::ENOEXEC);
// }
// x86_64 architecture
debug_assert_eq!(elf_header.pt2.machine.as_machine(), header::Machine::X86_64);
if elf_header.pt2.machine.as_machine() != header::Machine::X86_64 {
return_errno_with_message!(Errno::ENOEXEC, "Not x86_64 executable");
}
// Executable file or shared object
let elf_type = elf_header.pt2.type_.as_type();
debug_assert!(elf_type == header::Type::Executable || elf_type == header::Type::SharedObject);
if elf_type != header::Type::Executable && elf_type != header::Type::SharedObject {
return_errno_with_message!(Errno::ENOEXEC, "Not executable file");
}
Ok(())
}

View File

@ -0,0 +1,364 @@
//! This module defines the process initial stack.
//! The process initial stack, contains arguments, environmental variables and auxiliary vectors
//! The data layout of init stack can be seen in Figure 3.9 in https://uclibc.org/docs/psABI-x86_64.pdf
use crate::rights::Rights;
use crate::vm::perms::VmPerms;
use crate::{
prelude::*,
rights::Full,
vm::{vmar::Vmar, vmo::VmoOptions},
};
use align_ext::AlignExt;
use core::mem;
use jinux_frame::vm::{VmIo, VmPerm};
use super::aux_vec::{AuxKey, AuxVec};
use super::elf_file::Elf;
use super::load_elf::LdsoLoadInfo;
pub const INIT_STACK_BASE: Vaddr = 0x0000_0000_2000_0000;
pub const INIT_STACK_SIZE: usize = 0x1000 * 16; // 64KB
/*
* The initial stack of a process looks like below(This figure is from occlum):
*
*
* +---------------------+ <------+ Top of stack
* | | (high address)
* | Null-terminated |
* | strings referenced |
* | by variables below |
* | |
* +---------------------+
* | AT_NULL |
* +---------------------+
* | AT_NULL |
* +---------------------+
* | ... |
* +---------------------+
* | aux_val[0] |
* +---------------------+
* | aux_key[0] | <------+ Auxiliary table
* +---------------------+
* | NULL |
* +---------------------+
* | ... |
* +---------------------+
* | char* envp[0] | <------+ Environment variables
* +---------------------+
* | NULL |
* +---------------------+
* | char* argv[argc-1] |
* +---------------------+
* | ... |
* +---------------------+
* | char* argv[0] |
* +---------------------+
* | long argc | <------+ Program arguments
* +---------------------+
* | |
* | |
* + +
*
*/
pub struct InitStack {
/// The high address of init stack
init_stack_top: Vaddr,
init_stack_size: usize,
pos: usize,
/// Command line args
argv: Vec<CString>,
/// Environmental variables
envp: Vec<CString>,
}
impl InitStack {
/// initialize user stack on base addr
pub fn new(
init_stack_top: Vaddr,
init_stack_size: usize,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Self {
Self {
init_stack_top,
init_stack_size,
pos: init_stack_top,
argv,
envp,
}
}
/// This function only work for first process
pub fn new_default_config(argv: Vec<CString>, envp: Vec<CString>) -> Self {
let init_stack_top = INIT_STACK_BASE - PAGE_SIZE;
let init_stack_size = INIT_STACK_SIZE;
InitStack::new(init_stack_top, init_stack_size, argv, envp)
}
/// the user stack top(high address), used to setup rsp
pub fn user_stack_top(&self) -> Vaddr {
let stack_top = self.pos;
// ensure stack top is 16-bytes aligned
debug_assert!(stack_top & !0xf == stack_top);
stack_top
}
/// the user stack bottom(low address)
const fn user_stack_bottom(&self) -> Vaddr {
self.init_stack_top - self.init_stack_size
}
pub fn init(
&mut self,
root_vmar: &Vmar<Full>,
elf: &Elf,
ldso_load_info: &Option<LdsoLoadInfo>,
aux_vec: &mut AuxVec,
) -> Result<()> {
self.map_and_zeroed(root_vmar)?;
self.write_stack_content(root_vmar, elf, ldso_load_info, aux_vec)?;
self.debug_print_stack_content(root_vmar);
Ok(())
}
fn map_and_zeroed(&self, root_vmar: &Vmar<Full>) -> Result<()> {
let vmo_options = VmoOptions::<Rights>::new(self.init_stack_size);
let vmo = vmo_options.alloc()?;
vmo.clear(0..vmo.size())?;
let perms = VmPerms::READ | VmPerms::WRITE;
let vmar_map_options = root_vmar
.new_map(vmo, perms)?
.offset(self.user_stack_bottom());
vmar_map_options.build().unwrap();
Ok(())
}
/// Libc ABI requires 16-byte alignment of the stack entrypoint.
/// Current postion of the stack is 8-byte aligned already, insert 8 byte
/// to meet the requirement if necessary.
fn adjust_stack_alignment(
&mut self,
root_vmar: &Vmar<Full>,
envp_pointers: &Vec<u64>,
argv_pointers: &Vec<u64>,
aux_vec: &AuxVec,
) -> Result<()> {
// ensure 8-byte alignment
self.write_u64(0, root_vmar)?;
let auxvec_size = (aux_vec.table().len() + 1) * (mem::size_of::<u64>() * 2);
let envp_pointers_size = (envp_pointers.len() + 1) * mem::size_of::<u64>();
let argv_pointers_size = (argv_pointers.len() + 1) * mem::size_of::<u64>();
let argc_size = mem::size_of::<u64>();
let to_write_size = auxvec_size + envp_pointers_size + argv_pointers_size + argc_size;
if (self.pos - to_write_size) % 16 != 0 {
self.write_u64(0, root_vmar)?;
}
Ok(())
}
fn write_stack_content(
&mut self,
root_vmar: &Vmar<Full>,
elf: &Elf,
ldso_load_info: &Option<LdsoLoadInfo>,
aux_vec: &mut AuxVec,
) -> Result<()> {
// write a zero page. When a user program tries to read a cstring(like argv) from init stack,
// it will typically read 4096 bytes and then find the first '\0' in the buffer
// (we now read 128 bytes, which is set by MAX_FILENAME_LEN).
// If we don't have this zero page, the read may go into guard page,
// which will cause unrecoverable page fault(The guard page is not backed up by any vmo).
// So we add a zero page here, to ensure the read will not go into guard page.
// FIXME: Some other OSes put the first page of excutable file here.
self.write_bytes(&[0u8; PAGE_SIZE], root_vmar)?;
// write envp string
let envp_pointers = self.write_envp_strings(root_vmar)?;
// write argv string
let argv_pointers = self.write_argv_strings(root_vmar)?;
// write random value
let random_value = generate_random_for_aux_vec();
let random_value_pointer = self.write_bytes(&random_value, root_vmar)?;
aux_vec.set(AuxKey::AT_RANDOM, random_value_pointer)?;
if let Some(ldso_load_info) = ldso_load_info {
let ldso_base = ldso_load_info.base_addr();
aux_vec.set(AuxKey::AT_BASE, ldso_base as u64)?;
}
self.adjust_stack_alignment(root_vmar, &envp_pointers, &argv_pointers, &aux_vec)?;
self.write_aux_vec(root_vmar, aux_vec)?;
self.write_envp_pointers(root_vmar, envp_pointers)?;
self.write_argv_pointers(root_vmar, argv_pointers)?;
// write argc
let argc = self.argc();
self.write_u64(argc, root_vmar)?;
Ok(())
}
fn write_envp_strings(&mut self, root_vmar: &Vmar<Full>) -> Result<Vec<u64>> {
let envp = self
.envp
.iter()
.map(|envp| envp.clone())
.collect::<Vec<_>>();
let mut envp_pointers = Vec::with_capacity(envp.len());
for envp in envp.iter() {
let pointer = self.write_cstring(envp, root_vmar)?;
envp_pointers.push(pointer);
}
Ok(envp_pointers)
}
fn write_argv_strings(&mut self, root_vmar: &Vmar<Full>) -> Result<Vec<u64>> {
let argv = self
.argv
.iter()
.map(|argv| argv.clone())
.collect::<Vec<_>>();
let mut argv_pointers = Vec::with_capacity(argv.len());
for argv in argv.iter().rev() {
let pointer = self.write_cstring(argv, root_vmar)?;
debug!("argv address = 0x{:x}", pointer);
argv_pointers.push(pointer);
}
argv_pointers.reverse();
Ok(argv_pointers)
}
fn write_aux_vec(&mut self, root_vmar: &Vmar<Full>, aux_vec: &mut AuxVec) -> Result<()> {
// Write NULL auxilary
self.write_u64(0, root_vmar)?;
self.write_u64(AuxKey::AT_NULL as u64, root_vmar)?;
// Write Auxiliary vectors
let aux_vec: Vec<_> = aux_vec
.table()
.iter()
.map(|(aux_key, aux_value)| (*aux_key, *aux_value))
.collect();
for (aux_key, aux_value) in aux_vec.iter() {
self.write_u64(*aux_value, root_vmar)?;
self.write_u64(*aux_key as u64, root_vmar)?;
}
Ok(())
}
fn write_envp_pointers(
&mut self,
root_vmar: &Vmar<Full>,
mut envp_pointers: Vec<u64>,
) -> Result<()> {
// write NULL pointer
self.write_u64(0, root_vmar)?;
// write envp pointers
envp_pointers.reverse();
for envp_pointer in envp_pointers {
self.write_u64(envp_pointer, root_vmar)?;
}
Ok(())
}
fn write_argv_pointers(
&mut self,
root_vmar: &Vmar<Full>,
mut argv_pointers: Vec<u64>,
) -> Result<()> {
// write 0
self.write_u64(0, root_vmar)?;
// write argv pointers
argv_pointers.reverse();
for argv_pointer in argv_pointers {
self.write_u64(argv_pointer, root_vmar)?;
}
Ok(())
}
/// Command line argument counter
pub fn argc(&self) -> u64 {
self.argv.len() as u64
}
/// Command linke argument start address
pub fn argv(&self) -> u64 {
self.user_stack_top() as u64 + 8
}
/// Environmental variables counter
pub fn envc(&self) -> u64 {
self.envp.len() as u64
}
/// Environmental variables pointers
pub fn envp(&self) -> u64 {
0
}
/// returns the top address of init stack.
/// It should points to a fixed address.
pub const fn init_stack_top(&self) -> Vaddr {
self.init_stack_top
}
/// returns the u64 start address
fn write_u64(&mut self, val: u64, root_vmar: &Vmar<Full>) -> Result<u64> {
let start_address = (self.pos - 8).align_down(8);
self.pos = start_address;
root_vmar.write_val(start_address, &val)?;
Ok(self.pos as u64)
}
fn write_bytes(&mut self, bytes: &[u8], root_vmar: &Vmar<Full>) -> Result<u64> {
let len = bytes.len();
self.pos -= len;
root_vmar.write_bytes(self.pos, bytes)?;
Ok(self.pos as u64)
}
/// returns the string start address
/// cstring will with end null byte.
fn write_cstring(&mut self, val: &CString, root_vmar: &Vmar<Full>) -> Result<u64> {
let bytes = val.as_bytes_with_nul();
self.write_bytes(bytes, root_vmar)
}
pub const fn perm() -> VmPerm {
VmPerm::RWU
}
fn debug_print_stack_content(&self, root_vmar: &Vmar<Full>) {
debug!("print stack content:");
let stack_top = self.user_stack_top();
let argc = root_vmar.read_val::<u64>(stack_top).unwrap();
debug!("argc = {}", argc);
}
}
pub fn init_aux_vec(elf: &Elf, elf_map_addr: Vaddr) -> Result<AuxVec> {
let mut aux_vec = AuxVec::new();
aux_vec.set(AuxKey::AT_PAGESZ, PAGE_SIZE as _)?;
let ph_addr = if elf.is_shared_object() {
elf.ph_addr()? + elf_map_addr
} else {
elf.ph_addr()?
};
aux_vec.set(AuxKey::AT_PHDR, ph_addr as u64)?;
aux_vec.set(AuxKey::AT_PHNUM, elf.ph_count() as u64)?;
aux_vec.set(AuxKey::AT_PHENT, elf.ph_ent() as u64)?;
let elf_entry = if elf.is_shared_object() {
let base_load_offset = elf.base_load_address_offset();
elf.entry_point() + elf_map_addr - base_load_offset as usize
} else {
elf.entry_point()
};
aux_vec.set(AuxKey::AT_ENTRY, elf_entry as u64)?;
Ok(aux_vec)
}
/// generate random [u8; 16].
/// FIXME: generate really random value. Now only return array with fixed values.
fn generate_random_for_aux_vec() -> [u8; 16] {
let mut rand_val = [0; 16];
for i in 0..16u8 {
rand_val[i as usize] = 0xff - i;
}
rand_val
}

View File

@ -0,0 +1,290 @@
//! This module is used to parse elf file content to get elf_load_info.
//! When create a process from elf file, we will use the elf_load_info to construct the VmSpace
use crate::fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD};
use crate::fs::utils::Dentry;
use crate::process::program_loader::elf::init_stack::{init_aux_vec, InitStack};
use crate::rights::Rights;
use crate::vm::perms::VmPerms;
use crate::vm::vmo::{VmoOptions, VmoRightsOp};
use crate::{
prelude::*,
rights::Full,
vm::{vmar::Vmar, vmo::Vmo},
};
use align_ext::AlignExt;
use jinux_frame::vm::{VmIo, VmPerm};
use xmas_elf::program::{self, ProgramHeader64};
use super::elf_file::Elf;
/// load elf to the root vmar. this function will
/// 1. read the vaddr of each segment to get all elf pages.
/// 2. create a vmo for each elf segment, create a backup pager for each segment. Then map the vmo to the root vmar.
/// 3. write proper content to the init stack.
pub fn load_elf_to_root_vmar(
root_vmar: &Vmar<Full>,
file_header: &[u8],
elf_file: Arc<Dentry>,
fs_resolver: &FsResolver,
argv: Vec<CString>,
envp: Vec<CString>,
) -> Result<ElfLoadInfo> {
let elf = Elf::parse_elf(file_header)?;
let ldso_load_info = if let Ok(ldso_load_info) =
load_ldso_for_shared_object(root_vmar, &elf, file_header, fs_resolver)
{
Some(ldso_load_info)
} else {
None
};
let map_addr = map_segment_vmos(&elf, root_vmar, &elf_file)?;
let mut aux_vec = init_aux_vec(&elf, map_addr)?;
let mut init_stack = InitStack::new_default_config(argv, envp);
init_stack.init(root_vmar, &elf, &ldso_load_info, &mut aux_vec)?;
let entry_point = if let Some(ldso_load_info) = ldso_load_info {
// Normal shared object
ldso_load_info.entry_point()
} else {
if elf.is_shared_object() {
// ldso itself
elf.entry_point() + map_addr
} else {
// statically linked executable
elf.entry_point()
}
};
let elf_load_info = ElfLoadInfo::new(entry_point, init_stack.user_stack_top());
debug!("load elf succeeds.");
Ok(elf_load_info)
}
fn load_ldso_for_shared_object(
root_vmar: &Vmar<Full>,
elf: &Elf,
file_header: &[u8],
fs_resolver: &FsResolver,
) -> Result<LdsoLoadInfo> {
if !elf.is_shared_object() {
return_errno_with_message!(Errno::EINVAL, "not shared object");
}
let ldso_file = {
let ldso_path = elf.ldso_path(file_header)?;
let fs_path = FsPath::new(AT_FDCWD, &ldso_path)?;
fs_resolver.lookup(&fs_path)?
};
let ldso_elf = {
let mut buf = Box::new([0u8; PAGE_SIZE]);
let vnode = ldso_file.vnode();
vnode.read_at(0, &mut *buf)?;
Elf::parse_elf(&*buf)?
};
let map_addr = map_segment_vmos(&ldso_elf, root_vmar, &ldso_file)?;
Ok(LdsoLoadInfo::new(
ldso_elf.entry_point() + map_addr,
map_addr,
))
}
pub struct LdsoLoadInfo {
entry_point: Vaddr,
base_addr: Vaddr,
}
impl LdsoLoadInfo {
pub fn new(entry_point: Vaddr, base_addr: Vaddr) -> Self {
Self {
entry_point,
base_addr,
}
}
pub fn entry_point(&self) -> Vaddr {
self.entry_point
}
pub fn base_addr(&self) -> Vaddr {
self.base_addr
}
}
pub struct ElfLoadInfo {
entry_point: Vaddr,
user_stack_top: Vaddr,
}
impl ElfLoadInfo {
pub fn new(entry_point: Vaddr, user_stack_top: Vaddr) -> Self {
Self {
entry_point,
user_stack_top,
}
}
pub fn entry_point(&self) -> Vaddr {
self.entry_point
}
pub fn user_stack_top(&self) -> Vaddr {
self.user_stack_top
}
}
/// init vmo for each segment and then map segment to root vmar
pub fn map_segment_vmos(elf: &Elf, root_vmar: &Vmar<Full>, elf_file: &Dentry) -> Result<Vaddr> {
// all segments of the shared object must be mapped to a continuous vm range
// to ensure the relative offset of each segment not changed.
let base_addr = if elf.is_shared_object() {
base_map_addr(elf, root_vmar)?
} else {
0
};
for program_header in &elf.program_headers {
let type_ = program_header
.get_type()
.map_err(|_| Error::with_message(Errno::ENOEXEC, "parse program header type fails"))?;
if type_ == program::Type::Load {
check_segment_align(program_header)?;
let vmo = init_segment_vmo(program_header, elf_file)?;
map_segment_vmo(program_header, vmo, root_vmar, base_addr)?;
}
}
Ok(base_addr)
}
fn base_map_addr(elf: &Elf, root_vmar: &Vmar<Full>) -> Result<Vaddr> {
let elf_size = elf
.program_headers
.iter()
.filter_map(|program_header| {
if let Ok(type_) = program_header.get_type() && type_ == program::Type::Load {
let ph_max_addr = program_header.virtual_addr + program_header.mem_size;
Some(ph_max_addr as usize)
} else {
None
}
})
.max()
.ok_or(Error::with_message(
Errno::ENOEXEC,
"executable file does not has loadable sections",
))?;
let map_size = elf_size.align_up(PAGE_SIZE);
let vmo = VmoOptions::<Rights>::new(0).alloc()?;
let vmar_map_options = root_vmar.new_map(vmo, VmPerms::empty())?.size(map_size);
vmar_map_options.build()
}
/// map the segment vmo to root_vmar
fn map_segment_vmo(
program_header: &ProgramHeader64,
vmo: Vmo,
root_vmar: &Vmar<Full>,
base_addr: Vaddr,
) -> Result<()> {
let perms = VmPerms::from(parse_segment_perm(program_header.flags));
let offset = (program_header.virtual_addr as Vaddr).align_down(PAGE_SIZE);
trace!(
"map segment vmo: virtual addr = 0x{:x}, size = 0x{:x}, perms = {:?}",
offset,
program_header.mem_size,
perms
);
let mut vm_map_options = root_vmar.new_map(vmo, perms)?.can_overwrite(true);
let offset = base_addr + offset;
vm_map_options = vm_map_options.offset(offset);
let map_addr = vm_map_options.build()?;
Ok(())
}
/// create vmo for each segment
fn init_segment_vmo(program_header: &ProgramHeader64, elf_file: &Dentry) -> Result<Vmo> {
trace!(
"mem range = 0x{:x} - 0x{:x}, mem_size = 0x{:x}",
program_header.virtual_addr,
program_header.virtual_addr + program_header.mem_size,
program_header.mem_size
);
trace!(
"file range = 0x{:x} - 0x{:x}, file_size = 0x{:x}",
program_header.offset,
program_header.offset + program_header.file_size,
program_header.file_size
);
let file_offset = program_header.offset as usize;
let virtual_addr = program_header.virtual_addr as usize;
debug_assert!(file_offset % PAGE_SIZE == virtual_addr % PAGE_SIZE);
let page_cache_vmo = {
let vnode = elf_file.vnode();
vnode.page_cache().ok_or(Error::with_message(
Errno::ENOENT,
"executable has no page cache",
))?
};
let segment_vmo = {
let vmo_offset = file_offset.align_down(PAGE_SIZE);
let map_start = virtual_addr.align_down(PAGE_SIZE);
let map_end = (virtual_addr + program_header.mem_size as usize).align_up(PAGE_SIZE);
let vmo_size = map_end - map_start;
debug_assert!(vmo_size >= (program_header.file_size as usize).align_up(PAGE_SIZE));
page_cache_vmo
.new_cow_child(vmo_offset..vmo_offset + vmo_size)?
.alloc()?
};
// Write zero as paddings. There are head padding and tail padding.
// Head padding: if the segment's virtual address is not page-aligned,
// then the bytes in first page from start to virtual address should be padded zeros.
// Tail padding: If the segment's mem_size is larger than file size,
// then the bytes that are not backed up by file content should be zeros.(usually .data/.bss sections).
// FIXME: Head padding may be removed.
// Head padding.
let page_offset = file_offset % PAGE_SIZE;
if page_offset != 0 {
let buffer = vec![0u8; page_offset];
segment_vmo.write_bytes(0, &buffer)?;
}
// Tail padding.
let segment_vmo_size = segment_vmo.size();
let tail_padding_offset = program_header.file_size as usize + page_offset;
if segment_vmo_size > tail_padding_offset {
let buffer = vec![0u8; segment_vmo_size - tail_padding_offset];
segment_vmo.write_bytes(tail_padding_offset, &buffer)?;
}
Ok(segment_vmo.to_dyn())
}
fn parse_segment_perm(flags: xmas_elf::program::Flags) -> VmPerm {
let mut vm_perm = VmPerm::empty();
if flags.is_read() {
vm_perm |= VmPerm::R;
}
if flags.is_write() {
vm_perm |= VmPerm::W;
}
if flags.is_execute() {
vm_perm |= VmPerm::X;
}
vm_perm
}
fn check_segment_align(program_header: &ProgramHeader64) -> Result<()> {
let align = program_header.align;
if align == 0 || align == 1 {
// no align requirement
return Ok(());
}
debug_assert!(align.is_power_of_two());
if !align.is_power_of_two() {
return_errno_with_message!(Errno::ENOEXEC, "segment align is invalid.");
}
debug_assert!(program_header.offset % align == program_header.virtual_addr % align);
if program_header.offset % align != program_header.virtual_addr % align {
return_errno_with_message!(Errno::ENOEXEC, "segment align is not satisfied.");
}
Ok(())
}

View File

@ -0,0 +1,7 @@
mod aux_vec;
mod elf_file;
mod init_stack;
mod load_elf;
pub use init_stack::INIT_STACK_SIZE;
pub use load_elf::{load_elf_to_root_vmar, ElfLoadInfo};

View File

@ -0,0 +1,64 @@
pub mod elf;
mod shebang;
use crate::fs::fs_resolver::{FsPath, FsResolver, AT_FDCWD};
use crate::prelude::*;
use crate::rights::Full;
use crate::vm::vmar::Vmar;
use self::elf::{load_elf_to_root_vmar, ElfLoadInfo};
use self::shebang::parse_shebang_line;
/// Load an executable to root vmar, including loading programe image, preparing heap and stack,
/// initializing argv, envp and aux tables.
/// About recursion_limit: recursion limit is used to limit th recursion depth of shebang executables.
/// If the interpreter(the program behind #!) of shebang executable is also a shebang,
/// then it will trigger recursion. We will try to setup root vmar for the interpreter.
/// I guess for most cases, setting the recursion_limit as 1 should be enough.
/// because the interpreter is usually an elf binary(e.g., /bin/bash)
pub fn load_program_to_root_vmar(
root_vmar: &Vmar<Full>,
executable_path: String,
argv: Vec<CString>,
envp: Vec<CString>,
fs_resolver: &FsResolver,
recursion_limit: usize,
) -> Result<(String, ElfLoadInfo)> {
// Temporary use because fs_resolver cannot deal with procfs now.
// FIXME: removes this when procfs is ready.
let executable_path = if &executable_path == "/proc/self/exe" {
current!().executable_path().read().clone()
} else {
executable_path
};
let fs_path = FsPath::new(AT_FDCWD, &executable_path)?;
let elf_file = fs_resolver.lookup(&fs_path)?;
let abs_path = elf_file.abs_path();
let vnode = elf_file.vnode();
let file_header = {
// read the first page of file header
let mut file_header_buffer = Box::new([0u8; PAGE_SIZE]);
vnode.read_at(0, &mut *file_header_buffer)?;
file_header_buffer
};
if let Some(mut new_argv) = parse_shebang_line(&*file_header)? {
if recursion_limit == 0 {
return_errno_with_message!(Errno::EINVAL, "the recursieve limit is reached");
}
new_argv.extend_from_slice(&argv);
let interpreter = new_argv[0].to_str()?.to_string();
return load_program_to_root_vmar(
root_vmar,
interpreter,
new_argv,
envp,
fs_resolver,
recursion_limit - 1,
);
}
debug!("load executable, path = {}", executable_path);
let elf_load_info =
load_elf_to_root_vmar(root_vmar, &*file_header, elf_file, fs_resolver, argv, envp)?;
Ok((abs_path, elf_load_info))
}

View File

@ -0,0 +1,31 @@
use crate::prelude::*;
/// Try to parse a buffer as a shebang line.
///
/// If the buffer starts with `#!` and its header is a valid shebang sequence,
/// then the function returns `Ok(Some(parts))`,
/// where `parts` is a `Vec` that contains the path of and the arguments for the interpreter.
/// If the buffer starts with `#!` but some error occurs while parsing the file,
/// then `Err(_)` is returned.
/// If the buffer does not start with `#!`, then `Ok(None)` is returned.
pub fn parse_shebang_line(file_header_buffer: &[u8]) -> Result<Option<Vec<CString>>> {
if !file_header_buffer.starts_with(b"#!") || !file_header_buffer.contains(&b'\n') {
// the file is not a shebang
return Ok(None);
}
let first_line_len = file_header_buffer.iter().position(|&c| c == b'\n').unwrap();
// skip #!
let shebang_header = &file_header_buffer[2..first_line_len];
let mut shebang_argv = Vec::new();
for arg in shebang_header.split(|&c| c == b' ') {
let arg = CString::new(arg)?;
shebang_argv.push(arg);
}
if shebang_argv.len() != 1 {
return_errno_with_message!(
Errno::EINVAL,
"One and only one intpreter program should be specified"
);
}
Ok(Some(shebang_argv))
}

View File

@ -0,0 +1,119 @@
//! This implementation is from occlum
#![allow(non_camel_case_types)]
use crate::prelude::*;
use super::{process_vm::user_heap::USER_HEAP_SIZE_LIMIT, program_loader::elf::INIT_STACK_SIZE};
pub struct ResourceLimits {
rlimits: [RLimit64; RLIMIT_COUNT],
}
impl ResourceLimits {
pub fn get_rlimit(&self, resource: ResourceType) -> &RLimit64 {
&self.rlimits[resource as usize]
}
pub fn get_rlimit_mut(&mut self, resource: ResourceType) -> &mut RLimit64 {
&mut self.rlimits[resource as usize]
}
}
impl Default for ResourceLimits {
fn default() -> Self {
let stack_size = RLimit64::new(INIT_STACK_SIZE as u64);
let heap_size = RLimit64::new(USER_HEAP_SIZE_LIMIT as u64);
let open_files = RLimit64::new(1024);
let mut rlimits = Self {
rlimits: [RLimit64::default(); RLIMIT_COUNT],
};
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_STACK) = stack_size;
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_DATA) = heap_size;
*rlimits.get_rlimit_mut(ResourceType::RLIMIT_NOFILE) = open_files;
rlimits
}
}
#[repr(u32)]
#[derive(Debug, Clone, Copy)]
pub enum ResourceType {
RLIMIT_CPU = 0,
RLIMIT_FSIZE = 1,
RLIMIT_DATA = 2,
RLIMIT_STACK = 3,
RLIMIT_CORE = 4,
RLIMIT_RSS = 5,
RLIMIT_NPROC = 6,
RLIMIT_NOFILE = 7,
RLIMIT_MEMLOCK = 8,
RLIMIT_AS = 9,
RLIMIT_LOCKS = 10,
RLIMIT_SIGPENDING = 11,
RLIMIT_MSGQUEUE = 12,
RLIMIT_NICE = 13,
RLIMIT_RTPRIO = 14,
RLIMIT_RTTIME = 15,
}
impl TryFrom<u32> for ResourceType {
type Error = Error;
fn try_from(value: u32) -> Result<Self> {
match value {
0 => Ok(ResourceType::RLIMIT_CPU),
1 => Ok(ResourceType::RLIMIT_FSIZE),
2 => Ok(ResourceType::RLIMIT_DATA),
3 => Ok(ResourceType::RLIMIT_STACK),
4 => Ok(ResourceType::RLIMIT_CORE),
5 => Ok(ResourceType::RLIMIT_RSS),
6 => Ok(ResourceType::RLIMIT_NPROC),
7 => Ok(ResourceType::RLIMIT_NOFILE),
8 => Ok(ResourceType::RLIMIT_MEMLOCK),
9 => Ok(ResourceType::RLIMIT_AS),
10 => Ok(ResourceType::RLIMIT_LOCKS),
11 => Ok(ResourceType::RLIMIT_SIGPENDING),
12 => Ok(ResourceType::RLIMIT_MSGQUEUE),
13 => Ok(ResourceType::RLIMIT_NICE),
14 => Ok(ResourceType::RLIMIT_RTPRIO),
15 => Ok(ResourceType::RLIMIT_RTTIME),
_ => return_errno_with_message!(Errno::EINVAL, "invalid resource type"),
}
}
}
pub const RLIMIT_COUNT: usize = 16;
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct RLimit64 {
cur: u64,
max: u64,
}
impl RLimit64 {
pub fn new(cur: u64) -> Self {
Self {
cur,
max: u64::max_value(),
}
}
pub fn get_cur(&self) -> u64 {
self.cur
}
pub fn get_max(&self) -> u64 {
self.max
}
}
impl Default for RLimit64 {
fn default() -> Self {
Self {
cur: u64::max_value(),
max: u64::max_value(),
}
}
}

View File

@ -0,0 +1,196 @@
#![allow(non_camel_case_types)]
use core::mem;
use jinux_frame::cpu::GeneralRegs;
use jinux_util::{read_union_fields, union_read_ptr::UnionReadPtr};
use crate::{prelude::*, process::Pid};
use super::{sig_num::SigNum, signals::user::Uid};
pub type sigset_t = u64;
// FIXME: this type should be put at suitable place
pub type clock_t = i64;
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
pub struct sigaction_t {
pub handler_ptr: Vaddr,
pub flags: u32,
pub restorer_ptr: Vaddr,
pub mask: sigset_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub struct siginfo_t {
pub si_signo: i32,
pub si_errno: i32,
pub si_code: i32,
_padding: i32,
/// siginfo_fields should be a union type ( See occlum definition ). But union type have unsafe interfaces.
/// Here we use a simple byte array.
siginfo_fields: siginfo_fields_t,
}
impl siginfo_t {
pub fn new(num: SigNum, code: i32) -> Self {
siginfo_t {
si_signo: num.as_u8() as i32,
si_errno: 0,
si_code: code,
_padding: 0,
siginfo_fields: siginfo_fields_t::zero_fields(),
}
}
pub fn set_si_addr(&mut self, si_addr: Vaddr) {
self.siginfo_fields.sigfault.addr = si_addr;
}
pub fn si_addr(&self) -> Vaddr {
// let siginfo = *self;
read_union_fields!(self.siginfo_fields.sigfault.addr)
}
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_fields_t {
bytes: [u8; 128 - mem::size_of::<i32>() * 4],
common: siginfo_common_t,
sigfault: siginfo_sigfault_t,
}
impl siginfo_fields_t {
fn zero_fields() -> Self {
Self {
bytes: [0; 128 - mem::size_of::<i32>() * 4],
}
}
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_t {
first: siginfo_common_first_t,
second: siginfo_common_second_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_first_t {
piduid: siginfo_piduid_t,
timer: siginfo_timer_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_piduid_t {
pid: Pid,
uid: Uid,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_timer_t {
timerid: i32,
overrun: i32,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_common_second_t {
value: sigval_t,
sigchild: siginfo_sigchild_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
pub union sigval_t {
sigval_int: i32,
sigval_ptr: Vaddr, //*mut c_void
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_sigchild_t {
status: i32,
utime: clock_t,
stime: clock_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
struct siginfo_sigfault_t {
addr: Vaddr, //*const c_void
addr_lsb: i16,
first: siginfo_sigfault_first_t,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_sigfault_first_t {
addr_bnd: siginfo_addr_bnd_t,
pkey: u32,
}
#[derive(Clone, Copy, Pod)]
#[repr(C)]
union siginfo_addr_bnd_t {
lower: Vaddr, // *const c_void
upper: Vaddr, // *const c_void,
}
#[derive(Clone, Copy, Debug, Pod)]
#[repr(C)]
pub struct ucontext_t {
pub uc_flags: u64,
pub uc_link: Vaddr, // *mut ucontext_t
pub uc_stack: stack_t,
pub uc_mcontext: mcontext_t,
pub uc_sigmask: sigset_t,
pub fpregs: [u8; 64 * 8], //fxsave structure
}
impl Default for ucontext_t {
fn default() -> Self {
Self {
uc_flags: Default::default(),
uc_link: Default::default(),
uc_stack: Default::default(),
uc_mcontext: Default::default(),
uc_sigmask: Default::default(),
fpregs: [0u8; 64 * 8],
}
}
}
pub type stack_t = sigaltstack_t;
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct sigaltstack_t {
pub ss_sp: Vaddr, // *mut c_void
pub ss_flags: i32,
pub ss_size: usize,
}
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct mcontext_t {
pub inner: SignalCpuContext,
// TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
_unused0: [u64; 5],
// TODO: this field should be `fpregs: fpregset_t,`
_unused1: usize,
_reserved: [u64; 8],
}
#[derive(Debug, Clone, Copy, Pod, Default)]
#[repr(C)]
pub struct SignalCpuContext {
pub gp_regs: GeneralRegs,
pub fpregs_on_heap: u64,
pub fpregs: Vaddr, // *mut FpRegs,
}

View File

@ -0,0 +1,103 @@
/// Standard signals
pub(super) const MIN_STD_SIG_NUM: u8 = 1;
pub(super) const MAX_STD_SIG_NUM: u8 = 31; // inclusive
/// Real-time signals
pub(super) const MIN_RT_SIG_NUM: u8 = 32;
pub(super) const MAX_RT_SIG_NUM: u8 = 64; // inclusive
/// Count the number of signals
pub(super) const COUNT_STD_SIGS: usize = 31;
pub(super) const COUNT_RT_SIGS: usize = 33;
pub(super) const COUNT_ALL_SIGS: usize = 64;
pub const SIG_DFL: usize = 0;
pub const SIG_IGN: usize = 1;
use super::sig_num::SigNum;
macro_rules! define_std_signums {
( $( $name: ident = $num: expr ),+, ) => {
$(
pub const $name : SigNum = SigNum::from_u8($num);
)*
}
}
define_std_signums! {
SIGHUP = 1, // Hangup detected on controlling terminal or death of controlling process
SIGINT = 2, // Interrupt from keyboard
SIGQUIT = 3, // Quit from keyboard
SIGILL = 4, // Illegal Instruction
SIGTRAP = 5, // Trace/breakpoint trap
SIGABRT = 6, // Abort signal from abort(3)
SIGBUS = 7, // Bus error (bad memory access)
SIGFPE = 8, // Floating-point exception
SIGKILL = 9, // Kill signal
SIGUSR1 = 10, // User-defined signal 1
SIGSEGV = 11, // Invalid memory reference
SIGUSR2 = 12, // User-defined signal 2
SIGPIPE = 13, // Broken pipe: write to pipe with no readers; see pipe(7)
SIGALRM = 14, // Timer signal from alarm(2)
SIGTERM = 15, // Termination signal
SIGSTKFLT = 16, // Stack fault on coprocessor (unused)
SIGCHLD = 17, // Child stopped or terminated
SIGCONT = 18, // Continue if stopped
SIGSTOP = 19, // Stop process
SIGTSTP = 20, // Stop typed at terminal
SIGTTIN = 21, // Terminal input for background process
SIGTTOU = 22, // Terminal output for background process
SIGURG = 23, // Urgent condition on socket (4.2BSD)
SIGXCPU = 24, // CPU time limit exceeded (4.2BSD); see setrlimit(2)
SIGXFSZ = 25, // File size limit exceeded (4.2BSD); see setrlimit(2)
SIGVTALRM = 26, // Virtual alarm clock (4.2BSD)
SIGPROF = 27, // Profiling timer expired
SIGWINCH = 28, // Window resize signal (4.3BSD, Sun)
SIGIO = 29, // I/O now possible (4.2BSD)
SIGPWR = 30, // Power failure (System V)
SIGSYS = 31, // Bad system call (SVr4); see also seccomp(2)
}
pub const SI_ASYNCNL: i32 = -60;
pub const SI_TKILL: i32 = -6;
pub const SI_SIGIO: i32 = -5;
pub const SI_ASYNCIO: i32 = -4;
pub const SI_MESGQ: i32 = -3;
pub const SI_TIMER: i32 = -2;
pub const SI_QUEUE: i32 = -1;
pub const SI_USER: i32 = 0;
pub const SI_KERNEL: i32 = 128;
pub const FPE_INTDIV: i32 = 1;
pub const FPE_INTOVF: i32 = 2;
pub const FPE_FLTDIV: i32 = 3;
pub const FPE_FLTOVF: i32 = 4;
pub const FPE_FLTUND: i32 = 5;
pub const FPE_FLTRES: i32 = 6;
pub const FPE_FLTINV: i32 = 7;
pub const FPE_FLTSUB: i32 = 8;
pub const ILL_ILLOPC: i32 = 1;
pub const ILL_ILLOPN: i32 = 2;
pub const ILL_ILLADR: i32 = 3;
pub const ILL_ILLTRP: i32 = 4;
pub const ILL_PRVOPC: i32 = 5;
pub const ILL_PRVREG: i32 = 6;
pub const ILL_COPROC: i32 = 7;
pub const ILL_BADSTK: i32 = 8;
pub const SEGV_MAPERR: i32 = 1;
pub const SEGV_ACCERR: i32 = 2;
pub const SEGV_BNDERR: i32 = 3;
pub const SEGV_PKUERR: i32 = 4;
pub const BUS_ADRALN: i32 = 1;
pub const BUS_ADRERR: i32 = 2;
pub const BUS_OBJERR: i32 = 3;
pub const BUS_MCEERR_AR: i32 = 4;
pub const BUS_MCEERR_AO: i32 = 5;
pub const CLD_EXITED: i32 = 1;
pub const CLD_KILLED: i32 = 2;
pub const CLD_DUMPED: i32 = 3;
pub const CLD_TRAPPED: i32 = 4;
pub const CLD_STOPPED: i32 = 5;
pub const CLD_CONTINUED: i32 = 6;

View File

@ -0,0 +1,209 @@
pub mod c_types;
pub mod constants;
pub mod sig_action;
pub mod sig_disposition;
pub mod sig_mask;
pub mod sig_num;
pub mod sig_queues;
pub mod signals;
use core::mem;
use align_ext::AlignExt;
use jinux_frame::{cpu::UserContext, task::Task};
use self::c_types::siginfo_t;
use self::sig_mask::SigMask;
use self::sig_num::SigNum;
use crate::current_thread;
use crate::process::posix_thread::posix_thread_ext::PosixThreadExt;
use crate::process::signal::c_types::ucontext_t;
use crate::process::signal::sig_action::SigActionFlags;
use crate::util::{write_bytes_to_user, write_val_to_user};
use crate::{
prelude::*,
process::signal::sig_action::{SigAction, SigDefaultAction},
};
/// Handle pending signal for current process
pub fn handle_pending_signal(context: &mut UserContext) -> Result<()> {
let current = current!();
let current_thread = current_thread!();
let posix_thread = current_thread.as_posix_thread().unwrap();
let pid = current.pid();
let sig_mask = posix_thread.sig_mask().lock().clone();
let mut thread_sig_queues = posix_thread.sig_queues().lock();
let mut proc_sig_queues = current.sig_queues().lock();
// We first deal with signal in current thread, then signal in current process.
let signal = if let Some(signal) = thread_sig_queues.dequeue(&sig_mask) {
Some(signal)
} else if let Some(signal) = proc_sig_queues.dequeue(&sig_mask) {
Some(signal)
} else {
None
};
if let Some(signal) = signal {
let sig_num = signal.num();
trace!("sig_num = {:?}, sig_name = {}", sig_num, sig_num.sig_name());
let sig_action = current.sig_dispositions().lock().get(sig_num);
trace!("sig action: {:x?}", sig_action);
match sig_action {
SigAction::Ign => {
trace!("Ignore signal {:?}", sig_num);
}
SigAction::User {
handler_addr,
flags,
restorer_addr,
mask,
} => handle_user_signal(
sig_num,
handler_addr,
flags,
restorer_addr,
mask,
context,
signal.to_info(),
)?,
SigAction::Dfl => {
let sig_default_action = SigDefaultAction::from_signum(sig_num);
trace!("sig_default_action: {:?}", sig_default_action);
match sig_default_action {
SigDefaultAction::Core | SigDefaultAction::Term => {
warn!(
"{:?}: terminating on signal {}",
current.executable_path().read(),
sig_num.sig_name()
);
// FIXME: How to set correct status if process is terminated
current.exit_group(1);
// We should exit current here, since we cannot restore a valid status from trap now.
Task::current().exit();
}
SigDefaultAction::Ign => {}
SigDefaultAction::Stop => {
let mut status = current_thread.status().lock();
if status.is_running() {
status.set_stopped();
}
drop(status);
}
SigDefaultAction::Cont => {
let mut status = current_thread.status().lock();
if status.is_stopped() {
status.set_running();
}
drop(status);
}
}
}
}
}
Ok(())
}
pub fn handle_user_signal(
sig_num: SigNum,
handler_addr: Vaddr,
flags: SigActionFlags,
restorer_addr: Vaddr,
mut mask: SigMask,
context: &mut UserContext,
sig_info: siginfo_t,
) -> Result<()> {
debug!("sig_num = {:?}, signame = {}", sig_num, sig_num.sig_name());
debug!("handler_addr = 0x{:x}", handler_addr);
debug!("flags = {:?}", flags);
debug!("restorer_addr = 0x{:x}", restorer_addr);
// FIXME: How to respect flags?
if flags.contains_unsupported_flag() {
panic!("Unsupported Signal flags");
}
if !flags.contains(SigActionFlags::SA_NODEFER) {
// add current signal to mask
let current_mask = SigMask::from(sig_num);
mask.block(current_mask.as_u64());
}
let current_thread = current_thread!();
let posix_thread = current_thread.as_posix_thread().unwrap();
// block signals in sigmask when running signal handler
posix_thread.sig_mask().lock().block(mask.as_u64());
// Set up signal stack in user stack,
// to avoid corrupting user stack, we minus 128 first.
let mut user_rsp = context.rsp() as u64;
user_rsp = user_rsp - 128;
// 1. write siginfo_t
user_rsp = user_rsp - mem::size_of::<siginfo_t>() as u64;
write_val_to_user(user_rsp as _, &sig_info)?;
let siginfo_addr = user_rsp;
// debug!("siginfo_addr = 0x{:x}", siginfo_addr);
// 2. write ucontext_t.
user_rsp = alloc_aligned_in_user_stack(user_rsp, mem::size_of::<ucontext_t>(), 16)?;
let mut ucontext = ucontext_t::default();
ucontext.uc_sigmask = mask.as_u64();
ucontext.uc_mcontext.inner.gp_regs = *context.general_regs();
let mut sig_context = posix_thread.sig_context().lock();
if let Some(sig_context_addr) = *sig_context {
ucontext.uc_link = sig_context_addr;
} else {
ucontext.uc_link = 0;
}
// TODO: store fp regs in ucontext
write_val_to_user(user_rsp as _, &ucontext)?;
let ucontext_addr = user_rsp;
// Store the ucontext addr in sig context of current process.
*sig_context = Some(ucontext_addr as Vaddr);
// current.sig_context().lock().push_back(ucontext_addr as _);
// 3. Set the address of the trampoline code.
if flags.contains(SigActionFlags::SA_RESTORER) {
// If contains SA_RESTORER flag, trampoline code is provided by libc in restorer_addr.
// We just store restorer_addr on user stack to allow user code just to trampoline code.
user_rsp = write_u64_to_user_stack(user_rsp, restorer_addr as u64)?;
trace!("After set restorer addr: user_rsp = 0x{:x}", user_rsp);
} else {
// Otherwise we create a trampoline.
// FIXME: This may cause problems if we read old_context from rsp.
const TRAMPOLINE: &[u8] = &[
0xb8, 0x0f, 0x00, 0x00, 0x00, // mov eax, 15(syscall number of rt_sigreturn)
0x0f, 0x05, // syscall (call rt_sigreturn)
0x90, // nop (for alignment)
];
user_rsp = user_rsp - TRAMPOLINE.len() as u64;
let trampoline_rip = user_rsp;
write_bytes_to_user(user_rsp as Vaddr, TRAMPOLINE)?;
user_rsp = write_u64_to_user_stack(user_rsp, trampoline_rip)?;
}
// 4. Set correct register values
context.set_rip(handler_addr as _);
context.set_rsp(user_rsp as usize);
// parameters of signal handler
context.set_rdi(sig_num.as_u8() as usize); // signal number
if flags.contains(SigActionFlags::SA_SIGINFO) {
context.set_rsi(siginfo_addr as usize); // siginfo_t* siginfo
context.set_rdx(ucontext_addr as usize); // void* ctx
} else {
context.set_rsi(0);
context.set_rdx(0);
}
Ok(())
}
fn write_u64_to_user_stack(rsp: u64, value: u64) -> Result<u64> {
let rsp = rsp - 8;
write_val_to_user(rsp as Vaddr, &value)?;
Ok(rsp)
}
/// alloc memory of size on user stack, the return address should respect the align argument.
fn alloc_aligned_in_user_stack(rsp: u64, size: usize, align: usize) -> Result<u64> {
if !align.is_power_of_two() {
return_errno_with_message!(Errno::EINVAL, "align must be power of two");
}
let start = (rsp - size as u64).align_down(align as u64);
Ok(start)
}

View File

@ -0,0 +1,156 @@
use super::{c_types::sigaction_t, constants::*, sig_mask::SigMask, sig_num::SigNum};
use crate::prelude::*;
use bitflags::bitflags;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum SigAction {
Dfl, // Default action
Ign, // Ignore this signal
User {
// User-given handler
handler_addr: usize,
flags: SigActionFlags,
restorer_addr: usize,
mask: SigMask,
},
}
impl Default for SigAction {
fn default() -> Self {
SigAction::Dfl
}
}
impl TryFrom<sigaction_t> for SigAction {
type Error = Error;
fn try_from(input: sigaction_t) -> Result<Self> {
let action = match input.handler_ptr {
SIG_DFL => SigAction::Dfl,
SIG_IGN => SigAction::Ign,
_ => {
let flags = SigActionFlags::from_bits_truncate(input.flags);
let mask = SigMask::from(input.mask);
SigAction::User {
handler_addr: input.handler_ptr,
flags,
restorer_addr: input.restorer_ptr,
mask,
}
}
};
Ok(action)
}
}
impl SigAction {
pub fn to_c(&self) -> sigaction_t {
match self {
SigAction::Dfl => sigaction_t {
handler_ptr: SIG_DFL,
flags: 0,
restorer_ptr: 0,
mask: 0,
},
SigAction::Ign => sigaction_t {
handler_ptr: SIG_IGN,
flags: 0,
restorer_ptr: 0,
mask: 0,
},
SigAction::User {
handler_addr,
flags,
restorer_addr,
mask,
} => sigaction_t {
handler_ptr: *handler_addr,
flags: flags.to_u32(),
restorer_ptr: *restorer_addr,
mask: mask.as_u64(),
},
}
}
}
bitflags! {
pub struct SigActionFlags: u32 {
const SA_NOCLDSTOP = 1;
const SA_NOCLDWAIT = 2;
const SA_SIGINFO = 4;
const SA_ONSTACK = 0x08000000;
const SA_RESTART = 0x10000000;
const SA_NODEFER = 0x40000000;
const SA_RESETHAND = 0x80000000;
const SA_RESTORER = 0x04000000;
}
}
impl TryFrom<u32> for SigActionFlags {
type Error = Error;
fn try_from(bits: u32) -> Result<Self> {
let flags = SigActionFlags::from_bits(bits)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid sig action flag"))?;
if flags.contains(SigActionFlags::SA_RESTART) {
warn!("SA_RESTART is not supported");
}
Ok(flags)
}
}
impl SigActionFlags {
pub fn to_u32(&self) -> u32 {
self.bits()
}
pub fn contains_unsupported_flag(&self) -> bool {
self.intersects(
SigActionFlags::SA_NOCLDSTOP
| SigActionFlags::SA_NOCLDWAIT
| SigActionFlags::SA_ONSTACK
| SigActionFlags::SA_RESETHAND,
)
}
}
/// The default action to signals
#[derive(Debug, Copy, Clone)]
pub enum SigDefaultAction {
Term, // Default action is to terminate the process.
Ign, // Default action is to ignore the signal.
Core, // Default action is to terminate the process and dump core (see core(5)).
Stop, // Default action is to stop the process.
Cont, // Default action is to continue the process if it is currently stopped.
}
impl SigDefaultAction {
pub fn from_signum(num: SigNum) -> SigDefaultAction {
match num {
SIGABRT | // = SIGIOT
SIGBUS |
SIGFPE |
SIGILL |
SIGQUIT |
SIGSEGV |
SIGSYS | // = SIGUNUSED
SIGTRAP |
SIGXCPU |
SIGXFSZ
=> SigDefaultAction::Core,
SIGCHLD |
SIGURG |
SIGWINCH
=> SigDefaultAction::Ign,
SIGCONT
=> SigDefaultAction::Cont,
SIGSTOP |
SIGTSTP |
SIGTTIN |
SIGTTOU
=> SigDefaultAction::Stop,
_
=> SigDefaultAction::Term,
}
}
}

View File

@ -0,0 +1,49 @@
use super::{constants::*, sig_action::SigAction, sig_num::SigNum};
#[derive(Copy, Clone)]
pub struct SigDispositions {
// SigNum -> SigAction
map: [SigAction; COUNT_ALL_SIGS],
}
impl SigDispositions {
pub fn new() -> Self {
Self {
map: [SigAction::default(); COUNT_ALL_SIGS],
}
}
pub fn get(&self, num: SigNum) -> SigAction {
let idx = Self::num_to_idx(num);
self.map[idx]
}
pub fn set(&mut self, num: SigNum, sa: SigAction) {
let idx = Self::num_to_idx(num);
self.map[idx] = sa;
}
pub fn set_default(&mut self, num: SigNum) {
let idx = Self::num_to_idx(num);
self.map[idx] = SigAction::Dfl;
}
/// man 7 signal:
/// When execve, the handled signals are reset to the default; the dispositions of
/// ignored signals are left unchanged.
/// This function should be used when execve.
pub fn inherit(&mut self) {
for sigaction in &mut self.map {
match sigaction {
SigAction::User { .. } => {
*sigaction = SigAction::Dfl;
}
_ => {}
}
}
}
fn num_to_idx(num: SigNum) -> usize {
(num.as_u8() - MIN_STD_SIG_NUM) as usize
}
}

View File

@ -0,0 +1,77 @@
use super::{constants::MIN_STD_SIG_NUM, sig_num::SigNum};
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
pub struct SigMask {
bits: u64,
}
impl From<u64> for SigMask {
fn from(bits: u64) -> Self {
SigMask { bits }
}
}
impl From<SigNum> for SigMask {
fn from(sig_num: SigNum) -> Self {
let idx = SigMask::num_to_idx(sig_num);
let bits = 1u64 << idx;
SigMask { bits }
}
}
impl SigMask {
pub fn new_empty() -> Self {
SigMask::from(0u64)
}
pub fn new_full() -> Self {
SigMask::from(!0u64)
}
pub const fn as_u64(&self) -> u64 {
self.bits
}
pub const fn empty(&self) -> bool {
self.bits == 0
}
pub const fn full(&self) -> bool {
self.bits == !0
}
pub fn block(&mut self, block_sets: u64) {
self.bits |= block_sets;
}
pub fn unblock(&mut self, unblock_sets: u64) {
self.bits &= !unblock_sets;
}
pub fn set(&mut self, new_set: u64) {
self.bits = new_set;
}
pub fn count(&self) -> usize {
self.bits.count_ones() as usize
}
pub fn contains(&self, signum: SigNum) -> bool {
let idx = Self::num_to_idx(signum);
(self.bits & (1_u64 << idx)) != 0
}
fn num_to_idx(num: SigNum) -> usize {
(num.as_u8() - MIN_STD_SIG_NUM) as usize
}
pub fn remove_signal(&mut self, signum: SigNum) {
let idx = Self::num_to_idx(signum);
self.bits &= !(1_u64 << idx);
}
pub fn add_signal(&mut self, signum: SigNum) {
let idx = Self::num_to_idx(signum);
self.bits |= 1_u64 << idx;
}
}

View File

@ -0,0 +1,77 @@
use super::constants::*;
use crate::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SigNum {
sig_num: u8,
}
impl TryFrom<u8> for SigNum {
type Error = Error;
fn try_from(sig_num: u8) -> Result<Self> {
if sig_num > MAX_RT_SIG_NUM || sig_num < MIN_STD_SIG_NUM {
return_errno_with_message!(Errno::EINVAL, "invalid signal number");
}
Ok(SigNum { sig_num })
}
}
impl SigNum {
/// Caller must ensure the sig_num is valid. otherweise, use try_from will check sig_num and does not panic.
pub const fn from_u8(sig_num: u8) -> Self {
if sig_num > MAX_RT_SIG_NUM || sig_num < MIN_STD_SIG_NUM {
panic!("invalid signal number")
}
SigNum { sig_num }
}
pub const fn as_u8(&self) -> u8 {
self.sig_num
}
pub fn is_std(&self) -> bool {
self.sig_num <= MAX_STD_SIG_NUM
}
pub fn is_real_time(&self) -> bool {
self.sig_num >= MIN_RT_SIG_NUM
}
pub const fn sig_name(&self) -> &'static str {
match *self {
SIGHUP => "SIGHUP",
SIGINT => "SIGINT",
SIGQUIT => "SIGQUIT",
SIGILL => "SIGILL",
SIGTRAP => "SIGTRAP",
SIGABRT => "SIGABRT",
SIGBUS => "SIGBUS",
SIGFPE => "SIGFPE",
SIGKILL => "SIGKILL",
SIGUSR1 => "SIGUSR1",
SIGSEGV => "SIGSEGV",
SIGUSR2 => "SIGUSR2",
SIGPIPE => "SIGPIPE",
SIGALRM => "SIGALRM",
SIGTERM => "SIGTERM",
SIGSTKFLT => "SIGSTKFLT",
SIGCHLD => "SIGCHLD",
SIGCONT => "SIGCONT",
SIGSTOP => "SIGSTOP",
SIGTSTP => "SIGTSTP",
SIGTTIN => "SIGTTIN",
SIGTTOU => "SIGTTOU",
SIGURG => "SIGURG",
SIGXCPU => "SIGXCPU",
SIGXFSZ => "SIGXFSZ",
SIGVTALRM => "SIGVTALRM",
SIGPROF => "SIGPROF",
SIGWINCH => "SIGWINCH",
SIGIO => "SIGIO",
SIGPWR => "SIGPWR",
SIGSYS => "SIGSYS",
_ => "Realtime Signal",
}
}
}

View File

@ -0,0 +1,143 @@
use super::constants::*;
use crate::prelude::*;
use super::sig_mask::SigMask;
use super::sig_num::SigNum;
use super::signals::Signal;
pub struct SigQueues {
count: usize,
std_queues: Vec<Option<Box<dyn Signal>>>,
rt_queues: Vec<VecDeque<Box<dyn Signal>>>,
}
impl SigQueues {
pub fn new() -> Self {
let count = 0;
let std_queues = (0..COUNT_STD_SIGS).map(|_| None).collect();
let rt_queues = (0..COUNT_RT_SIGS).map(|_| Default::default()).collect();
// let notifier = Notifier::new();
SigQueues {
count,
std_queues,
rt_queues,
}
}
pub fn empty(&self) -> bool {
self.count == 0
}
pub fn enqueue(&mut self, signal: Box<dyn Signal>) {
let signum = signal.num();
if signum.is_std() {
// Standard signals
//
// From signal(7):
//
// Standard signals do not queue. If multiple instances of a standard
// signal are generated while that signal is blocked, then only one
// instance of the signal is marked as pending (and the signal will be
// delivered just once when it is unblocked). In the case where a
// standard signal is already pending, the siginfo_t structure (see
// sigaction(2)) associated with that signal is not overwritten on
// arrival of subsequent instances of the same signal. Thus, the
// process will receive the information associated with the first
// instance of the signal.
let queue = self.get_std_queue_mut(signum);
if queue.is_some() {
// If there is already a signal pending, just ignore all subsequent signals
return;
}
*queue = Some(signal);
self.count += 1;
} else {
// Real-time signals
let queue = self.get_rt_queue_mut(signum);
queue.push_back(signal);
self.count += 1;
}
// self.notifier.broadcast(&signum);
}
pub fn dequeue(&mut self, blocked: &SigMask) -> Option<Box<dyn Signal>> {
// Fast path for the common case of no pending signals
if self.empty() {
return None;
}
// Deliver standard signals.
//
// According to signal(7):
// If both standard and real-time signals are pending for a process,
// POSIX leaves it unspecified which is delivered first. Linux, like
// many other implementations, gives priority to standard signals in
// this case.
// POSIX leaves unspecified which to deliver first if there are multiple
// pending standard signals. So we are free to define our own. The
// principle is to give more urgent signals higher priority (like SIGKILL).
const ORDERED_STD_SIGS: [SigNum; COUNT_STD_SIGS] = [
SIGKILL, SIGTERM, SIGSTOP, SIGCONT, SIGSEGV, SIGILL, SIGHUP, SIGINT, SIGQUIT, SIGTRAP,
SIGABRT, SIGBUS, SIGFPE, SIGUSR1, SIGUSR2, SIGPIPE, SIGALRM, SIGSTKFLT, SIGCHLD,
SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
SIGIO, SIGPWR, SIGSYS,
];
for &signum in &ORDERED_STD_SIGS {
if blocked.contains(signum) {
continue;
}
let queue = self.get_std_queue_mut(signum);
let signal = queue.take();
if signal.is_some() {
self.count -= 1;
return signal;
}
}
// If no standard signals, then deliver real-time signals.
//
// According to signal (7):
// Real-time signals are delivered in a guaranteed order. Multiple
// real-time signals of the same type are delivered in the order
// they were sent. If different real-time signals are sent to a
// process, they are delivered starting with the lowest-numbered
// signal. (I.e., low-numbered signals have highest priority.)
for signum in MIN_RT_SIG_NUM..=MAX_RT_SIG_NUM {
let signum = SigNum::try_from(signum).unwrap();
if blocked.contains(signum) {
continue;
}
let queue = self.get_rt_queue_mut(signum);
let signal = queue.pop_front();
if signal.is_some() {
self.count -= 1;
return signal;
}
}
// There must be pending but blocked signals
None
}
fn get_std_queue_mut(&mut self, signum: SigNum) -> &mut Option<Box<dyn Signal>> {
debug_assert!(signum.is_std());
let idx = (signum.as_u8() - MIN_STD_SIG_NUM) as usize;
&mut self.std_queues[idx]
}
fn get_rt_queue_mut(&mut self, signum: SigNum) -> &mut VecDeque<Box<dyn Signal>> {
debug_assert!(signum.is_real_time());
let idx = (signum.as_u8() - MIN_RT_SIG_NUM) as usize;
&mut self.rt_queues[idx]
}
}
impl Default for SigQueues {
fn default() -> Self {
Self::new()
}
}

View File

@ -0,0 +1,59 @@
use jinux_frame::cpu::{CpuException, TrapInformation};
use jinux_frame::cpu::{
ALIGNMENT_CHECK, BOUND_RANGE_EXCEEDED, DIVIDE_BY_ZERO, GENERAL_PROTECTION_FAULT,
INVALID_OPCODE, PAGE_FAULT, SIMD_FLOATING_POINT_EXCEPTION, X87_FLOATING_POINT_EXCEPTION,
};
use crate::prelude::*;
use crate::process::signal::c_types::siginfo_t;
use crate::process::signal::constants::*;
use crate::process::signal::sig_num::SigNum;
use super::Signal;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct FaultSignal {
num: SigNum,
code: i32,
addr: Option<u64>,
}
impl FaultSignal {
pub fn new(trap_info: &TrapInformation) -> FaultSignal {
debug!("Trap id: {}", trap_info.id);
let exception = CpuException::to_cpu_exception(trap_info.id as u16).unwrap();
let (num, code, addr) = match *exception {
DIVIDE_BY_ZERO => (SIGFPE, FPE_INTDIV, None),
X87_FLOATING_POINT_EXCEPTION | SIMD_FLOATING_POINT_EXCEPTION => {
(SIGFPE, FPE_FLTDIV, None)
}
BOUND_RANGE_EXCEEDED => (SIGSEGV, SEGV_BNDERR, None),
ALIGNMENT_CHECK => (SIGBUS, BUS_ADRALN, None),
INVALID_OPCODE => (SIGILL, ILL_ILLOPC, None),
GENERAL_PROTECTION_FAULT => (SIGBUS, BUS_ADRERR, None),
PAGE_FAULT => {
const PF_ERR_FLAG_PRESENT: usize = 1usize << 0;
let code = if trap_info.err & PF_ERR_FLAG_PRESENT != 0 {
SEGV_ACCERR
} else {
SEGV_MAPERR
};
let addr = Some(trap_info.cr2 as u64);
(SIGSEGV, code, addr)
}
_ => panic!("Exception cannnot be a signal"),
};
FaultSignal { num, code, addr }
}
}
impl Signal for FaultSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
siginfo_t::new(self.num, self.code)
// info.set_si_addr(self.addr.unwrap_or_default() as *const c_void);
// info
}
}

View File

@ -0,0 +1,25 @@
use crate::process::signal::{c_types::siginfo_t, constants::SI_KERNEL, sig_num::SigNum};
use super::Signal;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct KernelSignal {
num: SigNum,
}
impl KernelSignal {
pub const fn new(num: SigNum) -> Self {
Self { num }
}
}
impl Signal for KernelSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
let info = siginfo_t::new(self.num, SI_KERNEL);
info
}
}

View File

@ -0,0 +1,14 @@
pub mod fault;
pub mod kernel;
pub mod user;
use core::fmt::Debug;
use super::{c_types::siginfo_t, sig_num::SigNum};
pub trait Signal: Send + Sync + Debug {
/// Returns the number of the signal.
fn num(&self) -> SigNum;
/// Returns the siginfo_t that gives more details about a signal.
fn to_info(&self) -> siginfo_t;
}

View File

@ -0,0 +1,73 @@
use crate::process::{
signal::{
c_types::siginfo_t,
constants::{SI_QUEUE, SI_TKILL, SI_USER},
sig_num::SigNum,
},
Pid,
};
use super::Signal;
pub type Uid = usize;
#[derive(Debug, Clone, Copy)]
pub struct UserSignal {
num: SigNum,
pid: Pid,
uid: Uid,
kind: UserSignalKind,
}
#[derive(Debug, Copy, Clone)]
pub enum UserSignalKind {
Kill,
Tkill,
Sigqueue,
}
impl UserSignal {
pub fn new(num: SigNum, kind: UserSignalKind, pid: Pid, uid: Uid) -> Self {
Self {
num,
kind,
pid,
uid,
}
}
pub fn pid(&self) -> Pid {
self.pid
}
pub fn uid(&self) -> Uid {
self.uid
}
pub fn kind(&self) -> UserSignalKind {
self.kind
}
}
impl Signal for UserSignal {
fn num(&self) -> SigNum {
self.num
}
fn to_info(&self) -> siginfo_t {
let code = match self.kind {
UserSignalKind::Kill => SI_USER,
UserSignalKind::Tkill => SI_TKILL,
UserSignalKind::Sigqueue => SI_QUEUE,
};
let info = siginfo_t::new(self.num, code);
// info.set_si_pid(self.pid);
// info.set_si_uid(self.uid);
// if let UserSignalKind::Sigqueue(val) = self.kind {
// info.set_si_value(val);
// }
info
}
}

View File

@ -0,0 +1,19 @@
//! The process status
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessStatus {
/// Can be scheduled to run
Runnable,
/// Exit while not reaped by parent
Zombie,
}
impl ProcessStatus {
pub fn set_zombie(&mut self) {
*self = ProcessStatus::Zombie;
}
pub fn is_zombie(&self) -> bool {
*self == ProcessStatus::Zombie
}
}

View File

@ -0,0 +1,75 @@
use core::sync::atomic::Ordering;
use crate::prelude::*;
use super::{process_filter::ProcessFilter, ExitCode, Pid};
// The definition of WaitOptions is from Occlum
bitflags! {
pub struct WaitOptions: u32 {
const WNOHANG = 0x1;
//Note: Below flags are not supported yet
const WSTOPPED = 0x2; // Same as WUNTRACED
const WEXITED = 0x4;
const WCONTINUED = 0x8;
const WNOWAIT = 0x01000000;
}
}
impl WaitOptions {
pub fn supported(&self) -> bool {
let unsupported_flags = WaitOptions::all() - WaitOptions::WNOHANG;
!self.intersects(unsupported_flags)
}
}
pub fn wait_child_exit(
child_filter: ProcessFilter,
wait_options: WaitOptions,
) -> Result<(Pid, ExitCode)> {
let current = current!();
let (pid, exit_code) = current.waiting_children().wait_until(|| {
let children_lock = current.children().lock();
let unwaited_children = children_lock
.iter()
.filter(|(pid, child)| match child_filter {
ProcessFilter::Any => true,
ProcessFilter::WithPid(pid) => child.pid() == pid,
ProcessFilter::WithPgid(pgid) => child.pgid() == pgid,
})
.map(|(_, child)| child.clone())
.collect::<Vec<_>>();
// we need to drop the lock here, since reap child process need to acquire this lock again
drop(children_lock);
if unwaited_children.len() == 0 {
return Some(Err(jinux_frame::Error::NoChild));
}
// return immediately if we find a zombie child
let zombie_child = unwaited_children
.iter()
.find(|child| child.status().lock().is_zombie());
if let Some(zombie_child) = zombie_child {
let zombie_pid = zombie_child.pid();
let exit_code = zombie_child.exit_code().load(Ordering::SeqCst);
if wait_options.contains(WaitOptions::WNOWAIT) {
// does not reap child, directly return
return Some(Ok((zombie_pid, exit_code)));
} else {
let exit_code = current.reap_zombie_child(zombie_pid);
return Some(Ok((zombie_pid, exit_code)));
}
}
if wait_options.contains(WaitOptions::WNOHANG) {
return Some(Ok((0, 0)));
}
// wait
None
})?;
Ok((pid, exit_code))
}

View File

@ -0,0 +1,51 @@
use bitflags::bitflags;
use typeflags::typeflags;
bitflags! {
/// Value-based access rights.
///
/// These access rights are provided to cover a wide range of use cases.
/// The access rights' semantics and how they would restrict the behaviors
/// of a capability are decided by the capability's designer.
/// Here, we give some sensible semantics for each access right.
pub struct Rights: u32 {
/// Allows duplicating a capability.
const DUP = 1 << 0;
/// Allows reading data from a data source (files, VM objects, etc.) or
/// creating readable memory mappings.
const READ = 1 << 1;
/// Allows writing data to a data sink (files, VM objects, etc.) or
/// creating writable memory mappings.
const WRITE = 1 << 2;
/// Allows creating executable memory mappings.
const EXEC = 1 << 3;
/// Allows sending notifications or signals.
const SIGNAL = 1 << 7;
}
}
typeflags! {
/// Type-based access rights.
///
/// Similar to value-based access rights (`Rights`), but represented in
/// types.
pub trait TRights: u32 {
/// Allows duplicating a capability.
pub struct Dup = Rights::DUP.bits;
/// Allows reading data from a data source (files, VM objects, etc.) or
/// creating readable memory mappings.
pub struct Read = Rights::READ.bits;
/// Allows writing data to a data sink (files, VM objects, etc.) or
/// creating writable memory mappings.
pub struct Write = Rights::WRITE.bits;
/// Allows creating executable memory mappings.
pub struct Exec = Rights::EXEC.bits;
/// Allows sending notifications or signals.
pub struct Signal = Rights::SIGNAL.bits;
}
}
/// The full set of access rights.
pub type Full = TRights![Dup, Read, Write, Exec, Signal];
pub type ReadOp = TRights![Read];
pub type WriteOp = TRights![Write];

View File

@ -0,0 +1,10 @@
use super::{constants::*, SyscallReturn};
use crate::{log_syscall_entry, prelude::*, syscall::SYS_ACCESS, util::read_cstring_from_user};
pub fn sys_access(filename_ptr: Vaddr, file_mode: u64) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_ACCESS);
let filename = read_cstring_from_user(filename_ptr, MAX_FILENAME_LEN)?;
debug!("filename: {:?}, file_mode = {}", filename, file_mode);
// TODO: access currenly does not check and just return success
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,53 @@
use jinux_frame::cpu::UserContext;
use crate::syscall::SYS_ARCH_PRCTL;
use crate::{log_syscall_entry, prelude::*};
use super::SyscallReturn;
#[allow(non_camel_case_types)]
#[derive(Debug)]
pub enum ArchPrctlCode {
ARCH_SET_GS = 0x1001,
ARCH_SET_FS = 0x1002,
ARCH_GET_FS = 0x1003,
ARCH_GET_GS = 0x1004,
}
impl TryFrom<u64> for ArchPrctlCode {
type Error = Error;
fn try_from(value: u64) -> Result<Self> {
match value {
0x1001 => Ok(ArchPrctlCode::ARCH_SET_GS),
0x1002 => Ok(ArchPrctlCode::ARCH_SET_FS),
0x1003 => Ok(ArchPrctlCode::ARCH_GET_FS),
0x1004 => Ok(ArchPrctlCode::ARCH_GET_GS),
_ => return_errno_with_message!(Errno::EINVAL, "Unknown code for arch_prctl"),
}
}
}
pub fn sys_arch_prctl(code: u64, addr: u64, context: &mut UserContext) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_ARCH_PRCTL);
let arch_prctl_code = ArchPrctlCode::try_from(code)?;
debug!(
"arch_prctl_code: {:?}, addr = 0x{:x}",
arch_prctl_code, addr
);
let res = do_arch_prctl(arch_prctl_code, addr, context).unwrap();
Ok(SyscallReturn::Return(res as _))
}
pub fn do_arch_prctl(code: ArchPrctlCode, addr: u64, context: &mut UserContext) -> Result<u64> {
match code {
ArchPrctlCode::ARCH_SET_FS => {
context.set_fsbase(addr as usize);
Ok(0)
}
ArchPrctlCode::ARCH_GET_FS => Ok(context.fsbase() as u64),
ArchPrctlCode::ARCH_GET_GS | ArchPrctlCode::ARCH_SET_GS => {
return_errno_with_message!(Errno::EINVAL, "GS cannot be accessed from the user space")
}
}
}

View File

@ -0,0 +1,21 @@
use crate::log_syscall_entry;
use crate::prelude::*;
use crate::syscall::SyscallReturn;
use crate::syscall::SYS_BRK;
/// expand the user heap to new heap end, returns the new heap end if expansion succeeds.
pub fn sys_brk(heap_end: u64) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_BRK);
let new_heap_end = if heap_end == 0 {
None
} else {
Some(heap_end as usize)
};
debug!("new heap end = {:x?}", heap_end);
let current = current!();
let user_heap = current.user_heap();
let new_heap_end = user_heap.brk(new_heap_end)?;
Ok(SyscallReturn::Return(new_heap_end as _))
}

View File

@ -0,0 +1,50 @@
use crate::fs::{file_table::FileDescripter, fs_resolver::FsPath, utils::InodeType};
use crate::log_syscall_entry;
use crate::prelude::*;
use crate::syscall::constants::MAX_FILENAME_LEN;
use crate::util::read_cstring_from_user;
use super::SyscallReturn;
use super::{SYS_CHDIR, SYS_FCHDIR};
pub fn sys_chdir(pathname_addr: Vaddr) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_CHDIR);
let pathname = read_cstring_from_user(pathname_addr, MAX_FILENAME_LEN)?;
debug!("pathname = {:?}", pathname);
let current = current!();
let mut fs = current.fs().write();
let dentry = {
let pathname = pathname.to_string_lossy();
if pathname.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::try_from(pathname.as_ref())?;
fs.lookup(&fs_path)?
};
if dentry.inode_type() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "must be directory");
}
fs.set_cwd(dentry);
Ok(SyscallReturn::Return(0))
}
pub fn sys_fchdir(fd: FileDescripter) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_FCHDIR);
debug!("fd = {}", fd);
let current = current!();
let dentry = {
let file_table = current.file_table().lock();
let file = file_table.get_file(fd)?;
let inode_handle = file
.as_inode_handle()
.ok_or(Error::with_message(Errno::EBADE, "not inode"))?;
inode_handle.dentry().clone()
};
if dentry.inode_type() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "must be directory");
}
current.fs().write().set_cwd(dentry);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,44 @@
use core::time::Duration;
use super::SyscallReturn;
use super::SYS_CLOCK_NANOSLEEP;
use crate::{
log_syscall_entry,
prelude::*,
thread::Thread,
time::{clockid_t, timespec_t, ClockID, TIMER_ABSTIME},
util::{read_val_from_user, write_val_to_user},
};
pub fn sys_clock_nanosleep(
clockid: clockid_t,
flags: i32,
request_timespec_addr: Vaddr,
remain_timespec_addr: Vaddr,
) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_CLOCK_NANOSLEEP);
let clock_id = ClockID::try_from(clockid)?;
let abs_time = if flags == 0 {
false
} else if flags == TIMER_ABSTIME {
true
} else {
unreachable!()
};
let request_timespec = read_val_from_user::<timespec_t>(request_timespec_addr)?;
debug!(
"clockid = {:?}, abs_time = {}, request_timespec = {:?}, remain timespec addr = 0x{:x}",
clock_id, abs_time, request_timespec, remain_timespec_addr
);
// FIXME: do real sleep. Here we simply yield the execution of current thread since we does not have timeout support now.
// If the sleep is interrupted by a signal, this syscall should return error.
Thread::yield_now();
if remain_timespec_addr != 0 {
let remain_duration = Duration::new(0, 0);
let remain_timespec = timespec_t::from(remain_duration);
write_val_to_user(remain_timespec_addr, &remain_timespec)?;
}
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,25 @@
use jinux_frame::cpu::UserContext;
use crate::log_syscall_entry;
use crate::process::clone::{clone_child, CloneArgs, CloneFlags};
use crate::{prelude::*, syscall::SYS_CLONE};
use super::SyscallReturn;
// The order of arguments for clone differs in different architecture.
// This order we use here is the order for x86_64. See https://man7.org/linux/man-pages/man2/clone.2.html.
pub fn sys_clone(
clone_flags: u64,
new_sp: u64,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
parent_context: UserContext,
) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_CLONE);
let clone_flags = CloneFlags::from(clone_flags);
debug!("flags = {:?}, child_stack_ptr = 0x{:x}, parent_tid_ptr = 0x{:x}, child tid ptr = 0x{:x}, tls = 0x{:x}", clone_flags, new_sp, parent_tidptr, child_tidptr, tls);
let clone_args = CloneArgs::new(new_sp, parent_tidptr, child_tidptr, tls, clone_flags);
let child_pid = clone_child(parent_context, clone_args).unwrap();
Ok(SyscallReturn::Return(child_pid as _))
}

View File

@ -0,0 +1,15 @@
use super::SyscallReturn;
use super::SYS_CLOSE;
use crate::log_syscall_entry;
use crate::{fs::file_table::FileDescripter, prelude::*};
pub fn sys_close(fd: FileDescripter) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_CLOSE);
debug!("fd = {}", fd);
let current = current!();
let mut file_table = current.file_table().lock();
let _ = file_table.get_file(fd)?;
let file = file_table.close_file(fd).unwrap();
file.clean_for_close()?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,8 @@
//! constants used in syscall
/// LONGEST ALLOWED FILENAME
pub const MAX_FILENAME_LEN: usize = 128;
pub const MAX_ARGV_NUMBER: usize = 128;
pub const MAX_ENVP_NUMBER: usize = 128;
pub const MAX_ARG_LEN: usize = 128;
pub const MAX_ENV_LEN: usize = 128;

View File

@ -0,0 +1,33 @@
use crate::fs::file_table::FileDescripter;
use crate::log_syscall_entry;
use crate::prelude::*;
use super::SyscallReturn;
use super::{SYS_DUP, SYS_DUP2};
pub fn sys_dup(old_fd: FileDescripter) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_DUP);
debug!("old_fd = {}", old_fd);
let current = current!();
let mut file_table = current.file_table().lock();
let file = file_table.get_file(old_fd)?.clone();
let new_fd = file_table.insert(file);
Ok(SyscallReturn::Return(new_fd as _))
}
pub fn sys_dup2(old_fd: FileDescripter, new_fd: FileDescripter) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_DUP2);
debug!("old_fd = {}, new_fd = {}", old_fd, new_fd);
let current = current!();
let mut file_table = current.file_table().lock();
let file = file_table.get_file(old_fd)?.clone();
if old_fd != new_fd {
if let Some(old_file) = file_table.insert_at(new_fd, file) {
// If the file descriptor `new_fd` was previously open, close it silently.
let _ = old_file.clean_for_close();
}
}
Ok(SyscallReturn::Return(new_fd as _))
}

View File

@ -0,0 +1,89 @@
use jinux_frame::cpu::UserContext;
use super::{constants::*, SyscallReturn};
use crate::log_syscall_entry;
use crate::process::posix_thread::name::ThreadName;
use crate::process::posix_thread::posix_thread_ext::PosixThreadExt;
use crate::process::program_loader::load_program_to_root_vmar;
use crate::util::{read_cstring_from_user, read_val_from_user};
use crate::{prelude::*, syscall::SYS_EXECVE};
pub fn sys_execve(
filename_ptr: Vaddr,
argv_ptr_ptr: Vaddr,
envp_ptr_ptr: Vaddr,
context: &mut UserContext,
) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_EXECVE);
let executable_path = read_cstring_from_user(filename_ptr, MAX_FILENAME_LEN)?;
let executable_path = executable_path.into_string().unwrap();
let argv = read_cstring_vec(argv_ptr_ptr, MAX_ARGV_NUMBER, MAX_ARG_LEN)?;
let envp = read_cstring_vec(envp_ptr_ptr, MAX_ENVP_NUMBER, MAX_ENV_LEN)?;
debug!(
"filename: {:?}, argv = {:?}, envp = {:?}",
executable_path, argv, envp
);
// FIXME: should we set thread name in execve?
let current_thread = current_thread!();
let posix_thread = current_thread.as_posix_thread().unwrap();
let mut thread_name = posix_thread.thread_name().lock();
let new_thread_name = ThreadName::new_from_executable_path(&executable_path)?;
*thread_name = Some(new_thread_name);
// clear ctid
// FIXME: should we clear ctid when execve?
*posix_thread.clear_child_tid().lock() = 0;
let current = current!();
// destroy root vmars
let root_vmar = current.root_vmar();
root_vmar.clear()?;
current.user_vm().set_default()?;
// load elf content to new vm space
let fs_resolver = &*current.fs().read();
debug!("load program to root vmar");
let (new_executable_path, elf_load_info) =
load_program_to_root_vmar(root_vmar, executable_path, argv, envp, fs_resolver, 1)?;
debug!("load elf in execve succeeds");
// set executable path
*current.executable_path().write() = new_executable_path;
// set signal disposition to default
current.sig_dispositions().lock().inherit();
// set cpu context to default
let default_content = UserContext::default();
*context.general_regs_mut() = *default_content.general_regs();
context.set_fsbase(default_content.fsbase());
*context.fp_regs_mut() = *default_content.fp_regs();
// set new entry point
context.set_rip(elf_load_info.entry_point() as _);
debug!("entry_point: 0x{:x}", elf_load_info.entry_point());
// set new user stack top
context.set_rsp(elf_load_info.user_stack_top() as _);
debug!("user stack top: 0x{:x}", elf_load_info.user_stack_top());
Ok(SyscallReturn::NoReturn)
}
fn read_cstring_vec(
array_ptr: Vaddr,
max_string_number: usize,
max_string_len: usize,
) -> Result<Vec<CString>> {
let mut res = Vec::new();
let mut read_addr = array_ptr;
let mut find_null = false;
for _ in 0..max_string_number {
let cstring_ptr = read_val_from_user::<usize>(read_addr)?;
read_addr += 8;
// read a null pointer
if cstring_ptr == 0 {
find_null = true;
break;
}
let cstring = read_cstring_from_user(cstring_ptr, max_string_len)?;
res.push(cstring);
}
if !find_null {
return_errno_with_message!(Errno::E2BIG, "Cannot find null pointer in vector");
}
Ok(res)
}

View File

@ -0,0 +1,19 @@
use crate::process::posix_thread::posix_thread_ext::PosixThreadExt;
use crate::{log_syscall_entry, prelude::*};
use crate::syscall::{SyscallReturn, SYS_EXIT};
pub fn sys_exit(exit_code: i32) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_EXIT);
debug!("exid code = {}", exit_code);
let current_thread = current_thread!();
let tid = current_thread.tid();
let current = current!();
let pid = current.pid();
debug!("tid = {}, pid = {}", tid, pid);
let posix_thread = current_thread.as_posix_thread().unwrap();
current_thread.exit();
posix_thread.exit(tid, exit_code)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,11 @@
use crate::{log_syscall_entry, prelude::*};
use crate::syscall::{SyscallReturn, SYS_EXIT_GROUP};
/// Exit all thread in a process.
pub fn sys_exit_group(exit_code: u64) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_EXIT_GROUP);
// Exit all thread in current process
current!().exit_group(exit_code as _);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,28 @@
use super::{SyscallReturn, SYS_FCNTL};
use crate::fs::utils::FcntlCmd;
use crate::log_syscall_entry;
use crate::{fs::file_table::FileDescripter, prelude::*};
pub fn sys_fcntl(fd: FileDescripter, cmd: i32, arg: u64) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_FCNTL);
let fcntl_cmd = FcntlCmd::try_from(cmd)?;
debug!("fd = {}, cmd = {:?}, arg = {}", fd, fcntl_cmd, arg);
match fcntl_cmd {
FcntlCmd::F_DUPFD_CLOEXEC => {
// FIXME: deal with the cloexec flag
let current = current!();
let mut file_table = current.file_table().lock();
let new_fd = arg as FileDescripter;
file_table.dup(fd, Some(new_fd))?;
return Ok(SyscallReturn::Return(new_fd as _));
}
FcntlCmd::F_SETFD => {
if arg != 1 {
panic!("Unknown setfd argument");
}
// TODO: Set cloexec
return Ok(SyscallReturn::Return(0));
}
_ => todo!(),
}
}

View File

@ -0,0 +1,19 @@
use crate::{
log_syscall_entry,
prelude::*,
process::clone::{clone_child, CloneArgs},
};
use jinux_frame::cpu::UserContext;
use crate::syscall::SYS_FORK;
use super::SyscallReturn;
pub fn sys_fork(parent_context: UserContext) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_FORK);
let current = current!();
// FIXME: set correct args for fork
let clone_args = CloneArgs::default();
let child_pid = clone_child(parent_context, clone_args).unwrap();
Ok(SyscallReturn::Return(child_pid as _))
}

Some files were not shown because too many files have changed in this diff Show More