Remove the shim kernel crate

This commit is contained in:
Zhang Junyang
2024-08-19 19:15:22 +08:00
committed by Tate, Hongliang Tian
parent d76c7a5b1e
commit dafd16075f
416 changed files with 231 additions and 273 deletions

View File

@ -0,0 +1,85 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::{FdFlags, FileDesc},
utils::{CreationFlags, StatusFlags},
},
prelude::*,
util::net::{get_socket_from_fd, write_socket_addr_to_user},
};
pub fn sys_accept(
sockfd: FileDesc,
sockaddr_ptr: Vaddr,
addrlen_ptr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!("sockfd = {sockfd}, sockaddr_ptr = 0x{sockaddr_ptr:x}, addrlen_ptr = 0x{addrlen_ptr:x}");
let fd = do_accept(sockfd, sockaddr_ptr, addrlen_ptr, Flags::empty(), ctx)?;
Ok(SyscallReturn::Return(fd as _))
}
pub fn sys_accept4(
sockfd: FileDesc,
sockaddr_ptr: Vaddr,
addrlen_ptr: Vaddr,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
trace!("raw flags = 0x{:x}", flags);
let flags = Flags::from_bits_truncate(flags);
debug!(
"sockfd = {}, sockaddr_ptr = 0x{:x}, addrlen_ptr = 0x{:x}, flags = {:?}",
sockfd, sockaddr_ptr, addrlen_ptr, flags
);
let fd = do_accept(sockfd, sockaddr_ptr, addrlen_ptr, flags, ctx)?;
Ok(SyscallReturn::Return(fd as _))
}
fn do_accept(
sockfd: FileDesc,
sockaddr_ptr: Vaddr,
addrlen_ptr: Vaddr,
flags: Flags,
ctx: &Context,
) -> Result<FileDesc> {
let (connected_socket, socket_addr) = {
let socket = get_socket_from_fd(sockfd)?;
socket.accept()?
};
if flags.contains(Flags::SOCK_NONBLOCK) {
connected_socket.set_status_flags(StatusFlags::O_NONBLOCK)?;
}
let fd_flags = if flags.contains(Flags::SOCK_CLOEXEC) {
FdFlags::CLOEXEC
} else {
FdFlags::empty()
};
if sockaddr_ptr != 0 {
write_socket_addr_to_user(&socket_addr, sockaddr_ptr, addrlen_ptr)?;
}
let fd = {
let mut file_table = ctx.process.file_table().lock();
file_table.insert(connected_socket, fd_flags)
};
Ok(fd)
}
bitflags! {
struct Flags: u32 {
const SOCK_NONBLOCK = NONBLOCK;
const SOCK_CLOEXEC = CLOEXEC;
}
}
const NONBLOCK: u32 = StatusFlags::O_NONBLOCK.bits();
const CLOEXEC: u32 = CreationFlags::O_CLOEXEC.bits();

View File

@ -0,0 +1,100 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::PATH_MAX,
},
prelude::*,
};
pub fn sys_faccessat(
dirfd: FileDesc,
path_ptr: Vaddr,
mode: u16,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"faccessat: dirfd = {}, path_ptr = {:#x}, mode = {:o}",
dirfd, path_ptr, mode
);
do_faccessat(dirfd, path_ptr, mode, 0, ctx)
}
pub fn sys_access(path_ptr: Vaddr, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
debug!("access: path_ptr = {:#x}, mode = {:o}", path_ptr, mode);
do_faccessat(AT_FDCWD, path_ptr, mode, 0, ctx)
}
bitflags! {
struct FaccessatFlags: i32 {
const AT_EACCESS = 0x200;
const AT_SYMLINK_NOFOLLOW = 0x100;
const AT_EMPTY_PATH = 0x1000;
}
}
bitflags! {
struct AccessMode: u16 {
const R_OK = 0x4;
const W_OK = 0x2;
const X_OK = 0x1;
// We could ignore F_OK in bitflags.
// const F_OK = 0x0;
}
}
pub fn do_faccessat(
dirfd: FileDesc,
path_ptr: Vaddr,
mode: u16,
flags: i32,
ctx: &Context,
) -> Result<SyscallReturn> {
let mode = AccessMode::from_bits(mode)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "Invalid mode"))?;
let flags = FaccessatFlags::from_bits(flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "Invalid flags"))?;
let path = ctx.get_user_space().read_cstring(path_ptr, PATH_MAX)?;
debug!(
"dirfd = {}, path = {:?}, mode = {:o}, flags = {:?}",
dirfd, path, mode, flags
);
let dentry = {
let path = path.to_string_lossy();
let fs_path = FsPath::new(dirfd, path.as_ref())?;
let fs = ctx.process.fs().read();
if flags.contains(FaccessatFlags::AT_SYMLINK_NOFOLLOW) {
fs.lookup_no_follow(&fs_path)?
} else {
fs.lookup(&fs_path)?
}
};
// AccessMode::empty() means F_OK and no more permission check needed.
if mode.is_empty() {
return Ok(SyscallReturn::Return(0));
}
let inode_mode = dentry.mode()?;
// FIXME: The current implementation is dummy
if mode.contains(AccessMode::R_OK) && !inode_mode.is_readable() {
return_errno_with_message!(Errno::EACCES, "Read permission denied");
}
if mode.contains(AccessMode::W_OK) && !inode_mode.is_writable() {
return_errno_with_message!(Errno::EACCES, "Write permission denied");
}
if mode.contains(AccessMode::X_OK) && !inode_mode.is_executable() {
return_errno_with_message!(Errno::EACCES, "Execute permission denied");
}
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,28 @@
// SPDX-License-Identifier: MPL-2.0
use core::time::Duration;
use super::SyscallReturn;
use crate::{prelude::*, time::timer::Timeout};
pub fn sys_alarm(seconds: u32, ctx: &Context) -> Result<SyscallReturn> {
debug!("seconds = {}", seconds);
let alarm_timer = ctx.process.timer_manager().alarm_timer();
let remaining = alarm_timer.remain();
let mut remaining_secs = remaining.as_secs();
if remaining.subsec_nanos() > 0 {
remaining_secs += 1;
}
if seconds == 0 {
// Clear previous timer
alarm_timer.cancel();
return Ok(SyscallReturn::Return(remaining_secs as _));
}
alarm_timer.set_timeout(Timeout::After(Duration::from_secs(seconds as u64)));
Ok(SyscallReturn::Return(remaining_secs as _))
}

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: MPL-2.0
//! Implement the `syscall_dispatch` function and the const values of system call number such as `SYS_READ`.
#[cfg(target_arch = "x86_64")]
pub mod x86;
#[cfg(target_arch = "x86_64")]
pub use self::x86::*;

View File

@ -0,0 +1,315 @@
// SPDX-License-Identifier: MPL-2.0
use crate::syscall::{
accept::{sys_accept, sys_accept4},
access::{sys_access, sys_faccessat},
alarm::sys_alarm,
arch_prctl::sys_arch_prctl,
bind::sys_bind,
brk::sys_brk,
capget::sys_capget,
capset::sys_capset,
chdir::{sys_chdir, sys_fchdir},
chmod::{sys_chmod, sys_fchmod, sys_fchmodat},
chown::{sys_chown, sys_fchown, sys_fchownat, sys_lchown},
chroot::sys_chroot,
clock_gettime::sys_clock_gettime,
clone::{sys_clone, sys_clone3},
close::sys_close,
connect::sys_connect,
dup::{sys_dup, sys_dup2, sys_dup3},
epoll::{sys_epoll_create, sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_wait},
eventfd::{sys_eventfd, sys_eventfd2},
execve::{sys_execve, sys_execveat},
exit::sys_exit,
exit_group::sys_exit_group,
fallocate::sys_fallocate,
fcntl::sys_fcntl,
flock::sys_flock,
fork::sys_fork,
fsync::{sys_fdatasync, sys_fsync},
futex::sys_futex,
getcwd::sys_getcwd,
getdents64::{sys_getdents, sys_getdents64},
getegid::sys_getegid,
geteuid::sys_geteuid,
getgid::sys_getgid,
getgroups::sys_getgroups,
getpeername::sys_getpeername,
getpgrp::sys_getpgrp,
getpid::sys_getpid,
getppid::sys_getppid,
getrandom::sys_getrandom,
getresgid::sys_getresgid,
getresuid::sys_getresuid,
getrusage::sys_getrusage,
getsid::sys_getsid,
getsockname::sys_getsockname,
getsockopt::sys_getsockopt,
gettid::sys_gettid,
gettimeofday::sys_gettimeofday,
getuid::sys_getuid,
impl_syscall_nums_and_dispatch_fn,
ioctl::sys_ioctl,
kill::sys_kill,
link::{sys_link, sys_linkat},
listen::sys_listen,
lseek::sys_lseek,
madvise::sys_madvise,
mkdir::{sys_mkdir, sys_mkdirat},
mknod::{sys_mknod, sys_mknodat},
mmap::sys_mmap,
mount::sys_mount,
mprotect::sys_mprotect,
msync::sys_msync,
munmap::sys_munmap,
nanosleep::{sys_clock_nanosleep, sys_nanosleep},
open::{sys_creat, sys_open, sys_openat},
pause::sys_pause,
pipe::{sys_pipe, sys_pipe2},
poll::sys_poll,
prctl::sys_prctl,
pread64::sys_pread64,
preadv::{sys_preadv, sys_preadv2, sys_readv},
prlimit64::sys_prlimit64,
pselect6::sys_pselect6,
pwrite64::sys_pwrite64,
pwritev::{sys_pwritev, sys_pwritev2, sys_writev},
read::sys_read,
readlink::{sys_readlink, sys_readlinkat},
recvfrom::sys_recvfrom,
recvmsg::sys_recvmsg,
rename::{sys_rename, sys_renameat},
rmdir::sys_rmdir,
rt_sigaction::sys_rt_sigaction,
rt_sigpending::sys_rt_sigpending,
rt_sigprocmask::sys_rt_sigprocmask,
rt_sigreturn::sys_rt_sigreturn,
rt_sigsuspend::sys_rt_sigsuspend,
sched_getaffinity::sys_sched_getaffinity,
sched_yield::sys_sched_yield,
select::sys_select,
semctl::sys_semctl,
semget::sys_semget,
semop::{sys_semop, sys_semtimedop},
sendfile::sys_sendfile,
sendmsg::sys_sendmsg,
sendto::sys_sendto,
set_get_priority::{sys_get_priority, sys_set_priority},
set_robust_list::sys_set_robust_list,
set_tid_address::sys_set_tid_address,
setfsgid::sys_setfsgid,
setfsuid::sys_setfsuid,
setgid::sys_setgid,
setgroups::sys_setgroups,
setitimer::{sys_getitimer, sys_setitimer},
setpgid::sys_setpgid,
setregid::sys_setregid,
setresgid::sys_setresgid,
setresuid::sys_setresuid,
setreuid::sys_setreuid,
setsid::sys_setsid,
setsockopt::sys_setsockopt,
setuid::sys_setuid,
shutdown::sys_shutdown,
sigaltstack::sys_sigaltstack,
socket::sys_socket,
socketpair::sys_socketpair,
stat::{sys_fstat, sys_fstatat, sys_lstat, sys_stat},
statfs::{sys_fstatfs, sys_statfs},
symlink::{sys_symlink, sys_symlinkat},
sync::sys_sync,
tgkill::sys_tgkill,
time::sys_time,
timer_create::{sys_timer_create, sys_timer_delete},
timer_settime::{sys_timer_gettime, sys_timer_settime},
truncate::{sys_ftruncate, sys_truncate},
umask::sys_umask,
umount::sys_umount,
uname::sys_uname,
unlink::{sys_unlink, sys_unlinkat},
utimens::{sys_futimesat, sys_utime, sys_utimensat, sys_utimes},
wait4::sys_wait4,
waitid::sys_waitid,
write::sys_write,
};
impl_syscall_nums_and_dispatch_fn! {
SYS_READ = 0 => sys_read(args[..3]);
SYS_WRITE = 1 => sys_write(args[..3]);
SYS_OPEN = 2 => sys_open(args[..3]);
SYS_CLOSE = 3 => sys_close(args[..1]);
SYS_STAT = 4 => sys_stat(args[..2]);
SYS_FSTAT = 5 => sys_fstat(args[..2]);
SYS_LSTAT = 6 => sys_lstat(args[..2]);
SYS_POLL = 7 => sys_poll(args[..3]);
SYS_LSEEK = 8 => sys_lseek(args[..3]);
SYS_MMAP = 9 => sys_mmap(args[..6]);
SYS_MPROTECT = 10 => sys_mprotect(args[..3]);
SYS_MUNMAP = 11 => sys_munmap(args[..2]);
SYS_BRK = 12 => sys_brk(args[..1]);
SYS_RT_SIGACTION = 13 => sys_rt_sigaction(args[..4]);
SYS_RT_SIGPROCMASK = 14 => sys_rt_sigprocmask(args[..4]);
SYS_RT_SIGRETURN = 15 => sys_rt_sigreturn(args[..0], &mut user_ctx);
SYS_IOCTL = 16 => sys_ioctl(args[..3]);
SYS_PREAD64 = 17 => sys_pread64(args[..4]);
SYS_PWRITE64 = 18 => sys_pwrite64(args[..4]);
SYS_READV = 19 => sys_readv(args[..3]);
SYS_WRITEV = 20 => sys_writev(args[..3]);
SYS_ACCESS = 21 => sys_access(args[..2]);
SYS_PIPE = 22 => sys_pipe(args[..1]);
SYS_SELECT = 23 => sys_select(args[..5]);
SYS_MSYNC = 26 => sys_msync(args[..3]);
SYS_SCHED_YIELD = 24 => sys_sched_yield(args[..0]);
SYS_MADVISE = 28 => sys_madvise(args[..3]);
SYS_DUP = 32 => sys_dup(args[..1]);
SYS_DUP2 = 33 => sys_dup2(args[..2]);
SYS_PAUSE = 34 => sys_pause(args[..0]);
SYS_NANOSLEEP = 35 => sys_nanosleep(args[..2]);
SYS_GETITIMER = 36 => sys_getitimer(args[..2]);
SYS_ALARM = 37 => sys_alarm(args[..1]);
SYS_SETITIMER = 38 => sys_setitimer(args[..3]);
SYS_GETPID = 39 => sys_getpid(args[..0]);
SYS_SENDFILE = 40 => sys_sendfile(args[..4]);
SYS_SOCKET = 41 => sys_socket(args[..3]);
SYS_CONNECT = 42 => sys_connect(args[..3]);
SYS_ACCEPT = 43 => sys_accept(args[..3]);
SYS_SENDTO = 44 => sys_sendto(args[..6]);
SYS_RECVFROM = 45 => sys_recvfrom(args[..6]);
SYS_SENDMSG = 46 => sys_sendmsg(args[..3]);
SYS_RECVMSG = 47 => sys_recvmsg(args[..3]);
SYS_SHUTDOWN = 48 => sys_shutdown(args[..2]);
SYS_BIND = 49 => sys_bind(args[..3]);
SYS_LISTEN = 50 => sys_listen(args[..2]);
SYS_GETSOCKNAME = 51 => sys_getsockname(args[..3]);
SYS_GETPEERNAME = 52 => sys_getpeername(args[..3]);
SYS_SOCKETPAIR = 53 => sys_socketpair(args[..4]);
SYS_SETSOCKOPT = 54 => sys_setsockopt(args[..5]);
SYS_GETSOCKOPT = 55 => sys_getsockopt(args[..5]);
SYS_CLONE = 56 => sys_clone(args[..5], &user_ctx);
SYS_FORK = 57 => sys_fork(args[..0], &user_ctx);
SYS_EXECVE = 59 => sys_execve(args[..3], &mut user_ctx);
SYS_EXIT = 60 => sys_exit(args[..1]);
SYS_WAIT4 = 61 => sys_wait4(args[..4]);
SYS_KILL = 62 => sys_kill(args[..2]);
SYS_UNAME = 63 => sys_uname(args[..1]);
SYS_SEMGET = 64 => sys_semget(args[..3]);
SYS_SEMOP = 65 => sys_semop(args[..3]);
SYS_SEMCTL = 66 => sys_semctl(args[..4]);
SYS_FCNTL = 72 => sys_fcntl(args[..3]);
SYS_FLOCK = 73 => sys_flock(args[..2]);
SYS_FSYNC = 74 => sys_fsync(args[..1]);
SYS_FDATASYNC = 75 => sys_fdatasync(args[..1]);
SYS_TRUNCATE = 76 => sys_truncate(args[..2]);
SYS_FTRUNCATE = 77 => sys_ftruncate(args[..2]);
SYS_GETDENTS = 78 => sys_getdents(args[..3]);
SYS_GETCWD = 79 => sys_getcwd(args[..2]);
SYS_CHDIR = 80 => sys_chdir(args[..1]);
SYS_FCHDIR = 81 => sys_fchdir(args[..1]);
SYS_RENAME = 82 => sys_rename(args[..2]);
SYS_MKDIR = 83 => sys_mkdir(args[..2]);
SYS_RMDIR = 84 => sys_rmdir(args[..1]);
SYS_CREAT = 85 => sys_creat(args[..2]);
SYS_LINK = 86 => sys_link(args[..2]);
SYS_UNLINK = 87 => sys_unlink(args[..1]);
SYS_SYMLINK = 88 => sys_symlink(args[..2]);
SYS_READLINK = 89 => sys_readlink(args[..3]);
SYS_CHMOD = 90 => sys_chmod(args[..2]);
SYS_FCHMOD = 91 => sys_fchmod(args[..2]);
SYS_CHOWN = 92 => sys_chown(args[..3]);
SYS_FCHOWN = 93 => sys_fchown(args[..3]);
SYS_LCHOWN = 94 => sys_lchown(args[..3]);
SYS_UMASK = 95 => sys_umask(args[..1]);
SYS_GETTIMEOFDAY = 96 => sys_gettimeofday(args[..1]);
SYS_GETRUSAGE = 98 => sys_getrusage(args[..2]);
SYS_GETUID = 102 => sys_getuid(args[..0]);
SYS_GETGID = 104 => sys_getgid(args[..0]);
SYS_SETUID = 105 => sys_setuid(args[..1]);
SYS_SETGID = 106 => sys_setgid(args[..1]);
SYS_GETEUID = 107 => sys_geteuid(args[..0]);
SYS_GETEGID = 108 => sys_getegid(args[..0]);
SYS_SETPGID = 109 => sys_setpgid(args[..2]);
SYS_GETPPID = 110 => sys_getppid(args[..0]);
SYS_GETPGRP = 111 => sys_getpgrp(args[..0]);
SYS_SETSID = 112 => sys_setsid(args[..0]);
SYS_SETREUID = 113 => sys_setreuid(args[..2]);
SYS_SETREGID = 114 => sys_setregid(args[..2]);
SYS_GETGROUPS = 115 => sys_getgroups(args[..2]);
SYS_SETGROUPS = 116 => sys_setgroups(args[..2]);
SYS_SETRESUID = 117 => sys_setresuid(args[..3]);
SYS_GETRESUID = 118 => sys_getresuid(args[..3]);
SYS_SETRESGID = 119 => sys_setresgid(args[..3]);
SYS_GETRESGID = 120 => sys_getresgid(args[..3]);
SYS_SETFSUID = 122 => sys_setfsuid(args[..1]);
SYS_SETFSGID = 123 => sys_setfsgid(args[..1]);
SYS_GETSID = 124 => sys_getsid(args[..1]);
SYS_CAPGET = 125 => sys_capget(args[..2]);
SYS_CAPSET = 126 => sys_capset(args[..2]);
SYS_RT_SIGPENDING = 127 => sys_rt_sigpending(args[..2]);
SYS_RT_SIGSUSPEND = 130 => sys_rt_sigsuspend(args[..2]);
SYS_SIGALTSTACK = 131 => sys_sigaltstack(args[..2]);
SYS_UTIME = 132 => sys_utime(args[..2]);
SYS_MKNOD = 133 => sys_mknod(args[..3]);
SYS_STATFS = 137 => sys_statfs(args[..2]);
SYS_FSTATFS = 138 => sys_fstatfs(args[..2]);
SYS_GET_PRIORITY = 140 => sys_get_priority(args[..2]);
SYS_SET_PRIORITY = 141 => sys_set_priority(args[..3]);
SYS_PRCTL = 157 => sys_prctl(args[..5]);
SYS_ARCH_PRCTL = 158 => sys_arch_prctl(args[..2], &mut user_ctx);
SYS_CHROOT = 161 => sys_chroot(args[..1]);
SYS_SYNC = 162 => sys_sync(args[..0]);
SYS_MOUNT = 165 => sys_mount(args[..5]);
SYS_UMOUNT2 = 166 => sys_umount(args[..2]);
SYS_GETTID = 186 => sys_gettid(args[..0]);
SYS_TIME = 201 => sys_time(args[..1]);
SYS_FUTEX = 202 => sys_futex(args[..6]);
SYS_SCHED_GETAFFINITY = 204 => sys_sched_getaffinity(args[..3]);
SYS_EPOLL_CREATE = 213 => sys_epoll_create(args[..1]);
SYS_GETDENTS64 = 217 => sys_getdents64(args[..3]);
SYS_SET_TID_ADDRESS = 218 => sys_set_tid_address(args[..1]);
SYS_SEMTIMEDOP = 220 => sys_semtimedop(args[..4]);
SYS_TIMER_CREATE = 222 => sys_timer_create(args[..3]);
SYS_TIMER_SETTIME = 223 => sys_timer_settime(args[..4]);
SYS_TIMER_GETTIME = 224 => sys_timer_gettime(args[..2]);
SYS_TIMER_DELETE = 226 => sys_timer_delete(args[..1]);
SYS_CLOCK_GETTIME = 228 => sys_clock_gettime(args[..2]);
SYS_CLOCK_NANOSLEEP = 230 => sys_clock_nanosleep(args[..4]);
SYS_EXIT_GROUP = 231 => sys_exit_group(args[..1]);
SYS_EPOLL_WAIT = 232 => sys_epoll_wait(args[..4]);
SYS_EPOLL_CTL = 233 => sys_epoll_ctl(args[..4]);
SYS_TGKILL = 234 => sys_tgkill(args[..3]);
SYS_UTIMES = 235 => sys_utimes(args[..2]);
SYS_WAITID = 247 => sys_waitid(args[..5]);
SYS_OPENAT = 257 => sys_openat(args[..4]);
SYS_MKDIRAT = 258 => sys_mkdirat(args[..3]);
SYS_MKNODAT = 259 => sys_mknodat(args[..4]);
SYS_FCHOWNAT = 260 => sys_fchownat(args[..5]);
SYS_FUTIMESAT = 261 => sys_futimesat(args[..3]);
SYS_FSTATAT = 262 => sys_fstatat(args[..4]);
SYS_UNLINKAT = 263 => sys_unlinkat(args[..3]);
SYS_RENAMEAT = 264 => sys_renameat(args[..4]);
SYS_LINKAT = 265 => sys_linkat(args[..5]);
SYS_SYMLINKAT = 266 => sys_symlinkat(args[..3]);
SYS_READLINKAT = 267 => sys_readlinkat(args[..4]);
SYS_FCHMODAT = 268 => sys_fchmodat(args[..3]);
SYS_FACCESSAT = 269 => sys_faccessat(args[..3]);
SYS_PSELECT6 = 270 => sys_pselect6(args[..6]);
SYS_SET_ROBUST_LIST = 273 => sys_set_robust_list(args[..2]);
SYS_UTIMENSAT = 280 => sys_utimensat(args[..4]);
SYS_EPOLL_PWAIT = 281 => sys_epoll_pwait(args[..6]);
SYS_EVENTFD = 284 => sys_eventfd(args[..1]);
SYS_FALLOCATE = 285 => sys_fallocate(args[..4]);
SYS_ACCEPT4 = 288 => sys_accept4(args[..4]);
SYS_EVENTFD2 = 290 => sys_eventfd2(args[..2]);
SYS_EPOLL_CREATE1 = 291 => sys_epoll_create1(args[..1]);
SYS_DUP3 = 292 => sys_dup3(args[..3]);
SYS_PIPE2 = 293 => sys_pipe2(args[..2]);
SYS_PREADV = 295 => sys_preadv(args[..4]);
SYS_PWRITEV = 296 => sys_pwritev(args[..4]);
SYS_PRLIMIT64 = 302 => sys_prlimit64(args[..4]);
SYS_GETRANDOM = 318 => sys_getrandom(args[..3]);
SYS_EXECVEAT = 322 => sys_execveat(args[..5], &mut user_ctx);
SYS_PREADV2 = 327 => sys_preadv2(args[..5]);
SYS_PWRITEV2 = 328 => sys_pwritev2(args[..5]);
SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx);
}

View File

@ -0,0 +1,44 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::cpu::UserContext;
use super::SyscallReturn;
use crate::{cpu::LinuxAbi, prelude::*};
#[allow(non_camel_case_types)]
#[repr(u64)]
#[derive(Debug, TryFromInt)]
pub enum ArchPrctlCode {
ARCH_SET_GS = 0x1001,
ARCH_SET_FS = 0x1002,
ARCH_GET_FS = 0x1003,
ARCH_GET_GS = 0x1004,
}
pub fn sys_arch_prctl(
code: u64,
addr: u64,
_ctx: &Context,
user_ctx: &mut UserContext,
) -> Result<SyscallReturn> {
let arch_prctl_code = ArchPrctlCode::try_from(code)?;
debug!(
"arch_prctl_code: {:?}, addr = 0x{:x}",
arch_prctl_code, addr
);
let res = do_arch_prctl(arch_prctl_code, addr, user_ctx).unwrap();
Ok(SyscallReturn::Return(res as _))
}
pub fn do_arch_prctl(code: ArchPrctlCode, addr: u64, ctx: &mut UserContext) -> Result<u64> {
match code {
ArchPrctlCode::ARCH_SET_FS => {
ctx.set_tls_pointer(addr as usize);
Ok(0)
}
ArchPrctlCode::ARCH_GET_FS => Ok(ctx.tls_pointer() as u64),
ArchPrctlCode::ARCH_GET_GS | ArchPrctlCode::ARCH_SET_GS => {
return_errno_with_message!(Errno::EINVAL, "GS cannot be accessed from the user space")
}
}
}

View File

@ -0,0 +1,22 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::net::{get_socket_from_fd, read_socket_addr_from_user},
};
pub fn sys_bind(
sockfd: FileDesc,
sockaddr_ptr: Vaddr,
addrlen: u32,
_ctx: &Context,
) -> Result<SyscallReturn> {
let socket_addr = read_socket_addr_from_user(sockaddr_ptr, addrlen as usize)?;
debug!("sockfd = {sockfd}, socket_addr = {socket_addr:?}");
let socket = get_socket_from_fd(sockfd)?;
socket.bind(socket_addr)?;
Ok(SyscallReturn::Return(0))
}

17
kernel/src/syscall/brk.rs Normal file
View File

@ -0,0 +1,17 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{prelude::*, syscall::SyscallReturn};
/// expand the user heap to new heap end, returns the new heap end if expansion succeeds.
pub fn sys_brk(heap_end: u64, ctx: &Context) -> Result<SyscallReturn> {
let new_heap_end = if heap_end == 0 {
None
} else {
Some(heap_end as usize)
};
debug!("new heap end = {:x?}", heap_end);
let user_heap = ctx.process.heap();
let new_heap_end = user_heap.brk(new_heap_end)?;
Ok(SyscallReturn::Return(new_heap_end as _))
}

View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::credentials::c_types::{
cap_user_data_t, cap_user_header_t, LINUX_CAPABILITY_VERSION_3,
},
};
pub fn sys_capget(
cap_user_header_addr: Vaddr,
cap_user_data_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let cap_user_header: cap_user_header_t =
user_space.read_val::<cap_user_header_t>(cap_user_header_addr)?;
if cap_user_header.version != LINUX_CAPABILITY_VERSION_3 {
return_errno_with_message!(Errno::EINVAL, "not supported (capability version is not 3)");
};
// Extract target pid and validate whether it represents the current process.
let header_pid = cap_user_header.pid;
// Capget only query current process's credential. Namely, it only allows header->pid == 0
// or header->pid == getpid(), which are equivalent.
// See https://linux.die.net/man/2/capget (Section. With VFS capability support) for details.
if header_pid != 0 && header_pid != ctx.process.pid() {
return_errno_with_message!(Errno::EINVAL, "invalid pid");
}
let credentials = ctx.posix_thread.credentials();
let inheritable_capset = credentials.inheritable_capset();
let permitted_capset = credentials.permitted_capset();
let effective_capset = credentials.effective_capset();
// Annoying legacy format with 64-bit capabilities exposed as two sets of 32-bit fields,
// so we need to split the capability values up.
let result = cap_user_data_t {
// Note we silently drop the upper capabilities here.
// This behavior is considered fail-safe behavior.
effective: effective_capset.as_u32(),
permitted: permitted_capset.as_u32(),
inheritable: inheritable_capset.as_u32(),
};
user_space.write_val(cap_user_data_addr, &result)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::credentials::{
c_types::{cap_user_data_t, cap_user_header_t, LINUX_CAPABILITY_VERSION_3},
capabilities::CapSet,
},
};
fn make_kernel_cap(low: u32, high: u32) -> u64 {
((low as u64) | ((high as u64) << 32)) & ((1u64 << (CapSet::most_significant_bit() + 1)) - 1)
}
pub fn sys_capset(
cap_user_header_addr: Vaddr,
cap_user_data_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let cap_user_header: cap_user_header_t =
user_space.read_val::<cap_user_header_t>(cap_user_header_addr)?;
if cap_user_header.version != LINUX_CAPABILITY_VERSION_3 {
return_errno_with_message!(Errno::EINVAL, "not supported (capability version is not 3)");
};
// The ability to set capabilities of any other process has been deprecated.
// See: https://elixir.bootlin.com/linux/v6.9.3/source/kernel/capability.c#L209 for more details.
let header_pid = cap_user_header.pid;
if header_pid != 0 && header_pid != ctx.process.pid() {
return_errno_with_message!(Errno::EINVAL, "invalid pid");
}
// Convert the cap(u32) to u64
let cap_user_data: cap_user_data_t =
user_space.read_val::<cap_user_data_t>(cap_user_data_addr)?;
let inheritable = make_kernel_cap(cap_user_data.inheritable, 0);
let permitted = make_kernel_cap(cap_user_data.permitted, 0);
let effective = make_kernel_cap(cap_user_data.effective, 0);
let credentials = ctx.posix_thread.credentials_mut();
credentials.set_inheritable_capset(CapSet::from_bits_truncate(inheritable));
credentials.set_permitted_capset(CapSet::from_bits_truncate(permitted));
credentials.set_effective_capset(CapSet::from_bits_truncate(effective));
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,48 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{file_table::FileDesc, fs_resolver::FsPath, inode_handle::InodeHandle, utils::InodeType},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_chdir(path_ptr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
let path = ctx
.get_user_space()
.read_cstring(path_ptr, MAX_FILENAME_LEN)?;
debug!("path = {:?}", path);
let mut fs = ctx.process.fs().write();
let dentry = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::try_from(path.as_ref())?;
fs.lookup(&fs_path)?
};
if dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "must be directory");
}
fs.set_cwd(dentry);
Ok(SyscallReturn::Return(0))
}
pub fn sys_fchdir(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}", fd);
let dentry = {
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
inode_handle.dentry().clone()
};
if dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "must be directory");
}
ctx.process.fs().write().set_cwd(dentry);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::{InodeMode, PATH_MAX},
},
prelude::*,
};
pub fn sys_fchmod(fd: FileDesc, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}, mode = 0o{:o}", fd, mode);
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
file.set_mode(InodeMode::from_bits_truncate(mode))?;
Ok(SyscallReturn::Return(0))
}
pub fn sys_chmod(path_ptr: Vaddr, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
self::sys_fchmodat(AT_FDCWD, path_ptr, mode, ctx)
}
// Glibc handles the `flags` argument, so we just ignore it.
pub fn sys_fchmodat(
dirfd: FileDesc,
path_ptr: Vaddr,
mode: u16,
/* flags: u32, */
ctx: &Context,
) -> Result<SyscallReturn> {
let path = ctx.get_user_space().read_cstring(path_ptr, PATH_MAX)?;
debug!("dirfd = {}, path = {:?}, mode = 0o{:o}", dirfd, path, mode,);
let dentry = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::new(dirfd, path.as_ref())?;
ctx.process.fs().read().lookup(&fs_path)?
};
dentry.set_mode(InodeMode::from_bits_truncate(mode))?;
Ok(SyscallReturn::Return(0))
}

115
kernel/src/syscall/chown.rs Normal file
View File

@ -0,0 +1,115 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::PATH_MAX,
},
prelude::*,
process::{Gid, Uid},
};
pub fn sys_fchown(fd: FileDesc, uid: i32, gid: i32, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}, uid = {}, gid = {}", fd, uid, gid);
let uid = to_optional_id(uid, Uid::new)?;
let gid = to_optional_id(gid, Gid::new)?;
if uid.is_none() && gid.is_none() {
return Ok(SyscallReturn::Return(0));
}
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
if let Some(uid) = uid {
file.set_owner(uid)?;
}
if let Some(gid) = gid {
file.set_group(gid)?;
}
Ok(SyscallReturn::Return(0))
}
pub fn sys_chown(path_ptr: Vaddr, uid: i32, gid: i32, ctx: &Context) -> Result<SyscallReturn> {
self::sys_fchownat(AT_FDCWD, path_ptr, uid, gid, 0, ctx)
}
pub fn sys_lchown(path_ptr: Vaddr, uid: i32, gid: i32, ctx: &Context) -> Result<SyscallReturn> {
self::sys_fchownat(
AT_FDCWD,
path_ptr,
uid,
gid,
ChownFlags::AT_SYMLINK_NOFOLLOW.bits(),
ctx,
)
}
pub fn sys_fchownat(
dirfd: FileDesc,
path_ptr: Vaddr,
uid: i32,
gid: i32,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
let path = ctx.get_user_space().read_cstring(path_ptr, PATH_MAX)?;
let flags = ChownFlags::from_bits(flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid flags"))?;
debug!(
"dirfd = {}, path = {:?}, uid = {}, gid = {}, flags = {:?}",
dirfd, path, uid, gid, flags
);
if path.is_empty() {
if !flags.contains(ChownFlags::AT_EMPTY_PATH) {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
return self::sys_fchown(dirfd, uid, gid, ctx);
}
let uid = to_optional_id(uid, Uid::new)?;
let gid = to_optional_id(gid, Gid::new)?;
if uid.is_none() && gid.is_none() {
return Ok(SyscallReturn::Return(0));
}
let dentry = {
let path = path.to_string_lossy();
let fs_path = FsPath::new(dirfd, path.as_ref())?;
let fs = ctx.process.fs().read();
if flags.contains(ChownFlags::AT_SYMLINK_NOFOLLOW) {
fs.lookup_no_follow(&fs_path)?
} else {
fs.lookup(&fs_path)?
}
};
if let Some(uid) = uid {
dentry.set_owner(uid)?;
}
if let Some(gid) = gid {
dentry.set_group(gid)?;
}
Ok(SyscallReturn::Return(0))
}
fn to_optional_id<T>(id: i32, f: impl Fn(u32) -> T) -> Result<Option<T>> {
let id = if id >= 0 {
Some(f(id as u32))
} else if id == -1 {
// If the owner or group is specified as -1, then that ID is not changed.
None
} else {
return_errno!(Errno::EINVAL);
};
Ok(id)
}
bitflags! {
struct ChownFlags: u32 {
const AT_SYMLINK_NOFOLLOW = 1 << 8;
const AT_EMPTY_PATH = 1 << 12;
}
}

View File

@ -0,0 +1,30 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{fs_resolver::FsPath, utils::InodeType},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_chroot(path_ptr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
let path = ctx
.get_user_space()
.read_cstring(path_ptr, MAX_FILENAME_LEN)?;
debug!("path = {:?}", path);
let mut fs = ctx.process.fs().write();
let dentry = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::try_from(path.as_ref())?;
fs.lookup(&fs_path)?
};
if dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "must be directory");
}
fs.set_root(dentry);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,153 @@
// SPDX-License-Identifier: MPL-2.0
use core::time::Duration;
use int_to_c_enum::TryFromInt;
use super::SyscallReturn;
use crate::{
prelude::*,
process::{posix_thread::PosixThreadExt, process_table},
thread::thread_table,
time::{
clockid_t,
clocks::{
BootTimeClock, MonotonicClock, MonotonicCoarseClock, MonotonicRawClock, RealTimeClock,
RealTimeCoarseClock,
},
timespec_t, Clock,
},
};
pub fn sys_clock_gettime(
clockid: clockid_t,
timespec_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!("clockid = {:?}", clockid);
let time_duration = read_clock(clockid, ctx)?;
let timespec = timespec_t::from(time_duration);
ctx.get_user_space().write_val(timespec_addr, &timespec)?;
Ok(SyscallReturn::Return(0))
}
// The hard-coded clock IDs.
#[derive(Debug, Copy, Clone, TryFromInt, PartialEq)]
#[repr(i32)]
#[allow(non_camel_case_types)]
pub enum ClockId {
CLOCK_REALTIME = 0,
CLOCK_MONOTONIC = 1,
CLOCK_PROCESS_CPUTIME_ID = 2,
CLOCK_THREAD_CPUTIME_ID = 3,
CLOCK_MONOTONIC_RAW = 4,
CLOCK_REALTIME_COARSE = 5,
CLOCK_MONOTONIC_COARSE = 6,
CLOCK_BOOTTIME = 7,
}
/// The information decoded from a dynamic clock ID.
///
/// Dynamic clocks are the clocks operates on certain
/// character devices, processes or threads. Their IDs will
/// be generated by encoding the file descriptor, PID or TID.
/// Here we follow the rules in Linux:
///
/// The dynamic clock ID is a 32 bit integer.
/// - The most significant 29 bits hold either a PID or a file descriptor.
/// - Bit 2 indicates whether a cpu clock refers to a thread or a process.
/// - Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
/// - A clock ID is invalid if bits 2, 1, and 0 are all set.
///
/// Ref: https://github.com/torvalds/linux/blob/master/include/linux/posix-timers_types.h
pub enum DynamicClockIdInfo {
Pid(u32, DynamicClockType),
Tid(u32, DynamicClockType),
#[allow(dead_code)]
Fd(u32),
}
impl TryFrom<clockid_t> for DynamicClockIdInfo {
type Error = crate::Error;
fn try_from(value: clockid_t) -> core::prelude::v1::Result<Self, Self::Error> {
const CPU_CLOCK_TYPE_MASK: i32 = 0b11;
const ID_TYPE_MASK: i32 = 0b100;
const INVALID_MASK: i32 = CPU_CLOCK_TYPE_MASK | ID_TYPE_MASK;
if (value & INVALID_MASK) == INVALID_MASK {
return_errno_with_message!(Errno::EINVAL, "invalid clock ID");
}
let id = !(value >> 3);
let cpu_clock_type = DynamicClockType::try_from(CPU_CLOCK_TYPE_MASK & value)?;
if let DynamicClockType::FD = cpu_clock_type {
return Ok(DynamicClockIdInfo::Fd(id as u32));
}
if ID_TYPE_MASK & value > 0 {
Ok(DynamicClockIdInfo::Tid(id as u32, cpu_clock_type))
} else {
Ok(DynamicClockIdInfo::Pid(id as u32, cpu_clock_type))
}
}
}
#[derive(Debug, Copy, Clone, TryFromInt, PartialEq)]
#[repr(i32)]
pub enum DynamicClockType {
Profiling = 0,
Virtual = 1,
Scheduling = 2,
FD = 3,
}
/// Reads the time of a clock specified by the input clock ID.
///
/// If the clock ID does not support, this function will return `Err`.
pub fn read_clock(clockid: clockid_t, ctx: &Context) -> Result<Duration> {
if clockid >= 0 {
let clock_id = ClockId::try_from(clockid)?;
match clock_id {
ClockId::CLOCK_REALTIME => Ok(RealTimeClock::get().read_time()),
ClockId::CLOCK_MONOTONIC => Ok(MonotonicClock::get().read_time()),
ClockId::CLOCK_MONOTONIC_RAW => Ok(MonotonicRawClock::get().read_time()),
ClockId::CLOCK_REALTIME_COARSE => Ok(RealTimeCoarseClock::get().read_time()),
ClockId::CLOCK_MONOTONIC_COARSE => Ok(MonotonicCoarseClock::get().read_time()),
ClockId::CLOCK_BOOTTIME => Ok(BootTimeClock::get().read_time()),
ClockId::CLOCK_PROCESS_CPUTIME_ID => Ok(ctx.process.prof_clock().read_time()),
ClockId::CLOCK_THREAD_CPUTIME_ID => Ok(ctx.posix_thread.prof_clock().read_time()),
}
} else {
let dynamic_clockid_info = DynamicClockIdInfo::try_from(clockid)?;
match dynamic_clockid_info {
DynamicClockIdInfo::Pid(pid, clock_type) => {
let process = process_table::get_process(pid)
.ok_or_else(|| crate::Error::with_message(Errno::EINVAL, "invalid clock ID"))?;
match clock_type {
DynamicClockType::Profiling => Ok(process.prof_clock().read_time()),
DynamicClockType::Virtual => Ok(process.prof_clock().user_clock().read_time()),
// TODO: support scheduling clock and fd clock.
_ => unimplemented!(),
}
}
DynamicClockIdInfo::Tid(tid, clock_type) => {
let thread = thread_table::get_thread(tid)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid clock ID"))?;
let posix_thread = thread.as_posix_thread().unwrap();
match clock_type {
DynamicClockType::Profiling => Ok(posix_thread.prof_clock().read_time()),
DynamicClockType::Virtual => {
Ok(posix_thread.prof_clock().user_clock().read_time())
}
_ => unimplemented!(),
}
}
DynamicClockIdInfo::Fd(_) => unimplemented!(),
}
}
}

115
kernel/src/syscall/clone.rs Normal file
View File

@ -0,0 +1,115 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::cpu::UserContext;
use super::SyscallReturn;
use crate::{
prelude::*,
process::{clone_child, signal::constants::SIGCHLD, CloneArgs, CloneFlags},
};
// The order of arguments for clone differs in different architecture.
// This order we use here is the order for x86_64. See https://man7.org/linux/man-pages/man2/clone.2.html.
pub fn sys_clone(
clone_flags: u64,
new_sp: u64,
parent_tidptr: Vaddr,
child_tidptr: Vaddr,
tls: u64,
ctx: &Context,
parent_context: &UserContext,
) -> Result<SyscallReturn> {
let clone_flags = CloneFlags::from(clone_flags);
debug!("flags = {:?}, child_stack_ptr = 0x{:x}, parent_tid_ptr = 0x{:x}, child tid ptr = 0x{:x}, tls = 0x{:x}", clone_flags, new_sp, parent_tidptr, child_tidptr, tls);
let clone_args = CloneArgs::new(new_sp, 0, parent_tidptr, child_tidptr, tls, clone_flags);
let child_pid = clone_child(ctx, parent_context, clone_args).unwrap();
Ok(SyscallReturn::Return(child_pid as _))
}
pub fn sys_clone3(
clong_args_addr: Vaddr,
size: usize,
ctx: &Context,
parent_context: &UserContext,
) -> Result<SyscallReturn> {
trace!(
"clone args addr = 0x{:x}, size = 0x{:x}",
clong_args_addr,
size
);
if size != core::mem::size_of::<Clone3Args>() {
return_errno_with_message!(Errno::EINVAL, "invalid size");
}
let clone_args = {
let args: Clone3Args = ctx.get_user_space().read_val(clong_args_addr)?;
trace!("clone3 args = {:x?}", args);
CloneArgs::from(args)
};
debug!("clone args = {:x?}", clone_args);
let child_pid = clone_child(ctx, parent_context, clone_args)?;
trace!("child pid = {}", child_pid);
Ok(SyscallReturn::Return(child_pid as _))
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Pod)]
struct Clone3Args {
/// Flags bit mask
flags: u64,
/// Where to store PID file descriptor
pidfd: u64,
/// Where to store child TID in child's memory
child_tid: u64,
/// Where to store child TID in parent's memory
parent_tid: u64,
/// Signal to deliver to parent on child termination
exit_signal: u64,
/// Pointer to lowest byte of stack
stack: u64,
/// Size of stack
stack_size: u64,
/// Location of new TLS
tls: u64,
/// Pointer to a pid_t array
set_tid: u64,
/// Number of elements in set_tid
set_tid_size: u64,
/// File descriptor for target cgroup of child
cgroup: u64,
}
impl From<Clone3Args> for CloneArgs {
fn from(value: Clone3Args) -> Self {
const FLAGS_MASK: u64 = 0xff;
let clone_flags =
CloneFlags::from(value.exit_signal & FLAGS_MASK | value.flags & !FLAGS_MASK);
// TODO: deal with pidfd, exit_signal, set_tid, set_tid_size, cgroup
if value.exit_signal != 0 || value.exit_signal as u8 != SIGCHLD.as_u8() {
warn!("exit signal is not supported");
}
if value.pidfd != 0 {
warn!("pidfd is not supported");
}
if value.set_tid != 0 || value.set_tid_size != 0 {
warn!("set_tid is not supported");
}
if value.cgroup != 0 {
warn!("cgroup is not supported");
}
CloneArgs::new(
value.stack,
value.stack_size as _,
value.parent_tid as _,
value.child_tid as _,
value.tls,
clone_flags,
)
}
}

View File

@ -0,0 +1,30 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*};
pub fn sys_close(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}", fd);
let file = {
let mut file_table = ctx.process.file_table().lock();
let _ = file_table.get_file(fd)?;
file_table.close_file(fd).unwrap()
};
// Cleanup work needs to be done in the `Drop` impl.
//
// We don't mind the races between closing the file descriptor and using the file descriptor,
// because such races are explicitly allowed in the man pages. See the "Multithreaded processes
// and close()" section in <https://man7.org/linux/man-pages/man2/close.2.html>.
drop(file);
// Linux has error codes for the close() system call for diagnostic and remedial purposes, but
// only for a small subset of file systems such as NFS. We currently have no support for such
// file systems, so it's fine to just return zero.
//
// For details, see the discussion at <https://github.com/asterinas/asterinas/issues/506> and
// the "Dealing with error returns from close()" section at
// <https://man7.org/linux/man-pages/man2/close.2.html>.
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,22 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::net::{get_socket_from_fd, read_socket_addr_from_user},
};
pub fn sys_connect(
sockfd: FileDesc,
sockaddr_ptr: Vaddr,
addr_len: u32,
_ctx: &Context,
) -> Result<SyscallReturn> {
let socket_addr = read_socket_addr_from_user(sockaddr_ptr, addr_len as _)?;
debug!("fd = {sockfd}, socket_addr = {socket_addr:?}");
let socket = get_socket_from_fd(sockfd)?;
socket.connect(socket_addr)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: MPL-2.0
//! constants used in syscall
/// LONGEST ALLOWED FILENAME
pub const MAX_FILENAME_LEN: usize = 4096;

74
kernel/src/syscall/dup.rs Normal file
View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::{FdFlags, FileDesc},
prelude::*,
process::ResourceType,
};
pub fn sys_dup(old_fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("old_fd = {}", old_fd);
let mut file_table = ctx.process.file_table().lock();
let new_fd = file_table.dup(old_fd, 0, FdFlags::empty())?;
Ok(SyscallReturn::Return(new_fd as _))
}
pub fn sys_dup2(old_fd: FileDesc, new_fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("old_fd = {}, new_fd = {}", old_fd, new_fd);
if old_fd == new_fd {
let file_table = ctx.process.file_table().lock();
let _ = file_table.get_file(old_fd)?;
return Ok(SyscallReturn::Return(new_fd as _));
}
do_dup3(old_fd, new_fd, FdFlags::empty(), ctx)
}
pub fn sys_dup3(
old_fd: FileDesc,
new_fd: FileDesc,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!("old_fd = {}, new_fd = {}", old_fd, new_fd);
let fdflag = match flags {
0x0 => FdFlags::empty(),
0x80000 => FdFlags::CLOEXEC,
_ => return_errno_with_message!(Errno::EINVAL, "flags must be O_CLOEXEC or 0"),
};
do_dup3(old_fd, new_fd, fdflag, ctx)
}
fn do_dup3(
old_fd: FileDesc,
new_fd: FileDesc,
flags: FdFlags,
ctx: &Context,
) -> Result<SyscallReturn> {
if old_fd == new_fd {
return_errno!(Errno::EINVAL);
}
let current = ctx.process;
if new_fd
>= current
.resource_limits()
.lock()
.get_rlimit(ResourceType::RLIMIT_NOFILE)
.get_cur() as FileDesc
{
return_errno!(Errno::EBADF);
}
let mut file_table = current.file_table().lock();
let _ = file_table.close_file(new_fd);
let new_fd = file_table.dup(old_fd, new_fd, flags)?;
Ok(SyscallReturn::Return(new_fd as _))
}

243
kernel/src/syscall/epoll.rs Normal file
View File

@ -0,0 +1,243 @@
// SPDX-License-Identifier: MPL-2.0
use core::{sync::atomic::Ordering, time::Duration};
use super::SyscallReturn;
use crate::{
events::IoEvents,
fs::{
epoll::{EpollCtl, EpollEvent, EpollFile, EpollFlags},
file_table::{FdFlags, FileDesc},
utils::CreationFlags,
},
prelude::*,
process::signal::sig_mask::SigMask,
};
pub fn sys_epoll_create(size: i32, ctx: &Context) -> Result<SyscallReturn> {
if size <= 0 {
return_errno_with_message!(Errno::EINVAL, "size is not positive");
}
sys_epoll_create1(0, ctx)
}
pub fn sys_epoll_create1(flags: u32, ctx: &Context) -> Result<SyscallReturn> {
debug!("flags = 0x{:x}", flags);
let fd_flags = {
let flags = CreationFlags::from_bits(flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "invalid flags"))?;
if flags == CreationFlags::empty() {
FdFlags::empty()
} else if flags == CreationFlags::O_CLOEXEC {
FdFlags::CLOEXEC
} else {
// Only O_CLOEXEC is valid
return_errno_with_message!(Errno::EINVAL, "invalid flags");
}
};
let epoll_file: Arc<EpollFile> = EpollFile::new();
let mut file_table = ctx.process.file_table().lock();
let fd = file_table.insert(epoll_file, fd_flags);
Ok(SyscallReturn::Return(fd as _))
}
pub fn sys_epoll_ctl(
epfd: FileDesc,
op: i32,
fd: FileDesc,
event_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"epfd = {}, op = {}, fd = {}, event_addr = 0x{:x}",
epfd, op, fd, event_addr
);
const EPOLL_CTL_ADD: i32 = 1;
const EPOLL_CTL_DEL: i32 = 2;
const EPOLL_CTL_MOD: i32 = 3;
let cmd = match op {
EPOLL_CTL_ADD => {
let c_epoll_event = ctx.get_user_space().read_val::<c_epoll_event>(event_addr)?;
let event = EpollEvent::from(&c_epoll_event);
let flags = EpollFlags::from_bits_truncate(c_epoll_event.events);
EpollCtl::Add(fd, event, flags)
}
EPOLL_CTL_DEL => EpollCtl::Del(fd),
EPOLL_CTL_MOD => {
let c_epoll_event = ctx.get_user_space().read_val::<c_epoll_event>(event_addr)?;
let event = EpollEvent::from(&c_epoll_event);
let flags = EpollFlags::from_bits_truncate(c_epoll_event.events);
EpollCtl::Mod(fd, event, flags)
}
_ => return_errno_with_message!(Errno::EINVAL, "invalid op"),
};
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(epfd)?.clone()
};
let epoll_file = file
.downcast_ref::<EpollFile>()
.ok_or(Error::with_message(Errno::EINVAL, "not epoll file"))?;
epoll_file.control(&cmd)?;
Ok(SyscallReturn::Return(0 as _))
}
fn do_epoll_wait(
epfd: FileDesc,
max_events: i32,
timeout: i32,
ctx: &Context,
) -> Result<Vec<EpollEvent>> {
let max_events = {
if max_events <= 0 {
return_errno_with_message!(Errno::EINVAL, "max_events is not positive");
}
max_events as usize
};
let timeout = if timeout >= 0 {
Some(Duration::from_millis(timeout as _))
} else {
None
};
let file_table = ctx.process.file_table().lock();
let epoll_file = file_table
.get_file(epfd)?
.downcast_ref::<EpollFile>()
.ok_or(Error::with_message(Errno::EINVAL, "not epoll file"))?;
let result = epoll_file.wait(max_events, timeout.as_ref());
// As mentioned in the manual, the return value should be zero if no file descriptor becomes ready
// during the requested `timeout` milliseconds. So we ignore `Err(ETIME)` and return an empty vector.
//
// Manual: <https://www.man7.org/linux/man-pages/man2/epoll_wait.2.html>
if result
.as_ref()
.is_err_and(|err| err.error() == Errno::ETIME)
{
return Ok(Vec::new());
}
result
}
pub fn sys_epoll_wait(
epfd: FileDesc,
events_addr: Vaddr,
max_events: i32,
timeout: i32,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"epfd = {}, events_addr = 0x{:x}, max_events = {}, timeout = {:?}",
epfd, events_addr, max_events, timeout
);
let epoll_events = do_epoll_wait(epfd, max_events, timeout, ctx)?;
// Write back
let mut write_addr = events_addr;
let user_space = ctx.get_user_space();
for epoll_event in epoll_events.iter() {
let c_epoll_event = c_epoll_event::from(epoll_event);
user_space.write_val(write_addr, &c_epoll_event)?;
write_addr += core::mem::size_of::<c_epoll_event>();
}
Ok(SyscallReturn::Return(epoll_events.len() as _))
}
fn set_signal_mask(set_ptr: Vaddr, ctx: &Context) -> Result<SigMask> {
let new_mask: Option<SigMask> = if set_ptr != 0 {
Some(ctx.get_user_space().read_val::<u64>(set_ptr)?.into())
} else {
None
};
let old_sig_mask_value = ctx.posix_thread.sig_mask().load(Ordering::Relaxed);
if let Some(new_mask) = new_mask {
ctx.posix_thread
.sig_mask()
.store(new_mask, Ordering::Relaxed);
}
Ok(old_sig_mask_value)
}
fn restore_signal_mask(sig_mask_val: SigMask, ctx: &Context) {
ctx.posix_thread
.sig_mask()
.store(sig_mask_val, Ordering::Relaxed);
}
pub fn sys_epoll_pwait(
epfd: FileDesc,
events_addr: Vaddr,
max_events: i32,
timeout: i32,
sigmask: Vaddr,
sigset_size: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"epfd = {}, events_addr = 0x{:x}, max_events = {}, timeout = {:?}, sigmask = 0x{:x}, sigset_size = {}",
epfd, events_addr, max_events, timeout, sigmask, sigset_size
);
if sigset_size != 8 {
error!("sigset size is not equal to 8");
}
let old_sig_mask_value = set_signal_mask(sigmask, ctx)?;
let ready_events = match do_epoll_wait(epfd, max_events, timeout, ctx) {
Ok(events) => {
restore_signal_mask(old_sig_mask_value, ctx);
events
}
Err(e) => {
// Restore the signal mask even if an error occurs
restore_signal_mask(old_sig_mask_value, ctx);
return Err(e);
}
};
// Write back
let mut write_addr = events_addr;
let user_space = ctx.get_user_space();
for event in ready_events.iter() {
let c_event = c_epoll_event::from(event);
user_space.write_val(write_addr, &c_event)?;
write_addr += core::mem::size_of::<c_epoll_event>();
}
Ok(SyscallReturn::Return(ready_events.len() as _))
}
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C, packed)]
struct c_epoll_event {
events: u32,
data: u64,
}
impl From<&EpollEvent> for c_epoll_event {
fn from(ep_event: &EpollEvent) -> Self {
Self {
events: ep_event.events.bits(),
data: ep_event.user_data,
}
}
}
impl From<&c_epoll_event> for EpollEvent {
fn from(c_event: &c_epoll_event) -> Self {
Self::new(IoEvents::from_bits_truncate(c_event.events), c_event.data)
}
}

View File

@ -0,0 +1,281 @@
// SPDX-License-Identifier: MPL-2.0
//! `eventfd()` creates an "eventfd object" (we name it as `EventFile`)
//! which serves as a mechanism for event wait/notify.
//!
//! `EventFile` holds a u64 integer counter.
//! Writing to `EventFile` increments the counter by the written value.
//! Reading from `EventFile` returns the current counter value and resets it
//! (It is also possible to only read 1,
//! depending on whether the `EFD_SEMAPHORE` flag is set).
//! The read/write operations may be blocked based on file flags.
//!
//! For more detailed information about this syscall,
//! refer to the man 2 eventfd documentation.
//!
use super::SyscallReturn;
use crate::{
events::{IoEvents, Observer},
fs::{
file_handle::FileLike,
file_table::{FdFlags, FileDesc},
utils::{CreationFlags, InodeMode, InodeType, Metadata, StatusFlags},
},
prelude::*,
process::{
signal::{Pauser, Pollable, Pollee, Poller},
Gid, Uid,
},
time::clocks::RealTimeClock,
};
pub fn sys_eventfd(init_val: u64, ctx: &Context) -> Result<SyscallReturn> {
debug!("init_val = 0x{:x}", init_val);
let fd = do_sys_eventfd2(init_val, Flags::empty(), ctx);
Ok(SyscallReturn::Return(fd as _))
}
pub fn sys_eventfd2(init_val: u64, flags: u32, ctx: &Context) -> Result<SyscallReturn> {
trace!("raw flags = {}", flags);
let flags = Flags::from_bits(flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown flags"))?;
debug!("init_val = 0x{:x}, flags = {:?}", init_val, flags);
let fd = do_sys_eventfd2(init_val, flags, ctx);
Ok(SyscallReturn::Return(fd as _))
}
fn do_sys_eventfd2(init_val: u64, flags: Flags, ctx: &Context) -> FileDesc {
let event_file = EventFile::new(init_val, flags);
let fd = {
let mut file_table = ctx.process.file_table().lock();
let fd_flags = if flags.contains(Flags::EFD_CLOEXEC) {
FdFlags::CLOEXEC
} else {
FdFlags::empty()
};
file_table.insert(Arc::new(event_file), fd_flags)
};
fd
}
bitflags! {
struct Flags: u32 {
const EFD_SEMAPHORE = 1;
const EFD_CLOEXEC = CreationFlags::O_CLOEXEC.bits();
const EFD_NONBLOCK = StatusFlags::O_NONBLOCK.bits();
}
}
struct EventFile {
counter: Mutex<u64>,
pollee: Pollee,
flags: Mutex<Flags>,
write_pauser: Arc<Pauser>,
}
impl EventFile {
const MAX_COUNTER_VALUE: u64 = u64::MAX - 1;
fn new(init_val: u64, flags: Flags) -> Self {
let counter = Mutex::new(init_val);
let pollee = Pollee::new(IoEvents::OUT);
let write_pauser = Pauser::new();
Self {
counter,
pollee,
flags: Mutex::new(flags),
write_pauser,
}
}
fn is_nonblocking(&self) -> bool {
self.flags.lock().contains(Flags::EFD_NONBLOCK)
}
fn update_io_state(&self, counter: &MutexGuard<u64>) {
let is_readable = **counter != 0;
// if it is possible to write a value of at least "1"
// without blocking, the file is writable
let is_writable = **counter < Self::MAX_COUNTER_VALUE;
if is_writable {
if is_readable {
self.pollee.add_events(IoEvents::IN | IoEvents::OUT);
} else {
self.pollee.add_events(IoEvents::OUT);
self.pollee.del_events(IoEvents::IN);
}
self.write_pauser.resume_all();
return;
}
if is_readable {
self.pollee.add_events(IoEvents::IN);
self.pollee.del_events(IoEvents::OUT);
return;
}
self.pollee.del_events(IoEvents::IN | IoEvents::OUT);
// TODO: deal with overflow logic
}
/// Adds val to the counter.
///
/// If the new_value is overflowed or exceeds MAX_COUNTER_VALUE, the counter value
/// will not be modified, and this method returns `Err(EINVAL)`.
fn add_counter_val(&self, val: u64) -> Result<()> {
let mut counter = self.counter.lock();
let new_value = (*counter)
.checked_add(val)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "arithmetic overflow"))?;
if new_value <= Self::MAX_COUNTER_VALUE {
*counter = new_value;
self.update_io_state(&counter);
return Ok(());
}
return_errno_with_message!(Errno::EINVAL, "new value exceeds MAX_COUNTER_VALUE");
}
}
impl Pollable for EventFile {
fn poll(&self, mask: IoEvents, poller: Option<&mut Poller>) -> IoEvents {
self.pollee.poll(mask, poller)
}
}
impl FileLike for EventFile {
fn read(&self, writer: &mut VmWriter) -> Result<usize> {
let read_len = core::mem::size_of::<u64>();
if writer.avail() < read_len {
return_errno_with_message!(Errno::EINVAL, "buf len is less len u64 size");
}
loop {
let mut counter = self.counter.lock();
// Wait until the counter becomes non-zero
if *counter == 0 {
if self.is_nonblocking() {
return_errno_with_message!(Errno::EAGAIN, "try reading event file again");
}
self.update_io_state(&counter);
drop(counter);
let mut poller = Poller::new();
if self.pollee.poll(IoEvents::IN, Some(&mut poller)).is_empty() {
poller.wait()?;
}
continue;
}
// Copy value from counter, and set the new counter value
if self.flags.lock().contains(Flags::EFD_SEMAPHORE) {
writer.write_fallible(&mut 1u64.as_bytes().into())?;
*counter -= 1;
} else {
writer.write_fallible(&mut (*counter).as_bytes().into())?;
*counter = 0;
}
self.update_io_state(&counter);
break;
}
Ok(read_len)
}
fn write(&self, reader: &mut VmReader) -> Result<usize> {
let write_len = core::mem::size_of::<u64>();
if reader.remain() < write_len {
return_errno_with_message!(Errno::EINVAL, "buf len is less than the size of u64");
}
let supplied_value = reader.read_val::<u64>()?;
// Try to add counter val at first
if self.add_counter_val(supplied_value).is_ok() {
return Ok(write_len);
}
if self.is_nonblocking() {
return_errno_with_message!(Errno::EAGAIN, "try writing to event file again");
}
// Wait until counter can be added val to
self.write_pauser
.pause_until(|| self.add_counter_val(supplied_value).ok())?;
Ok(write_len)
}
fn status_flags(&self) -> StatusFlags {
if self.is_nonblocking() {
StatusFlags::O_NONBLOCK
} else {
StatusFlags::empty()
}
}
fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> {
let mut flags = self.flags.lock();
if new_flags.contains(StatusFlags::O_NONBLOCK) {
*flags |= Flags::EFD_NONBLOCK;
} else {
*flags &= !Flags::EFD_NONBLOCK;
}
// TODO: deal with other flags
Ok(())
}
fn register_observer(
&self,
observer: Weak<dyn crate::events::Observer<IoEvents>>,
mask: IoEvents,
) -> Result<()> {
self.pollee.register_observer(observer, mask);
Ok(())
}
fn unregister_observer(
&self,
observer: &Weak<dyn Observer<IoEvents>>,
) -> Option<Weak<dyn Observer<IoEvents>>> {
self.pollee.unregister_observer(observer)
}
fn metadata(&self) -> Metadata {
let now = RealTimeClock::get().read_time();
Metadata {
dev: 0,
ino: 0,
size: 0,
blk_size: 0,
blocks: 0,
atime: now,
mtime: now,
ctime: now,
type_: InodeType::NamedPipe,
mode: InodeMode::from_bits_truncate(0o200),
nlinks: 1,
uid: Uid::new_root(),
gid: Gid::new_root(),
rdev: 0,
}
}
}

View File

@ -0,0 +1,227 @@
// SPDX-License-Identifier: MPL-2.0
use aster_rights::WriteOp;
use ostd::{cpu::UserContext, user::UserContextApi};
use super::{constants::*, SyscallReturn};
use crate::{
cpu::LinuxAbi,
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
path::Dentry,
utils::InodeType,
},
prelude::*,
process::{
check_executable_file, load_program_to_vm, posix_thread::ThreadName, Credentials, Process,
MAX_ARGV_NUMBER, MAX_ARG_LEN, MAX_ENVP_NUMBER, MAX_ENV_LEN,
},
};
pub fn sys_execve(
filename_ptr: Vaddr,
argv_ptr_ptr: Vaddr,
envp_ptr_ptr: Vaddr,
ctx: &Context,
user_context: &mut UserContext,
) -> Result<SyscallReturn> {
let elf_file = {
let executable_path = read_filename(filename_ptr, ctx)?;
lookup_executable_file(AT_FDCWD, executable_path, OpenFlags::empty(), ctx)?
};
do_execve(elf_file, argv_ptr_ptr, envp_ptr_ptr, ctx, user_context)?;
Ok(SyscallReturn::NoReturn)
}
pub fn sys_execveat(
dfd: FileDesc,
filename_ptr: Vaddr,
argv_ptr_ptr: Vaddr,
envp_ptr_ptr: Vaddr,
flags: u32,
ctx: &Context,
user_context: &mut UserContext,
) -> Result<SyscallReturn> {
let elf_file = {
let flags = OpenFlags::from_bits_truncate(flags);
let filename = read_filename(filename_ptr, ctx)?;
lookup_executable_file(dfd, filename, flags, ctx)?
};
do_execve(elf_file, argv_ptr_ptr, envp_ptr_ptr, ctx, user_context)?;
Ok(SyscallReturn::NoReturn)
}
fn lookup_executable_file(
dfd: FileDesc,
filename: String,
flags: OpenFlags,
ctx: &Context,
) -> Result<Arc<Dentry>> {
let fs_resolver = ctx.process.fs().read();
let dentry = if flags.contains(OpenFlags::AT_EMPTY_PATH) && filename.is_empty() {
fs_resolver.lookup_from_fd(dfd)
} else {
let fs_path = FsPath::new(dfd, &filename)?;
if flags.contains(OpenFlags::AT_SYMLINK_NOFOLLOW) {
let dentry = fs_resolver.lookup_no_follow(&fs_path)?;
if dentry.type_() == InodeType::SymLink {
return_errno_with_message!(Errno::ELOOP, "the executable file is a symlink");
}
Ok(dentry)
} else {
fs_resolver.lookup(&fs_path)
}
}?;
check_executable_file(&dentry)?;
Ok(dentry)
}
fn do_execve(
elf_file: Arc<Dentry>,
argv_ptr_ptr: Vaddr,
envp_ptr_ptr: Vaddr,
ctx: &Context,
user_context: &mut UserContext,
) -> Result<()> {
let Context {
process,
posix_thread,
thread: _,
task: _,
} = ctx;
let executable_path = elf_file.abs_path();
let argv = read_cstring_vec(argv_ptr_ptr, MAX_ARGV_NUMBER, MAX_ARG_LEN, ctx)?;
let envp = read_cstring_vec(envp_ptr_ptr, MAX_ENVP_NUMBER, MAX_ENV_LEN, ctx)?;
debug!(
"filename: {:?}, argv = {:?}, envp = {:?}",
executable_path, argv, envp
);
// FIXME: should we set thread name in execve?
*posix_thread.thread_name().lock() =
Some(ThreadName::new_from_executable_path(&executable_path)?);
// clear ctid
// FIXME: should we clear ctid when execve?
*posix_thread.clear_child_tid().lock() = 0;
// Ensure that the file descriptors with the close-on-exec flag are closed.
let closed_files = process.file_table().lock().close_files_on_exec();
drop(closed_files);
debug!("load program to root vmar");
let (new_executable_path, elf_load_info) = {
let fs_resolver = &*process.fs().read();
let process_vm = process.vm();
load_program_to_vm(process_vm, elf_file.clone(), argv, envp, fs_resolver, 1)?
};
// After the program has been successfully loaded, the virtual memory of the current process
// is initialized. Hence, it is necessary to clear the previously recorded robust list.
*posix_thread.robust_list().lock() = None;
debug!("load elf in execve succeeds");
let credentials = ctx.posix_thread.credentials_mut();
set_uid_from_elf(process, &credentials, &elf_file)?;
set_gid_from_elf(process, &credentials, &elf_file)?;
// set executable path
process.set_executable_path(new_executable_path);
// set signal disposition to default
process.sig_dispositions().lock().inherit();
// set cpu context to default
let default_content = UserContext::default();
*user_context.general_regs_mut() = *default_content.general_regs();
user_context.set_tls_pointer(default_content.tls_pointer());
*user_context.fp_regs_mut() = *default_content.fp_regs();
// set new entry point
user_context.set_instruction_pointer(elf_load_info.entry_point() as _);
debug!("entry_point: 0x{:x}", elf_load_info.entry_point());
// set new user stack top
user_context.set_stack_pointer(elf_load_info.user_stack_top() as _);
debug!("user stack top: 0x{:x}", elf_load_info.user_stack_top());
Ok(())
}
bitflags::bitflags! {
struct OpenFlags: u32 {
const AT_EMPTY_PATH = 0x1000;
const AT_SYMLINK_NOFOLLOW = 0x100;
}
}
fn read_filename(filename_ptr: Vaddr, ctx: &Context) -> Result<String> {
let filename = ctx
.get_user_space()
.read_cstring(filename_ptr, MAX_FILENAME_LEN)?;
Ok(filename.into_string().unwrap())
}
fn read_cstring_vec(
array_ptr: Vaddr,
max_string_number: usize,
max_string_len: usize,
ctx: &Context,
) -> Result<Vec<CString>> {
let mut res = Vec::new();
// On Linux, argv pointer and envp pointer can be specified as NULL.
if array_ptr == 0 {
return Ok(res);
}
let mut read_addr = array_ptr;
let mut find_null = false;
let user_space = ctx.get_user_space();
for _ in 0..max_string_number {
let cstring_ptr = user_space.read_val::<usize>(read_addr)?;
read_addr += 8;
// read a null pointer
if cstring_ptr == 0 {
find_null = true;
break;
}
let cstring = user_space.read_cstring(cstring_ptr, max_string_len)?;
res.push(cstring);
}
if !find_null {
return_errno_with_message!(Errno::E2BIG, "Cannot find null pointer in vector");
}
Ok(res)
}
/// Sets uid for credentials as the same of uid of elf file if elf file has `set_uid` bit.
fn set_uid_from_elf(
current: &Process,
credentials: &Credentials<WriteOp>,
elf_file: &Arc<Dentry>,
) -> Result<()> {
if elf_file.mode()?.has_set_uid() {
let uid = elf_file.owner()?;
credentials.set_euid(uid);
current.clear_parent_death_signal();
}
// No matter whether the elf_file has `set_uid` bit, suid should be reset.
credentials.reset_suid();
Ok(())
}
/// Sets gid for credentials as the same of gid of elf file if elf file has `set_gid` bit.
fn set_gid_from_elf(
current: &Process,
credentials: &Credentials<WriteOp>,
elf_file: &Arc<Dentry>,
) -> Result<()> {
if elf_file.mode()?.has_set_gid() {
let gid = elf_file.group()?;
credentials.set_egid(gid);
current.clear_parent_death_signal();
}
// No matter whether the the elf file has `set_gid` bit, sgid should be reset.
credentials.reset_sgid();
Ok(())
}

View File

@ -0,0 +1,17 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{
prelude::*,
process::{posix_thread::do_exit, TermStatus},
syscall::SyscallReturn,
};
pub fn sys_exit(exit_code: i32, _ctx: &Context) -> Result<SyscallReturn> {
debug!("exid code = {}", exit_code);
let current_thread = current_thread!();
let term_status = TermStatus::Exited(exit_code as _);
do_exit(current_thread, term_status)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,15 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{
prelude::*,
process::{do_exit_group, TermStatus},
syscall::SyscallReturn,
};
/// Exit all thread in a process.
pub fn sys_exit_group(exit_code: u64, _ctx: &Context) -> Result<SyscallReturn> {
// Exit all thread in current process
let term_status = TermStatus::Exited(exit_code as _);
do_exit_group(term_status);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,160 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{file_table::FileDesc, utils::FallocMode},
prelude::*,
process::ResourceType,
};
pub fn sys_fallocate(
fd: FileDesc,
mode: u64,
offset: i64,
len: i64,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, mode = {}, offset = {}, len = {}",
fd, mode, offset, len
);
check_offset_and_len(offset, len, ctx)?;
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let falloc_mode = FallocMode::try_from(
RawFallocMode::from_bits(mode as _)
.ok_or_else(|| Error::with_message(Errno::EOPNOTSUPP, "invalid fallocate mode"))?,
)?;
file.fallocate(falloc_mode, offset as usize, len as usize)?;
Ok(SyscallReturn::Return(0))
}
fn check_offset_and_len(offset: i64, len: i64, ctx: &Context) -> Result<()> {
if offset < 0 || len <= 0 {
return_errno_with_message!(
Errno::EINVAL,
"offset is less than 0, or len is less than or equal to 0"
);
}
if offset.checked_add(len).is_none() {
return_errno_with_message!(Errno::EINVAL, "offset+len has overflowed");
}
let max_file_size = {
let resource_limits = ctx.process.resource_limits().lock();
resource_limits
.get_rlimit(ResourceType::RLIMIT_FSIZE)
.get_cur() as usize
};
if (offset + len) as usize > max_file_size {
return_errno_with_message!(Errno::EFBIG, "offset+len exceeds the maximum file size");
}
Ok(())
}
bitflags! {
/// Operation mode flags for fallocate.
///
/// These flags determine the operation to be performed on the given byte range.
struct RawFallocMode: u32 {
/// File size will not be changed when extending the file.
const FALLOC_FL_KEEP_SIZE = 0x01;
/// De-allocates a range (creates a hole).
///
/// Must be OR-ed with `FALLOC_FL_KEEP_SIZE`.
const FALLOC_FL_PUNCH_HOLE = 0x02;
/// Removes a range of a file without leaving a hole.
///
/// The offset and length must be multiples of the filesystem block size.
const FALLOC_FL_COLLAPSE_RANGE = 0x08;
/// Converts a range of a file to zeros.
///
/// Preallocates blocks within the range, converting to unwritten extents.
const FALLOC_FL_ZERO_RANGE = 0x10;
/// Inserts space within the file size without overwriting any existing data.
///
/// The offset and length must be multiples of the filesystem block size.
const FALLOC_FL_INSERT_RANGE = 0x20;
/// Unshares shared blocks within the file size without overwriting any existing data.
///
/// Guarantees that subsequent writes will not fail due to lack of space.
const FALLOC_FL_UNSHARE_RANGE = 0x40;
}
}
impl TryFrom<RawFallocMode> for FallocMode {
type Error = crate::error::Error;
fn try_from(raw_mode: RawFallocMode) -> Result<Self> {
// Check for invalid combinations of flags
if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE)
&& raw_mode.contains(RawFallocMode::FALLOC_FL_ZERO_RANGE)
{
return_errno_with_message!(
Errno::EOPNOTSUPP,
"PUNCH_HOLE and ZERO_RANGE cannot be used together"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE)
&& !raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE)
{
return_errno_with_message!(
Errno::EOPNOTSUPP,
"PUNCH_HOLE must be combined with KEEP_SIZE"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_COLLAPSE_RANGE)
&& !(raw_mode - RawFallocMode::FALLOC_FL_COLLAPSE_RANGE).is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"COLLAPSE_RANGE must be used exclusively without any other flags"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_INSERT_RANGE)
&& !(raw_mode - RawFallocMode::FALLOC_FL_INSERT_RANGE).is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"INSERT_RANGE must be used exclusively without any other flags"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_UNSHARE_RANGE)
&& !(raw_mode
- (RawFallocMode::FALLOC_FL_UNSHARE_RANGE | RawFallocMode::FALLOC_FL_KEEP_SIZE))
.is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"UNSHARE_RANGE can only be combined with KEEP_SIZE."
);
}
// Transform valid flags into the fallocate mode
let mode = if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE) {
FallocMode::PunchHoleKeepSize
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_ZERO_RANGE) {
if raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE) {
FallocMode::ZeroRangeKeepSize
} else {
FallocMode::ZeroRange
}
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_COLLAPSE_RANGE) {
FallocMode::CollapseRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_INSERT_RANGE) {
FallocMode::InsertRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_UNSHARE_RANGE) {
FallocMode::AllocateUnshareRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE) {
FallocMode::AllocateKeepSize
} else {
FallocMode::Allocate
};
Ok(mode)
}
}

258
kernel/src/syscall/fcntl.rs Normal file
View File

@ -0,0 +1,258 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_handle::FileLike,
file_table::{FdFlags, FileDesc},
inode_handle::InodeHandle,
utils::{
FileRange, RangeLockItem, RangeLockItemBuilder, RangeLockType, StatusFlags, OFFSET_MAX,
},
},
prelude::*,
process::Pid,
};
pub fn sys_fcntl(fd: FileDesc, cmd: i32, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let fcntl_cmd = FcntlCmd::try_from(cmd)?;
debug!("fd = {}, cmd = {:?}, arg = {}", fd, fcntl_cmd, arg);
match fcntl_cmd {
FcntlCmd::F_DUPFD => handle_dupfd(fd, arg, FdFlags::empty(), ctx),
FcntlCmd::F_DUPFD_CLOEXEC => handle_dupfd(fd, arg, FdFlags::CLOEXEC, ctx),
FcntlCmd::F_GETFD => handle_getfd(fd, ctx),
FcntlCmd::F_SETFD => handle_setfd(fd, arg, ctx),
FcntlCmd::F_GETFL => handle_getfl(fd, ctx),
FcntlCmd::F_SETFL => handle_setfl(fd, arg, ctx),
FcntlCmd::F_GETLK => handle_getlk(fd, arg, ctx),
FcntlCmd::F_SETLK => handle_setlk(fd, arg, true, ctx),
FcntlCmd::F_SETLKW => handle_setlk(fd, arg, false, ctx),
FcntlCmd::F_GETOWN => handle_getown(fd, ctx),
FcntlCmd::F_SETOWN => handle_setown(fd, arg, ctx),
}
}
fn handle_dupfd(fd: FileDesc, arg: u64, flags: FdFlags, ctx: &Context) -> Result<SyscallReturn> {
let mut file_table = ctx.process.file_table().lock();
let new_fd = file_table.dup(fd, arg as FileDesc, flags)?;
Ok(SyscallReturn::Return(new_fd as _))
}
fn handle_getfd(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
let file_table = ctx.process.file_table().lock();
let entry = file_table.get_entry(fd)?;
let fd_flags = entry.flags();
Ok(SyscallReturn::Return(fd_flags.bits() as _))
}
fn handle_setfd(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let flags = if arg > u8::MAX.into() {
return_errno_with_message!(Errno::EINVAL, "invalid fd flags");
} else {
FdFlags::from_bits(arg as u8).ok_or(Error::with_message(Errno::EINVAL, "invalid flags"))?
};
let file_table = ctx.process.file_table().lock();
let entry = file_table.get_entry(fd)?;
entry.set_flags(flags);
Ok(SyscallReturn::Return(0))
}
fn handle_getfl(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let status_flags = file.status_flags();
let access_mode = file.access_mode();
Ok(SyscallReturn::Return(
(status_flags.bits() | access_mode as u32) as _,
))
}
fn handle_setfl(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let valid_flags_mask = StatusFlags::O_APPEND
| StatusFlags::O_ASYNC
| StatusFlags::O_DIRECT
| StatusFlags::O_NOATIME
| StatusFlags::O_NONBLOCK;
let mut status_flags = file.status_flags();
status_flags.remove(valid_flags_mask);
status_flags.insert(StatusFlags::from_bits_truncate(arg as _) & valid_flags_mask);
file.set_status_flags(status_flags)?;
Ok(SyscallReturn::Return(0))
}
fn handle_getlk(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let lock_mut_ptr = arg as Vaddr;
let mut lock_mut_c = ctx.get_user_space().read_val::<c_flock>(lock_mut_ptr)?;
let lock_type = RangeLockType::try_from(lock_mut_c.l_type)?;
if lock_type == RangeLockType::Unlock {
return_errno_with_message!(Errno::EINVAL, "invalid flock type for getlk");
}
let mut lock = RangeLockItemBuilder::new()
.type_(lock_type)
.range(from_c_flock_and_file(&lock_mut_c, file.clone())?)
.build()?;
let inode_file = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
lock = inode_file.test_range_lock(lock)?;
lock_mut_c.copy_from_range_lock(&lock);
ctx.get_user_space().write_val(lock_mut_ptr, &lock_mut_c)?;
Ok(SyscallReturn::Return(0))
}
fn handle_setlk(
fd: FileDesc,
arg: u64,
is_nonblocking: bool,
ctx: &Context,
) -> Result<SyscallReturn> {
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let lock_mut_ptr = arg as Vaddr;
let lock_mut_c = ctx.get_user_space().read_val::<c_flock>(lock_mut_ptr)?;
let lock_type = RangeLockType::try_from(lock_mut_c.l_type)?;
let lock = RangeLockItemBuilder::new()
.type_(lock_type)
.range(from_c_flock_and_file(&lock_mut_c, file.clone())?)
.build()?;
let inode_file = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
inode_file.set_range_lock(&lock, is_nonblocking)?;
Ok(SyscallReturn::Return(0))
}
fn handle_getown(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
let file_table = ctx.process.file_table().lock();
let file_entry = file_table.get_entry(fd)?;
let pid = file_entry.owner().unwrap_or(0);
Ok(SyscallReturn::Return(pid as _))
}
fn handle_setown(fd: FileDesc, arg: u64, ctx: &Context) -> Result<SyscallReturn> {
let file_table = ctx.process.file_table().lock();
let file_entry = file_table.get_entry(fd)?;
// A process ID is specified as a positive value; a process group ID is specified as a negative value.
let abs_arg = (arg as i32).unsigned_abs();
if abs_arg > i32::MAX as u32 {
return_errno_with_message!(Errno::EINVAL, "process (group) id overflowed");
}
let pid = Pid::try_from(abs_arg)
.map_err(|_| Error::with_message(Errno::EINVAL, "invalid process (group) id"))?;
file_entry.set_owner(pid)?;
Ok(SyscallReturn::Return(0))
}
#[repr(i32)]
#[derive(Debug, Clone, Copy, TryFromInt)]
#[allow(non_camel_case_types)]
enum FcntlCmd {
F_DUPFD = 0,
F_GETFD = 1,
F_SETFD = 2,
F_GETFL = 3,
F_SETFL = 4,
F_SETLK = 6,
F_SETLKW = 7,
F_GETLK = 8,
F_SETOWN = 9,
F_GETOWN = 10,
F_DUPFD_CLOEXEC = 1030,
}
#[allow(non_camel_case_types)]
pub type off_t = i64;
#[allow(non_camel_case_types)]
#[derive(Debug, Copy, Clone, TryFromInt)]
#[repr(u16)]
pub enum RangeLockWhence {
SEEK_SET = 0,
SEEK_CUR = 1,
SEEK_END = 2,
}
/// C struct for a file range lock in Libc
#[repr(C)]
#[derive(Debug, Copy, Clone, Pod)]
pub struct c_flock {
/// Type of lock: F_RDLCK, F_WRLCK, or F_UNLCK
pub l_type: u16,
/// Where `l_start' is relative to
pub l_whence: u16,
/// Offset where the lock begins
pub l_start: off_t,
/// Size of the locked area, 0 means until EOF
pub l_len: off_t,
/// Process holding the lock
pub l_pid: Pid,
}
impl c_flock {
pub fn copy_from_range_lock(&mut self, lock: &RangeLockItem) {
self.l_type = lock.type_() as u16;
if RangeLockType::Unlock != lock.type_() {
self.l_whence = RangeLockWhence::SEEK_SET as u16;
self.l_start = lock.start() as off_t;
self.l_len = if lock.end() == OFFSET_MAX {
0
} else {
lock.range().len() as off_t
};
self.l_pid = lock.owner();
}
}
}
/// Create the file range through C flock and opened file reference
fn from_c_flock_and_file(lock: &c_flock, file: Arc<dyn FileLike>) -> Result<FileRange> {
let start = {
let whence = RangeLockWhence::try_from(lock.l_whence)?;
match whence {
RangeLockWhence::SEEK_SET => lock.l_start,
RangeLockWhence::SEEK_CUR => (file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?
.offset() as off_t)
.checked_add(lock.l_start)
.ok_or(Error::with_message(Errno::EOVERFLOW, "start overflow"))?,
RangeLockWhence::SEEK_END => (file.metadata().size as off_t)
.checked_add(lock.l_start)
.ok_or(Error::with_message(Errno::EOVERFLOW, "start overflow"))?,
}
};
let (start, end) = match lock.l_len {
len if len > 0 => {
let end = start
.checked_add(len)
.ok_or(Error::with_message(Errno::EOVERFLOW, "end overflow"))?;
(start as usize, end as usize)
}
0 => (start as usize, OFFSET_MAX),
len if len < 0 => {
let end = start;
let new_start = start + len;
if new_start < 0 {
return Err(Error::with_message(Errno::EINVAL, "invalid len"));
}
(new_start as usize, end as usize)
}
_ => unreachable!(),
};
FileRange::new(start, end)
}

View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
inode_handle::InodeHandle,
utils::{FlockItem, FlockType},
},
prelude::*,
};
pub fn sys_flock(fd: FileDesc, ops: i32, ctx: &Context) -> Result<SyscallReturn> {
debug!("flock: fd: {}, ops: {:?}", fd, ops);
let file = {
let current = ctx.process;
let file_table = current.file_table().lock();
file_table.get_file(fd)?.clone()
};
let inode_file = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
let ops: FlockOps = FlockOps::from_i32(ops)?;
if ops.contains(FlockOps::LOCK_UN) {
inode_file.unlock_flock();
} else {
let is_nonblocking = ops.contains(FlockOps::LOCK_NB);
let flock = {
let type_ = FlockType::from(ops);
FlockItem::new(&file, type_)
};
inode_file.set_flock(flock, is_nonblocking)?;
}
Ok(SyscallReturn::Return(0))
}
impl From<FlockOps> for FlockType {
fn from(ops: FlockOps) -> Self {
if ops.contains(FlockOps::LOCK_EX) {
Self::ExclusiveLock
} else if ops.contains(FlockOps::LOCK_SH) {
Self::SharedLock
} else {
panic!("invalid flockops");
}
}
}
bitflags! {
struct FlockOps: i32 {
/// Shared lock
const LOCK_SH = 1;
/// Exclusive lock
const LOCK_EX = 2;
// Or'd with one of the above to prevent blocking
const LOCK_NB = 4;
// Remove lock
const LOCK_UN = 8;
}
}
impl FlockOps {
fn from_i32(bits: i32) -> Result<Self> {
if let Some(ops) = Self::from_bits(bits) {
if ops.contains(Self::LOCK_SH) {
if ops.contains(Self::LOCK_EX) || ops.contains(Self::LOCK_UN) {
return_errno_with_message!(Errno::EINVAL, "invalid operation");
}
} else if ops.contains(Self::LOCK_EX) {
if ops.contains(Self::LOCK_UN) {
return_errno_with_message!(Errno::EINVAL, "invalid operation");
}
} else if !ops.contains(Self::LOCK_UN) {
return_errno_with_message!(Errno::EINVAL, "invalid operation");
}
Ok(ops)
} else {
return_errno_with_message!(Errno::EINVAL, "invalid operation");
}
}
}

View File

@ -0,0 +1,15 @@
// SPDX-License-Identifier: MPL-2.0
use ostd::cpu::UserContext;
use super::SyscallReturn;
use crate::{
prelude::*,
process::{clone_child, CloneArgs},
};
pub fn sys_fork(ctx: &Context, parent_context: &UserContext) -> Result<SyscallReturn> {
let clone_args = CloneArgs::for_fork();
let child_pid = clone_child(ctx, parent_context, clone_args).unwrap();
Ok(SyscallReturn::Return(child_pid as _))
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{file_table::FileDesc, inode_handle::InodeHandle},
prelude::*,
};
pub fn sys_fsync(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}", fd);
let dentry = {
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EINVAL, "not inode"))?;
inode_handle.dentry().clone()
};
dentry.sync_all()?;
Ok(SyscallReturn::Return(0))
}
pub fn sys_fdatasync(fd: FileDesc, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}", fd);
let dentry = {
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EINVAL, "not inode"))?;
inode_handle.dentry().clone()
};
dentry.sync_data()?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,76 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{
prelude::*,
process::posix_thread::futex::{
futex_op_and_flags_from_u32, futex_requeue, futex_wait, futex_wait_bitset, futex_wake,
futex_wake_bitset, FutexOp, FutexTimeout,
},
syscall::SyscallReturn,
};
pub fn sys_futex(
futex_addr: Vaddr,
futex_op: i32,
futex_val: u32,
utime_addr: u64,
futex_new_addr: u64,
bitset: u64,
ctx: &Context,
) -> Result<SyscallReturn> {
// FIXME: we current ignore futex flags
let (futex_op, futex_flags) = futex_op_and_flags_from_u32(futex_op as _)?;
debug!(
"futex_op = {:?}, futex_flags = {:?}, futex_addr = 0x{:x}",
futex_op, futex_flags, futex_addr
);
let get_futex_val = |val: i32| -> Result<usize> {
if val < 0 {
return_errno_with_message!(Errno::EINVAL, "the futex val must not be negative");
}
Ok(val as usize)
};
let get_futex_timeout = |timeout_addr| -> Result<Option<FutexTimeout>> {
if timeout_addr == 0 {
return Ok(None);
}
// TODO: parse a timeout
todo!()
};
let res = match futex_op {
FutexOp::FUTEX_WAIT => {
let timeout = get_futex_timeout(utime_addr)?;
futex_wait(futex_addr as _, futex_val as _, &timeout).map(|_| 0)
}
FutexOp::FUTEX_WAIT_BITSET => {
let timeout = get_futex_timeout(utime_addr)?;
futex_wait_bitset(futex_addr as _, futex_val as _, &timeout, bitset as _).map(|_| 0)
}
FutexOp::FUTEX_WAKE => {
let max_count = get_futex_val(futex_val as i32)?;
futex_wake(futex_addr as _, max_count).map(|count| count as isize)
}
FutexOp::FUTEX_WAKE_BITSET => {
let max_count = get_futex_val(futex_val as i32)?;
futex_wake_bitset(futex_addr as _, max_count, bitset as _).map(|count| count as isize)
}
FutexOp::FUTEX_REQUEUE => {
let max_nwakes = get_futex_val(futex_val as i32)?;
let max_nrequeues = get_futex_val(utime_addr as i32)?;
futex_requeue(
futex_addr as _,
max_nwakes,
max_nrequeues,
futex_new_addr as _,
)
.map(|nwakes| nwakes as _)
}
_ => panic!("Unsupported futex operations"),
}?;
debug!("futex returns, tid= {} ", ctx.thread.tid());
Ok(SyscallReturn::Return(res as _))
}

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getcwd(buf: Vaddr, len: usize, ctx: &Context) -> Result<SyscallReturn> {
// TODO: getcwd only return a fake result now
let fake_cwd = CString::new("/")?;
let bytes = fake_cwd.as_bytes_with_nul();
let write_len = len.min(bytes.len());
ctx.get_user_space()
.write_bytes(buf, &mut VmReader::from(&bytes[..write_len]))?;
Ok(SyscallReturn::Return(write_len as _))
}

View File

@ -0,0 +1,274 @@
// SPDX-License-Identifier: MPL-2.0
use core::marker::PhantomData;
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
inode_handle::InodeHandle,
utils::{DirentVisitor, InodeType},
},
prelude::*,
};
pub fn sys_getdents(
fd: FileDesc,
buf_addr: Vaddr,
buf_len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, buf_addr = 0x{:x}, buf_len = 0x{:x}",
fd, buf_addr, buf_len
);
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
if inode_handle.dentry().type_() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut buffer = vec![0u8; buf_len];
let mut reader = DirentBufferReader::<Dirent>::new(&mut buffer); // Use the non-64-bit reader
let _ = inode_handle.readdir(&mut reader)?;
let read_len = reader.read_len();
ctx.get_user_space()
.write_bytes(buf_addr, &mut VmReader::from(&buffer[..read_len]))?;
Ok(SyscallReturn::Return(read_len as _))
}
pub fn sys_getdents64(
fd: FileDesc,
buf_addr: Vaddr,
buf_len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, buf_addr = 0x{:x}, buf_len = 0x{:x}",
fd, buf_addr, buf_len
);
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EBADF, "not inode"))?;
if inode_handle.dentry().type_() != InodeType::Dir {
return_errno!(Errno::ENOTDIR);
}
let mut buffer = vec![0u8; buf_len];
let mut reader = DirentBufferReader::<Dirent64>::new(&mut buffer);
let _ = inode_handle.readdir(&mut reader)?;
let read_len = reader.read_len();
ctx.get_user_space()
.write_bytes(buf_addr, &mut VmReader::from(&buffer[..read_len]))?;
Ok(SyscallReturn::Return(read_len as _))
}
/// The DirentSerializer can decide how to serialize the data.
trait DirentSerializer {
/// Create a DirentSerializer.
fn new(ino: u64, offset: u64, type_: InodeType, name: CString) -> Self;
/// Get the length of a directory entry.
fn len(&self) -> usize;
/// Try to serialize a directory entry into buffer.
fn serialize(&self, buf: &mut [u8]) -> Result<()>;
}
/// The Buffered DirentReader to visit the dir entry.
/// The DirentSerializer T decides how to serialize the data.
struct DirentBufferReader<'a, T: DirentSerializer> {
buffer: &'a mut [u8],
read_len: usize,
phantom: PhantomData<T>,
}
impl<'a, T: DirentSerializer> DirentBufferReader<'a, T> {
pub fn new(buffer: &'a mut [u8]) -> Self {
Self {
buffer,
read_len: 0,
phantom: PhantomData,
}
}
pub fn read_len(&self) -> usize {
self.read_len
}
}
impl<'a, T: DirentSerializer> DirentVisitor for DirentBufferReader<'a, T> {
fn visit(&mut self, name: &str, ino: u64, type_: InodeType, offset: usize) -> Result<()> {
let dirent_serializer = T::new(ino, offset as u64, type_, CString::new(name)?);
if self.read_len >= self.buffer.len() {
return_errno_with_message!(Errno::EINVAL, "buffer is too small");
}
dirent_serializer.serialize(&mut self.buffer[self.read_len..])?;
self.read_len += dirent_serializer.len();
Ok(())
}
}
#[derive(Debug)]
struct Dirent {
inner: DirentInner,
name: CString,
}
#[repr(packed)]
#[derive(Debug, Clone, Copy)]
struct DirentInner {
d_ino: u64,
d_off: u64,
d_reclen: u16,
}
impl DirentSerializer for Dirent {
fn new(ino: u64, offset: u64, _type_: InodeType, name: CString) -> Self {
let d_reclen = {
let len =
core::mem::size_of::<Dirent64Inner>() + name.as_c_str().to_bytes_with_nul().len();
align_up(len, 8) as u16
};
Self {
inner: DirentInner {
d_ino: ino,
d_off: offset,
d_reclen,
},
name,
}
}
fn len(&self) -> usize {
self.inner.d_reclen as usize
}
fn serialize(&self, buf: &mut [u8]) -> Result<()> {
// Ensure buffer is large enough for the directory entry
if self.len() > buf.len() {
return_errno_with_message!(Errno::EINVAL, "buffer is too small");
}
let d_ino = self.inner.d_ino;
let d_off = self.inner.d_off;
let d_reclen = self.inner.d_reclen;
let items: [&[u8]; 4] = [
d_ino.as_bytes(),
d_off.as_bytes(),
d_reclen.as_bytes(),
self.name.as_c_str().to_bytes_with_nul(),
];
let mut offset = 0;
for item in items {
buf[offset..offset + item.len()].copy_from_slice(item);
offset += item.len();
}
Ok(())
}
}
#[derive(Debug)]
struct Dirent64 {
inner: Dirent64Inner,
name: CString,
}
#[repr(packed)]
#[derive(Debug, Clone, Copy)]
struct Dirent64Inner {
d_ino: u64,
d_off: u64,
d_reclen: u16,
d_type: u8,
}
impl DirentSerializer for Dirent64 {
fn new(ino: u64, offset: u64, type_: InodeType, name: CString) -> Self {
let d_reclen = {
let len =
core::mem::size_of::<Dirent64Inner>() + name.as_c_str().to_bytes_with_nul().len();
align_up(len, 8) as u16
};
let d_type = DirentType::from(type_) as u8;
Self {
inner: Dirent64Inner {
d_ino: ino,
d_off: offset,
d_reclen,
d_type,
},
name,
}
}
fn len(&self) -> usize {
self.inner.d_reclen as usize
}
fn serialize(&self, buf: &mut [u8]) -> Result<()> {
if self.len() > buf.len() {
return_errno_with_message!(Errno::EINVAL, "buffer is too small");
}
let d_ino = self.inner.d_ino;
let d_off = self.inner.d_off;
let d_reclen = self.inner.d_reclen;
let d_type = self.inner.d_type;
let items: [&[u8]; 5] = [
d_ino.as_bytes(),
d_off.as_bytes(),
d_reclen.as_bytes(),
d_type.as_bytes(),
self.name.as_c_str().to_bytes_with_nul(),
];
let mut offset = 0;
for item in items {
buf[offset..offset + item.len()].copy_from_slice(item);
offset += item.len();
}
Ok(())
}
}
#[allow(non_camel_case_types)]
#[repr(u8)]
#[derive(Debug, Clone, Copy)]
enum DirentType {
#[allow(dead_code)]
DT_UNKNOWN = 0,
DT_FIFO = 1,
DT_CHR = 2,
DT_DIR = 4,
DT_BLK = 6,
DT_REG = 8,
DT_LNK = 10,
DT_SOCK = 12,
#[allow(dead_code)]
DT_WHT = 14,
}
impl From<InodeType> for DirentType {
fn from(type_: InodeType) -> Self {
match type_ {
InodeType::File => DirentType::DT_REG,
InodeType::Dir => DirentType::DT_DIR,
InodeType::SymLink => DirentType::DT_LNK,
InodeType::CharDevice => DirentType::DT_CHR,
InodeType::BlockDevice => DirentType::DT_BLK,
InodeType::Socket => DirentType::DT_SOCK,
InodeType::NamedPipe => DirentType::DT_FIFO,
}
}
}
fn align_up(size: usize, align: usize) -> usize {
(size + align - 1) & !(align - 1)
}

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getegid(ctx: &Context) -> Result<SyscallReturn> {
let egid = ctx.posix_thread.credentials().egid();
Ok(SyscallReturn::Return(egid.as_u32() as _))
}

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_geteuid(ctx: &Context) -> Result<SyscallReturn> {
let euid = ctx.posix_thread.credentials().euid();
Ok(SyscallReturn::Return(euid.as_u32() as _))
}

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getgid(ctx: &Context) -> Result<SyscallReturn> {
let gid = ctx.posix_thread.credentials().rgid();
Ok(SyscallReturn::Return(gid.as_u32() as _))
}

View File

@ -0,0 +1,34 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getgroups(size: i32, group_list_addr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
debug!("size = {}, group_list_addr = 0x{:x}", size, group_list_addr);
if size < 0 {
return_errno_with_message!(Errno::EINVAL, "size cannot be negative");
}
let credentials = ctx.posix_thread.credentials();
let groups = credentials.groups();
if size == 0 {
return Ok(SyscallReturn::Return(groups.len() as _));
}
if groups.len() > size as usize {
return_errno_with_message!(
Errno::EINVAL,
"size is less than the number of supplementary group IDs"
);
}
let user_space = ctx.get_user_space();
for (idx, gid) in groups.iter().enumerate() {
let addr = group_list_addr + idx * core::mem::size_of_val(gid);
user_space.write_val(addr, gid)?;
}
Ok(SyscallReturn::Return(groups.len() as _))
}

View File

@ -0,0 +1,25 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::net::{get_socket_from_fd, write_socket_addr_to_user},
};
pub fn sys_getpeername(
sockfd: FileDesc,
addr: Vaddr,
addrlen_ptr: Vaddr,
_ctx: &Context,
) -> Result<SyscallReturn> {
debug!("sockfd = {sockfd}, addr = 0x{addr:x}, addrlen_ptr = 0x{addrlen_ptr:x}");
let peer_addr = {
let socket = get_socket_from_fd(sockfd)?;
socket.peer_addr()?
};
// FIXME: trunscate write len if addrlen is not big enough
write_socket_addr_to_user(&peer_addr, addr, addrlen_ptr)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getpgrp(ctx: &Context) -> Result<SyscallReturn> {
Ok(SyscallReturn::Return(ctx.process.pgid() as _))
}

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getpid(ctx: &Context) -> Result<SyscallReturn> {
let pid = ctx.process.pid();
debug!("[sys_getpid]: pid = {}", pid);
Ok(SyscallReturn::Return(pid as _))
}

View File

@ -0,0 +1,12 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getppid(ctx: &Context) -> Result<SyscallReturn> {
let parent = ctx.process.parent();
match parent {
None => Ok(SyscallReturn::Return(0)),
Some(parent) => Ok(SyscallReturn::Return(parent.pid() as _)),
}
}

View File

@ -0,0 +1,33 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{device, prelude::*};
pub fn sys_getrandom(buf: Vaddr, count: usize, flags: u32, ctx: &Context) -> Result<SyscallReturn> {
let flags = GetRandomFlags::from_bits_truncate(flags);
debug!(
"buf = 0x{:x}, count = 0x{:x}, flags = {:?}",
buf, count, flags
);
// TODO: support nonblock flag.
// Currently our getrandom implementation relies on x86-specific `rdrand` instruction, so it will never block.
let mut buffer = vec![0u8; count];
let read_len = if flags.contains(GetRandomFlags::GRND_RANDOM) {
device::Random::getrandom(&mut buffer)?
} else {
device::Urandom::getrandom(&mut buffer)?
};
ctx.get_user_space()
.write_bytes(buf, &mut VmReader::from(buffer.as_slice()))?;
Ok(SyscallReturn::Return(read_len as isize))
}
bitflags::bitflags! {
#[derive(Pod)]
#[repr(C)]
pub struct GetRandomFlags: u32 {
const GRND_NONBLOCK = 0x0001;
const GRND_RANDOM = 0x0002;
const GRND_INSECURE = 0x0004;
}
}

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getresgid(
rgid_ptr: Vaddr,
egid_ptr: Vaddr,
sgid_ptr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!("rgid_ptr = 0x{rgid_ptr:x}, egid_ptr = 0x{egid_ptr:x}, sgid_ptr = 0x{sgid_ptr:x}");
let credentials = ctx.posix_thread.credentials();
let user_space = ctx.get_user_space();
let rgid = credentials.rgid();
user_space.write_val(rgid_ptr, &rgid)?;
let egid = credentials.egid();
user_space.write_val(egid_ptr, &egid)?;
let sgid = credentials.sgid();
user_space.write_val(sgid_ptr, &sgid)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getresuid(
ruid_ptr: Vaddr,
euid_ptr: Vaddr,
suid_ptr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!("ruid_ptr = 0x{ruid_ptr:x}, euid_ptr = 0x{euid_ptr:x}, suid_ptr = 0x{suid_ptr:x}");
let credentials = ctx.posix_thread.credentials();
let user_space = ctx.get_user_space();
let ruid = credentials.ruid();
user_space.write_val(ruid_ptr, &ruid)?;
let euid = credentials.euid();
user_space.write_val(euid_ptr, &euid)?;
let suid = credentials.suid();
user_space.write_val(suid_ptr, &suid)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,92 @@
// SPDX-License-Identifier: MPL-2.0
use int_to_c_enum::TryFromInt;
use super::SyscallReturn;
use crate::{prelude::*, time::timeval_t};
#[derive(Debug, Copy, Clone, TryFromInt, PartialEq)]
#[repr(i32)]
enum RusageTarget {
ForSelf = 0,
Children = -1,
Both = -2,
Thread = 1,
}
pub fn sys_getrusage(target: i32, rusage_addr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
let rusage_target = RusageTarget::try_from(target)?;
debug!(
"target = {:?}, rusage_addr = {}",
rusage_target, rusage_addr,
);
if rusage_addr != 0 {
let rusage = match rusage_target {
RusageTarget::ForSelf => {
let process = ctx.process;
rusage_t {
ru_utime: process.prof_clock().user_clock().read_time().into(),
ru_stime: process.prof_clock().kernel_clock().read_time().into(),
..Default::default()
}
}
RusageTarget::Thread => {
let posix_thread = ctx.posix_thread;
rusage_t {
ru_utime: posix_thread.prof_clock().user_clock().read_time().into(),
ru_stime: posix_thread.prof_clock().kernel_clock().read_time().into(),
..Default::default()
}
}
// To support `Children` and `Both` we need to implement the functionality to
// accumulate the resources of a child process back to the parent process
// upon the child's termination.
_ => {
return_errno_with_message!(Errno::EINVAL, "the target type is not supported")
}
};
ctx.get_user_space().write_val(rusage_addr, &rusage)?;
}
Ok(SyscallReturn::Return(0))
}
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, Pod)]
pub struct rusage_t {
/// user time used
pub ru_utime: timeval_t,
/// system time used
pub ru_stime: timeval_t,
/// maximum resident set size
pub ru_maxrss: u64,
/// integral shared memory size
pub ru_ixrss: u64,
/// integral unshared data size
pub ru_idrss: u64,
/// integral unshared stack size
pub ru_isrss: u64,
/// page reclaims
pub ru_minflt: u64,
/// page faults
pub ru_majflt: u64,
/// swaps
pub ru_nswap: u64,
/// block input operations
pub ru_inblock: u64,
/// block output operations
pub ru_oublock: u64,
/// messages sent
pub ru_msgsnd: u64,
/// messages received
pub ru_msgrcv: u64,
/// signals received
pub ru_nsignals: u64,
/// voluntary ctx switches
pub ru_nvcsw: u64,
/// involuntary
pub ru_nivcsw: u64,
}

View File

@ -0,0 +1,31 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::{process_table, Pid},
};
pub fn sys_getsid(pid: Pid, ctx: &Context) -> Result<SyscallReturn> {
debug!("pid = {}", pid);
let session = ctx.process.session().unwrap();
let sid = session.sid();
if pid == 0 {
return Ok(SyscallReturn::Return(sid as _));
}
let Some(process) = process_table::get_process(pid) else {
return_errno_with_message!(Errno::ESRCH, "the process does not exist")
};
if !Arc::ptr_eq(&session, &process.session().unwrap()) {
return_errno_with_message!(
Errno::EPERM,
"the process and current process does not belong to the same session"
);
}
Ok(SyscallReturn::Return(sid as _))
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::net::{get_socket_from_fd, write_socket_addr_to_user},
};
pub fn sys_getsockname(
sockfd: FileDesc,
addr: Vaddr,
addrlen_ptr: Vaddr,
_ctx: &Context,
) -> Result<SyscallReturn> {
debug!("sockfd = {sockfd}, addr = 0x{addr:x}, addrlen_ptr = 0x{addrlen_ptr:x}");
let socket_addr = {
let socket = get_socket_from_fd(sockfd)?;
socket.addr()?
};
// FIXME: trunscate write len if addrlen is not big enough
write_socket_addr_to_user(&socket_addr, addr, addrlen_ptr)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::net::{get_socket_from_fd, new_raw_socket_option, CSocketOptionLevel},
};
pub fn sys_getsockopt(
sockfd: FileDesc,
level: i32,
optname: i32,
optval: Vaddr,
optlen_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let level = CSocketOptionLevel::try_from(level)?;
if optval == 0 || optlen_addr == 0 {
return_errno_with_message!(Errno::EINVAL, "optval or optlen_addr is null pointer");
}
let user_space = ctx.get_user_space();
let optlen: u32 = user_space.read_val(optlen_addr)?;
debug!("level = {level:?}, sockfd = {sockfd}, optname = {optname:?}, optlen = {optlen}");
let socket = get_socket_from_fd(sockfd)?;
let mut raw_option = new_raw_socket_option(level, optname)?;
debug!("raw option: {:?}", raw_option);
socket.get_option(raw_option.as_sock_option_mut())?;
let write_len = raw_option.write_to_user(optval, optlen)?;
user_space.write_val(optlen_addr, &(write_len as u32))?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_gettid(ctx: &Context) -> Result<SyscallReturn> {
let tid = ctx.thread.tid();
Ok(SyscallReturn::Return(tid as _))
}

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
time::{timeval_t, SystemTime},
};
// The use of the timezone structure is obsolete.
// Glibc sets the timezone_addr argument to NULL, so just ignore it.
pub fn sys_gettimeofday(
timeval_addr: Vaddr,
/* timezone_addr: Vaddr, */ ctx: &Context,
) -> Result<SyscallReturn> {
if timeval_addr == 0 {
return Ok(SyscallReturn::Return(0));
}
let time_val = {
let now = SystemTime::now();
let time_duration = now.duration_since(&SystemTime::UNIX_EPOCH)?;
timeval_t::from(time_duration)
};
ctx.get_user_space().write_val(timeval_addr, &time_val)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_getuid(ctx: &Context) -> Result<SyscallReturn> {
let uid = ctx.posix_thread.credentials().ruid();
Ok(SyscallReturn::Return(uid.as_u32() as _))
}

View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
utils::{IoctlCmd, StatusFlags},
},
prelude::*,
};
pub fn sys_ioctl(fd: FileDesc, cmd: u32, arg: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
let ioctl_cmd = IoctlCmd::try_from(cmd)?;
debug!(
"fd = {}, ioctl_cmd = {:?}, arg = 0x{:x}",
fd, ioctl_cmd, arg
);
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let res = match ioctl_cmd {
IoctlCmd::FIONBIO => {
let is_nonblocking = ctx.get_user_space().read_val::<i32>(arg)? != 0;
let mut flags = file.status_flags();
flags.set(StatusFlags::O_NONBLOCK, is_nonblocking);
file.set_status_flags(flags)?;
0
}
IoctlCmd::FIOASYNC => {
let is_async = ctx.get_user_space().read_val::<i32>(arg)? != 0;
let mut flags = file.status_flags();
// Set `O_ASYNC` flags will send `SIGIO` signal to a process when
// I/O is possible, user should call `fcntl(fd, F_SETOWN, pid)`
// first to let the kernel know just whom to notify.
flags.set(StatusFlags::O_ASYNC, is_async);
file.set_status_flags(flags)?;
0
}
_ => file.ioctl(ioctl_cmd, arg)?,
};
Ok(SyscallReturn::Return(res as _))
}

View File

@ -0,0 +1,44 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::{
kill, kill_all, kill_group,
signal::{
sig_num::SigNum,
signals::user::{UserSignal, UserSignalKind},
},
ProcessFilter,
},
};
pub fn sys_kill(process_filter: u64, sig_num: u64, ctx: &Context) -> Result<SyscallReturn> {
let process_filter = ProcessFilter::from_id(process_filter as _);
let sig_num = if sig_num == 0 {
None
} else {
Some(SigNum::try_from(sig_num as u8)?)
};
debug!(
"process_filter = {:?}, sig_num = {:?}",
process_filter, sig_num
);
do_sys_kill(process_filter, sig_num, ctx)?;
Ok(SyscallReturn::Return(0))
}
pub fn do_sys_kill(filter: ProcessFilter, sig_num: Option<SigNum>, ctx: &Context) -> Result<()> {
let signal = sig_num.map(|sig_num| {
let pid = ctx.process.pid();
let uid = ctx.posix_thread.credentials().ruid();
UserSignal::new(sig_num, UserSignalKind::Kill, pid, uid)
});
match filter {
ProcessFilter::Any => kill_all(signal)?,
ProcessFilter::WithPid(pid) => kill(pid, signal)?,
ProcessFilter::WithPgid(pgid) => kill_group(pgid, signal)?,
}
Ok(())
}

View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_linkat(
old_dirfd: FileDesc,
old_path_addr: Vaddr,
new_dirfd: FileDesc,
new_path_addr: Vaddr,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let old_path = user_space.read_cstring(old_path_addr, MAX_FILENAME_LEN)?;
let new_path = user_space.read_cstring(new_path_addr, MAX_FILENAME_LEN)?;
let flags =
LinkFlags::from_bits(flags).ok_or(Error::with_message(Errno::EINVAL, "invalid flags"))?;
debug!(
"old_dirfd = {}, old_path = {:?}, new_dirfd = {}, new_path = {:?}, flags = {:?}",
old_dirfd, old_path, new_dirfd, new_path, flags
);
let (old_dentry, new_dir_dentry, new_name) = {
let old_path = old_path.to_string_lossy();
if old_path.ends_with('/') {
return_errno_with_message!(Errno::EPERM, "oldpath is dir");
}
if old_path.is_empty() && !flags.contains(LinkFlags::AT_EMPTY_PATH) {
return_errno_with_message!(Errno::ENOENT, "oldpath is empty");
}
let new_path = new_path.to_string_lossy();
if new_path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "newpath is empty");
}
let old_fs_path = FsPath::new(old_dirfd, old_path.as_ref())?;
let new_fs_path = FsPath::new(new_dirfd, new_path.as_ref())?;
let fs = ctx.process.fs().read();
let old_dentry = if flags.contains(LinkFlags::AT_SYMLINK_FOLLOW) {
fs.lookup(&old_fs_path)?
} else {
fs.lookup_no_follow(&old_fs_path)?
};
let (new_dir_dentry, new_name) = fs.lookup_dir_and_new_basename(&new_fs_path, false)?;
(old_dentry, new_dir_dentry, new_name)
};
new_dir_dentry.link(&old_dentry, &new_name)?;
Ok(SyscallReturn::Return(0))
}
pub fn sys_link(
old_path_addr: Vaddr,
new_path_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
self::sys_linkat(AT_FDCWD, old_path_addr, AT_FDCWD, new_path_addr, 0, ctx)
}
bitflags::bitflags! {
pub struct LinkFlags: u32 {
const AT_EMPTY_PATH = 0x1000;
const AT_SYMLINK_FOLLOW = 0x400;
}
}

View File

@ -0,0 +1,13 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*, util::net::get_socket_from_fd};
pub fn sys_listen(sockfd: FileDesc, backlog: i32, _ctx: &Context) -> Result<SyscallReturn> {
debug!("sockfd = {sockfd}, backlog = {backlog}");
let socket = get_socket_from_fd(sockfd)?;
socket.listen(backlog as usize)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{file_table::FileDesc, utils::SeekFrom},
prelude::*,
};
pub fn sys_lseek(fd: FileDesc, offset: isize, whence: u32, ctx: &Context) -> Result<SyscallReturn> {
debug!("fd = {}, offset = {}, whence = {}", fd, offset, whence);
let seek_from = match whence {
0 => {
if offset < 0 {
return_errno!(Errno::EINVAL);
}
SeekFrom::Start(offset as usize)
}
1 => SeekFrom::Current(offset),
2 => SeekFrom::End(offset),
_ => return_errno!(Errno::EINVAL),
};
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let offset = file.seek(seek_from)?;
Ok(SyscallReturn::Return(offset as _))
}

View File

@ -0,0 +1,93 @@
// SPDX-License-Identifier: MPL-2.0
use align_ext::AlignExt;
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_madvise(
start: Vaddr,
len: usize,
behavior: i32,
ctx: &Context,
) -> Result<SyscallReturn> {
let behavior = MadviseBehavior::try_from(behavior)?;
debug!(
"start = 0x{:x}, len = 0x{:x}, behavior = {:?}",
start, len, behavior
);
if start % PAGE_SIZE != 0 {
return_errno_with_message!(Errno::EINVAL, "the start address should be page aligned");
}
if len == 0 {
return Ok(SyscallReturn::Return(0));
}
let len = len.align_up(PAGE_SIZE);
match behavior {
MadviseBehavior::MADV_NORMAL
| MadviseBehavior::MADV_SEQUENTIAL
| MadviseBehavior::MADV_WILLNEED => {
// perform a read at first
let mut buffer = vec![0u8; len];
ctx.get_user_space()
.read_bytes(start, &mut VmWriter::from(buffer.as_mut_slice()))?;
}
MadviseBehavior::MADV_DONTNEED => {
warn!("MADV_DONTNEED isn't implemented, do nothing for now.");
}
MadviseBehavior::MADV_FREE => madv_free(start, len, ctx)?,
_ => todo!(),
}
Ok(SyscallReturn::Return(0))
}
fn madv_free(start: Vaddr, len: usize, ctx: &Context) -> Result<()> {
let root_vmar = ctx.process.root_vmar();
let advised_range = start..start + len;
let _ = root_vmar.destroy(advised_range);
Ok(())
}
#[repr(i32)]
#[derive(Debug, Clone, Copy, TryFromInt)]
#[allow(non_camel_case_types)]
/// This definition is the same from linux
pub enum MadviseBehavior {
MADV_NORMAL = 0, /* no further special treatment */
MADV_RANDOM = 1, /* expect random page references */
MADV_SEQUENTIAL = 2, /* expect sequential page references */
MADV_WILLNEED = 3, /* will need these pages */
MADV_DONTNEED = 4, /* don't need these pages */
/* common parameters: try to keep these consistent across architectures */
MADV_FREE = 8, /* free pages only if memory pressure */
MADV_REMOVE = 9, /* remove these pages & resources */
MADV_DONTFORK = 10, /* don't inherit across fork */
MADV_DOFORK = 11, /* do inherit across fork */
MADV_HWPOISON = 100, /* poison a page for testing */
MADV_SOFT_OFFLINE = 101, /* soft offline page for testing */
MADV_MERGEABLE = 12, /* KSM may merge identical pages */
MADV_UNMERGEABLE = 13, /* KSM may not merge identical pages */
MADV_HUGEPAGE = 14, /* Worth backing with hugepages */
MADV_NOHUGEPAGE = 15, /* Not worth backing with hugepages */
MADV_DONTDUMP = 16, /* Explicity exclude from the core dump,
overrides the coredump filter bits */
MADV_DODUMP = 17, /* Clear the MADV_DONTDUMP flag */
MADV_WIPEONFORK = 18, /* Zero memory on fork, child only */
MADV_KEEPONFORK = 19, /* Undo MADV_WIPEONFORK */
MADV_COLD = 20, /* deactivate these pages */
MADV_PAGEOUT = 21, /* reclaim these pages */
MADV_POPULATE_READ = 22, /* populate (prefault) page tables readable */
MADV_POPULATE_WRITE = 23, /* populate (prefault) page tables writable */
MADV_DONTNEED_LOCKED = 24, /* like DONTNEED, but drop locked pages too */
}

View File

@ -0,0 +1,48 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::{InodeMode, InodeType},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_mkdirat(
dirfd: FileDesc,
path_addr: Vaddr,
mode: u16,
ctx: &Context,
) -> Result<SyscallReturn> {
let path = ctx
.get_user_space()
.read_cstring(path_addr, MAX_FILENAME_LEN)?;
debug!("dirfd = {}, path = {:?}, mode = {}", dirfd, path, mode);
let current = ctx.process;
let (dir_dentry, name) = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::new(dirfd, path.as_ref())?;
current
.fs()
.read()
.lookup_dir_and_new_basename(&fs_path, true)?
};
let inode_mode = {
let mask_mode = mode & !current.umask().read().get();
InodeMode::from_bits_truncate(mask_mode)
};
let _ = dir_dentry.new_fs_child(name.trim_end_matches('/'), InodeType::Dir, inode_mode)?;
Ok(SyscallReturn::Return(0))
}
pub fn sys_mkdir(path_addr: Vaddr, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
self::sys_mkdirat(AT_FDCWD, path_addr, mode, ctx)
}

View File

@ -0,0 +1,70 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
device::get_device,
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::{InodeMode, InodeType, MknodType},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_mknodat(
dirfd: FileDesc,
path_addr: Vaddr,
mode: u16,
dev: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
let path = ctx
.get_user_space()
.read_cstring(path_addr, MAX_FILENAME_LEN)?;
let current = ctx.process;
let inode_mode = {
let mask_mode = mode & !current.umask().read().get();
InodeMode::from_bits_truncate(mask_mode)
};
let inode_type = InodeType::from_raw_mode(mode)?;
debug!(
"dirfd = {}, path = {:?}, inode_mode = {:?}, inode_type = {:?}, dev = {}",
dirfd, path, inode_mode, inode_type, dev
);
let (dir_dentry, name) = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::new(dirfd, path.as_ref())?;
current
.fs()
.read()
.lookup_dir_and_new_basename(&fs_path, false)?
};
match inode_type {
InodeType::File => {
let _ = dir_dentry.new_fs_child(&name, InodeType::File, inode_mode)?;
}
InodeType::CharDevice | InodeType::BlockDevice => {
let device_inode = get_device(dev)?;
let _ = dir_dentry.mknod(&name, inode_mode, device_inode.into())?;
}
InodeType::NamedPipe => {
let _ = dir_dentry.mknod(&name, inode_mode, MknodType::NamedPipeNode)?;
}
InodeType::Socket => {
return_errno_with_message!(Errno::EINVAL, "unsupported file types")
}
_ => return_errno_with_message!(Errno::EPERM, "unimplemented file types"),
}
Ok(SyscallReturn::Return(0))
}
pub fn sys_mknod(path_addr: Vaddr, mode: u16, dev: usize, ctx: &Context) -> Result<SyscallReturn> {
self::sys_mknodat(AT_FDCWD, path_addr, mode, dev, ctx)
}

207
kernel/src/syscall/mmap.rs Normal file
View File

@ -0,0 +1,207 @@
// SPDX-License-Identifier: MPL-2.0
//! This mod defines mmap flags and the handler to syscall mmap
use align_ext::AlignExt;
use aster_rights::Rights;
use super::SyscallReturn;
use crate::{
fs::{file_handle::FileLike, file_table::FileDesc, inode_handle::InodeHandle},
prelude::*,
vm::{
perms::VmPerms,
vmo::{VmoOptions, VmoRightsOp},
},
};
pub fn sys_mmap(
addr: u64,
len: u64,
perms: u64,
flags: u64,
fd: u64,
offset: u64,
ctx: &Context,
) -> Result<SyscallReturn> {
let perms = VmPerms::from_posix_prot_bits(perms as u32).unwrap();
let option = MMapOptions::try_from(flags as u32)?;
let res = do_sys_mmap(
addr as usize,
len as usize,
perms,
option,
fd as _,
offset as usize,
ctx,
)?;
Ok(SyscallReturn::Return(res as _))
}
fn do_sys_mmap(
addr: Vaddr,
len: usize,
vm_perms: VmPerms,
option: MMapOptions,
fd: FileDesc,
offset: usize,
ctx: &Context,
) -> Result<Vaddr> {
debug!(
"addr = 0x{:x}, len = 0x{:x}, perms = {:?}, option = {:?}, fd = {}, offset = 0x{:x}",
addr, len, vm_perms, option, fd, offset
);
check_option(&option)?;
let len = len.align_up(PAGE_SIZE);
if offset % PAGE_SIZE != 0 {
return_errno_with_message!(Errno::EINVAL, "mmap only support page-aligned offset");
}
let root_vmar = ctx.process.root_vmar();
let vm_map_options = {
let mut options = root_vmar.new_map(len, vm_perms)?;
let flags = option.flags;
if flags.contains(MMapFlags::MAP_FIXED) {
options = options.offset(addr).can_overwrite(true);
} else if flags.contains(MMapFlags::MAP_32BIT) {
// TODO: support MAP_32BIT. MAP_32BIT requires the map range to be below 2GB
warn!("MAP_32BIT is not supported");
}
if option.typ() == MMapType::Shared {
options = options.is_shared(true);
}
if option.flags.contains(MMapFlags::MAP_ANONYMOUS) {
if offset != 0 {
return_errno_with_message!(
Errno::EINVAL,
"offset must be zero for anonymous mapping"
);
}
// Anonymous shared mapping should share the same memory pages.
if option.typ() == MMapType::Shared {
let shared_vmo = {
let vmo_options: VmoOptions<Rights> = VmoOptions::new(len);
vmo_options.alloc()?
};
options = options.vmo(shared_vmo);
}
} else {
let vmo = {
let file_table = ctx.process.file_table().lock();
let file = file_table.get_file(fd)?;
let inode_handle = file
.downcast_ref::<InodeHandle>()
.ok_or(Error::with_message(Errno::EINVAL, "no inode"))?;
let access_mode = inode_handle.access_mode();
if vm_perms.contains(VmPerms::READ) && !access_mode.is_readable() {
return_errno!(Errno::EACCES);
}
if option.typ() == MMapType::Shared
&& vm_perms.contains(VmPerms::WRITE)
&& !access_mode.is_writable()
{
return_errno!(Errno::EACCES);
}
let inode = inode_handle.dentry().inode();
inode
.page_cache()
.ok_or(Error::with_message(
Errno::EBADF,
"File does not have page cache",
))?
.to_dyn()
};
options = options.vmo(vmo).vmo_offset(offset);
}
options
};
let map_addr = vm_map_options.build()?;
Ok(map_addr)
}
fn check_option(option: &MMapOptions) -> Result<()> {
if option.typ() == MMapType::File {
return_errno_with_message!(Errno::EINVAL, "Invalid mmap type");
}
Ok(())
}
// Definition of MMap flags, conforming to the linux mmap interface:
// https://man7.org/linux/man-pages/man2/mmap.2.html
//
// The first 4 bits of the flag value represents the type of memory map,
// while other bits are used as memory map flags.
// The map type mask
const MAP_TYPE: u32 = 0xf;
#[derive(Copy, Clone, PartialEq, Debug, TryFromInt)]
#[repr(u8)]
pub enum MMapType {
File = 0x0, // Invalid
Shared = 0x1,
Private = 0x2,
SharedValidate = 0x3,
}
bitflags! {
pub struct MMapFlags : u32 {
const MAP_FIXED = 0x10;
const MAP_ANONYMOUS = 0x20;
const MAP_32BIT = 0x40;
const MAP_GROWSDOWN = 0x100;
const MAP_DENYWRITE = 0x800;
const MAP_EXECUTABLE = 0x1000;
const MAP_LOCKED = 0x2000;
const MAP_NORESERVE = 0x4000;
const MAP_POPULATE = 0x8000;
const MAP_NONBLOCK = 0x10000;
const MAP_STACK = 0x20000;
const MAP_HUGETLB = 0x40000;
const MAP_SYNC = 0x80000;
const MAP_FIXED_NOREPLACE = 0x100000;
}
}
#[derive(Debug)]
pub struct MMapOptions {
typ: MMapType,
flags: MMapFlags,
}
impl TryFrom<u32> for MMapOptions {
type Error = Error;
fn try_from(value: u32) -> Result<Self> {
let typ_raw = (value & MAP_TYPE) as u8;
let typ = MMapType::try_from(typ_raw)?;
let flags_raw = value & !MAP_TYPE;
let Some(flags) = MMapFlags::from_bits(flags_raw) else {
return Err(Error::with_message(Errno::EINVAL, "unknown mmap flags"));
};
Ok(MMapOptions { typ, flags })
}
}
impl MMapOptions {
pub fn typ(&self) -> MMapType {
self.typ
}
pub fn flags(&self) -> MMapFlags {
self.flags
}
}

358
kernel/src/syscall/mod.rs Normal file
View File

@ -0,0 +1,358 @@
// SPDX-License-Identifier: MPL-2.0
//! Read the Cpu ctx content then dispatch syscall to corrsponding handler
//! The each sub module contains functions that handle real syscall logic.
pub use clock_gettime::ClockId;
use ostd::cpu::UserContext;
use crate::{context::Context, cpu::LinuxAbi, prelude::*};
mod accept;
mod access;
mod alarm;
mod arch;
mod arch_prctl;
mod bind;
mod brk;
mod capget;
mod capset;
mod chdir;
mod chmod;
mod chown;
mod chroot;
mod clock_gettime;
mod clone;
mod close;
mod connect;
mod constants;
mod dup;
mod epoll;
mod eventfd;
mod execve;
mod exit;
mod exit_group;
mod fallocate;
mod fcntl;
mod flock;
mod fork;
mod fsync;
mod futex;
mod getcwd;
mod getdents64;
mod getegid;
mod geteuid;
mod getgid;
mod getgroups;
mod getpeername;
mod getpgrp;
mod getpid;
mod getppid;
mod getrandom;
mod getresgid;
mod getresuid;
mod getrusage;
mod getsid;
mod getsockname;
mod getsockopt;
mod gettid;
mod gettimeofday;
mod getuid;
mod ioctl;
mod kill;
mod link;
mod listen;
mod lseek;
mod madvise;
mod mkdir;
mod mknod;
mod mmap;
mod mount;
mod mprotect;
mod msync;
mod munmap;
mod nanosleep;
mod open;
mod pause;
mod pipe;
mod poll;
mod prctl;
mod pread64;
mod preadv;
mod prlimit64;
mod pselect6;
mod pwrite64;
mod pwritev;
mod read;
mod readlink;
mod recvfrom;
mod recvmsg;
mod rename;
mod rmdir;
mod rt_sigaction;
mod rt_sigpending;
mod rt_sigprocmask;
mod rt_sigreturn;
mod rt_sigsuspend;
mod sched_getaffinity;
mod sched_yield;
mod select;
mod semctl;
mod semget;
mod semop;
mod sendfile;
mod sendmsg;
mod sendto;
mod set_get_priority;
mod set_robust_list;
mod set_tid_address;
mod setfsgid;
mod setfsuid;
mod setgid;
mod setgroups;
mod setitimer;
mod setpgid;
mod setregid;
mod setresgid;
mod setresuid;
mod setreuid;
mod setsid;
mod setsockopt;
mod setuid;
mod shutdown;
mod sigaltstack;
mod socket;
mod socketpair;
mod stat;
mod statfs;
mod symlink;
mod sync;
mod tgkill;
mod time;
mod timer_create;
mod timer_settime;
mod truncate;
mod umask;
mod umount;
mod uname;
mod unlink;
mod utimens;
mod wait4;
mod waitid;
mod write;
/// This macro is used to define syscall handler.
/// The first param is ths number of parameters,
/// The second param is the function name of syscall handler,
/// The third is optional, means the args(if parameter number > 0),
/// The third is optional, means if cpu ctx is required.
macro_rules! syscall_handler {
(0, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name($ctx)
};
(0, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name($ctx, $user_ctx)
};
(1, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name($args[0] as _, $ctx)
};
(1, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name($args[0] as _, $ctx, $user_ctx)
};
(2, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name($args[0] as _, $args[1] as _, $ctx)
};
(2, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name($args[0] as _, $args[1] as _, $ctx, $user_ctx)
};
(3, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name($args[0] as _, $args[1] as _, $args[2] as _, $ctx)
};
(3, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name($args[0] as _, $args[1] as _, $args[2] as _, $ctx, $user_ctx)
};
(4, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$ctx,
)
};
(4, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$ctx,
$user_ctx,
)
};
(5, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$args[4] as _,
$ctx,
)
};
(5, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$args[4] as _,
$ctx,
$user_ctx,
)
};
(6, $fn_name: ident, $args: ident, $ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$args[4] as _,
$args[5] as _,
$ctx,
)
};
(6, $fn_name: ident, $args: ident, $ctx: expr, $user_ctx: expr) => {
$fn_name(
$args[0] as _,
$args[1] as _,
$args[2] as _,
$args[3] as _,
$args[4] as _,
$args[5] as _,
$ctx,
$user_ctx,
)
};
}
macro_rules! dispatch_fn_inner {
( $args: ident, $ctx: ident, $user_ctx: ident, $handler: ident ( args[ .. $cnt: tt ] ) ) => {
$crate::syscall::syscall_handler!($cnt, $handler, $args, $ctx)
};
( $args: ident, $ctx: ident, $user_ctx: ident, $handler: ident ( args[ .. $cnt: tt ] , &user_ctx ) ) => {
$crate::syscall::syscall_handler!($cnt, $handler, $args, $ctx, &$user_ctx)
};
( $args: ident, $ctx: ident, $user_ctx: ident, $handler: ident ( args[ .. $cnt: tt ] , &mut user_ctx ) ) => {
// `$user_ctx` is already of type `&mut ostd::cpu::UserContext`,
// so no need to take `&mut` again
$crate::syscall::syscall_handler!($cnt, $handler, $args, $ctx, $user_ctx)
};
}
macro_rules! impl_syscall_nums_and_dispatch_fn {
// $args, $user_ctx, and $dispatcher_name are needed since Rust macro is hygienic
( $( $name: ident = $num: literal => $handler: ident $args: tt );* $(;)? ) => {
// First, define the syscall numbers
$(
pub const $name: u64 = $num;
)*
// Then, define the dispatcher function
pub fn syscall_dispatch(
syscall_number: u64,
args: [u64; 6],
ctx: &crate::context::Context,
user_ctx: &mut ostd::cpu::UserContext,
) -> $crate::prelude::Result<$crate::syscall::SyscallReturn> {
match syscall_number {
$(
$num => {
$crate::log_syscall_entry!($name);
$crate::syscall::dispatch_fn_inner!(args, ctx, user_ctx, $handler $args)
}
)*
_ => {
log::warn!("Unimplemented syscall number: {}", syscall_number);
$crate::return_errno_with_message!($crate::error::Errno::ENOSYS, "Syscall was unimplemented");
}
}
}
}
}
// Export macros to sub-modules
use dispatch_fn_inner;
use impl_syscall_nums_and_dispatch_fn;
use syscall_handler;
pub struct SyscallArgument {
syscall_number: u64,
args: [u64; 6],
}
/// Syscall return
#[derive(Debug, Clone, Copy)]
pub enum SyscallReturn {
/// return isize, this value will be used to set rax
Return(isize),
/// does not need to set rax
NoReturn,
}
impl SyscallArgument {
fn new_from_context(user_ctx: &UserContext) -> Self {
let syscall_number = user_ctx.syscall_num() as u64;
let args = user_ctx.syscall_args().map(|x| x as u64);
Self {
syscall_number,
args,
}
}
}
pub fn handle_syscall(ctx: &Context, user_ctx: &mut UserContext) {
let syscall_frame = SyscallArgument::new_from_context(user_ctx);
let syscall_return = arch::syscall_dispatch(
syscall_frame.syscall_number,
syscall_frame.args,
ctx,
user_ctx,
);
match syscall_return {
Ok(return_value) => {
if let SyscallReturn::Return(return_value) = return_value {
user_ctx.set_syscall_ret(return_value as usize);
}
}
Err(err) => {
debug!("syscall return error: {:?}", err);
let errno = err.error() as i32;
user_ctx.set_syscall_ret((-errno) as usize)
}
}
}
#[macro_export]
macro_rules! log_syscall_entry {
($syscall_name: tt) => {
if log::log_enabled!(log::Level::Info) {
let syscall_name_str = stringify!($syscall_name);
let pid = $crate::current!().pid();
let tid = $crate::current_thread!().tid();
log::info!(
"[pid={}][tid={}][id={}][{}]",
pid,
tid,
$syscall_name,
syscall_name_str
);
}
};
}

202
kernel/src/syscall/mount.rs Normal file
View File

@ -0,0 +1,202 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
exfat::{ExfatFS, ExfatMountOptions},
ext2::Ext2,
fs_resolver::{FsPath, AT_FDCWD},
path::Dentry,
utils::{FileSystem, InodeType},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
/// The `data` argument is interpreted by the different filesystems.
/// Typically it is a string of comma-separated options understood by
/// this filesystem. The current implementation only considers the case
/// where it is `NULL`. Because it should be interpreted by the specific filesystems.
pub fn sys_mount(
devname_addr: Vaddr,
dirname_addr: Vaddr,
fstype_addr: Vaddr,
flags: u64,
data: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let devname = user_space.read_cstring(devname_addr, MAX_FILENAME_LEN)?;
let dirname = user_space.read_cstring(dirname_addr, MAX_FILENAME_LEN)?;
let mount_flags = MountFlags::from_bits_truncate(flags as u32);
debug!(
"devname = {:?}, dirname = {:?}, fstype = 0x{:x}, flags = {:?}, data = 0x{:x}",
devname, dirname, fstype_addr, mount_flags, data,
);
let dst_dentry = {
let dirname = dirname.to_string_lossy();
if dirname.is_empty() {
return_errno_with_message!(Errno::ENOENT, "dirname is empty");
}
let fs_path = FsPath::new(AT_FDCWD, dirname.as_ref())?;
ctx.process.fs().read().lookup(&fs_path)?
};
if mount_flags.contains(MountFlags::MS_REMOUNT) && mount_flags.contains(MountFlags::MS_BIND) {
do_reconfigure_mnt()?;
} else if mount_flags.contains(MountFlags::MS_REMOUNT) {
do_remount()?;
} else if mount_flags.contains(MountFlags::MS_BIND) {
do_bind_mount(
devname,
dst_dentry,
mount_flags.contains(MountFlags::MS_REC),
ctx,
)?;
} else if mount_flags.contains(MountFlags::MS_SHARED)
| mount_flags.contains(MountFlags::MS_PRIVATE)
| mount_flags.contains(MountFlags::MS_SLAVE)
| mount_flags.contains(MountFlags::MS_UNBINDABLE)
{
do_change_type()?;
} else if mount_flags.contains(MountFlags::MS_MOVE) {
do_move_mount_old(devname, dst_dentry, ctx)?;
} else {
do_new_mount(devname, fstype_addr, dst_dentry, ctx)?;
}
Ok(SyscallReturn::Return(0))
}
fn do_reconfigure_mnt() -> Result<()> {
return_errno_with_message!(Errno::EINVAL, "do_reconfigure_mnt is not supported");
}
fn do_remount() -> Result<()> {
return_errno_with_message!(Errno::EINVAL, "do_remount is not supported");
}
/// Bind a mount to a dst location.
///
/// If recursive is true, then bind the mount recursively.
/// Such as use user command `mount --rbind src dst`.
fn do_bind_mount(
src_name: CString,
dst_dentry: Arc<Dentry>,
recursive: bool,
ctx: &Context,
) -> Result<()> {
let src_dentry = {
let src_name = src_name.to_string_lossy();
if src_name.is_empty() {
return_errno_with_message!(Errno::ENOENT, "src_name is empty");
}
let fs_path = FsPath::new(AT_FDCWD, src_name.as_ref())?;
ctx.process.fs().read().lookup(&fs_path)?
};
if src_dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "src_name must be directory");
};
src_dentry.bind_mount_to(&dst_dentry, recursive)?;
Ok(())
}
fn do_change_type() -> Result<()> {
return_errno_with_message!(Errno::EINVAL, "do_change_type is not supported");
}
/// Move a mount from src location to dst location.
fn do_move_mount_old(src_name: CString, dst_dentry: Arc<Dentry>, ctx: &Context) -> Result<()> {
let src_dentry = {
let src_name = src_name.to_string_lossy();
if src_name.is_empty() {
return_errno_with_message!(Errno::ENOENT, "src_name is empty");
}
let fs_path = FsPath::new(AT_FDCWD, src_name.as_ref())?;
ctx.process.fs().read().lookup(&fs_path)?
};
if !src_dentry.is_root_of_mount() {
return_errno_with_message!(Errno::EINVAL, "src_name can not be moved");
};
if src_dentry.mount_node().parent().is_none() {
return_errno_with_message!(Errno::EINVAL, "src_name can not be moved");
}
src_dentry.mount_node().graft_mount_node_tree(&dst_dentry)?;
Ok(())
}
/// Mount a new filesystem.
fn do_new_mount(
devname: CString,
fs_type: Vaddr,
target_dentry: Arc<Dentry>,
ctx: &Context,
) -> Result<()> {
if target_dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "mountpoint must be directory");
};
let fs_type = ctx
.get_user_space()
.read_cstring(fs_type, MAX_FILENAME_LEN)?;
if fs_type.is_empty() {
return_errno_with_message!(Errno::EINVAL, "fs_type is empty");
}
let fs = get_fs(fs_type, devname)?;
target_dentry.mount(fs)?;
Ok(())
}
/// Get the filesystem by fs_type and devname.
fn get_fs(fs_type: CString, devname: CString) -> Result<Arc<dyn FileSystem>> {
let devname = devname.to_str().unwrap();
let device = match aster_block::get_device(devname) {
Some(device) => device,
None => return_errno_with_message!(Errno::ENOENT, "Device does not exist"),
};
let fs_type = fs_type.to_str().unwrap();
match fs_type {
"ext2" => {
let ext2_fs = Ext2::open(device)?;
Ok(ext2_fs)
}
"exfat" => {
let exfat_fs = ExfatFS::open(device, ExfatMountOptions::default())?;
Ok(exfat_fs)
}
_ => return_errno_with_message!(Errno::EINVAL, "Invalid fs type"),
}
}
bitflags! {
struct MountFlags: u32 {
const MS_RDONLY = 1 << 0; // Mount read-only.
const MS_NOSUID = 1 << 1; // Ignore suid and sgid bits.
const MS_NODEV = 1 << 2; // Disallow access to device special files.
const MS_NOEXEC = 1 << 3; // Disallow program execution.
const MS_SYNCHRONOUS = 1 << 4; // Writes are synced at once.
const MS_REMOUNT = 1 << 5; // Alter flags of a mounted FS.
const MS_MANDLOCK = 1 << 6; // Allow mandatory locks on an FS.
const MS_DIRSYNC = 1 << 7; // Directory modifications are synchronous.
const MS_NOSYMFOLLOW = 1 << 8; // Do not follow symlinks.
const MS_NOATIME = 1 << 10; // Do not update access times.
const MS_NODIRATIME = 1 << 11; // Do not update directory access times.
const MS_BIND = 1 << 12; // Bind directory at different place.
const MS_MOVE = 1 << 13; // Move mount from old to new.
const MS_REC = 1 << 14; // Create recursive mount.
const MS_SILENT = 1 << 15; // Suppress certain messages in kernel log.
const MS_POSIXACL = 1 << 16; // VFS does not apply the umask.
const MS_UNBINDABLE = 1 << 17; // Change to unbindable.
const MS_PRIVATE = 1 << 18; // Change to private.
const MS_SLAVE = 1 << 19; // Change to slave.
const MS_SHARED = 1 << 20; // Change to shared.
const MS_RELATIME = 1 << 21; // Update atime relative to mtime/ctime.
const MS_KERNMOUNT = 1 << 22; // This is a kern_mount call.
}
}

View File

@ -0,0 +1,20 @@
// SPDX-License-Identifier: MPL-2.0
use align_ext::AlignExt;
use super::SyscallReturn;
use crate::{prelude::*, vm::perms::VmPerms};
pub fn sys_mprotect(addr: Vaddr, len: usize, perms: u64, ctx: &Context) -> Result<SyscallReturn> {
let vm_perms = VmPerms::from_bits_truncate(perms as u32);
debug!(
"addr = 0x{:x}, len = 0x{:x}, perms = {:?}",
addr, len, vm_perms
);
let root_vmar = ctx.process.root_vmar();
debug_assert!(addr % PAGE_SIZE == 0);
let len = len.align_up(PAGE_SIZE);
let range = addr..(addr + len);
root_vmar.protect(vm_perms, range)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_msync(_start: Vaddr, _size: usize, _flag: i32, _ctx: &Context) -> Result<SyscallReturn> {
// TODO: implement real `msync`.
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,15 @@
// SPDX-License-Identifier: MPL-2.0
use align_ext::AlignExt;
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_munmap(addr: Vaddr, len: usize, ctx: &Context) -> Result<SyscallReturn> {
debug!("addr = 0x{:x}, len = {}", addr, len);
let root_vmar = ctx.process.root_vmar();
let len = len.align_up(PAGE_SIZE);
debug!("unmap range = 0x{:x} - 0x{:x}", addr, addr + len);
root_vmar.destroy(addr..addr + len)?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: MPL-2.0
use core::time::Duration;
use super::{clock_gettime::read_clock, ClockId, SyscallReturn};
use crate::{
prelude::*,
process::signal::Pauser,
time::{clockid_t, timespec_t, TIMER_ABSTIME},
};
pub fn sys_nanosleep(
request_timespec_addr: Vaddr,
remain_timespec_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let clockid = ClockId::CLOCK_MONOTONIC;
do_clock_nanosleep(
clockid as clockid_t,
false,
request_timespec_addr,
remain_timespec_addr,
ctx,
)
}
pub fn sys_clock_nanosleep(
clockid: clockid_t,
flags: i32,
request_timespec_addr: Vaddr,
remain_timespec_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let is_abs_time = if flags == 0 {
false
} else if flags == TIMER_ABSTIME {
true
} else {
unreachable!()
};
do_clock_nanosleep(
clockid,
is_abs_time,
request_timespec_addr,
remain_timespec_addr,
ctx,
)
}
fn do_clock_nanosleep(
clockid: clockid_t,
is_abs_time: bool,
request_timespec_addr: Vaddr,
remain_timespec_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let request_time = {
let timespec = ctx
.get_user_space()
.read_val::<timespec_t>(request_timespec_addr)?;
Duration::try_from(timespec)?
};
debug!(
"clockid = {:?}, is_abs_time = {}, request_time = {:?}, remain_timespec_addr = 0x{:x}",
clockid, is_abs_time, request_time, remain_timespec_addr
);
let start_time = read_clock(clockid, ctx)?;
let timeout = if is_abs_time {
if request_time < start_time {
return Ok(SyscallReturn::Return(0));
}
request_time - start_time
} else {
request_time
};
// FIXME: sleeping thread can only be interrupted by signals that will call signal handler or terminate
// current process. i.e., the signals that should be ignored will not interrupt sleeping thread.
let pauser = Pauser::new();
let res = pauser.pause_until_or_timeout(|| None, &timeout);
match res {
Err(e) if e.error() == Errno::ETIME => Ok(SyscallReturn::Return(0)),
Err(e) if e.error() == Errno::EINTR => {
let end_time = read_clock(clockid, ctx)?;
if end_time >= start_time + timeout {
return Ok(SyscallReturn::Return(0));
}
if remain_timespec_addr != 0 && !is_abs_time {
let remaining_duration = (start_time + timeout) - end_time;
let remaining_timespec = timespec_t::from(remaining_duration);
ctx.get_user_space()
.write_val(remain_timespec_addr, &remaining_timespec)?;
}
return_errno_with_message!(Errno::EINTR, "sleep was interrupted");
}
Ok(()) | Err(_) => unreachable!(),
}
}

View File

@ -0,0 +1,58 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::{FdFlags, FileDesc},
fs_resolver::{FsPath, AT_FDCWD},
utils::{AccessMode, CreationFlags},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_openat(
dirfd: FileDesc,
path_addr: Vaddr,
flags: u32,
mode: u16,
ctx: &Context,
) -> Result<SyscallReturn> {
let path = ctx
.get_user_space()
.read_cstring(path_addr, MAX_FILENAME_LEN)?;
debug!(
"dirfd = {}, path = {:?}, flags = {}, mode = {}",
dirfd, path, flags, mode
);
let current = ctx.process;
let file_handle = {
let path = path.to_string_lossy();
let fs_path = FsPath::new(dirfd, path.as_ref())?;
let mask_mode = mode & !current.umask().read().get();
let inode_handle = current.fs().read().open(&fs_path, flags, mask_mode)?;
Arc::new(inode_handle)
};
let mut file_table = current.file_table().lock();
let fd = {
let fd_flags =
if CreationFlags::from_bits_truncate(flags).contains(CreationFlags::O_CLOEXEC) {
FdFlags::CLOEXEC
} else {
FdFlags::empty()
};
file_table.insert(file_handle, fd_flags)
};
Ok(SyscallReturn::Return(fd as _))
}
pub fn sys_open(path_addr: Vaddr, flags: u32, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
self::sys_openat(AT_FDCWD, path_addr, flags, mode, ctx)
}
pub fn sys_creat(path_addr: Vaddr, mode: u16, ctx: &Context) -> Result<SyscallReturn> {
let flags =
AccessMode::O_WRONLY as u32 | CreationFlags::O_CREAT.bits() | CreationFlags::O_TRUNC.bits();
self::sys_openat(AT_FDCWD, path_addr, flags, mode, ctx)
}

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{prelude::*, process::signal::Pauser};
pub fn sys_pause(_ctx: &Context) -> Result<SyscallReturn> {
// FIXME: like sleep, paused thread can only be interrupted by signals that will call signal
// handler or terminate current process
let pauser = Pauser::new();
pauser.pause_until(|| None)?;
unreachable!("[Internal Error] pause should always return EINTR");
}

View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::{FdFlags, FileDesc},
pipe,
utils::CreationFlags,
},
prelude::*,
};
pub fn sys_pipe2(fds: Vaddr, flags: u32, ctx: &Context) -> Result<SyscallReturn> {
debug!("flags: {:?}", flags);
let (pipe_reader, pipe_writer) = pipe::new_pair()?;
let fd_flags = if CreationFlags::from_bits_truncate(flags).contains(CreationFlags::O_CLOEXEC) {
FdFlags::CLOEXEC
} else {
FdFlags::empty()
};
let mut file_table = ctx.process.file_table().lock();
let pipe_fds = PipeFds {
reader_fd: file_table.insert(pipe_reader, fd_flags),
writer_fd: file_table.insert(pipe_writer, fd_flags),
};
debug!("pipe_fds: {:?}", pipe_fds);
if let Err(err) = ctx.get_user_space().write_val(fds, &pipe_fds) {
file_table.close_file(pipe_fds.reader_fd).unwrap();
file_table.close_file(pipe_fds.writer_fd).unwrap();
return Err(err);
}
Ok(SyscallReturn::Return(0))
}
pub fn sys_pipe(fds: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
self::sys_pipe2(fds, 0, ctx)
}
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
struct PipeFds {
reader_fd: FileDesc,
writer_fd: FileDesc,
}

170
kernel/src/syscall/poll.rs Normal file
View File

@ -0,0 +1,170 @@
// SPDX-License-Identifier: MPL-2.0
use core::{cell::Cell, time::Duration};
use super::SyscallReturn;
use crate::{events::IoEvents, fs::file_table::FileDesc, prelude::*, process::signal::Poller};
pub fn sys_poll(fds: Vaddr, nfds: u64, timeout: i32, ctx: &Context) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let poll_fds = {
let mut read_addr = fds;
let mut poll_fds = Vec::with_capacity(nfds as _);
for _ in 0..nfds {
let c_poll_fd = user_space.read_val::<c_pollfd>(read_addr)?;
let poll_fd = PollFd::from(c_poll_fd);
// Always clear the revents fields first
poll_fd.revents().set(IoEvents::empty());
poll_fds.push(poll_fd);
// FIXME: do we need to respect align of c_pollfd here?
read_addr += core::mem::size_of::<c_pollfd>();
}
poll_fds
};
let timeout = if timeout >= 0 {
Some(Duration::from_millis(timeout as _))
} else {
None
};
debug!(
"poll_fds = {:?}, nfds = {}, timeout = {:?}",
poll_fds, nfds, timeout
);
let num_revents = do_poll(&poll_fds, timeout, ctx)?;
// Write back
let mut write_addr = fds;
for pollfd in poll_fds {
let c_poll_fd = c_pollfd::from(pollfd);
user_space.write_val(write_addr, &c_poll_fd)?;
// FIXME: do we need to respect align of c_pollfd here?
write_addr += core::mem::size_of::<c_pollfd>();
}
Ok(SyscallReturn::Return(num_revents as _))
}
pub fn do_poll(poll_fds: &[PollFd], timeout: Option<Duration>, ctx: &Context) -> Result<usize> {
// The main loop of polling
let mut poller = Poller::new();
loop {
let mut num_revents = 0;
let file_table = ctx.process.file_table().lock();
for poll_fd in poll_fds {
// Skip poll_fd if it is not given a fd
let fd = match poll_fd.fd() {
Some(fd) => fd,
None => continue,
};
// Poll the file
let file = file_table.get_file(fd)?;
let need_poller = if num_revents == 0 {
Some(&mut poller)
} else {
None
};
let revents = file.poll(poll_fd.events(), need_poller);
if !revents.is_empty() {
poll_fd.revents().set(revents);
num_revents += 1;
}
}
drop(file_table);
if num_revents > 0 {
return Ok(num_revents);
}
// Return immediately if specifying a timeout of zero
if timeout.is_some() && timeout.as_ref().unwrap().is_zero() {
return Ok(0);
}
if let Some(timeout) = timeout.as_ref() {
match poller.wait_timeout(timeout) {
Ok(_) => {}
Err(e) if e.error() == Errno::ETIME => {
// The return value is zero if the timeout expires
// before any file descriptors became ready
return Ok(0);
}
Err(e) => return Err(e),
};
} else {
poller.wait()?;
}
}
}
// https://github.com/torvalds/linux/blob/master/include/uapi/asm-generic/poll.h
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
struct c_pollfd {
fd: i32,
events: i16,
revents: i16,
}
#[derive(Debug, Clone)]
pub struct PollFd {
fd: Option<FileDesc>,
events: IoEvents,
revents: Cell<IoEvents>,
}
impl PollFd {
pub fn new(fd: Option<FileDesc>, events: IoEvents) -> Self {
let revents = Cell::new(IoEvents::empty());
Self {
fd,
events,
revents,
}
}
pub fn fd(&self) -> Option<FileDesc> {
self.fd
}
pub fn events(&self) -> IoEvents {
self.events
}
pub fn revents(&self) -> &Cell<IoEvents> {
&self.revents
}
}
impl From<c_pollfd> for PollFd {
fn from(raw: c_pollfd) -> Self {
let fd = if raw.fd >= 0 {
Some(raw.fd as FileDesc)
} else {
None
};
let events = IoEvents::from_bits_truncate(raw.events as _);
let revents = Cell::new(IoEvents::from_bits_truncate(raw.revents as _));
Self {
fd,
events,
revents,
}
}
}
impl From<PollFd> for c_pollfd {
fn from(raw: PollFd) -> Self {
let fd = if let Some(fd) = raw.fd() { fd } else { -1 };
let events = raw.events().bits() as i16;
let revents = raw.revents().get().bits() as i16;
Self {
fd,
events,
revents,
}
}
}

121
kernel/src/syscall/prctl.rs Normal file
View File

@ -0,0 +1,121 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::{posix_thread::MAX_THREAD_NAME_LEN, signal::sig_num::SigNum},
};
pub fn sys_prctl(
option: i32,
arg2: u64,
arg3: u64,
arg4: u64,
arg5: u64,
ctx: &Context,
) -> Result<SyscallReturn> {
let prctl_cmd = PrctlCmd::from_args(option, arg2, arg3, arg4, arg5)?;
debug!("prctl cmd = {:x?}", prctl_cmd);
match prctl_cmd {
PrctlCmd::PR_SET_PDEATHSIG(signum) => {
ctx.process.set_parent_death_signal(signum);
}
PrctlCmd::PR_GET_PDEATHSIG(write_to_addr) => {
let write_val = {
match ctx.process.parent_death_signal() {
None => 0i32,
Some(signum) => signum.as_u8() as i32,
}
};
ctx.get_user_space().write_val(write_to_addr, &write_val)?;
}
PrctlCmd::PR_GET_DUMPABLE => {
// TODO: when coredump is supported, return the actual value
return Ok(SyscallReturn::Return(Dumpable::Disable as _));
}
PrctlCmd::PR_SET_DUMPABLE(dumpable) => {
if dumpable != Dumpable::Disable && dumpable != Dumpable::User {
return_errno!(Errno::EINVAL)
}
// TODO: implement coredump
}
PrctlCmd::PR_GET_NAME(write_to_addr) => {
let thread_name = ctx.posix_thread.thread_name().lock();
if let Some(thread_name) = &*thread_name {
if let Some(thread_name) = thread_name.name()? {
ctx.get_user_space().write_bytes(
write_to_addr,
&mut VmReader::from(thread_name.to_bytes_with_nul()),
)?;
}
}
}
PrctlCmd::PR_SET_NAME(read_addr) => {
let mut thread_name = ctx.posix_thread.thread_name().lock();
if let Some(thread_name) = &mut *thread_name {
let new_thread_name = ctx
.get_user_space()
.read_cstring(read_addr, MAX_THREAD_NAME_LEN)?;
thread_name.set_name(&new_thread_name)?;
}
}
_ => todo!(),
}
Ok(SyscallReturn::Return(0))
}
const PR_SET_PDEATHSIG: i32 = 1;
const PR_GET_PDEATHSIG: i32 = 2;
const PR_GET_DUMPABLE: i32 = 3;
const PR_SET_DUMPABLE: i32 = 4;
const PR_SET_NAME: i32 = 15;
const PR_GET_NAME: i32 = 16;
const PR_SET_TIMERSLACK: i32 = 29;
const PR_GET_TIMERSLACK: i32 = 30;
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy)]
pub enum PrctlCmd {
PR_SET_PDEATHSIG(SigNum),
PR_GET_PDEATHSIG(Vaddr),
PR_SET_NAME(Vaddr),
PR_GET_NAME(Vaddr),
#[allow(dead_code)]
PR_SET_TIMERSLACK(u64),
#[allow(dead_code)]
PR_GET_TIMERSLACK,
PR_SET_DUMPABLE(Dumpable),
PR_GET_DUMPABLE,
}
#[repr(u64)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, TryFromInt)]
pub enum Dumpable {
Disable = 0, /* No setuid dumping */
User = 1, /* Dump as user of process */
Root = 2, /* Dump as root */
}
impl PrctlCmd {
fn from_args(option: i32, arg2: u64, _arg3: u64, _arg4: u64, _arg5: u64) -> Result<PrctlCmd> {
match option {
PR_SET_PDEATHSIG => {
let signum = SigNum::try_from(arg2 as u8)?;
Ok(PrctlCmd::PR_SET_PDEATHSIG(signum))
}
PR_GET_PDEATHSIG => Ok(PrctlCmd::PR_GET_PDEATHSIG(arg2 as _)),
PR_GET_DUMPABLE => Ok(PrctlCmd::PR_GET_DUMPABLE),
PR_SET_DUMPABLE => Ok(PrctlCmd::PR_SET_DUMPABLE(Dumpable::try_from(arg2)?)),
PR_SET_NAME => Ok(PrctlCmd::PR_SET_NAME(arg2 as _)),
PR_GET_NAME => Ok(PrctlCmd::PR_GET_NAME(arg2 as _)),
PR_GET_TIMERSLACK => todo!(),
PR_SET_TIMERSLACK => todo!(),
_ => {
debug!("prctl cmd number: {}", option);
return_errno_with_message!(Errno::EINVAL, "unsupported prctl command");
}
}
}
}

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*};
pub fn sys_pread64(
fd: FileDesc,
user_buf_ptr: Vaddr,
user_buf_len: usize,
offset: i64,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, buf = 0x{:x}, user_buf_len = 0x{:x}, offset = 0x{:x}",
fd, user_buf_ptr, user_buf_len, offset
);
if offset < 0 {
return_errno_with_message!(Errno::EINVAL, "offset cannot be negative");
}
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
// TODO: Check (f.file->f_mode & FMODE_PREAD); We don't have f_mode in our FileLike trait
if user_buf_len == 0 {
return Ok(SyscallReturn::Return(0));
}
if offset.checked_add(user_buf_len as i64).is_none() {
return_errno_with_message!(Errno::EINVAL, "offset + user_buf_len overflow");
}
let read_len = {
let mut writer = ctx
.process
.root_vmar()
.vm_space()
.writer(user_buf_ptr, user_buf_len)?;
file.read_at(offset as usize, &mut writer)?
};
Ok(SyscallReturn::Return(read_len as _))
}

View File

@ -0,0 +1,182 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
prelude::*,
util::{copy_iovs_from_user, IoVec},
};
pub fn sys_readv(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
let res = do_sys_readv(fd, io_vec_ptr, io_vec_count, ctx)?;
Ok(SyscallReturn::Return(res as _))
}
pub fn sys_preadv(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
ctx: &Context,
) -> Result<SyscallReturn> {
let res = do_sys_preadv(fd, io_vec_ptr, io_vec_count, offset, RWFFlag::empty(), ctx)?;
Ok(SyscallReturn::Return(res as _))
}
pub fn sys_preadv2(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
let flags = match RWFFlag::from_bits(flags) {
Some(flags) => flags,
None => return_errno_with_message!(Errno::EINVAL, "invalid flags"),
};
let res = if offset == -1 {
do_sys_readv(fd, io_vec_ptr, io_vec_count, ctx)?
} else {
do_sys_preadv(fd, io_vec_ptr, io_vec_count, offset, flags, ctx)?
};
Ok(SyscallReturn::Return(res as _))
}
fn do_sys_preadv(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
_flags: RWFFlag,
ctx: &Context,
) -> Result<usize> {
debug!(
"preadv: fd = {}, io_vec_ptr = 0x{:x}, io_vec_counter = 0x{:x}, offset = 0x{:x}",
fd, io_vec_ptr, io_vec_count, offset
);
if offset < 0 {
return_errno_with_message!(Errno::EINVAL, "offset cannot be negative");
}
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
if io_vec_count == 0 {
return Ok(0);
}
// Calculate the total buffer length and check for overflow
let total_len = io_vec_count
.checked_mul(core::mem::size_of::<IoVec>())
.and_then(|val| val.checked_add(offset as usize));
if total_len.is_none() {
return_errno_with_message!(Errno::EINVAL, "offset + io_vec_count overflow");
}
let mut total_len: usize = 0;
let mut cur_offset = offset as usize;
let io_vecs = copy_iovs_from_user(io_vec_ptr, io_vec_count)?;
for io_vec in io_vecs.as_ref() {
if io_vec.is_empty() {
continue;
}
if total_len.checked_add(io_vec.len()).is_none()
|| total_len
.checked_add(io_vec.len())
.and_then(|sum| sum.checked_add(cur_offset))
.is_none()
|| total_len
.checked_add(io_vec.len())
.and_then(|sum| sum.checked_add(cur_offset))
.map(|sum| sum > isize::MAX as usize)
.unwrap_or(false)
{
return_errno_with_message!(Errno::EINVAL, "Total length overflow");
}
let mut buffer = vec![0u8; io_vec.len()];
// TODO: According to the man page
// at <https://man7.org/linux/man-pages/man2/readv.2.html>,
// readv must be atomic,
// but the current implementation does not ensure atomicity.
// A suitable fix would be to add a `readv` method for the `FileLike` trait,
// allowing each subsystem to implement atomicity.
let read_len = file.read_bytes_at(cur_offset, &mut buffer)?;
io_vec.write_exact_to_user(&buffer)?;
total_len += read_len;
cur_offset += read_len;
if read_len == 0 || read_len < buffer.len() {
// End of file reached or no more data to read
break;
}
}
Ok(total_len)
}
fn do_sys_readv(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
ctx: &Context,
) -> Result<usize> {
debug!(
"fd = {}, io_vec_ptr = 0x{:x}, io_vec_counter = 0x{:x}",
fd, io_vec_ptr, io_vec_count
);
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
if io_vec_count == 0 {
return Ok(0);
}
let mut total_len = 0;
let io_vecs = copy_iovs_from_user(io_vec_ptr, io_vec_count)?;
for io_vec in io_vecs.as_ref() {
if io_vec.is_empty() {
continue;
}
let mut buffer = vec![0u8; io_vec.len()];
// TODO: According to the man page
// at <https://man7.org/linux/man-pages/man2/readv.2.html>,
// readv must be atomic,
// but the current implementation does not ensure atomicity.
// A suitable fix would be to add a `readv` method for the `FileLike` trait,
// allowing each subsystem to implement atomicity.
let read_len = file.read_bytes(&mut buffer)?;
io_vec.write_exact_to_user(&buffer)?;
total_len += read_len;
if read_len == 0 || read_len < buffer.len() {
// End of file reached or no more data to read
break;
}
}
Ok(total_len)
}
bitflags! {
struct RWFFlag: u32 {
const RWF_DSYNC = 0x00000001;
const RWF_HIPRI = 0x00000002;
const RWF_SYNC = 0x00000004;
const RWF_NOWAIT = 0x00000008;
}
}

View File

@ -0,0 +1,31 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::{Pid, ResourceType},
};
pub fn sys_prlimit64(
pid: Pid,
resource: u32,
new_rlim_addr: Vaddr,
old_rlim_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let resource = ResourceType::try_from(resource)?;
debug!(
"pid = {}, resource = {:?}, new_rlim_addr = 0x{:x}, old_rlim_addr = 0x{:x}",
pid, resource, new_rlim_addr, old_rlim_addr
);
let mut resource_limits = ctx.process.resource_limits().lock();
if old_rlim_addr != 0 {
let rlimit = resource_limits.get_rlimit(resource);
ctx.get_user_space().write_val(old_rlim_addr, rlimit)?;
}
if new_rlim_addr != 0 {
let new_rlimit = ctx.get_user_space().read_val(new_rlim_addr)?;
*resource_limits.get_rlimit_mut(resource) = new_rlimit;
}
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,72 @@
// SPDX-License-Identifier: MPL-2.0
use core::{sync::atomic::Ordering, time::Duration};
use super::{select::do_sys_select, SyscallReturn};
use crate::{
fs::file_table::FileDesc, prelude::*, process::signal::sig_mask::SigMask, time::timespec_t,
};
pub fn sys_pselect6(
nfds: FileDesc,
readfds_addr: Vaddr,
writefds_addr: Vaddr,
exceptfds_addr: Vaddr,
timespec_addr: Vaddr,
sigmask_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let old_simask = if sigmask_addr != 0 {
let sigmask_with_size: SigMaskWithSize = user_space.read_val(sigmask_addr)?;
if !sigmask_with_size.is_valid() {
return_errno_with_message!(Errno::EINVAL, "sigmask size is invalid")
}
let old_sigmask = ctx
.posix_thread
.sig_mask()
.swap(sigmask_with_size.sigmask, Ordering::Relaxed);
Some(old_sigmask)
} else {
None
};
let timeout = if timespec_addr != 0 {
let time_spec: timespec_t = user_space.read_val(timespec_addr)?;
Some(Duration::try_from(time_spec)?)
} else {
None
};
let res = do_sys_select(
nfds,
readfds_addr,
writefds_addr,
exceptfds_addr,
timeout,
ctx,
);
if let Some(old_mask) = old_simask {
ctx.posix_thread
.sig_mask()
.store(old_mask, Ordering::Relaxed);
}
res
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Pod)]
struct SigMaskWithSize {
sigmask: SigMask,
sigmasksize: usize,
}
impl SigMaskWithSize {
const fn is_valid(&self) -> bool {
self.sigmask.is_empty() || self.sigmasksize == size_of::<SigMask>()
}
}

View File

@ -0,0 +1,39 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*};
pub fn sys_pwrite64(
fd: FileDesc,
user_buf_ptr: Vaddr,
user_buf_len: usize,
offset: i64,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, user_buf_ptr = 0x{:x}, user_buf_len = 0x{:x}, offset = 0x{:x}",
fd, user_buf_ptr, user_buf_len, offset
);
if offset < 0 {
return_errno_with_message!(Errno::EINVAL, "offset cannot be negative");
}
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
// TODO: Check (f.file->f_mode & FMODE_PWRITE); We don't have f_mode in our FileLike trait
if user_buf_len == 0 {
return Ok(SyscallReturn::Return(0));
}
if offset.checked_add(user_buf_len as i64).is_none() {
return_errno_with_message!(Errno::EINVAL, "offset + user_buf_len overflow");
}
let mut reader = ctx
.process
.root_vmar()
.vm_space()
.reader(user_buf_ptr, user_buf_len)?;
let write_len = file.write_at(offset as _, &mut reader)?;
Ok(SyscallReturn::Return(write_len as _))
}

View File

@ -0,0 +1,160 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*, util::copy_iovs_from_user};
pub fn sys_writev(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
let res = do_sys_writev(fd, io_vec_ptr, io_vec_count, ctx)?;
Ok(SyscallReturn::Return(res as _))
}
pub fn sys_pwritev(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
ctx: &Context,
) -> Result<SyscallReturn> {
let res = do_sys_pwritev(fd, io_vec_ptr, io_vec_count, offset, RWFFlag::empty(), ctx)?;
Ok(SyscallReturn::Return(res as _))
}
pub fn sys_pwritev2(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
flags: u32,
ctx: &Context,
) -> Result<SyscallReturn> {
let flags = match RWFFlag::from_bits(flags) {
Some(flags) => flags,
None => return_errno_with_message!(Errno::EINVAL, "invalid flags"),
};
let res = if offset == -1 {
do_sys_writev(fd, io_vec_ptr, io_vec_count, ctx)?
} else {
do_sys_pwritev(fd, io_vec_ptr, io_vec_count, offset, flags, ctx)?
};
Ok(SyscallReturn::Return(res as _))
}
fn do_sys_pwritev(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
offset: i64,
_flags: RWFFlag,
ctx: &Context,
) -> Result<usize> {
// TODO: Implement flags support
debug!(
"fd = {}, io_vec_ptr = 0x{:x}, io_vec_counter = 0x{:x}, offset = 0x{:x}",
fd, io_vec_ptr, io_vec_count, offset
);
if offset < 0 {
return_errno_with_message!(Errno::EINVAL, "offset cannot be negative");
}
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
// TODO: Check (f.file->f_mode & FMODE_PREAD); We don't have f_mode in our FileLike trait
if io_vec_count == 0 {
return Ok(0);
}
let mut total_len: usize = 0;
let mut cur_offset = offset as usize;
let io_vecs = copy_iovs_from_user(io_vec_ptr, io_vec_count)?;
for io_vec in io_vecs.as_ref() {
if io_vec.is_empty() {
continue;
}
if total_len.checked_add(io_vec.len()).is_none()
|| total_len
.checked_add(io_vec.len())
.and_then(|sum| sum.checked_add(cur_offset))
.is_none()
|| total_len
.checked_add(io_vec.len())
.and_then(|sum| sum.checked_add(cur_offset))
.map(|sum| sum > isize::MAX as usize)
.unwrap_or(false)
{
return_errno_with_message!(Errno::EINVAL, "Total length overflow");
}
let buffer = {
let mut buffer = vec![0u8; io_vec.len()];
io_vec.read_exact_from_user(&mut buffer)?;
buffer
};
// TODO: According to the man page
// at <https://man7.org/linux/man-pages/man2/readv.2.html>,
// writev must be atomic,
// but the current implementation does not ensure atomicity.
// A suitable fix would be to add a `writev` method for the `FileLike` trait,
// allowing each subsystem to implement atomicity.
let write_len = file.write_bytes_at(cur_offset, &buffer)?;
total_len += write_len;
cur_offset += write_len;
}
Ok(total_len)
}
fn do_sys_writev(
fd: FileDesc,
io_vec_ptr: Vaddr,
io_vec_count: usize,
ctx: &Context,
) -> Result<usize> {
debug!(
"fd = {}, io_vec_ptr = 0x{:x}, io_vec_counter = 0x{:x}",
fd, io_vec_ptr, io_vec_count
);
let file = {
let filetable = ctx.process.file_table().lock();
filetable.get_file(fd)?.clone()
};
let mut total_len = 0;
let io_vecs = copy_iovs_from_user(io_vec_ptr, io_vec_count)?;
for io_vec in io_vecs.as_ref() {
if io_vec.is_empty() {
continue;
}
let buffer = {
let mut buffer = vec![0u8; io_vec.len()];
io_vec.read_exact_from_user(&mut buffer)?;
buffer
};
// TODO: According to the man page
// at <https://man7.org/linux/man-pages/man2/readv.2.html>,
// writev must be atomic,
// but the current implementation does not ensure atomicity.
// A suitable fix would be to add a `writev` method for the `FileLike` trait,
// allowing each subsystem to implement atomicity.
let write_len = file.write_bytes(&buffer)?;
total_len += write_len;
}
Ok(total_len)
}
bitflags! {
struct RWFFlag: u32 {
const RWF_DSYNC = 0x00000001;
const RWF_HIPRI = 0x00000002;
const RWF_SYNC = 0x00000004;
const RWF_NOWAIT = 0x00000008;
}
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*};
pub fn sys_read(
fd: FileDesc,
user_buf_addr: Vaddr,
buf_len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"fd = {}, user_buf_ptr = 0x{:x}, buf_len = 0x{:x}",
fd, user_buf_addr, buf_len
);
let file = {
let file_table = ctx.process.file_table().lock();
file_table.get_file(fd)?.clone()
};
// According to <https://man7.org/linux/man-pages/man2/read.2.html>, if
// the user specified an empty buffer, we should detect errors by checking
// the file discriptor. If no errors detected, return 0 successfully.
let read_len = if buf_len != 0 {
let mut writer = ctx
.process
.root_vmar()
.vm_space()
.writer(user_buf_addr, buf_len)?;
file.read(&mut writer)?
} else {
file.read_bytes(&mut [])?
};
Ok(SyscallReturn::Return(read_len as _))
}

View File

@ -0,0 +1,49 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_readlinkat(
dirfd: FileDesc,
path_addr: Vaddr,
usr_buf_addr: Vaddr,
usr_buf_len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let path = user_space.read_cstring(path_addr, MAX_FILENAME_LEN)?;
debug!(
"dirfd = {}, path = {:?}, usr_buf_addr = 0x{:x}, usr_buf_len = 0x{:x}",
dirfd, path, usr_buf_addr, usr_buf_len
);
let dentry = {
let path = path.to_string_lossy();
if path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "path is empty");
}
let fs_path = FsPath::new(dirfd, path.as_ref())?;
ctx.process.fs().read().lookup_no_follow(&fs_path)?
};
let linkpath = dentry.inode().read_link()?;
let bytes = linkpath.as_bytes();
let write_len = bytes.len().min(usr_buf_len);
user_space.write_bytes(usr_buf_addr, &mut VmReader::from(&bytes[..write_len]))?;
Ok(SyscallReturn::Return(write_len as _))
}
pub fn sys_readlink(
path_addr: Vaddr,
usr_buf_addr: Vaddr,
usr_buf_len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
self::sys_readlinkat(AT_FDCWD, path_addr, usr_buf_addr, usr_buf_len, ctx)
}

View File

@ -0,0 +1,38 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
net::socket::SendRecvFlags,
prelude::*,
util::{
net::{get_socket_from_fd, write_socket_addr_to_user},
IoVec,
},
};
pub fn sys_recvfrom(
sockfd: FileDesc,
buf: Vaddr,
len: usize,
flags: i32,
src_addr: Vaddr,
addrlen_ptr: Vaddr,
_ctx: &Context,
) -> Result<SyscallReturn> {
let flags = SendRecvFlags::from_bits_truncate(flags);
debug!("sockfd = {sockfd}, buf = 0x{buf:x}, len = {len}, flags = {flags:?}, src_addr = 0x{src_addr:x}, addrlen_ptr = 0x{addrlen_ptr:x}");
let socket = get_socket_from_fd(sockfd)?;
let io_vecs = [IoVec::new(buf, len)];
let (recv_size, message_header) = socket.recvmsg(&io_vecs, flags)?;
if let Some(socket_addr) = message_header.addr()
&& src_addr != 0
{
write_socket_addr_to_user(socket_addr, src_addr, addrlen_ptr)?;
}
Ok(SyscallReturn::Return(recv_size as _))
}

View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
net::socket::SendRecvFlags,
prelude::*,
util::net::{get_socket_from_fd, CUserMsgHdr},
};
pub fn sys_recvmsg(
sockfd: FileDesc,
user_msghdr_ptr: Vaddr,
flags: i32,
ctx: &Context,
) -> Result<SyscallReturn> {
let c_user_msghdr: CUserMsgHdr = ctx.get_user_space().read_val(user_msghdr_ptr)?;
let flags = SendRecvFlags::from_bits_truncate(flags);
debug!(
"sockfd = {}, user_msghdr = {:x?}, flags = {:?}",
sockfd, c_user_msghdr, flags
);
let (total_bytes, message_header) = {
let socket = get_socket_from_fd(sockfd)?;
let io_vecs = c_user_msghdr.copy_iovs_from_user()?;
socket.recvmsg(&io_vecs, flags)?
};
if let Some(addr) = message_header.addr() {
c_user_msghdr.write_socket_addr_to_user(addr)?;
}
if c_user_msghdr.msg_control != 0 {
warn!("receiving control message is not supported");
}
Ok(SyscallReturn::Return(total_bytes as _))
}

View File

@ -0,0 +1,78 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
utils::InodeType,
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_renameat(
old_dirfd: FileDesc,
old_path_addr: Vaddr,
new_dirfd: FileDesc,
new_path_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let user_space = ctx.get_user_space();
let old_path = user_space.read_cstring(old_path_addr, MAX_FILENAME_LEN)?;
let new_path = user_space.read_cstring(new_path_addr, MAX_FILENAME_LEN)?;
debug!(
"old_dirfd = {}, old_path = {:?}, new_dirfd = {}, new_path = {:?}",
old_dirfd, old_path, new_dirfd, new_path
);
let fs = ctx.process.fs().read();
let (old_dir_dentry, old_name) = {
let old_path = old_path.to_string_lossy();
if old_path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "oldpath is empty");
}
let old_fs_path = FsPath::new(old_dirfd, old_path.as_ref())?;
fs.lookup_dir_and_base_name(&old_fs_path)?
};
let old_dentry = old_dir_dentry.lookup(&old_name)?;
let (new_dir_dentry, new_name) = {
let new_path = new_path.to_string_lossy();
if new_path.is_empty() {
return_errno_with_message!(Errno::ENOENT, "newpath is empty");
}
if new_path.ends_with('/') && old_dentry.type_() != InodeType::Dir {
return_errno_with_message!(Errno::ENOTDIR, "oldpath is not dir");
}
let new_fs_path = FsPath::new(new_dirfd, new_path.as_ref().trim_end_matches('/'))?;
fs.lookup_dir_and_base_name(&new_fs_path)?
};
// Check abs_path
let old_abs_path = old_dentry.abs_path();
let new_abs_path = new_dir_dentry.abs_path() + "/" + &new_name;
if new_abs_path.starts_with(&old_abs_path) {
if new_abs_path.len() == old_abs_path.len() {
return Ok(SyscallReturn::Return(0));
} else {
return_errno_with_message!(
Errno::EINVAL,
"newpath contains a path prefix of the oldpath"
);
}
}
old_dir_dentry.rename(&old_name, &new_dir_dentry, &new_name)?;
Ok(SyscallReturn::Return(0))
}
pub fn sys_rename(
old_path_addr: Vaddr,
new_path_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
self::sys_renameat(AT_FDCWD, old_path_addr, AT_FDCWD, new_path_addr, ctx)
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{
file_table::FileDesc,
fs_resolver::{FsPath, AT_FDCWD},
},
prelude::*,
syscall::constants::MAX_FILENAME_LEN,
};
pub fn sys_rmdir(path_addr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
self::sys_rmdirat(AT_FDCWD, path_addr, ctx)
}
pub(super) fn sys_rmdirat(
dirfd: FileDesc,
path_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let path_addr = ctx
.get_user_space()
.read_cstring(path_addr, MAX_FILENAME_LEN)?;
debug!("dirfd = {}, path_addr = {:?}", dirfd, path_addr);
let (dir_dentry, name) = {
let path_addr = path_addr.to_string_lossy();
if path_addr == "/" {
return_errno_with_message!(Errno::EBUSY, "is root directory");
}
let fs_path = FsPath::new(dirfd, path_addr.as_ref())?;
ctx.process.fs().read().lookup_dir_and_base_name(&fs_path)?
};
dir_dentry.rmdir(name.trim_end_matches('/'))?;
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,41 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::signal::{c_types::sigaction_t, sig_action::SigAction, sig_num::SigNum},
};
pub fn sys_rt_sigaction(
sig_num: u8,
sig_action_addr: Vaddr,
old_sig_action_addr: Vaddr,
sigset_size: u64,
ctx: &Context,
) -> Result<SyscallReturn> {
let sig_num = SigNum::try_from(sig_num)?;
debug!(
"signal = {}, sig_action_addr = 0x{:x}, old_sig_action_addr = 0x{:x}, sigset_size = {}",
sig_num.sig_name(),
sig_action_addr,
old_sig_action_addr,
sigset_size
);
let mut sig_dispositions = ctx.process.sig_dispositions().lock();
let old_action = sig_dispositions.get(sig_num);
let old_action_c = old_action.as_c_type();
if old_sig_action_addr != 0 {
ctx.get_user_space()
.write_val(old_sig_action_addr, &old_action_c)?;
}
if sig_action_addr != 0 {
let sig_action_c = ctx
.get_user_space()
.read_val::<sigaction_t>(sig_action_addr)?;
let sig_action = SigAction::try_from(sig_action_c).unwrap();
trace!("sig action = {:?}", sig_action);
sig_dispositions.set(sig_num, sig_action);
}
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,34 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::Ordering;
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_rt_sigpending(
u_set_ptr: Vaddr,
sigset_size: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"u_set_ptr = 0x{:x}, sigset_size = {}",
u_set_ptr, sigset_size
);
if sigset_size != 8 {
return_errno_with_message!(Errno::EINVAL, "sigset size is not equal to 8")
}
do_rt_sigpending(u_set_ptr, ctx)?;
Ok(SyscallReturn::Return(0))
}
fn do_rt_sigpending(set_ptr: Vaddr, ctx: &Context) -> Result<()> {
let combined_signals = {
let sig_mask_value = ctx.posix_thread.sig_mask().load(Ordering::Relaxed);
let sig_pending_value = ctx.posix_thread.sig_pending();
sig_mask_value & sig_pending_value
};
ctx.get_user_space()
.write_val(set_ptr, &u64::from(combined_signals))?;
Ok(())
}

View File

@ -0,0 +1,74 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::Ordering;
use super::SyscallReturn;
use crate::{
prelude::*,
process::signal::{
constants::{SIGKILL, SIGSTOP},
sig_mask::SigMask,
},
};
pub fn sys_rt_sigprocmask(
how: u32,
set_ptr: Vaddr,
oldset_ptr: Vaddr,
sigset_size: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
let mask_op = MaskOp::try_from(how)?;
debug!(
"mask op = {:?}, set_ptr = 0x{:x}, oldset_ptr = 0x{:x}, sigset_size = {}",
mask_op, set_ptr, oldset_ptr, sigset_size
);
if sigset_size != 8 {
error!("sigset size is not equal to 8");
}
do_rt_sigprocmask(mask_op, set_ptr, oldset_ptr, ctx)?;
Ok(SyscallReturn::Return(0))
}
fn do_rt_sigprocmask(
mask_op: MaskOp,
set_ptr: Vaddr,
oldset_ptr: Vaddr,
ctx: &Context,
) -> Result<()> {
let old_sig_mask_value = ctx.posix_thread.sig_mask().load(Ordering::Relaxed);
debug!("old sig mask value: 0x{:x}", old_sig_mask_value);
if oldset_ptr != 0 {
ctx.get_user_space()
.write_val(oldset_ptr, &old_sig_mask_value)?;
}
let sig_mask_ref = ctx.posix_thread.sig_mask();
if set_ptr != 0 {
let mut read_mask = ctx.get_user_space().read_val::<SigMask>(set_ptr)?;
match mask_op {
MaskOp::Block => {
// According to man pages, "it is not possible to block SIGKILL or SIGSTOP.
// Attempts to do so are silently ignored."
read_mask -= SIGKILL;
read_mask -= SIGSTOP;
sig_mask_ref.store(old_sig_mask_value + read_mask, Ordering::Relaxed);
}
MaskOp::Unblock => {
sig_mask_ref.store(old_sig_mask_value - read_mask, Ordering::Relaxed)
}
MaskOp::SetMask => sig_mask_ref.store(read_mask, Ordering::Relaxed),
}
}
debug!("new set = {:x?}", sig_mask_ref.load(Ordering::Relaxed));
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, TryFromInt)]
#[repr(u32)]
pub enum MaskOp {
Block = 0,
Unblock = 1,
SetMask = 2,
}

View File

@ -0,0 +1,58 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::Ordering;
use ostd::{cpu::UserContext, user::UserContextApi};
use super::SyscallReturn;
use crate::{prelude::*, process::signal::c_types::ucontext_t};
pub fn sys_rt_sigreturn(ctx: &Context, user_ctx: &mut UserContext) -> Result<SyscallReturn> {
let Context {
process: _,
posix_thread,
thread: _,
task: _,
} = ctx;
let mut sig_context = posix_thread.sig_context().lock();
if (*sig_context).is_none() {
return_errno_with_message!(Errno::EINVAL, "sigreturn should not been called");
}
let sig_context_addr = sig_context.unwrap();
// FIXME: This assertion is not always true, if RESTORER flag is not presented.
// In this case, we will put restorer code on user stack, then the assertion will fail.
// However, for most glibc applications, the restorer codes is provided by glibc and RESTORER flag is set.
debug_assert!(sig_context_addr == user_ctx.stack_pointer() as Vaddr);
let ucontext = ctx
.get_user_space()
.read_val::<ucontext_t>(sig_context_addr)?;
// If the sig stack is active and used by current handler, decrease handler counter.
if let Some(sig_stack) = posix_thread.sig_stack().lock().as_mut() {
let rsp = user_ctx.stack_pointer();
if rsp >= sig_stack.base() && rsp <= sig_stack.base() + sig_stack.size() {
sig_stack.decrease_handler_counter();
}
}
// Set previous ucontext address
if ucontext.uc_link == 0 {
*sig_context = None;
} else {
*sig_context = Some(ucontext.uc_link);
};
ucontext
.uc_mcontext
.inner
.gp_regs
.copy_to_raw(user_ctx.general_regs_mut());
// unblock sig mask
let sig_mask = ucontext.uc_sigmask;
let old_mask = posix_thread.sig_mask().load(Ordering::Relaxed);
posix_thread
.sig_mask()
.store(old_mask - sig_mask, Ordering::Relaxed);
Ok(SyscallReturn::NoReturn)
}

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
prelude::*,
process::signal::{
constants::{SIGKILL, SIGSTOP},
sig_mask::SigMask,
Pauser,
},
};
pub fn sys_rt_sigsuspend(
sigmask_addr: Vaddr,
sigmask_size: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"sigmask_addr = 0x{:x}, sigmask_size = {}",
sigmask_addr, sigmask_size
);
debug_assert!(sigmask_size == core::mem::size_of::<SigMask>());
if sigmask_size != core::mem::size_of::<SigMask>() {
return_errno_with_message!(Errno::EINVAL, "invalid sigmask size");
}
let sigmask = {
let mut mask: SigMask = ctx.get_user_space().read_val(sigmask_addr)?;
// It is not possible to block SIGKILL or SIGSTOP,
// specifying these signals in mask has no effect.
mask -= SIGKILL;
mask -= SIGSTOP;
mask
};
// Pause until receiving any signal
let pauser = Pauser::new_with_mask(sigmask);
pauser.pause_until(|| None::<()>)?;
// This syscall should always return `Err(EINTR)`. This path should never be reached.
unreachable!("rt_sigsuspend always return EINTR");
}

View File

@ -0,0 +1,70 @@
// SPDX-License-Identifier: MPL-2.0
use core::{cmp, mem};
use super::SyscallReturn;
use crate::{
prelude::*,
process::{process_table, Pid},
};
fn get_num_cpus() -> usize {
// TODO: Properly determine the number of available CPUs
// This could be through a system configuration query.
1
}
pub fn sys_sched_getaffinity(
pid: Pid,
cpuset_size: usize,
cpu_set_ptr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let num_cpus = get_num_cpus();
if cpuset_size < core::mem::size_of::<cpu_set_t>() {
return Err(Error::with_message(Errno::EINVAL, "invalid cpuset size"));
}
match pid {
0 => {
// TODO: Get the current thread's CPU affinity
// Placeholder for future implementation.
}
_ => {
match process_table::get_process(pid) {
Some(_process) => { /* Placeholder if process-specific logic needed */ }
None => return Err(Error::with_message(Errno::ESRCH, "process does not exist")),
}
}
}
let dummy_cpu_set = cpu_set_t::new(num_cpus);
ctx.get_user_space()
.write_val(cpu_set_ptr, &dummy_cpu_set)?;
Ok(SyscallReturn::Return(0))
}
const CPU_SETSIZE: usize = 1024; // Max number of CPU bits.
const __NCPUBITS: usize = 8 * mem::size_of::<usize>();
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C, packed)]
struct cpu_set_t {
__bits: [usize; CPU_SETSIZE / __NCPUBITS],
}
impl cpu_set_t {
/// Creates a new cpu_set_t representing available CPUs.
fn new(num_cpus: usize) -> Self {
let mut bits = [0usize; CPU_SETSIZE / __NCPUBITS];
for cpu in 0..cmp::min(num_cpus, CPU_SETSIZE) {
bits[cpu / __NCPUBITS] |= 1 << (cpu % __NCPUBITS);
}
Self { __bits: bits }
}
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{prelude::*, thread::Thread};
pub fn sys_sched_yield(_ctx: &Context) -> Result<SyscallReturn> {
Thread::yield_now();
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,240 @@
// SPDX-License-Identifier: MPL-2.0
use core::time::Duration;
use super::{
poll::{do_poll, PollFd},
SyscallReturn,
};
use crate::{events::IoEvents, fs::file_table::FileDesc, prelude::*, time::timeval_t};
pub fn sys_select(
nfds: FileDesc,
readfds_addr: Vaddr,
writefds_addr: Vaddr,
exceptfds_addr: Vaddr,
timeval_addr: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
let timeout = if timeval_addr == 0 {
None
} else {
let timeval = ctx.get_user_space().read_val::<timeval_t>(timeval_addr)?;
Some(Duration::from(timeval))
};
do_sys_select(
nfds,
readfds_addr,
writefds_addr,
exceptfds_addr,
timeout,
ctx,
)
}
pub fn do_sys_select(
nfds: FileDesc,
readfds_addr: Vaddr,
writefds_addr: Vaddr,
exceptfds_addr: Vaddr,
timeout: Option<Duration>,
ctx: &Context,
) -> Result<SyscallReturn> {
if nfds < 0 || nfds as usize > FD_SETSIZE {
return_errno_with_message!(Errno::EINVAL, "nfds is negative or exceeds the FD_SETSIZE");
}
let user_space = ctx.get_user_space();
let get_fdset = |fdset_addr: Vaddr| -> Result<Option<FdSet>> {
let fdset = if fdset_addr == 0 {
None
} else {
let fdset = user_space.read_val::<FdSet>(fdset_addr)?;
Some(fdset)
};
Ok(fdset)
};
let mut readfds = get_fdset(readfds_addr)?;
let mut writefds = get_fdset(writefds_addr)?;
let mut exceptfds = get_fdset(exceptfds_addr)?;
debug!(
"nfds = {}, readfds = {:?}, writefds = {:?}, exceptfds = {:?}, timeout = {:?}",
nfds, readfds, writefds, exceptfds, timeout
);
let num_revents = do_select(
nfds,
readfds.as_mut(),
writefds.as_mut(),
exceptfds.as_mut(),
timeout,
ctx,
)?;
// FIXME: The Linux select() and pselect6() system call
// modifies its timeout argument to reflect the amount of time not slept.
// However, the glibc wrapper function hides this behavior.
// Maybe we should follow the Linux behavior.
let set_fdset = |fdset_addr: Vaddr, fdset: Option<FdSet>| -> Result<()> {
if let Some(fdset) = fdset {
debug_assert!(fdset_addr != 0);
user_space.write_val(fdset_addr, &fdset)?;
}
Ok(())
};
set_fdset(readfds_addr, readfds)?;
set_fdset(writefds_addr, writefds)?;
set_fdset(exceptfds_addr, exceptfds)?;
Ok(SyscallReturn::Return(num_revents as _))
}
fn do_select(
nfds: FileDesc,
mut readfds: Option<&mut FdSet>,
mut writefds: Option<&mut FdSet>,
mut exceptfds: Option<&mut FdSet>,
timeout: Option<Duration>,
ctx: &Context,
) -> Result<usize> {
// Convert the FdSet to an array of PollFd
let poll_fds = {
let mut poll_fds = Vec::with_capacity(nfds as usize);
for fd in 0..nfds {
let events = {
let readable = readfds.as_ref().map_or(false, |fds| fds.is_set(fd));
let writable = writefds.as_ref().map_or(false, |fds| fds.is_set(fd));
let except = exceptfds.as_ref().map_or(false, |fds| fds.is_set(fd));
convert_rwe_to_events(readable, writable, except)
};
if events.is_empty() {
continue;
}
let poll_fd = PollFd::new(Some(fd), events);
poll_fds.push(poll_fd);
}
poll_fds
};
// Clear up the three input fd_set's, which will be used for output as well
if let Some(fds) = readfds.as_mut() {
fds.clear();
}
if let Some(fds) = writefds.as_mut() {
fds.clear();
}
if let Some(fds) = exceptfds.as_mut() {
fds.clear();
}
// Do the poll syscall that is equivalent to the select syscall
let num_revents = do_poll(&poll_fds, timeout, ctx)?;
if num_revents == 0 {
return Ok(0);
}
// Convert poll's pollfd results to select's fd_set results
let mut total_revents = 0;
for poll_fd in &poll_fds {
let fd = poll_fd.fd().unwrap();
let revents = poll_fd.revents().get();
let (readable, writable, except) = convert_events_to_rwe(&revents);
if let Some(ref mut fds) = readfds
&& readable
{
fds.set(fd)?;
total_revents += 1;
}
if let Some(ref mut fds) = writefds
&& writable
{
fds.set(fd)?;
total_revents += 1;
}
if let Some(ref mut fds) = exceptfds
&& except
{
fds.set(fd)?;
total_revents += 1;
}
}
Ok(total_revents)
}
// Convert select's rwe input to poll's IoEvents input according to Linux's
// behavior.
fn convert_rwe_to_events(readable: bool, writable: bool, except: bool) -> IoEvents {
let mut events = IoEvents::empty();
if readable {
events |= IoEvents::IN;
}
if writable {
events |= IoEvents::OUT;
}
if except {
events |= IoEvents::PRI;
}
events
}
// Convert poll's IoEvents results to select's rwe results according to Linux's
// behavior.
fn convert_events_to_rwe(events: &IoEvents) -> (bool, bool, bool) {
let readable = events.intersects(IoEvents::IN | IoEvents::HUP | IoEvents::ERR);
let writable = events.intersects(IoEvents::OUT | IoEvents::ERR);
let except = events.contains(IoEvents::PRI);
(readable, writable, except)
}
const FD_SETSIZE: usize = 1024;
const USIZE_BITS: usize = core::mem::size_of::<usize>() * 8;
#[derive(Debug, Clone, Copy, Pod)]
#[repr(C)]
struct FdSet {
fds_bits: [usize; FD_SETSIZE / USIZE_BITS],
}
impl FdSet {
/// Equivalent to FD_SET.
pub fn set(&mut self, fd: FileDesc) -> Result<()> {
let fd = fd as usize;
if fd >= FD_SETSIZE {
return_errno_with_message!(Errno::EINVAL, "fd exceeds FD_SETSIZE");
}
self.fds_bits[fd / USIZE_BITS] |= 1 << (fd % USIZE_BITS);
Ok(())
}
/// Equivalent to FD_CLR.
#[allow(unused)]
pub fn unset(&mut self, fd: FileDesc) -> Result<()> {
let fd = fd as usize;
if fd >= FD_SETSIZE {
return_errno_with_message!(Errno::EINVAL, "fd exceeds FD_SETSIZE");
}
self.fds_bits[fd / USIZE_BITS] &= !(1 << (fd % USIZE_BITS));
Ok(())
}
/// Equivalent to FD_ISSET.
pub fn is_set(&self, fd: FileDesc) -> bool {
let fd = fd as usize;
if fd >= FD_SETSIZE {
return false;
}
(self.fds_bits[fd / USIZE_BITS] & (1 << (fd % USIZE_BITS))) != 0
}
/// Equivalent to FD_ZERO.
pub fn clear(&mut self) {
for slot in self.fds_bits.iter_mut() {
*slot = 0;
}
}
}

View File

@ -0,0 +1,121 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
ipc::{
semaphore::system_v::{
sem_set::{check_sem, sem_sets, sem_sets_mut, SemaphoreSet},
PermissionMode,
},
IpcControlCmd,
},
prelude::*,
process::Pid,
};
pub fn sys_semctl(
semid: i32,
semnum: i32,
cmd: i32,
arg: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
if semid <= 0 || semnum < 0 {
return_errno!(Errno::EINVAL)
}
let cmd = IpcControlCmd::try_from(cmd)?;
debug!(
"[sys_semctl] semid = {}, semnum = {}, cmd = {:?}, arg = {:x}",
semid, semnum, cmd, arg
);
match cmd {
IpcControlCmd::IPC_RMID => {
let mut sem_sets_mut = sem_sets_mut();
let sem_set = sem_sets_mut.get(&semid).ok_or(Error::new(Errno::EINVAL))?;
let euid = ctx.posix_thread.credentials().euid();
let permission = sem_set.permission();
let can_removed = (euid == permission.uid()) || (euid == permission.cuid());
if !can_removed {
return_errno!(Errno::EPERM);
}
sem_sets_mut
.remove(&semid)
.ok_or(Error::new(Errno::EINVAL))?;
}
IpcControlCmd::SEM_SETVAL => {
// In setval, arg is parse as i32
let val = arg as i32;
if val < 0 {
return_errno!(Errno::ERANGE);
}
check_and_ctl(semid, PermissionMode::ALTER, |sem_set| {
let sem = sem_set
.get(semnum as usize)
.ok_or(Error::new(Errno::EINVAL))?;
sem.set_val(val)?;
sem_set.update_ctime();
Ok(())
})?;
}
IpcControlCmd::SEM_GETVAL => {
let val: i32 = check_and_ctl(semid, PermissionMode::READ, |sem_set| {
Ok(sem_set
.get(semnum as usize)
.ok_or(Error::new(Errno::EINVAL))?
.val())
})?;
return Ok(SyscallReturn::Return(val as isize));
}
IpcControlCmd::SEM_GETPID => {
let pid: Pid = check_and_ctl(semid, PermissionMode::READ, |sem_set| {
Ok(sem_set
.get(semnum as usize)
.ok_or(Error::new(Errno::EINVAL))?
.last_modified_pid())
})?;
return Ok(SyscallReturn::Return(pid as isize));
}
IpcControlCmd::SEM_GETZCNT => {
let cnt: usize = check_and_ctl(semid, PermissionMode::READ, |sem_set| {
Ok(sem_set
.get(semnum as usize)
.ok_or(Error::new(Errno::EINVAL))?
.pending_zero_count())
})?;
return Ok(SyscallReturn::Return(cnt as isize));
}
IpcControlCmd::SEM_GETNCNT => {
let cnt: usize = check_and_ctl(semid, PermissionMode::READ, |sem_set| {
Ok(sem_set
.get(semnum as usize)
.ok_or(Error::new(Errno::EINVAL))?
.pending_alter_count())
})?;
return Ok(SyscallReturn::Return(cnt as isize));
}
_ => todo!("Need to support {:?} in SYS_SEMCTL", cmd),
}
Ok(SyscallReturn::Return(0))
}
fn check_and_ctl<T, F>(semid: i32, permission: PermissionMode, ctl_func: F) -> Result<T>
where
F: FnOnce(&SemaphoreSet) -> Result<T>,
{
check_sem(semid, None, permission)?;
let sem_sets = sem_sets();
let sem_set = sem_sets.get(&semid).ok_or(Error::new(Errno::EINVAL))?;
ctl_func.call_once((sem_set,))
}

View File

@ -0,0 +1,69 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
ipc::{
semaphore::system_v::{
sem_set::{check_sem, create_sem_set, create_sem_set_with_id, SEMMSL},
PermissionMode,
},
IpcFlags,
},
prelude::*,
};
pub fn sys_semget(key: i32, nsems: i32, semflags: i32, ctx: &Context) -> Result<SyscallReturn> {
if nsems < 0 || nsems as usize > SEMMSL {
return_errno!(Errno::EINVAL);
}
if key < 0 {
return_errno!(Errno::EINVAL);
}
let flags = IpcFlags::from_bits_truncate(semflags as u32);
let mode: u16 = (semflags as u32 & 0x1FF) as u16;
let nsems = nsems as usize;
let credentials = ctx.posix_thread.credentials();
debug!(
"[sys_semget] key = {}, nsems = {}, flags = {:?}",
key, nsems, semflags
);
// Create a new semaphore set directly
const IPC_NEW: i32 = 0;
if key == IPC_NEW {
if nsems == 0 {
return_errno!(Errno::EINVAL);
}
return Ok(SyscallReturn::Return(
create_sem_set(nsems, mode, credentials)? as isize,
));
}
// Get a semaphore set, and create if necessary
match check_sem(
key,
Some(nsems),
PermissionMode::ALTER | PermissionMode::READ,
) {
Ok(_) => {
if flags.contains(IpcFlags::IPC_CREAT | IpcFlags::IPC_EXCL) {
return_errno!(Errno::EEXIST);
}
}
Err(err) => {
let need_create = err.error() == Errno::ENOENT && flags.contains(IpcFlags::IPC_CREAT);
if !need_create {
return Err(err);
}
if nsems == 0 {
return_errno!(Errno::EINVAL);
}
create_sem_set_with_id(key, nsems, mode, credentials)?
}
};
Ok(SyscallReturn::Return(key as isize))
}

View File

@ -0,0 +1,71 @@
// SPDX-License-Identifier: MPL-2.0
use core::time::Duration;
use super::SyscallReturn;
use crate::{
ipc::semaphore::system_v::{
sem::{sem_op, SemBuf},
sem_set::{check_sem, SEMOPM},
PermissionMode,
},
prelude::*,
time::timespec_t,
};
pub fn sys_semop(sem_id: i32, tsops: Vaddr, nsops: usize, _ctx: &Context) -> Result<SyscallReturn> {
debug!(
"[sys_semop] sem_id = {:?}, tsops_vaddr = {:x?}, nsops = {:?}",
sem_id, tsops, nsops
);
do_sys_semtimedop(sem_id, tsops, nsops, None)
}
pub fn sys_semtimedop(
sem_id: i32,
tsops: Vaddr,
nsops: usize,
timeout: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"[sys_semtimedop] sem_id = {:?}, tsops_vaddr = {:x?}, nsops = {:?}, timeout_vaddr = {:x?}",
sem_id, tsops, nsops, timeout
);
let timeout = if timeout == 0 {
None
} else {
Some(Duration::try_from(
ctx.get_user_space().read_val::<timespec_t>(timeout)?,
)?)
};
do_sys_semtimedop(sem_id, tsops, nsops, timeout)
}
fn do_sys_semtimedop(
sem_id: i32,
tsops: Vaddr,
nsops: usize,
timeout: Option<Duration>,
) -> Result<SyscallReturn> {
if sem_id <= 0 || nsops == 0 {
return_errno!(Errno::EINVAL);
}
if nsops > SEMOPM {
return_errno!(Errno::E2BIG);
}
for i in 0..nsops {
let sem_buf =
CurrentUserSpace::get().read_val::<SemBuf>(tsops + size_of::<SemBuf>() * i)?;
if sem_buf.sem_op() != 0 {
check_sem(sem_id, None, PermissionMode::ALTER)?;
}
sem_op(sem_id, sem_buf, timeout)?;
}
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,117 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{fs::file_table::FileDesc, prelude::*};
pub fn sys_sendfile(
out_fd: FileDesc,
in_fd: FileDesc,
offset_ptr: Vaddr,
count: isize,
ctx: &Context,
) -> Result<SyscallReturn> {
trace!("raw offset ptr = 0x{:x}", offset_ptr);
let offset = if offset_ptr == 0 {
None
} else {
let offset: isize = ctx.get_user_space().read_val(offset_ptr)?;
if offset < 0 {
return_errno_with_message!(Errno::EINVAL, "offset cannot be negative");
}
Some(offset)
};
debug!(
"out_fd = {}, in_fd = {}, offset = {:x?}, count = 0x{:x}",
out_fd, in_fd, offset, count
);
let mut count = if count < 0 {
return_errno_with_message!(Errno::EINVAL, "count cannot be negative");
} else {
count as usize
};
let (out_file, in_file) = {
let file_table = ctx.process.file_table().lock();
let out_file = file_table.get_file(out_fd)?.clone();
// FIXME: the in_file must support mmap-like operations (i.e., it cannot be a socket).
let in_file = file_table.get_file(in_fd)?.clone();
(out_file, in_file)
};
// sendfile can send at most `MAX_COUNT` bytes
const MAX_COUNT: usize = 0x7fff_f000;
if count > MAX_COUNT {
count = MAX_COUNT;
}
const BUFFER_SIZE: usize = PAGE_SIZE;
let mut buffer = vec![0u8; BUFFER_SIZE].into_boxed_slice();
let mut total_len = 0;
let mut offset = offset.map(|offset| offset as usize);
while total_len < count {
// The offset decides how to read from `in_file`.
// If offset is `Some(_)`, the data will be read from the given offset,
// and after reading, the file offset of `in_file` will remain unchanged.
// If offset is `None`, the data will be read from the file offset,
// and the file offset of `in_file` is adjusted
// to reflect the number of bytes read from `in_file`.
let max_readlen = buffer.len().min(count - total_len);
// Read from `in_file`
let read_res = if let Some(offset) = offset.as_mut() {
let res = in_file.read_bytes_at(*offset, &mut buffer[..max_readlen]);
if let Ok(len) = res.as_ref() {
*offset += *len;
}
res
} else {
in_file.read_bytes(&mut buffer[..max_readlen])
};
let read_len = match read_res {
Ok(len) => len,
Err(e) => {
if total_len > 0 {
warn!("error occurs when trying to read file: {:?}", e);
break;
}
return Err(e);
}
};
if read_len == 0 {
break;
}
// Note: `sendfile` allows sending partial data,
// so short reads and short writes are all acceptable
let write_res = out_file.write_bytes(&buffer[..read_len]);
match write_res {
Ok(len) => {
total_len += len;
if len < BUFFER_SIZE {
break;
}
}
Err(e) => {
if total_len > 0 {
warn!("error occurs when trying to write file: {:?}", e);
break;
}
return Err(e);
}
}
}
if let Some(offset) = offset {
ctx.get_user_space()
.write_val(offset_ptr, &(offset as isize))?;
}
Ok(SyscallReturn::Return(total_len as _))
}

View File

@ -0,0 +1,45 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
net::socket::{MessageHeader, SendRecvFlags},
prelude::*,
util::net::{get_socket_from_fd, CUserMsgHdr},
};
pub fn sys_sendmsg(
sockfd: FileDesc,
user_msghdr_ptr: Vaddr,
flags: i32,
ctx: &Context,
) -> Result<SyscallReturn> {
let c_user_msghdr: CUserMsgHdr = ctx.get_user_space().read_val(user_msghdr_ptr)?;
let flags = SendRecvFlags::from_bits_truncate(flags);
debug!(
"sockfd = {}, user_msghdr = {:x?}, flags = {:?}",
sockfd, c_user_msghdr, flags
);
let socket = get_socket_from_fd(sockfd)?;
let (io_vecs, message_header) = {
let addr = c_user_msghdr.read_socket_addr_from_user()?;
let io_vecs = c_user_msghdr.copy_iovs_from_user()?;
let control_message = {
if c_user_msghdr.msg_control != 0 {
// TODO: support sending control message
warn!("control message is not supported now");
}
None
};
(io_vecs, MessageHeader::new(addr, control_message))
};
let total_bytes = socket.sendmsg(&io_vecs, message_header, flags)?;
Ok(SyscallReturn::Return(total_bytes as _))
}

View File

@ -0,0 +1,40 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::file_table::FileDesc,
net::socket::{MessageHeader, SendRecvFlags},
prelude::*,
util::{
net::{get_socket_from_fd, read_socket_addr_from_user},
IoVec,
},
};
pub fn sys_sendto(
sockfd: FileDesc,
buf: Vaddr,
len: usize,
flags: i32,
dest_addr: Vaddr,
addrlen: usize,
_ctx: &Context,
) -> Result<SyscallReturn> {
let flags = SendRecvFlags::from_bits_truncate(flags);
let socket_addr = if dest_addr == 0 {
None
} else {
let socket_addr = read_socket_addr_from_user(dest_addr, addrlen)?;
Some(socket_addr)
};
debug!("sockfd = {sockfd}, buf = 0x{buf:x}, len = 0x{len:x}, flags = {flags:?}, socket_addr = {socket_addr:?}");
let socket = get_socket_from_fd(sockfd)?;
let io_vecs = [IoVec::new(buf, len)];
let message_header = MessageHeader::new(socket_addr, None);
let send_size = socket.sendmsg(&io_vecs, message_header, flags)?;
Ok(SyscallReturn::Return(send_size as _))
}

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: MPL-2.0
use core::sync::atomic::Ordering;
use super::SyscallReturn;
use crate::{
prelude::*,
process::{posix_thread::PosixThreadExt, process_table, Pgid, Pid, Process, Uid},
sched::nice::Nice,
};
pub fn sys_set_priority(which: i32, who: u32, prio: i32, ctx: &Context) -> Result<SyscallReturn> {
let prio_target = PriorityTarget::new(which, who, ctx)?;
let new_nice = {
let norm_prio = prio.clamp(i8::MIN as i32, i8::MAX as i32) as i8;
Nice::new(norm_prio)
};
debug!(
"set_priority prio_target: {:?}, new_nice: {:?}",
prio_target, new_nice
);
let processes = get_processes(prio_target)?;
for process in processes.iter() {
process.nice().store(new_nice, Ordering::Relaxed);
}
Ok(SyscallReturn::Return(0))
}
pub fn sys_get_priority(which: i32, who: u32, ctx: &Context) -> Result<SyscallReturn> {
let prio_target = PriorityTarget::new(which, who, ctx)?;
debug!("get_priority prio_target: {:?}", prio_target);
let processes = get_processes(prio_target)?;
let highest_prio = {
let mut nice = Nice::MAX;
for process in processes.iter() {
let proc_nice = process.nice().load(Ordering::Relaxed);
// Returns the highest priority enjoyed by the processes
if proc_nice < nice {
nice = proc_nice;
}
}
// The system call returns nice values translated to the range 40 to 1,
// since a negative return value would be interpreted as an error.
20 - nice.to_raw()
};
Ok(SyscallReturn::Return(highest_prio as _))
}
fn get_processes(prio_target: PriorityTarget) -> Result<Vec<Arc<Process>>> {
Ok(match prio_target {
PriorityTarget::Process(pid) => {
let process = process_table::get_process(pid).ok_or(Error::new(Errno::ESRCH))?;
vec![process]
}
PriorityTarget::ProcessGroup(pgid) => {
let process_group =
process_table::get_process_group(&pgid).ok_or(Error::new(Errno::ESRCH))?;
let processes: Vec<Arc<Process>> = process_group.lock().iter().cloned().collect();
if processes.is_empty() {
return_errno!(Errno::ESRCH);
}
processes
}
PriorityTarget::User(uid) => {
// Get the processes that are running under the specified user
let processes: Vec<Arc<Process>> = process_table::process_table()
.iter()
.filter(|process| {
let Some(main_thread) = process.main_thread() else {
return false;
};
let Some(posix_thread) = main_thread.as_posix_thread() else {
return false;
};
uid == posix_thread.credentials().ruid()
})
.cloned()
.collect();
if processes.is_empty() {
return_errno!(Errno::ESRCH);
}
processes
}
})
}
#[derive(Debug)]
enum PriorityTarget {
Process(Pid),
ProcessGroup(Pgid),
User(Uid),
}
impl PriorityTarget {
fn new(which: i32, who: u32, ctx: &Context) -> Result<Self> {
let which = Which::try_from(which)
.map_err(|_| Error::with_message(Errno::EINVAL, "invalid which value"))?;
Ok(match which {
Which::PRIO_PROCESS => {
let pid = if who == 0 {
ctx.process.pid()
} else {
who as Pid
};
Self::Process(pid)
}
Which::PRIO_PGRP => {
let pgid = if who == 0 {
ctx.process.pgid()
} else {
who as Pgid
};
Self::ProcessGroup(pgid)
}
Which::PRIO_USER => {
let uid = if who == 0 {
ctx.posix_thread.credentials().ruid()
} else {
Uid::new(who)
};
Self::User(uid)
}
})
}
}
#[allow(non_camel_case_types)]
#[derive(Clone, Debug, TryFromInt)]
#[repr(i32)]
enum Which {
PRIO_PROCESS = 0,
PRIO_PGRP = 1,
PRIO_USER = 2,
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{prelude::*, process::posix_thread::RobustListHead};
pub fn sys_set_robust_list(
robust_list_head_ptr: Vaddr,
len: usize,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"robust list head ptr: 0x{:x}, len = {}",
robust_list_head_ptr, len
);
if len != core::mem::size_of::<RobustListHead>() {
return_errno_with_message!(
Errno::EINVAL,
"The len is not equal to the size of robust list head"
);
}
let robust_list_head: RobustListHead = ctx.get_user_space().read_val(robust_list_head_ptr)?;
debug!("{:x?}", robust_list_head);
let mut robust_list = ctx.posix_thread.robust_list().lock();
*robust_list = Some(robust_list_head);
Ok(SyscallReturn::Return(0))
}

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::prelude::*;
pub fn sys_set_tid_address(tidptr: Vaddr, ctx: &Context) -> Result<SyscallReturn> {
debug!("tidptr = 0x{:x}", tidptr);
let mut clear_child_tid = ctx.posix_thread.clear_child_tid().lock();
if *clear_child_tid != 0 {
// According to manuals at https://man7.org/linux/man-pages/man2/set_tid_address.2.html
// We need to write 0 to clear_child_tid and do futex wake
todo!()
} else {
*clear_child_tid = tidptr;
}
let tid = ctx.thread.tid();
Ok(SyscallReturn::Return(tid as _))
}

Some files were not shown because too many files have changed in this diff Show More