From 07fbbcfd8c22459719c9af7eeeb3dee5ad24ba92 Mon Sep 17 00:00:00 2001 From: Jianfeng Jiang Date: Thu, 14 Dec 2023 02:33:23 +0000 Subject: [PATCH] Add syscall clone3 --- docs/src/kernel/linux-compatibility.md | 1 + framework/aster-frame/src/task/task.rs | 1 + kernel/aster-nix/src/process/clone.rs | 64 ++++++++-------- kernel/aster-nix/src/syscall/arch/x86.rs | 3 +- kernel/aster-nix/src/syscall/clone.rs | 94 +++++++++++++++++++++++- kernel/aster-nix/src/syscall/fork.rs | 5 +- osdk/Cargo.lock | 2 +- regression/apps/Makefile | 1 + regression/apps/clone3/Makefile | 5 ++ regression/apps/clone3/clone_process.c | 66 +++++++++++++++++ regression/apps/scripts/process.sh | 17 ++++- 11 files changed, 220 insertions(+), 39 deletions(-) create mode 100644 regression/apps/clone3/Makefile create mode 100644 regression/apps/clone3/clone_process.c diff --git a/docs/src/kernel/linux-compatibility.md b/docs/src/kernel/linux-compatibility.md index 246b5d61..841e7f96 100644 --- a/docs/src/kernel/linux-compatibility.md +++ b/docs/src/kernel/linux-compatibility.md @@ -336,6 +336,7 @@ provided by Linux on x86-64 architecture. | 313 | finit_module | ❌ | | 318 | getrandom | ✅ | | 322 | execveat | ✅ | +| 435 | clone3 | ✅ | ## File Systems diff --git a/framework/aster-frame/src/task/task.rs b/framework/aster-frame/src/task/task.rs index 9aff9b8e..55e020fe 100644 --- a/framework/aster-frame/src/task/task.rs +++ b/framework/aster-frame/src/task/task.rs @@ -161,6 +161,7 @@ impl Task { schedule(); } + /// Runs the task. pub fn run(self: &Arc) { add_task(self.clone()); schedule(); diff --git a/kernel/aster-nix/src/process/clone.rs b/kernel/aster-nix/src/process/clone.rs index 3f6d6f56..22141f31 100644 --- a/kernel/aster-nix/src/process/clone.rs +++ b/kernel/aster-nix/src/process/clone.rs @@ -59,6 +59,7 @@ bitflags! { #[derive(Debug, Clone, Copy)] pub struct CloneArgs { new_sp: u64, + stack_size: usize, parent_tidptr: Vaddr, child_tidptr: Vaddr, tls: u64, @@ -66,9 +67,12 @@ pub struct CloneArgs { } impl CloneArgs { - pub const fn default() -> Self { + /// Clone Args for syscall fork. + /// TODO: set the correct values + pub const fn for_fork() -> Self { CloneArgs { new_sp: 0, + stack_size: 0, parent_tidptr: 0, child_tidptr: 0, tls: 0, @@ -78,6 +82,7 @@ impl CloneArgs { pub const fn new( new_sp: u64, + stack_size: usize, parent_tidptr: Vaddr, child_tidptr: Vaddr, tls: u64, @@ -85,6 +90,7 @@ impl CloneArgs { ) -> Self { CloneArgs { new_sp, + stack_size, parent_tidptr, child_tidptr, tls, @@ -121,43 +127,28 @@ impl CloneFlags { } } -/// Clone a child thread. Without schedule it to run. -pub fn clone_child(parent_context: UserContext, clone_args: CloneArgs) -> Result { +/// Clone a child thread or child process. +/// +/// FIXME: currently, the child process or thread will be scheduled to run at once, +/// but this may not be the expected bahavior. +pub fn clone_child(parent_context: &UserContext, clone_args: CloneArgs) -> Result { clone_args.clone_flags.check_unsupported_flags()?; if clone_args.clone_flags.contains(CloneFlags::CLONE_THREAD) { let child_thread = clone_child_thread(parent_context, clone_args)?; - let child_tid = child_thread.tid(); - debug!( - "*********schedule child thread, current tid = {}, child pid = {}**********", - current_thread!().tid(), - child_tid - ); child_thread.run(); - debug!( - "*********return to parent thread, current tid = {}, child pid = {}*********", - current_thread!().tid(), - child_tid - ); + + let child_tid = child_thread.tid(); Ok(child_tid) } else { let child_process = clone_child_process(parent_context, clone_args)?; - let child_pid = child_process.pid(); - debug!( - "*********schedule child process, current pid = {}, child pid = {}**********", - current!().pid(), - child_pid - ); child_process.run(); - debug!( - "*********return to parent process, current pid = {}, child pid = {}*********", - current!().pid(), - child_pid - ); + + let child_pid = child_process.pid(); Ok(child_pid) } } -fn clone_child_thread(parent_context: UserContext, clone_args: CloneArgs) -> Result> { +fn clone_child_thread(parent_context: &UserContext, clone_args: CloneArgs) -> Result> { let clone_flags = clone_args.clone_flags; let current = current!(); debug_assert!(clone_flags.contains(CloneFlags::CLONE_VM)); @@ -170,6 +161,7 @@ fn clone_child_thread(parent_context: UserContext, clone_args: CloneArgs) -> Res let child_cpu_context = clone_cpu_context( parent_context, clone_args.new_sp, + clone_args.stack_size, clone_args.tls, clone_flags, ); @@ -215,7 +207,10 @@ fn clone_child_thread(parent_context: UserContext, clone_args: CloneArgs) -> Res Ok(child_thread) } -fn clone_child_process(parent_context: UserContext, clone_args: CloneArgs) -> Result> { +fn clone_child_process( + parent_context: &UserContext, + clone_args: CloneArgs, +) -> Result> { let current = current!(); let parent = Arc::downgrade(¤t); let clone_flags = clone_args.clone_flags; @@ -231,6 +226,7 @@ fn clone_child_process(parent_context: UserContext, clone_args: CloneArgs) -> Re let child_cpu_context = clone_cpu_context( parent_context, clone_args.new_sp, + clone_args.stack_size, clone_args.tls, clone_flags, ); @@ -368,12 +364,13 @@ fn clone_vm(parent_process_vm: &ProcessVm, clone_flags: CloneFlags) -> Result UserContext { - let mut child_context = parent_context; + let mut child_context = *parent_context; // The return value of child thread is zero child_context.set_syscall_ret(0); @@ -382,7 +379,14 @@ fn clone_cpu_context( debug_assert!(new_sp != 0); } if new_sp != 0 { - child_context.set_stack_pointer(new_sp as usize); + // If stack size is not 0, the `new_sp` points to the BOTTOMMOST byte of stack. + if stack_size != 0 { + child_context.set_stack_pointer(new_sp as usize + stack_size); + } + // If stack size is 0, the new_sp points to the TOPMOST byte of stack. + else { + child_context.set_stack_pointer(new_sp as usize); + } } if clone_flags.contains(CloneFlags::CLONE_SETTLS) { child_context.set_tls_pointer(tls as usize); diff --git a/kernel/aster-nix/src/syscall/arch/x86.rs b/kernel/aster-nix/src/syscall/arch/x86.rs index 8a5dc5aa..c830c3b8 100644 --- a/kernel/aster-nix/src/syscall/arch/x86.rs +++ b/kernel/aster-nix/src/syscall/arch/x86.rs @@ -12,7 +12,7 @@ use crate::syscall::{ chown::{sys_chown, sys_fchown, sys_fchownat, sys_lchown}, chroot::sys_chroot, clock_gettime::sys_clock_gettime, - clone::sys_clone, + clone::{sys_clone, sys_clone3}, close::sys_close, connect::sys_connect, dup::{sys_dup, sys_dup2}, @@ -248,4 +248,5 @@ impl_syscall_nums_and_dispatch_fn! { SYS_PRLIMIT64 = 302 => sys_prlimit64(args[..4]); SYS_GETRANDOM = 318 => sys_getrandom(args[..3]); SYS_EXECVEAT = 322 => sys_execveat(args[..5], &mut context); + SYS_CLONE3 = 435 => sys_clone3(args[..2], &context); } diff --git a/kernel/aster-nix/src/syscall/clone.rs b/kernel/aster-nix/src/syscall/clone.rs index 27354de7..5547f837 100644 --- a/kernel/aster-nix/src/syscall/clone.rs +++ b/kernel/aster-nix/src/syscall/clone.rs @@ -5,7 +5,8 @@ use aster_frame::cpu::UserContext; use super::SyscallReturn; use crate::{ prelude::*, - process::{clone_child, CloneArgs, CloneFlags}, + process::{clone_child, signal::constants::SIGCHLD, CloneArgs, CloneFlags}, + util::read_val_from_user, }; // The order of arguments for clone differs in different architecture. @@ -20,7 +21,94 @@ pub fn sys_clone( ) -> Result { let clone_flags = CloneFlags::from(clone_flags); debug!("flags = {:?}, child_stack_ptr = 0x{:x}, parent_tid_ptr = 0x{:x}, child tid ptr = 0x{:x}, tls = 0x{:x}", clone_flags, new_sp, parent_tidptr, child_tidptr, tls); - let clone_args = CloneArgs::new(new_sp, parent_tidptr, child_tidptr, tls, clone_flags); - let child_pid = clone_child(*parent_context, clone_args).unwrap(); + let clone_args = CloneArgs::new(new_sp, 0, parent_tidptr, child_tidptr, tls, clone_flags); + let child_pid = clone_child(parent_context, clone_args).unwrap(); Ok(SyscallReturn::Return(child_pid as _)) } + +pub fn sys_clone3( + clong_args_addr: Vaddr, + size: usize, + parent_context: &UserContext, +) -> Result { + trace!( + "clone args addr = 0x{:x}, size = 0x{:x}", + clong_args_addr, + size + ); + if size != core::mem::size_of::() { + return_errno_with_message!(Errno::EINVAL, "invalid size"); + } + + let clone_args = { + let args: Clone3Args = read_val_from_user(clong_args_addr)?; + trace!("clone3 args = {:x?}", args); + CloneArgs::from(args) + }; + debug!("clone args = {:x?}", clone_args); + + let child_pid = clone_child(parent_context, clone_args)?; + trace!("child pid = {}", child_pid); + Ok(SyscallReturn::Return(child_pid as _)) +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +struct Clone3Args { + /// Flags bit mask + flags: u64, + /// Where to store PID file descriptor + pidfd: u64, + /// Where to store child TID in child's memory + child_tid: u64, + /// Where to store child TID in parent's memory + parent_tid: u64, + /// Signal to deliver to parent on child termination + exit_signal: u64, + /// Pointer to lowest byte of stack + stack: u64, + /// Size of stack + stack_size: u64, + /// Location of new TLS + tls: u64, + /// Pointer to a pid_t array + set_tid: u64, + /// Number of elements in set_tid + set_tid_size: u64, + /// File descriptor for target cgroup of child + cgroup: u64, +} + +impl From for CloneArgs { + fn from(value: Clone3Args) -> Self { + const FLAGS_MASK: u64 = 0xff; + let clone_flags = + CloneFlags::from(value.exit_signal & FLAGS_MASK | value.flags & !FLAGS_MASK); + + // TODO: deal with pidfd, exit_signal, set_tid, set_tid_size, cgroup + if value.exit_signal != 0 || value.exit_signal as u8 != SIGCHLD.as_u8() { + warn!("exit signal is not supported"); + } + + if value.pidfd != 0 { + warn!("pidfd is not supported"); + } + + if value.set_tid != 0 || value.set_tid_size != 0 { + warn!("set_tid is not supported"); + } + + if value.cgroup != 0 { + warn!("cgroup is not supported"); + } + + CloneArgs::new( + value.stack, + value.stack_size as _, + value.parent_tid as _, + value.child_tid as _, + value.tls, + clone_flags, + ) + } +} diff --git a/kernel/aster-nix/src/syscall/fork.rs b/kernel/aster-nix/src/syscall/fork.rs index 4ce5880a..034a7323 100644 --- a/kernel/aster-nix/src/syscall/fork.rs +++ b/kernel/aster-nix/src/syscall/fork.rs @@ -10,8 +10,7 @@ use crate::{ pub fn sys_fork(parent_context: &UserContext) -> Result { let current = current!(); - // FIXME: set correct args for fork - let clone_args = CloneArgs::default(); - let child_pid = clone_child(*parent_context, clone_args).unwrap(); + let clone_args = CloneArgs::for_fork(); + let child_pid = clone_child(parent_context, clone_args).unwrap(); Ok(SyscallReturn::Return(child_pid as _)) } diff --git a/osdk/Cargo.lock b/osdk/Cargo.lock index 75cedb07..b362d3ea 100644 --- a/osdk/Cargo.lock +++ b/osdk/Cargo.lock @@ -321,7 +321,7 @@ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "linux-bzimage-builder" version = "0.1.0" -source = "git+https://github.com/asterinas/asterinas?rev=cc4111c#cc4111cab227f188a1170dea0aa729b410b1b509" +source = "git+https://github.com/asterinas/asterinas?rev=c9b66bd#c9b66bddd6b77dcddcbe1b66337a76ee1e16b640" dependencies = [ "bitflags", "bytemuck", diff --git a/regression/apps/Makefile b/regression/apps/Makefile index 43c34678..79bb0394 100644 --- a/regression/apps/Makefile +++ b/regression/apps/Makefile @@ -10,6 +10,7 @@ REGRESSION_BUILD_DIR ?= $(INITRAMFS)/regression # These test apps are sorted by name TEST_APPS := \ + clone3 \ eventfd2 \ execve \ file_io \ diff --git a/regression/apps/clone3/Makefile b/regression/apps/clone3/Makefile new file mode 100644 index 00000000..9a664a9a --- /dev/null +++ b/regression/apps/clone3/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MPL-2.0 + +include ../test_common.mk + +EXTRA_C_FLAGS := \ No newline at end of file diff --git a/regression/apps/clone3/clone_process.c b/regression/apps/clone3/clone_process.c new file mode 100644 index 00000000..6d8339d9 --- /dev/null +++ b/regression/apps/clone3/clone_process.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: MPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +static pid_t sys_clone3(struct clone_args *args) +{ + return syscall(SYS_clone3, args, sizeof(struct clone_args)); +} + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +int main(int argc, char *argv[]) +{ + int pidfd = -1; + pid_t parent_tid = -1, pid = -1; + struct clone_args args = { 0 }; + + args.parent_tid = ptr_to_u64(&parent_tid); /* CLONE_PARENT_SETTID */ + args.flags = CLONE_PARENT_SETTID; + args.exit_signal = SIGCHLD; + + pid = sys_clone3(&args); + if (pid < 0) { + printf("%s - Failed to create new process\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + if (pid == 0) { + printf("Child process with pid %d\n", getpid()); + exit(EXIT_SUCCESS); + } + + printf("Parent process: child's pid %d\n", pid); + printf("Parent process: child's pid %d in parent_tid\n", + *(pid_t *)args.parent_tid); + + waitpid(pid, NULL, 0); + + if (pid != *(pid_t *)args.parent_tid) + exit(EXIT_FAILURE); + + close(pidfd); + + return 0; +} diff --git a/regression/apps/scripts/process.sh b/regression/apps/scripts/process.sh index d6d20f1a..2b2fa73a 100755 --- a/regression/apps/scripts/process.sh +++ b/regression/apps/scripts/process.sh @@ -8,7 +8,22 @@ SCRIPT_DIR=/regression cd ${SCRIPT_DIR}/.. echo "Start process test......" -tests="hello_world/hello_world fork/fork execve/execve fork_c/fork signal_c/signal_test pthread/pthread_test hello_pie/hello pty/open_pty getpid/getpid eventfd2/eventfd2 mmap/map_shared_anon" +# These test programs are sorted by name. +tests=" +clone3/clone_process +execve/execve +eventfd2/eventfd2 +fork/fork +fork_c/fork +getpid/getpid +hello_pie/hello +hello_world/hello_world +mmap/map_shared_anon +pthread/pthread_test +pty/open_pty +signal_c/signal_test +" + for testcase in ${tests} do echo "Running test ${testcase}......"