From 0b471ef370d6333553b675fcf40ad5a8f4ba65fe Mon Sep 17 00:00:00 2001 From: Cautreoxit Date: Mon, 2 Jun 2025 13:26:14 +0800 Subject: [PATCH] Add sys_epoll_pwait2 --- docs/src/kernel/linux-compatibility.md | 1 + kernel/src/syscall/arch/riscv.rs | 3 +- kernel/src/syscall/arch/x86.rs | 6 +- kernel/src/syscall/epoll.rs | 135 +++++++++++++++--------- test/syscall_test/ltp/testcases/all.txt | 6 +- 5 files changed, 98 insertions(+), 53 deletions(-) diff --git a/docs/src/kernel/linux-compatibility.md b/docs/src/kernel/linux-compatibility.md index 6ba2b3711..2523d0a6e 100644 --- a/docs/src/kernel/linux-compatibility.md +++ b/docs/src/kernel/linux-compatibility.md @@ -343,6 +343,7 @@ provided by Linux on x86-64 architecture. | 332 | statx | ✅ | | 435 | clone3 | ✅ | | 439 | faccessat2 | ✅ | +| 441 | epoll_pwait2 | ✅ | ## File Systems diff --git a/kernel/src/syscall/arch/riscv.rs b/kernel/src/syscall/arch/riscv.rs index 63fccddac..1b8db7e8c 100644 --- a/kernel/src/syscall/arch/riscv.rs +++ b/kernel/src/syscall/arch/riscv.rs @@ -16,7 +16,7 @@ use crate::syscall::{ close::sys_close, connect::sys_connect, dup::{sys_dup, sys_dup3}, - epoll::{sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait}, + epoll::{sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_pwait2}, eventfd::sys_eventfd2, execve::{sys_execve, sys_execveat}, exit::sys_exit, @@ -303,4 +303,5 @@ impl_syscall_nums_and_dispatch_fn! { SYS_SEMTIMEDOP = 420 => sys_semtimedop(args[..4]); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); + SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]); } diff --git a/kernel/src/syscall/arch/x86.rs b/kernel/src/syscall/arch/x86.rs index 956d845a0..9433102ca 100644 --- a/kernel/src/syscall/arch/x86.rs +++ b/kernel/src/syscall/arch/x86.rs @@ -18,7 +18,10 @@ use crate::syscall::{ close::sys_close, connect::sys_connect, dup::{sys_dup, sys_dup2, sys_dup3}, - epoll::{sys_epoll_create, sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_wait}, + epoll::{ + sys_epoll_create, sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_pwait2, + sys_epoll_wait, + }, eventfd::{sys_eventfd, sys_eventfd2}, execve::{sys_execve, sys_execveat}, exit::sys_exit, @@ -369,4 +372,5 @@ impl_syscall_nums_and_dispatch_fn! { SYS_STATX = 332 => sys_statx(args[..5]); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); + SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]); } diff --git a/kernel/src/syscall/epoll.rs b/kernel/src/syscall/epoll.rs index f13b953f6..1a89c63e0 100644 --- a/kernel/src/syscall/epoll.rs +++ b/kernel/src/syscall/epoll.rs @@ -11,7 +11,8 @@ use crate::{ utils::CreationFlags, }, prelude::*, - process::signal::sig_mask::SigMask, + process::signal::sig_mask::{SigMask, SigSet}, + time::timespec_t, }; // See: https://elixir.bootlin.com/linux/v6.11.5/source/fs/eventpoll.c#L2437 @@ -92,22 +93,31 @@ pub fn sys_epoll_ctl( Ok(SyscallReturn::Return(0 as _)) } -fn do_epoll_wait( +fn do_epoll_pwait2( epfd: FileDesc, + events_addr: Vaddr, max_events: i32, - timeout: i32, + timeout: Option, + sigmask: Vaddr, + sigset_size: usize, ctx: &Context, -) -> Result> { +) -> Result { let max_events = { if max_events <= 0 || max_events as usize > EP_MAX_EVENTS { return_errno_with_message!(Errno::EINVAL, "max_events is not valid"); } max_events as usize }; - let timeout = if timeout >= 0 { - Some(Duration::from_millis(timeout as _)) + + let sigset = sigmask != 0; + if sigset && sigset_size != 8 { + return_errno_with_message!(Errno::EINVAL, "sigset size is not equal to 8"); + } + + let old_sig_mask_value = if sigset { + set_signal_mask(sigmask, ctx)? } else { - None + SigSet::from(0) }; let mut file_table = ctx.thread_local.borrow_file_table_mut(); @@ -118,17 +128,34 @@ fn do_epoll_wait( let result = epoll_file.wait(max_events, timeout.as_ref()); + if sigset { + restore_signal_mask(old_sig_mask_value, ctx); + } + // As mentioned in the manual, the return value should be zero if no file descriptor becomes ready // during the requested `timeout` milliseconds. So we ignore `Err(ETIME)` and return an empty vector. // // Manual: - if result - .as_ref() - .is_err_and(|err| err.error() == Errno::ETIME) - { - return Ok(Vec::new()); + let epoll_events = match result { + Ok(events) => events, + Err(e) if e.error() == Errno::ETIME => { + return Ok(0); + } + Err(e) => { + return Err(e); + } + }; + + // Write back + let mut write_addr = events_addr; + let user_space = ctx.user_space(); + for epoll_event in epoll_events.iter() { + let c_epoll_event = c_epoll_event::from(epoll_event); + user_space.write_val(write_addr, &c_epoll_event)?; + write_addr += core::mem::size_of::(); } - result + + Ok(epoll_events.len()) } pub fn sys_epoll_wait( @@ -143,18 +170,15 @@ pub fn sys_epoll_wait( epfd, events_addr, max_events, timeout ); - let epoll_events = do_epoll_wait(epfd, max_events, timeout, ctx)?; + let timeout = if timeout >= 0 { + Some(Duration::from_millis(timeout as _)) + } else { + None + }; - // Write back - let mut write_addr = events_addr; - let user_space = ctx.user_space(); - for epoll_event in epoll_events.iter() { - let c_epoll_event = c_epoll_event::from(epoll_event); - user_space.write_val(write_addr, &c_epoll_event)?; - write_addr += core::mem::size_of::(); - } + let events_len = do_epoll_pwait2(epfd, events_addr, max_events, timeout, 0, 0, ctx)?; - Ok(SyscallReturn::Return(epoll_events.len() as _)) + Ok(SyscallReturn::Return(events_len as _)) } fn set_signal_mask(set_ptr: Vaddr, ctx: &Context) -> Result { @@ -195,34 +219,49 @@ pub fn sys_epoll_pwait( epfd, events_addr, max_events, timeout, sigmask, sigset_size ); - if sigmask != 0 && sigset_size != 8 { - return_errno_with_message!(Errno::EINVAL, "sigset size is not equal to 8"); - } - - let old_sig_mask_value = set_signal_mask(sigmask, ctx)?; - - let ready_events = match do_epoll_wait(epfd, max_events, timeout, ctx) { - Ok(events) => { - restore_signal_mask(old_sig_mask_value, ctx); - events - } - Err(e) => { - // Restore the signal mask even if an error occurs - restore_signal_mask(old_sig_mask_value, ctx); - return Err(e); - } + let timeout = if timeout >= 0 { + Some(Duration::from_millis(timeout as _)) + } else { + None }; - // Write back - let mut write_addr = events_addr; - let user_space = ctx.user_space(); - for event in ready_events.iter() { - let c_event = c_epoll_event::from(event); - user_space.write_val(write_addr, &c_event)?; - write_addr += core::mem::size_of::(); - } + let events_len = do_epoll_pwait2( + epfd, + events_addr, + max_events, + timeout, + sigmask, + sigset_size, + ctx, + )?; - Ok(SyscallReturn::Return(ready_events.len() as _)) + Ok(SyscallReturn::Return(events_len as _)) +} + +pub fn sys_epoll_pwait2( + epfd: FileDesc, + events_addr: Vaddr, + max_events: i32, + timeout_addr: Vaddr, + sigmask: Vaddr, + ctx: &Context, +) -> Result { + debug!( + "epfd = {}, events_addr = 0x{:x}, max_events = {}, timeout_ts = 0x{:x}, sigmask = 0x{:x}", + epfd, events_addr, max_events, timeout_addr, sigmask, + ); + + let timeout: Option = if timeout_addr == 0 { + None + } else { + let ts: timespec_t = ctx.user_space().read_val(timeout_addr)?; + let duration = Duration::try_from(ts)?; + Some(duration) + }; + + let events_len = do_epoll_pwait2(epfd, events_addr, max_events, timeout, sigmask, 8, ctx)?; + + Ok(SyscallReturn::Return(events_len as _)) } #[derive(Debug, Clone, Copy, Pod)] diff --git a/test/syscall_test/ltp/testcases/all.txt b/test/syscall_test/ltp/testcases/all.txt index dd411e98b..5f520717c 100644 --- a/test/syscall_test/ltp/testcases/all.txt +++ b/test/syscall_test/ltp/testcases/all.txt @@ -190,10 +190,10 @@ epoll_wait04 # epoll_wait06 epoll_wait07 # epoll_pwait01 -# epoll_pwait02 +epoll_pwait02 # epoll_pwait03 -# epoll_pwait04 -# epoll_pwait05 +epoll_pwait04 +epoll_pwait05 # eventfd01 # eventfd02