Add sys_epoll_pwait2

This commit is contained in:
Cautreoxit
2025-06-02 13:26:14 +08:00
committed by Tate, Hongliang Tian
parent ead5a70444
commit 0b471ef370
5 changed files with 98 additions and 53 deletions

View File

@ -343,6 +343,7 @@ provided by Linux on x86-64 architecture.
| 332 | statx | ✅ | | 332 | statx | ✅ |
| 435 | clone3 | ✅ | | 435 | clone3 | ✅ |
| 439 | faccessat2 | ✅ | | 439 | faccessat2 | ✅ |
| 441 | epoll_pwait2 | ✅ |
## File Systems ## File Systems

View File

@ -16,7 +16,7 @@ use crate::syscall::{
close::sys_close, close::sys_close,
connect::sys_connect, connect::sys_connect,
dup::{sys_dup, sys_dup3}, dup::{sys_dup, sys_dup3},
epoll::{sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait}, epoll::{sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_pwait2},
eventfd::sys_eventfd2, eventfd::sys_eventfd2,
execve::{sys_execve, sys_execveat}, execve::{sys_execve, sys_execveat},
exit::sys_exit, exit::sys_exit,
@ -303,4 +303,5 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_SEMTIMEDOP = 420 => sys_semtimedop(args[..4]); SYS_SEMTIMEDOP = 420 => sys_semtimedop(args[..4]);
SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx);
SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]);
SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]);
} }

View File

@ -18,7 +18,10 @@ use crate::syscall::{
close::sys_close, close::sys_close,
connect::sys_connect, connect::sys_connect,
dup::{sys_dup, sys_dup2, sys_dup3}, dup::{sys_dup, sys_dup2, sys_dup3},
epoll::{sys_epoll_create, sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_wait}, epoll::{
sys_epoll_create, sys_epoll_create1, sys_epoll_ctl, sys_epoll_pwait, sys_epoll_pwait2,
sys_epoll_wait,
},
eventfd::{sys_eventfd, sys_eventfd2}, eventfd::{sys_eventfd, sys_eventfd2},
execve::{sys_execve, sys_execveat}, execve::{sys_execve, sys_execveat},
exit::sys_exit, exit::sys_exit,
@ -369,4 +372,5 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_STATX = 332 => sys_statx(args[..5]); SYS_STATX = 332 => sys_statx(args[..5]);
SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx); SYS_CLONE3 = 435 => sys_clone3(args[..2], &user_ctx);
SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]); SYS_FACCESSAT2 = 439 => sys_faccessat2(args[..4]);
SYS_EPOLL_PWAIT2 = 441 => sys_epoll_pwait2(args[..5]);
} }

View File

@ -11,7 +11,8 @@ use crate::{
utils::CreationFlags, utils::CreationFlags,
}, },
prelude::*, prelude::*,
process::signal::sig_mask::SigMask, process::signal::sig_mask::{SigMask, SigSet},
time::timespec_t,
}; };
// See: https://elixir.bootlin.com/linux/v6.11.5/source/fs/eventpoll.c#L2437 // See: https://elixir.bootlin.com/linux/v6.11.5/source/fs/eventpoll.c#L2437
@ -92,22 +93,31 @@ pub fn sys_epoll_ctl(
Ok(SyscallReturn::Return(0 as _)) Ok(SyscallReturn::Return(0 as _))
} }
fn do_epoll_wait( fn do_epoll_pwait2(
epfd: FileDesc, epfd: FileDesc,
events_addr: Vaddr,
max_events: i32, max_events: i32,
timeout: i32, timeout: Option<Duration>,
sigmask: Vaddr,
sigset_size: usize,
ctx: &Context, ctx: &Context,
) -> Result<Vec<EpollEvent>> { ) -> Result<usize> {
let max_events = { let max_events = {
if max_events <= 0 || max_events as usize > EP_MAX_EVENTS { if max_events <= 0 || max_events as usize > EP_MAX_EVENTS {
return_errno_with_message!(Errno::EINVAL, "max_events is not valid"); return_errno_with_message!(Errno::EINVAL, "max_events is not valid");
} }
max_events as usize max_events as usize
}; };
let timeout = if timeout >= 0 {
Some(Duration::from_millis(timeout as _)) let sigset = sigmask != 0;
if sigset && sigset_size != 8 {
return_errno_with_message!(Errno::EINVAL, "sigset size is not equal to 8");
}
let old_sig_mask_value = if sigset {
set_signal_mask(sigmask, ctx)?
} else { } else {
None SigSet::from(0)
}; };
let mut file_table = ctx.thread_local.borrow_file_table_mut(); let mut file_table = ctx.thread_local.borrow_file_table_mut();
@ -118,17 +128,34 @@ fn do_epoll_wait(
let result = epoll_file.wait(max_events, timeout.as_ref()); let result = epoll_file.wait(max_events, timeout.as_ref());
if sigset {
restore_signal_mask(old_sig_mask_value, ctx);
}
// As mentioned in the manual, the return value should be zero if no file descriptor becomes ready // As mentioned in the manual, the return value should be zero if no file descriptor becomes ready
// during the requested `timeout` milliseconds. So we ignore `Err(ETIME)` and return an empty vector. // during the requested `timeout` milliseconds. So we ignore `Err(ETIME)` and return an empty vector.
// //
// Manual: <https://www.man7.org/linux/man-pages/man2/epoll_wait.2.html> // Manual: <https://www.man7.org/linux/man-pages/man2/epoll_wait.2.html>
if result let epoll_events = match result {
.as_ref() Ok(events) => events,
.is_err_and(|err| err.error() == Errno::ETIME) Err(e) if e.error() == Errno::ETIME => {
{ return Ok(0);
return Ok(Vec::new());
} }
result Err(e) => {
return Err(e);
}
};
// Write back
let mut write_addr = events_addr;
let user_space = ctx.user_space();
for epoll_event in epoll_events.iter() {
let c_epoll_event = c_epoll_event::from(epoll_event);
user_space.write_val(write_addr, &c_epoll_event)?;
write_addr += core::mem::size_of::<c_epoll_event>();
}
Ok(epoll_events.len())
} }
pub fn sys_epoll_wait( pub fn sys_epoll_wait(
@ -143,18 +170,15 @@ pub fn sys_epoll_wait(
epfd, events_addr, max_events, timeout epfd, events_addr, max_events, timeout
); );
let epoll_events = do_epoll_wait(epfd, max_events, timeout, ctx)?; let timeout = if timeout >= 0 {
Some(Duration::from_millis(timeout as _))
} else {
None
};
// Write back let events_len = do_epoll_pwait2(epfd, events_addr, max_events, timeout, 0, 0, ctx)?;
let mut write_addr = events_addr;
let user_space = ctx.user_space();
for epoll_event in epoll_events.iter() {
let c_epoll_event = c_epoll_event::from(epoll_event);
user_space.write_val(write_addr, &c_epoll_event)?;
write_addr += core::mem::size_of::<c_epoll_event>();
}
Ok(SyscallReturn::Return(epoll_events.len() as _)) Ok(SyscallReturn::Return(events_len as _))
} }
fn set_signal_mask(set_ptr: Vaddr, ctx: &Context) -> Result<SigMask> { fn set_signal_mask(set_ptr: Vaddr, ctx: &Context) -> Result<SigMask> {
@ -195,34 +219,49 @@ pub fn sys_epoll_pwait(
epfd, events_addr, max_events, timeout, sigmask, sigset_size epfd, events_addr, max_events, timeout, sigmask, sigset_size
); );
if sigmask != 0 && sigset_size != 8 { let timeout = if timeout >= 0 {
return_errno_with_message!(Errno::EINVAL, "sigset size is not equal to 8"); Some(Duration::from_millis(timeout as _))
} } else {
None
let old_sig_mask_value = set_signal_mask(sigmask, ctx)?;
let ready_events = match do_epoll_wait(epfd, max_events, timeout, ctx) {
Ok(events) => {
restore_signal_mask(old_sig_mask_value, ctx);
events
}
Err(e) => {
// Restore the signal mask even if an error occurs
restore_signal_mask(old_sig_mask_value, ctx);
return Err(e);
}
}; };
// Write back let events_len = do_epoll_pwait2(
let mut write_addr = events_addr; epfd,
let user_space = ctx.user_space(); events_addr,
for event in ready_events.iter() { max_events,
let c_event = c_epoll_event::from(event); timeout,
user_space.write_val(write_addr, &c_event)?; sigmask,
write_addr += core::mem::size_of::<c_epoll_event>(); sigset_size,
ctx,
)?;
Ok(SyscallReturn::Return(events_len as _))
} }
Ok(SyscallReturn::Return(ready_events.len() as _)) pub fn sys_epoll_pwait2(
epfd: FileDesc,
events_addr: Vaddr,
max_events: i32,
timeout_addr: Vaddr,
sigmask: Vaddr,
ctx: &Context,
) -> Result<SyscallReturn> {
debug!(
"epfd = {}, events_addr = 0x{:x}, max_events = {}, timeout_ts = 0x{:x}, sigmask = 0x{:x}",
epfd, events_addr, max_events, timeout_addr, sigmask,
);
let timeout: Option<Duration> = if timeout_addr == 0 {
None
} else {
let ts: timespec_t = ctx.user_space().read_val(timeout_addr)?;
let duration = Duration::try_from(ts)?;
Some(duration)
};
let events_len = do_epoll_pwait2(epfd, events_addr, max_events, timeout, sigmask, 8, ctx)?;
Ok(SyscallReturn::Return(events_len as _))
} }
#[derive(Debug, Clone, Copy, Pod)] #[derive(Debug, Clone, Copy, Pod)]

View File

@ -190,10 +190,10 @@ epoll_wait04
# epoll_wait06 # epoll_wait06
epoll_wait07 epoll_wait07
# epoll_pwait01 # epoll_pwait01
# epoll_pwait02 epoll_pwait02
# epoll_pwait03 # epoll_pwait03
# epoll_pwait04 epoll_pwait04
# epoll_pwait05 epoll_pwait05
# eventfd01 # eventfd01
# eventfd02 # eventfd02