Add syscall eventfd and eventfd2

This commit is contained in:
Jianfeng Jiang
2023-12-04 07:21:31 +00:00
committed by Tate, Hongliang Tian
parent ccc4e6ec6b
commit 078f9a8891
12 changed files with 378 additions and 19 deletions

View File

@ -304,13 +304,13 @@ provided by Linux on x86-64 architecture.
| 281 | epoll_pwait | ✅ | | 281 | epoll_pwait | ✅ |
| 282 | signalfd | ❌ | | 282 | signalfd | ❌ |
| 283 | timerfd_create | ❌ | | 283 | timerfd_create | ❌ |
| 284 | eventfd | | | 284 | eventfd | |
| 285 | fallocate | ❌ | | 285 | fallocate | ❌ |
| 286 | timerfd_settime | ❌ | | 286 | timerfd_settime | ❌ |
| 287 | timerfd_gettime | ❌ | | 287 | timerfd_gettime | ❌ |
| 288 | accept4 | ❌ | | 288 | accept4 | ❌ |
| 289 | signalfd4 | ❌ | | 289 | signalfd4 | ❌ |
| 290 | eventfd2 | | | 290 | eventfd2 | |
| 291 | epoll_create1 | ✅ | | 291 | epoll_create1 | ✅ |
| 292 | dup3 | ❌ | | 292 | dup3 | ❌ |
| 293 | pipe2 | ✅ | | 293 | pipe2 | ✅ |

View File

@ -80,7 +80,7 @@ pub trait FileLike: Send + Sync + Any {
} }
fn seek(&self, seek_from: SeekFrom) -> Result<usize> { fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
return_errno_with_message!(Errno::EINVAL, "seek is not supported"); return_errno_with_message!(Errno::ESPIPE, "seek is not supported");
} }
fn clean_for_close(&self) -> Result<()> { fn clean_for_close(&self) -> Result<()> {

View File

@ -0,0 +1,259 @@
// SPDX-License-Identifier: MPL-2.0
//! `eventfd()` creates an "eventfd object" (we name it as `EventFile`)
//! which serves as a mechanism for event wait/notify.
//!
//! `EventFile` holds a u64 integer counter.
//! Writing to `EventFile` increments the counter by the written value.
//! Reading from `EventFile` returns the current counter value and resets it
//! (It is also possible to only read 1,
//! depending on whether the `EFD_SEMAPHORE` flag is set).
//! The read/write operations may be blocked based on file flags.
//!
//! For more detailed information about this syscall,
//! refer to the man 2 eventfd documentation.
//!
use super::{SyscallReturn, SYS_EVENTFD, SYS_EVENTFD2};
use crate::{
events::{IoEvents, Observer},
fs::{
file_handle::FileLike,
file_table::{FdFlags, FileDesc},
utils::{CreationFlags, StatusFlags},
},
log_syscall_entry,
prelude::*,
process::signal::{Pauser, Pollee, Poller},
};
pub fn sys_eventfd(init_val: u64) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_EVENTFD);
debug!("init_val = 0x{:x}", init_val);
let fd = do_sys_eventfd2(init_val, Flags::empty());
Ok(SyscallReturn::Return(fd as _))
}
pub fn sys_eventfd2(init_val: u64, flags: u32) -> Result<SyscallReturn> {
log_syscall_entry!(SYS_EVENTFD2);
trace!("raw flags = {}", flags);
let flags = Flags::from_bits(flags)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown flags"))?;
debug!("init_val = 0x{:x}, flags = {:?}", init_val, flags);
let fd = do_sys_eventfd2(init_val, flags);
Ok(SyscallReturn::Return(fd as _))
}
fn do_sys_eventfd2(init_val: u64, flags: Flags) -> FileDesc {
let event_file = EventFile::new(init_val, flags);
let fd = {
let current = current!();
let mut file_table = current.file_table().lock();
let fd_flags = if flags.contains(Flags::EFD_CLOEXEC) {
FdFlags::CLOEXEC
} else {
FdFlags::empty()
};
file_table.insert(Arc::new(event_file), fd_flags)
};
fd
}
bitflags! {
struct Flags: u32 {
const EFD_SEMAPHORE = 1;
const EFD_CLOEXEC = CreationFlags::O_CLOEXEC.bits();
const EFD_NONBLOCK = StatusFlags::O_NONBLOCK.bits();
}
}
struct EventFile {
counter: Mutex<u64>,
pollee: Pollee,
flags: Mutex<Flags>,
write_pauser: Arc<Pauser>,
}
impl EventFile {
const MAX_COUNTER_VALUE: u64 = u64::MAX - 1;
fn new(init_val: u64, flags: Flags) -> Self {
let counter = Mutex::new(init_val);
let pollee = Pollee::new(IoEvents::OUT);
let write_pauser = Pauser::new();
Self {
counter,
pollee,
flags: Mutex::new(flags),
write_pauser,
}
}
fn is_nonblocking(&self) -> bool {
self.flags.lock().contains(Flags::EFD_NONBLOCK)
}
fn update_io_state(&self, counter: &MutexGuard<u64>) {
let is_readable = **counter != 0;
// if it is possible to write a value of at least "1"
// without blocking, the file is writable
let is_writable = **counter < Self::MAX_COUNTER_VALUE;
if is_writable {
if is_readable {
self.pollee.add_events(IoEvents::IN | IoEvents::OUT);
} else {
self.pollee.add_events(IoEvents::OUT);
self.pollee.del_events(IoEvents::IN);
}
self.write_pauser.resume_all();
return;
}
if is_readable {
self.pollee.add_events(IoEvents::IN);
self.pollee.del_events(IoEvents::OUT);
return;
}
self.pollee.del_events(IoEvents::IN | IoEvents::OUT);
// TODO: deal with overflow logic
}
/// Adds val to the counter.
///
/// If the new_value is overflowed or exceeds MAX_COUNTER_VALUE, the counter value
/// will not be modified, and this method returns `Err(EINVAL)`.
fn add_counter_val(&self, val: u64) -> Result<()> {
let mut counter = self.counter.lock();
let new_value = (*counter)
.checked_add(val)
.ok_or_else(|| Error::with_message(Errno::EINVAL, "arithmetic overflow"))?;
if new_value <= Self::MAX_COUNTER_VALUE {
*counter = new_value;
self.update_io_state(&counter);
return Ok(());
}
return_errno_with_message!(Errno::EINVAL, "new value exceeds MAX_COUNTER_VALUE");
}
}
impl FileLike for EventFile {
fn read(&self, buf: &mut [u8]) -> Result<usize> {
let read_len = core::mem::size_of::<u64>();
if buf.len() < read_len {
return_errno_with_message!(Errno::EINVAL, "buf len is less len u64 size");
}
loop {
let mut counter = self.counter.lock();
// Wait until the counter becomes non-zero
if *counter == 0 {
if self.is_nonblocking() {
return_errno_with_message!(Errno::EAGAIN, "try reading event file again");
}
self.update_io_state(&counter);
drop(counter);
let poller = Poller::new();
if self.pollee.poll(IoEvents::IN, Some(&poller)).is_empty() {
poller.wait()?;
}
continue;
}
// Copy value from counter, and set the new counter value
if self.flags.lock().contains(Flags::EFD_SEMAPHORE) {
buf[..read_len].copy_from_slice(1u64.as_bytes());
*counter -= 1;
} else {
buf[..read_len].copy_from_slice((*counter).as_bytes());
*counter = 0;
}
self.update_io_state(&counter);
break;
}
Ok(read_len)
}
fn write(&self, buf: &[u8]) -> Result<usize> {
let write_len = core::mem::size_of::<u64>();
if buf.len() < write_len {
return_errno_with_message!(Errno::EINVAL, "buf len is less than the size of u64");
}
let supplied_value = u64::from_bytes(buf);
// Try to add counter val at first
if self.add_counter_val(supplied_value).is_ok() {
return Ok(write_len);
}
if self.is_nonblocking() {
return_errno_with_message!(Errno::EAGAIN, "try writing to event file again");
}
// Wait until counter can be added val to
self.write_pauser
.pause_until(|| self.add_counter_val(supplied_value).ok())?;
Ok(write_len)
}
fn poll(&self, mask: IoEvents, poller: Option<&Poller>) -> IoEvents {
self.pollee.poll(mask, poller)
}
fn status_flags(&self) -> StatusFlags {
if self.is_nonblocking() {
StatusFlags::O_NONBLOCK
} else {
StatusFlags::empty()
}
}
fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> {
let mut flags = self.flags.lock();
if new_flags.contains(StatusFlags::O_NONBLOCK) {
*flags |= Flags::EFD_NONBLOCK;
} else {
*flags &= !Flags::EFD_NONBLOCK;
}
// TODO: deal with other flags
Ok(())
}
fn register_observer(
&self,
observer: Weak<dyn crate::events::Observer<IoEvents>>,
mask: IoEvents,
) -> Result<()> {
self.pollee.register_observer(observer, mask);
Ok(())
}
fn unregister_observer(
&self,
observer: &Weak<dyn Observer<IoEvents>>,
) -> Option<Weak<dyn Observer<IoEvents>>> {
self.pollee.unregister_observer(observer)
}
}

View File

@ -5,15 +5,38 @@
use aster_frame::cpu::UserContext; use aster_frame::cpu::UserContext;
use self::{ use self::{
accept::sys_accept, alarm::sys_alarm, bind::sys_bind, connect::sys_connect, accept::sys_accept,
execve::sys_execveat, getgroups::sys_getgroups, getpeername::sys_getpeername, alarm::sys_alarm,
getrandom::sys_getrandom, getresgid::sys_getresgid, getresuid::sys_getresuid, bind::sys_bind,
getsid::sys_getsid, getsockname::sys_getsockname, getsockopt::sys_getsockopt, connect::sys_connect,
listen::sys_listen, pread64::sys_pread64, recvfrom::sys_recvfrom, sendto::sys_sendto, eventfd::{sys_eventfd, sys_eventfd2},
setfsgid::sys_setfsgid, setfsuid::sys_setfsuid, setgid::sys_setgid, setgroups::sys_setgroups, execve::sys_execveat,
setregid::sys_setregid, setresgid::sys_setresgid, setresuid::sys_setresuid, getgroups::sys_getgroups,
setreuid::sys_setreuid, setsid::sys_setsid, setsockopt::sys_setsockopt, setuid::sys_setuid, getpeername::sys_getpeername,
shutdown::sys_shutdown, sigaltstack::sys_sigaltstack, socket::sys_socket, getrandom::sys_getrandom,
getresgid::sys_getresgid,
getresuid::sys_getresuid,
getsid::sys_getsid,
getsockname::sys_getsockname,
getsockopt::sys_getsockopt,
listen::sys_listen,
pread64::sys_pread64,
recvfrom::sys_recvfrom,
sendto::sys_sendto,
setfsgid::sys_setfsgid,
setfsuid::sys_setfsuid,
setgid::sys_setgid,
setgroups::sys_setgroups,
setregid::sys_setregid,
setresgid::sys_setresgid,
setresuid::sys_setresuid,
setreuid::sys_setreuid,
setsid::sys_setsid,
setsockopt::sys_setsockopt,
setuid::sys_setuid,
shutdown::sys_shutdown,
sigaltstack::sys_sigaltstack,
socket::sys_socket,
socketpair::sys_socketpair, socketpair::sys_socketpair,
}; };
use crate::{ use crate::{
@ -115,6 +138,7 @@ mod connect;
mod constants; mod constants;
mod dup; mod dup;
mod epoll; mod epoll;
mod eventfd;
mod execve; mod execve;
mod exit; mod exit;
mod exit_group; mod exit_group;
@ -363,6 +387,8 @@ define_syscall_nums!(
SYS_SET_ROBUST_LIST = 273, SYS_SET_ROBUST_LIST = 273,
SYS_UTIMENSAT = 280, SYS_UTIMENSAT = 280,
SYS_EPOLL_PWAIT = 281, SYS_EPOLL_PWAIT = 281,
SYS_EVENTFD = 284,
SYS_EVENTFD2 = 290,
SYS_EPOLL_CREATE1 = 291, SYS_EPOLL_CREATE1 = 291,
SYS_PIPE2 = 293, SYS_PIPE2 = 293,
SYS_PRLIMIT64 = 302, SYS_PRLIMIT64 = 302,
@ -553,6 +579,8 @@ pub fn syscall_dispatch(
SYS_SET_ROBUST_LIST => syscall_handler!(2, sys_set_robust_list, args), SYS_SET_ROBUST_LIST => syscall_handler!(2, sys_set_robust_list, args),
SYS_UTIMENSAT => syscall_handler!(4, sys_utimensat, args), SYS_UTIMENSAT => syscall_handler!(4, sys_utimensat, args),
SYS_EPOLL_PWAIT => syscall_handler!(5, sys_epoll_pwait, args), SYS_EPOLL_PWAIT => syscall_handler!(5, sys_epoll_pwait, args),
SYS_EVENTFD => syscall_handler!(1, sys_eventfd, args),
SYS_EVENTFD2 => syscall_handler!(2, sys_eventfd2, args),
SYS_EPOLL_CREATE1 => syscall_handler!(1, sys_epoll_create1, args), SYS_EPOLL_CREATE1 => syscall_handler!(1, sys_epoll_create1, args),
SYS_PIPE2 => syscall_handler!(2, sys_pipe2, args), SYS_PIPE2 => syscall_handler!(2, sys_pipe2, args),
SYS_PRLIMIT64 => syscall_handler!(4, sys_prlimit64, args), SYS_PRLIMIT64 => syscall_handler!(4, sys_prlimit64, args),

View File

@ -9,9 +9,13 @@ pub fn sys_read(fd: FileDesc, user_buf_addr: Vaddr, buf_len: usize) -> Result<Sy
"fd = {}, user_buf_ptr = 0x{:x}, buf_len = 0x{:x}", "fd = {}, user_buf_ptr = 0x{:x}, buf_len = 0x{:x}",
fd, user_buf_addr, buf_len fd, user_buf_addr, buf_len
); );
let file = {
let current = current!(); let current = current!();
let file_table = current.file_table().lock(); let file_table = current.file_table().lock();
let file = file_table.get_file(fd)?; file_table.get_file(fd)?.clone()
};
let mut read_buf = vec![0u8; buf_len]; let mut read_buf = vec![0u8; buf_len];
let read_len = file.read(&mut read_buf)?; let read_len = file.read(&mut read_buf)?;
write_bytes_to_user(user_buf_addr, &read_buf)?; write_bytes_to_user(user_buf_addr, &read_buf)?;

View File

@ -16,9 +16,12 @@ pub fn sys_write(fd: FileDesc, user_buf_ptr: Vaddr, user_buf_len: usize) -> Resu
fd, user_buf_ptr, user_buf_len fd, user_buf_ptr, user_buf_len
); );
let file = {
let current = current!(); let current = current!();
let file_table = current.file_table().lock(); let file_table = current.file_table().lock();
let file = file_table.get_file(fd)?; file_table.get_file(fd)?.clone()
};
if user_buf_len == 0 { if user_buf_len == 0 {
return Ok(SyscallReturn::Return(0)); return Ok(SyscallReturn::Return(0));
} }

View File

@ -11,6 +11,7 @@ REGRESSION_BUILD_DIR ?= $(INITRAMFS)/regression
# These test apps are sorted by name # These test apps are sorted by name
TEST_APPS := \ TEST_APPS := \
execve \ execve \
eventfd2 \
fork \ fork \
fork_c \ fork_c \
getpid \ getpid \

View File

@ -0,0 +1,5 @@
# SPDX-License-Identifier: MPL-2.0
include ../test_common.mk
EXTRA_C_FLAGS :=

View File

@ -0,0 +1,55 @@
// SPDX-License-Identifier: MPL-2.0
#include <err.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/eventfd.h>
#include <unistd.h>
int main()
{
int efd;
uint64_t u;
ssize_t s;
uint64_t values[] = { 11, 222, 3333 };
size_t length = sizeof(values) / sizeof(values[0]);
efd = eventfd(0, 0);
if (efd == -1)
err(EXIT_FAILURE, "eventfd");
switch (fork()) {
case 0:
for (size_t j = 0; j < length; j++) {
printf("Child writing %ld to efd\n", values[j]);
u = values[j]; /* strtoull() allows various bases */
s = write(efd, &u, sizeof(uint64_t));
if (s != sizeof(uint64_t))
err(EXIT_FAILURE, "write");
}
printf("Child completed write loop\n");
exit(EXIT_SUCCESS);
default:
sleep(2);
printf("Parent about to read\n");
s = read(efd, &u, sizeof(uint64_t));
if (s != sizeof(uint64_t))
err(EXIT_FAILURE, "read");
printf("Parent read %" PRIu64 " (%#" PRIx64 ") from efd\n", u,
u);
if (u != 11 + 222 + 3333) {
err(EXIT_FAILURE, "read eventfd");
exit(EXIT_FAILURE);
}
exit(EXIT_SUCCESS);
case -1:
err(EXIT_FAILURE, "fork");
}
}

View File

@ -8,7 +8,7 @@ SCRIPT_DIR=/regression
cd ${SCRIPT_DIR}/.. cd ${SCRIPT_DIR}/..
echo "Start process test......" echo "Start process test......"
tests="hello_world/hello_world fork/fork execve/execve fork_c/fork signal_c/signal_test pthread/pthread_test hello_pie/hello pty/open_pty getpid/getpid" tests="hello_world/hello_world fork/fork execve/execve fork_c/fork signal_c/signal_test pthread/pthread_test hello_pie/hello pty/open_pty getpid/getpid eventfd2/eventfd2"
for testcase in ${tests} for testcase in ${tests}
do do
echo "Running test ${testcase}......" echo "Running test ${testcase}......"

View File

@ -11,6 +11,7 @@ TESTS ?= \
chown_test \ chown_test \
chroot_test \ chroot_test \
epoll_test \ epoll_test \
eventfd_test \
fsync_test \ fsync_test \
getdents_test \ getdents_test \
link_test \ link_test \

View File

@ -0,0 +1,3 @@
EventfdTest.IllegalPwrite
EventfdTest.SpliceFromPipePartialSucceeds
EventfdTest.NotifyNonZero_NoRandomSave