mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-19 20:46:35 +00:00
Add syscall eventfd and eventfd2
This commit is contained in:
committed by
Tate, Hongliang Tian
parent
ccc4e6ec6b
commit
078f9a8891
@ -304,13 +304,13 @@ provided by Linux on x86-64 architecture.
|
||||
| 281 | epoll_pwait | ✅ |
|
||||
| 282 | signalfd | ❌ |
|
||||
| 283 | timerfd_create | ❌ |
|
||||
| 284 | eventfd | ❌ |
|
||||
| 284 | eventfd | ✅ |
|
||||
| 285 | fallocate | ❌ |
|
||||
| 286 | timerfd_settime | ❌ |
|
||||
| 287 | timerfd_gettime | ❌ |
|
||||
| 288 | accept4 | ❌ |
|
||||
| 289 | signalfd4 | ❌ |
|
||||
| 290 | eventfd2 | ❌ |
|
||||
| 290 | eventfd2 | ✅ |
|
||||
| 291 | epoll_create1 | ✅ |
|
||||
| 292 | dup3 | ❌ |
|
||||
| 293 | pipe2 | ✅ |
|
||||
|
@ -80,7 +80,7 @@ pub trait FileLike: Send + Sync + Any {
|
||||
}
|
||||
|
||||
fn seek(&self, seek_from: SeekFrom) -> Result<usize> {
|
||||
return_errno_with_message!(Errno::EINVAL, "seek is not supported");
|
||||
return_errno_with_message!(Errno::ESPIPE, "seek is not supported");
|
||||
}
|
||||
|
||||
fn clean_for_close(&self) -> Result<()> {
|
||||
|
259
kernel/aster-nix/src/syscall/eventfd.rs
Normal file
259
kernel/aster-nix/src/syscall/eventfd.rs
Normal file
@ -0,0 +1,259 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
//! `eventfd()` creates an "eventfd object" (we name it as `EventFile`)
|
||||
//! which serves as a mechanism for event wait/notify.
|
||||
//!
|
||||
//! `EventFile` holds a u64 integer counter.
|
||||
//! Writing to `EventFile` increments the counter by the written value.
|
||||
//! Reading from `EventFile` returns the current counter value and resets it
|
||||
//! (It is also possible to only read 1,
|
||||
//! depending on whether the `EFD_SEMAPHORE` flag is set).
|
||||
//! The read/write operations may be blocked based on file flags.
|
||||
//!
|
||||
//! For more detailed information about this syscall,
|
||||
//! refer to the man 2 eventfd documentation.
|
||||
//!
|
||||
|
||||
use super::{SyscallReturn, SYS_EVENTFD, SYS_EVENTFD2};
|
||||
use crate::{
|
||||
events::{IoEvents, Observer},
|
||||
fs::{
|
||||
file_handle::FileLike,
|
||||
file_table::{FdFlags, FileDesc},
|
||||
utils::{CreationFlags, StatusFlags},
|
||||
},
|
||||
log_syscall_entry,
|
||||
prelude::*,
|
||||
process::signal::{Pauser, Pollee, Poller},
|
||||
};
|
||||
|
||||
pub fn sys_eventfd(init_val: u64) -> Result<SyscallReturn> {
|
||||
log_syscall_entry!(SYS_EVENTFD);
|
||||
debug!("init_val = 0x{:x}", init_val);
|
||||
|
||||
let fd = do_sys_eventfd2(init_val, Flags::empty());
|
||||
|
||||
Ok(SyscallReturn::Return(fd as _))
|
||||
}
|
||||
|
||||
pub fn sys_eventfd2(init_val: u64, flags: u32) -> Result<SyscallReturn> {
|
||||
log_syscall_entry!(SYS_EVENTFD2);
|
||||
trace!("raw flags = {}", flags);
|
||||
let flags = Flags::from_bits(flags)
|
||||
.ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown flags"))?;
|
||||
debug!("init_val = 0x{:x}, flags = {:?}", init_val, flags);
|
||||
|
||||
let fd = do_sys_eventfd2(init_val, flags);
|
||||
|
||||
Ok(SyscallReturn::Return(fd as _))
|
||||
}
|
||||
|
||||
fn do_sys_eventfd2(init_val: u64, flags: Flags) -> FileDesc {
|
||||
let event_file = EventFile::new(init_val, flags);
|
||||
let fd = {
|
||||
let current = current!();
|
||||
let mut file_table = current.file_table().lock();
|
||||
let fd_flags = if flags.contains(Flags::EFD_CLOEXEC) {
|
||||
FdFlags::CLOEXEC
|
||||
} else {
|
||||
FdFlags::empty()
|
||||
};
|
||||
file_table.insert(Arc::new(event_file), fd_flags)
|
||||
};
|
||||
fd
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
struct Flags: u32 {
|
||||
const EFD_SEMAPHORE = 1;
|
||||
const EFD_CLOEXEC = CreationFlags::O_CLOEXEC.bits();
|
||||
const EFD_NONBLOCK = StatusFlags::O_NONBLOCK.bits();
|
||||
}
|
||||
}
|
||||
|
||||
struct EventFile {
|
||||
counter: Mutex<u64>,
|
||||
pollee: Pollee,
|
||||
flags: Mutex<Flags>,
|
||||
write_pauser: Arc<Pauser>,
|
||||
}
|
||||
|
||||
impl EventFile {
|
||||
const MAX_COUNTER_VALUE: u64 = u64::MAX - 1;
|
||||
|
||||
fn new(init_val: u64, flags: Flags) -> Self {
|
||||
let counter = Mutex::new(init_val);
|
||||
let pollee = Pollee::new(IoEvents::OUT);
|
||||
let write_pauser = Pauser::new();
|
||||
Self {
|
||||
counter,
|
||||
pollee,
|
||||
flags: Mutex::new(flags),
|
||||
write_pauser,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_nonblocking(&self) -> bool {
|
||||
self.flags.lock().contains(Flags::EFD_NONBLOCK)
|
||||
}
|
||||
|
||||
fn update_io_state(&self, counter: &MutexGuard<u64>) {
|
||||
let is_readable = **counter != 0;
|
||||
|
||||
// if it is possible to write a value of at least "1"
|
||||
// without blocking, the file is writable
|
||||
let is_writable = **counter < Self::MAX_COUNTER_VALUE;
|
||||
|
||||
if is_writable {
|
||||
if is_readable {
|
||||
self.pollee.add_events(IoEvents::IN | IoEvents::OUT);
|
||||
} else {
|
||||
self.pollee.add_events(IoEvents::OUT);
|
||||
self.pollee.del_events(IoEvents::IN);
|
||||
}
|
||||
|
||||
self.write_pauser.resume_all();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if is_readable {
|
||||
self.pollee.add_events(IoEvents::IN);
|
||||
self.pollee.del_events(IoEvents::OUT);
|
||||
return;
|
||||
}
|
||||
|
||||
self.pollee.del_events(IoEvents::IN | IoEvents::OUT);
|
||||
|
||||
// TODO: deal with overflow logic
|
||||
}
|
||||
|
||||
/// Adds val to the counter.
|
||||
///
|
||||
/// If the new_value is overflowed or exceeds MAX_COUNTER_VALUE, the counter value
|
||||
/// will not be modified, and this method returns `Err(EINVAL)`.
|
||||
fn add_counter_val(&self, val: u64) -> Result<()> {
|
||||
let mut counter = self.counter.lock();
|
||||
|
||||
let new_value = (*counter)
|
||||
.checked_add(val)
|
||||
.ok_or_else(|| Error::with_message(Errno::EINVAL, "arithmetic overflow"))?;
|
||||
|
||||
if new_value <= Self::MAX_COUNTER_VALUE {
|
||||
*counter = new_value;
|
||||
self.update_io_state(&counter);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
return_errno_with_message!(Errno::EINVAL, "new value exceeds MAX_COUNTER_VALUE");
|
||||
}
|
||||
}
|
||||
|
||||
impl FileLike for EventFile {
|
||||
fn read(&self, buf: &mut [u8]) -> Result<usize> {
|
||||
let read_len = core::mem::size_of::<u64>();
|
||||
if buf.len() < read_len {
|
||||
return_errno_with_message!(Errno::EINVAL, "buf len is less len u64 size");
|
||||
}
|
||||
|
||||
loop {
|
||||
let mut counter = self.counter.lock();
|
||||
|
||||
// Wait until the counter becomes non-zero
|
||||
if *counter == 0 {
|
||||
if self.is_nonblocking() {
|
||||
return_errno_with_message!(Errno::EAGAIN, "try reading event file again");
|
||||
}
|
||||
|
||||
self.update_io_state(&counter);
|
||||
drop(counter);
|
||||
|
||||
let poller = Poller::new();
|
||||
if self.pollee.poll(IoEvents::IN, Some(&poller)).is_empty() {
|
||||
poller.wait()?;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Copy value from counter, and set the new counter value
|
||||
if self.flags.lock().contains(Flags::EFD_SEMAPHORE) {
|
||||
buf[..read_len].copy_from_slice(1u64.as_bytes());
|
||||
*counter -= 1;
|
||||
} else {
|
||||
buf[..read_len].copy_from_slice((*counter).as_bytes());
|
||||
*counter = 0;
|
||||
}
|
||||
|
||||
self.update_io_state(&counter);
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(read_len)
|
||||
}
|
||||
|
||||
fn write(&self, buf: &[u8]) -> Result<usize> {
|
||||
let write_len = core::mem::size_of::<u64>();
|
||||
if buf.len() < write_len {
|
||||
return_errno_with_message!(Errno::EINVAL, "buf len is less than the size of u64");
|
||||
}
|
||||
|
||||
let supplied_value = u64::from_bytes(buf);
|
||||
|
||||
// Try to add counter val at first
|
||||
if self.add_counter_val(supplied_value).is_ok() {
|
||||
return Ok(write_len);
|
||||
}
|
||||
|
||||
if self.is_nonblocking() {
|
||||
return_errno_with_message!(Errno::EAGAIN, "try writing to event file again");
|
||||
}
|
||||
|
||||
// Wait until counter can be added val to
|
||||
self.write_pauser
|
||||
.pause_until(|| self.add_counter_val(supplied_value).ok())?;
|
||||
|
||||
Ok(write_len)
|
||||
}
|
||||
|
||||
fn poll(&self, mask: IoEvents, poller: Option<&Poller>) -> IoEvents {
|
||||
self.pollee.poll(mask, poller)
|
||||
}
|
||||
|
||||
fn status_flags(&self) -> StatusFlags {
|
||||
if self.is_nonblocking() {
|
||||
StatusFlags::O_NONBLOCK
|
||||
} else {
|
||||
StatusFlags::empty()
|
||||
}
|
||||
}
|
||||
|
||||
fn set_status_flags(&self, new_flags: StatusFlags) -> Result<()> {
|
||||
let mut flags = self.flags.lock();
|
||||
|
||||
if new_flags.contains(StatusFlags::O_NONBLOCK) {
|
||||
*flags |= Flags::EFD_NONBLOCK;
|
||||
} else {
|
||||
*flags &= !Flags::EFD_NONBLOCK;
|
||||
}
|
||||
|
||||
// TODO: deal with other flags
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn register_observer(
|
||||
&self,
|
||||
observer: Weak<dyn crate::events::Observer<IoEvents>>,
|
||||
mask: IoEvents,
|
||||
) -> Result<()> {
|
||||
self.pollee.register_observer(observer, mask);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn unregister_observer(
|
||||
&self,
|
||||
observer: &Weak<dyn Observer<IoEvents>>,
|
||||
) -> Option<Weak<dyn Observer<IoEvents>>> {
|
||||
self.pollee.unregister_observer(observer)
|
||||
}
|
||||
}
|
@ -5,15 +5,38 @@
|
||||
use aster_frame::cpu::UserContext;
|
||||
|
||||
use self::{
|
||||
accept::sys_accept, alarm::sys_alarm, bind::sys_bind, connect::sys_connect,
|
||||
execve::sys_execveat, getgroups::sys_getgroups, getpeername::sys_getpeername,
|
||||
getrandom::sys_getrandom, getresgid::sys_getresgid, getresuid::sys_getresuid,
|
||||
getsid::sys_getsid, getsockname::sys_getsockname, getsockopt::sys_getsockopt,
|
||||
listen::sys_listen, pread64::sys_pread64, recvfrom::sys_recvfrom, sendto::sys_sendto,
|
||||
setfsgid::sys_setfsgid, setfsuid::sys_setfsuid, setgid::sys_setgid, setgroups::sys_setgroups,
|
||||
setregid::sys_setregid, setresgid::sys_setresgid, setresuid::sys_setresuid,
|
||||
setreuid::sys_setreuid, setsid::sys_setsid, setsockopt::sys_setsockopt, setuid::sys_setuid,
|
||||
shutdown::sys_shutdown, sigaltstack::sys_sigaltstack, socket::sys_socket,
|
||||
accept::sys_accept,
|
||||
alarm::sys_alarm,
|
||||
bind::sys_bind,
|
||||
connect::sys_connect,
|
||||
eventfd::{sys_eventfd, sys_eventfd2},
|
||||
execve::sys_execveat,
|
||||
getgroups::sys_getgroups,
|
||||
getpeername::sys_getpeername,
|
||||
getrandom::sys_getrandom,
|
||||
getresgid::sys_getresgid,
|
||||
getresuid::sys_getresuid,
|
||||
getsid::sys_getsid,
|
||||
getsockname::sys_getsockname,
|
||||
getsockopt::sys_getsockopt,
|
||||
listen::sys_listen,
|
||||
pread64::sys_pread64,
|
||||
recvfrom::sys_recvfrom,
|
||||
sendto::sys_sendto,
|
||||
setfsgid::sys_setfsgid,
|
||||
setfsuid::sys_setfsuid,
|
||||
setgid::sys_setgid,
|
||||
setgroups::sys_setgroups,
|
||||
setregid::sys_setregid,
|
||||
setresgid::sys_setresgid,
|
||||
setresuid::sys_setresuid,
|
||||
setreuid::sys_setreuid,
|
||||
setsid::sys_setsid,
|
||||
setsockopt::sys_setsockopt,
|
||||
setuid::sys_setuid,
|
||||
shutdown::sys_shutdown,
|
||||
sigaltstack::sys_sigaltstack,
|
||||
socket::sys_socket,
|
||||
socketpair::sys_socketpair,
|
||||
};
|
||||
use crate::{
|
||||
@ -115,6 +138,7 @@ mod connect;
|
||||
mod constants;
|
||||
mod dup;
|
||||
mod epoll;
|
||||
mod eventfd;
|
||||
mod execve;
|
||||
mod exit;
|
||||
mod exit_group;
|
||||
@ -363,6 +387,8 @@ define_syscall_nums!(
|
||||
SYS_SET_ROBUST_LIST = 273,
|
||||
SYS_UTIMENSAT = 280,
|
||||
SYS_EPOLL_PWAIT = 281,
|
||||
SYS_EVENTFD = 284,
|
||||
SYS_EVENTFD2 = 290,
|
||||
SYS_EPOLL_CREATE1 = 291,
|
||||
SYS_PIPE2 = 293,
|
||||
SYS_PRLIMIT64 = 302,
|
||||
@ -553,6 +579,8 @@ pub fn syscall_dispatch(
|
||||
SYS_SET_ROBUST_LIST => syscall_handler!(2, sys_set_robust_list, args),
|
||||
SYS_UTIMENSAT => syscall_handler!(4, sys_utimensat, args),
|
||||
SYS_EPOLL_PWAIT => syscall_handler!(5, sys_epoll_pwait, args),
|
||||
SYS_EVENTFD => syscall_handler!(1, sys_eventfd, args),
|
||||
SYS_EVENTFD2 => syscall_handler!(2, sys_eventfd2, args),
|
||||
SYS_EPOLL_CREATE1 => syscall_handler!(1, sys_epoll_create1, args),
|
||||
SYS_PIPE2 => syscall_handler!(2, sys_pipe2, args),
|
||||
SYS_PRLIMIT64 => syscall_handler!(4, sys_prlimit64, args),
|
||||
|
@ -9,9 +9,13 @@ pub fn sys_read(fd: FileDesc, user_buf_addr: Vaddr, buf_len: usize) -> Result<Sy
|
||||
"fd = {}, user_buf_ptr = 0x{:x}, buf_len = 0x{:x}",
|
||||
fd, user_buf_addr, buf_len
|
||||
);
|
||||
|
||||
let file = {
|
||||
let current = current!();
|
||||
let file_table = current.file_table().lock();
|
||||
let file = file_table.get_file(fd)?;
|
||||
file_table.get_file(fd)?.clone()
|
||||
};
|
||||
|
||||
let mut read_buf = vec![0u8; buf_len];
|
||||
let read_len = file.read(&mut read_buf)?;
|
||||
write_bytes_to_user(user_buf_addr, &read_buf)?;
|
||||
|
@ -16,9 +16,12 @@ pub fn sys_write(fd: FileDesc, user_buf_ptr: Vaddr, user_buf_len: usize) -> Resu
|
||||
fd, user_buf_ptr, user_buf_len
|
||||
);
|
||||
|
||||
let file = {
|
||||
let current = current!();
|
||||
let file_table = current.file_table().lock();
|
||||
let file = file_table.get_file(fd)?;
|
||||
file_table.get_file(fd)?.clone()
|
||||
};
|
||||
|
||||
if user_buf_len == 0 {
|
||||
return Ok(SyscallReturn::Return(0));
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ REGRESSION_BUILD_DIR ?= $(INITRAMFS)/regression
|
||||
# These test apps are sorted by name
|
||||
TEST_APPS := \
|
||||
execve \
|
||||
eventfd2 \
|
||||
fork \
|
||||
fork_c \
|
||||
getpid \
|
||||
|
5
regression/apps/eventfd2/Makefile
Normal file
5
regression/apps/eventfd2/Makefile
Normal file
@ -0,0 +1,5 @@
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
include ../test_common.mk
|
||||
|
||||
EXTRA_C_FLAGS :=
|
55
regression/apps/eventfd2/eventfd2.c
Normal file
55
regression/apps/eventfd2/eventfd2.c
Normal file
@ -0,0 +1,55 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
#include <err.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int efd;
|
||||
uint64_t u;
|
||||
ssize_t s;
|
||||
|
||||
uint64_t values[] = { 11, 222, 3333 };
|
||||
size_t length = sizeof(values) / sizeof(values[0]);
|
||||
|
||||
efd = eventfd(0, 0);
|
||||
if (efd == -1)
|
||||
err(EXIT_FAILURE, "eventfd");
|
||||
|
||||
switch (fork()) {
|
||||
case 0:
|
||||
for (size_t j = 0; j < length; j++) {
|
||||
printf("Child writing %ld to efd\n", values[j]);
|
||||
u = values[j]; /* strtoull() allows various bases */
|
||||
s = write(efd, &u, sizeof(uint64_t));
|
||||
if (s != sizeof(uint64_t))
|
||||
err(EXIT_FAILURE, "write");
|
||||
}
|
||||
|
||||
printf("Child completed write loop\n");
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
default:
|
||||
sleep(2);
|
||||
|
||||
printf("Parent about to read\n");
|
||||
s = read(efd, &u, sizeof(uint64_t));
|
||||
if (s != sizeof(uint64_t))
|
||||
err(EXIT_FAILURE, "read");
|
||||
printf("Parent read %" PRIu64 " (%#" PRIx64 ") from efd\n", u,
|
||||
u);
|
||||
if (u != 11 + 222 + 3333) {
|
||||
err(EXIT_FAILURE, "read eventfd");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
case -1:
|
||||
err(EXIT_FAILURE, "fork");
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@ SCRIPT_DIR=/regression
|
||||
cd ${SCRIPT_DIR}/..
|
||||
|
||||
echo "Start process test......"
|
||||
tests="hello_world/hello_world fork/fork execve/execve fork_c/fork signal_c/signal_test pthread/pthread_test hello_pie/hello pty/open_pty getpid/getpid"
|
||||
tests="hello_world/hello_world fork/fork execve/execve fork_c/fork signal_c/signal_test pthread/pthread_test hello_pie/hello pty/open_pty getpid/getpid eventfd2/eventfd2"
|
||||
for testcase in ${tests}
|
||||
do
|
||||
echo "Running test ${testcase}......"
|
||||
|
@ -11,6 +11,7 @@ TESTS ?= \
|
||||
chown_test \
|
||||
chroot_test \
|
||||
epoll_test \
|
||||
eventfd_test \
|
||||
fsync_test \
|
||||
getdents_test \
|
||||
link_test \
|
||||
|
3
regression/syscall_test/blocklists/eventfd_test
Normal file
3
regression/syscall_test/blocklists/eventfd_test
Normal file
@ -0,0 +1,3 @@
|
||||
EventfdTest.IllegalPwrite
|
||||
EventfdTest.SpliceFromPipePartialSucceeds
|
||||
EventfdTest.NotifyNonZero_NoRandomSave
|
Reference in New Issue
Block a user