diff --git a/services/libs/jinux-std/src/syscall/mod.rs b/services/libs/jinux-std/src/syscall/mod.rs index 7e60c784f..6ee8634f1 100644 --- a/services/libs/jinux-std/src/syscall/mod.rs +++ b/services/libs/jinux-std/src/syscall/mod.rs @@ -49,6 +49,7 @@ use crate::syscall::rt_sigaction::sys_rt_sigaction; use crate::syscall::rt_sigprocmask::sys_rt_sigprocmask; use crate::syscall::rt_sigreturn::sys_rt_sigreturn; use crate::syscall::sched_yield::sys_sched_yield; +use crate::syscall::select::sys_select; use crate::syscall::set_robust_list::sys_set_robust_list; use crate::syscall::set_tid_address::sys_set_tid_address; use crate::syscall::setpgid::sys_setpgid; @@ -118,6 +119,7 @@ mod rt_sigaction; mod rt_sigprocmask; mod rt_sigreturn; mod sched_yield; +mod select; mod set_robust_list; mod set_tid_address; mod setpgid; @@ -186,6 +188,7 @@ define_syscall_nums!( SYS_WRITEV = 20, SYS_ACCESS = 21, SYS_PIPE = 22, + SYS_SELECT = 23, SYS_SCHED_YIELD = 24, SYS_MADVISE = 28, SYS_DUP = 32, @@ -324,6 +327,7 @@ pub fn syscall_dispatch( SYS_WRITEV => syscall_handler!(3, sys_writev, args), SYS_ACCESS => syscall_handler!(2, sys_access, args), SYS_PIPE => syscall_handler!(1, sys_pipe, args), + SYS_SELECT => syscall_handler!(5, sys_select, args), SYS_SCHED_YIELD => syscall_handler!(0, sys_sched_yield), SYS_MADVISE => syscall_handler!(3, sys_madvise, args), SYS_DUP => syscall_handler!(1, sys_dup, args), diff --git a/services/libs/jinux-std/src/syscall/poll.rs b/services/libs/jinux-std/src/syscall/poll.rs index 730699e56..c1cdebd2b 100644 --- a/services/libs/jinux-std/src/syscall/poll.rs +++ b/services/libs/jinux-std/src/syscall/poll.rs @@ -51,7 +51,7 @@ pub fn sys_poll(fds: Vaddr, nfds: u64, timeout: i32) -> Result { Ok(SyscallReturn::Return(num_revents as _)) } -fn do_poll(poll_fds: &[PollFd], timeout: Option) -> Result { +pub fn do_poll(poll_fds: &[PollFd], timeout: Option) -> Result { // The main loop of polling let poller = Poller::new(); loop { @@ -106,13 +106,22 @@ struct c_pollfd { } #[derive(Debug, Clone)] -struct PollFd { +pub struct PollFd { fd: Option, events: IoEvents, revents: Cell, } impl PollFd { + pub fn new(fd: Option, events: IoEvents) -> Self { + let revents = Cell::new(IoEvents::empty()); + Self { + fd, + events, + revents, + } + } + pub fn fd(&self) -> Option { self.fd } diff --git a/services/libs/jinux-std/src/syscall/select.rs b/services/libs/jinux-std/src/syscall/select.rs new file mode 100644 index 000000000..b4fa4375c --- /dev/null +++ b/services/libs/jinux-std/src/syscall/select.rs @@ -0,0 +1,205 @@ +use core::time::Duration; + +use crate::fs::file_table::FileDescripter; +use crate::fs::utils::IoEvents; +use crate::log_syscall_entry; +use crate::prelude::*; +use crate::time::timeval_t; +use crate::util::{read_val_from_user, write_val_to_user}; + +use super::poll::{do_poll, PollFd}; +use super::SyscallReturn; +use super::SYS_SELECT; + +pub fn sys_select( + nfds: FileDescripter, + readfds_addr: Vaddr, + writefds_addr: Vaddr, + exceptfds_addr: Vaddr, + timeval_addr: Vaddr, +) -> Result { + log_syscall_entry!(SYS_SELECT); + + if nfds < 0 || nfds as usize > FD_SETSIZE { + return_errno_with_message!(Errno::EINVAL, "nfds is negative or exceeds the FD_SETSIZE"); + } + + let get_fdset = |fdset_addr: Vaddr| -> Result> { + let fdset = if fdset_addr == 0 { + None + } else { + let fdset = read_val_from_user::(fdset_addr)?; + Some(fdset) + }; + Ok(fdset) + }; + let mut readfds = get_fdset(readfds_addr)?; + let mut writefds = get_fdset(writefds_addr)?; + let mut exceptfds = get_fdset(exceptfds_addr)?; + + let timeout = if timeval_addr == 0 { + None + } else { + let timeval = read_val_from_user::(timeval_addr)?; + Some(Duration::from(timeval)) + }; + + debug!( + "nfds = {}, readfds = {:?}, writefds = {:?}, exceptfds = {:?}, timeout = {:?}", + nfds, readfds, writefds, exceptfds, timeout + ); + + let num_revents = do_select( + nfds, + readfds.as_mut(), + writefds.as_mut(), + exceptfds.as_mut(), + timeout, + )?; + + let set_fdset = |fdset_addr: Vaddr, fdset: Option| -> Result<()> { + if let Some(fdset) = fdset { + debug_assert!(fdset_addr != 0); + write_val_to_user(fdset_addr, &fdset)?; + } + Ok(()) + }; + set_fdset(readfds_addr, readfds)?; + set_fdset(writefds_addr, writefds)?; + set_fdset(exceptfds_addr, exceptfds)?; + + Ok(SyscallReturn::Return(num_revents as _)) +} + +fn do_select( + nfds: FileDescripter, + mut readfds: Option<&mut FdSet>, + mut writefds: Option<&mut FdSet>, + mut exceptfds: Option<&mut FdSet>, + timeout: Option, +) -> Result { + // Convert the FdSet to an array of PollFd + let poll_fds = { + let mut poll_fds = Vec::new(); + for fd in 0..nfds { + let events = { + let readable = readfds.as_ref().map_or(false, |fds| fds.is_set(fd)); + let writable = writefds.as_ref().map_or(false, |fds| fds.is_set(fd)); + let except = exceptfds.as_ref().map_or(false, |fds| fds.is_set(fd)); + convert_rwe_to_events(readable, writable, except) + }; + + if events.is_empty() { + continue; + } + + let poll_fd = PollFd::new(Some(fd), events); + poll_fds.push(poll_fd); + } + poll_fds + }; + + // Clear up the three input fd_set's, which will be used for output as well + readfds.as_mut().map_or((), |fds| fds.clear()); + writefds.as_mut().map_or((), |fds| fds.clear()); + exceptfds.as_mut().map_or((), |fds| fds.clear()); + + // Do the poll syscall that is equivalent to the select syscall + let num_revents = do_poll(&poll_fds, timeout)?; + if num_revents == 0 { + return Ok(0); + } + + // Convert poll's pollfd results to select's fd_set results + let mut total_revents = 0; + for poll_fd in &poll_fds { + let fd = poll_fd.fd().unwrap(); + let revents = poll_fd.revents().get(); + let (readable, writable, except) = convert_events_to_rwe(&revents); + if let Some(ref mut fds) = readfds && readable { + fds.set(fd)?; + total_revents += 1; + } + if let Some(ref mut fds) = writefds && writable { + fds.set(fd)?; + total_revents += 1; + } + if let Some(ref mut fds) = exceptfds && except { + fds.set(fd)?; + total_revents += 1; + } + } + Ok(total_revents) +} + +// Convert select's rwe input to poll's IoEvents input according to Linux's +// behavior. +fn convert_rwe_to_events(readable: bool, writable: bool, except: bool) -> IoEvents { + let mut events = IoEvents::empty(); + if readable { + events |= IoEvents::IN; + } + if writable { + events |= IoEvents::OUT; + } + if except { + events |= IoEvents::PRI; + } + events +} + +// Convert poll's IoEvents results to select's rwe results according to Linux's +// behavior. +fn convert_events_to_rwe(events: &IoEvents) -> (bool, bool, bool) { + let readable = events.intersects(IoEvents::IN | IoEvents::HUP | IoEvents::ERR); + let writable = events.intersects(IoEvents::OUT | IoEvents::ERR); + let except = events.contains(IoEvents::PRI); + (readable, writable, except) +} + +const FD_SETSIZE: usize = 1024; +const USIZE_BITS: usize = core::mem::size_of::() * 8; + +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +struct FdSet { + fds_bits: [usize; FD_SETSIZE / USIZE_BITS], +} + +impl FdSet { + /// Equivalent to FD_SET. + pub fn set(&mut self, fd: FileDescripter) -> Result<()> { + let fd = fd as usize; + if fd >= FD_SETSIZE { + return_errno_with_message!(Errno::EINVAL, "fd exceeds FD_SETSIZE"); + } + self.fds_bits[fd / USIZE_BITS] |= 1 << (fd % USIZE_BITS); + Ok(()) + } + + /// Equivalent to FD_CLR. + pub fn unset(&mut self, fd: FileDescripter) -> Result<()> { + let fd = fd as usize; + if fd >= FD_SETSIZE { + return_errno_with_message!(Errno::EINVAL, "fd exceeds FD_SETSIZE"); + } + self.fds_bits[fd / USIZE_BITS] &= !(1 << (fd % USIZE_BITS)); + Ok(()) + } + + /// Equivalent to FD_ISSET. + pub fn is_set(&self, fd: FileDescripter) -> bool { + let fd = fd as usize; + if fd >= FD_SETSIZE { + return false; + } + (self.fds_bits[fd / USIZE_BITS] & (1 << (fd % USIZE_BITS))) != 0 + } + + /// Equivalent to FD_ZERO. + pub fn clear(&mut self) { + for slot in self.fds_bits.iter_mut() { + *slot = 0; + } + } +} diff --git a/services/libs/jinux-std/src/time/mod.rs b/services/libs/jinux-std/src/time/mod.rs index cd823f551..54e172f15 100644 --- a/services/libs/jinux-std/src/time/mod.rs +++ b/services/libs/jinux-std/src/time/mod.rs @@ -46,5 +46,27 @@ impl From for Duration { } } +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Pod)] +pub struct timeval_t { + pub sec: time_t, + pub usec: suseconds_t, +} + +impl From for timeval_t { + fn from(duration: Duration) -> timeval_t { + let sec = duration.as_secs() as time_t; + let usec = duration.subsec_micros() as suseconds_t; + debug_assert!(sec >= 0); // usec >= 0 always holds + timeval_t { sec, usec } + } +} + +impl From for Duration { + fn from(timeval: timeval_t) -> Self { + Duration::new(timeval.sec as u64, (timeval.usec * 1000) as u32) + } +} + /// The various flags for setting POSIX.1b interval timers: pub const TIMER_ABSTIME: i32 = 0x01;