diff --git a/docs/src/kernel/linux-compatibility.md b/docs/src/kernel/linux-compatibility.md index 2defccf12..3ee773370 100644 --- a/docs/src/kernel/linux-compatibility.md +++ b/docs/src/kernel/linux-compatibility.md @@ -60,7 +60,7 @@ provided by Linux on x86-64 architecture. | 37 | alarm | ✅ | | 38 | setitimer | ❌ | | 39 | getpid | ✅ | -| 40 | sendfile | ❌ | +| 40 | sendfile | ✅ | | 41 | socket | ✅ | | 42 | connect | ✅ | | 43 | accept | ✅ | diff --git a/kernel/aster-nix/src/syscall/arch/x86.rs b/kernel/aster-nix/src/syscall/arch/x86.rs index d26ba7c3c..46ccd03fb 100644 --- a/kernel/aster-nix/src/syscall/arch/x86.rs +++ b/kernel/aster-nix/src/syscall/arch/x86.rs @@ -74,6 +74,7 @@ use crate::syscall::{ rt_sigsuspend::sys_rt_sigsuspend, sched_yield::sys_sched_yield, select::sys_select, + sendfile::sys_sendfile, sendto::sys_sendto, set_get_priority::{sys_get_priority, sys_set_priority}, set_robust_list::sys_set_robust_list, @@ -142,6 +143,7 @@ impl_syscall_nums_and_dispatch_fn! { SYS_NANOSLEEP = 35 => sys_nanosleep(args[..2]); SYS_ALARM = 37 => sys_alarm(args[..1]); SYS_GETPID = 39 => sys_getpid(args[..0]); + SYS_SENDFILE = 40 => sys_sendfile(args[..4]); SYS_SOCKET = 41 => sys_socket(args[..3]); SYS_CONNECT = 42 => sys_connect(args[..3]); SYS_ACCEPT = 43 => sys_accept(args[..3]); diff --git a/kernel/aster-nix/src/syscall/mod.rs b/kernel/aster-nix/src/syscall/mod.rs index 29e84d878..625d52c36 100644 --- a/kernel/aster-nix/src/syscall/mod.rs +++ b/kernel/aster-nix/src/syscall/mod.rs @@ -81,6 +81,7 @@ mod rt_sigreturn; mod rt_sigsuspend; mod sched_yield; mod select; +mod sendfile; mod sendto; mod set_get_priority; mod set_robust_list; diff --git a/kernel/aster-nix/src/syscall/sendfile.rs b/kernel/aster-nix/src/syscall/sendfile.rs new file mode 100644 index 000000000..0ab0a03af --- /dev/null +++ b/kernel/aster-nix/src/syscall/sendfile.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::SyscallReturn; +use crate::{ + fs::file_table::FileDesc, + prelude::*, + util::{read_val_from_user, write_val_to_user}, +}; + +pub fn sys_sendfile( + out_fd: FileDesc, + in_fd: FileDesc, + offset_ptr: Vaddr, + count: isize, +) -> Result { + trace!("raw offset ptr = 0x{:x}", offset_ptr); + + let offset = if offset_ptr == 0 { + None + } else { + let offset: isize = read_val_from_user(offset_ptr)?; + if offset < 0 { + return_errno_with_message!(Errno::EINVAL, "offset cannot be negative"); + } + Some(offset) + }; + + debug!( + "out_fd = {}, in_fd = {}, offset = {:x?}, count = 0x{:x}", + out_fd, in_fd, offset, count + ); + + let mut count = if count < 0 { + return_errno_with_message!(Errno::EINVAL, "count cannot be negative"); + } else { + count as usize + }; + + let (out_file, in_file) = { + let current = current!(); + let file_table = current.file_table().lock(); + let out_file = file_table.get_file(out_fd)?.clone(); + // FIXME: the in_file must support mmap-like operations (i.e., it cannot be a socket). + let in_file = file_table.get_file(in_fd)?.clone(); + (out_file, in_file) + }; + + // sendfile can send at most `MAX_COUNT` bytes + const MAX_COUNT: usize = 0x7fff_f000; + if count > MAX_COUNT { + count = MAX_COUNT; + } + + const BUFFER_SIZE: usize = PAGE_SIZE; + let mut buffer = vec![0u8; BUFFER_SIZE].into_boxed_slice(); + let mut total_len = 0; + let mut offset = offset.map(|offset| offset as usize); + + while total_len < count { + // The offset decides how to read from `in_file`. + // If offset is `Some(_)`, the data will be read from the given offset, + // and after reading, the file offset of `in_file` will remain unchanged. + // If offset is `None`, the data will be read from the file offset, + // and the file offset of `in_file` is adjusted + // to reflect the number of bytes read from `in_file`. + let max_readlen = buffer.len().min(count - total_len); + + // Read from `in_file` + let read_res = if let Some(offset) = offset.as_mut() { + let res = in_file.read_at(*offset, &mut buffer[..max_readlen]); + if let Ok(len) = res.as_ref() { + *offset += *len; + } + res + } else { + in_file.read(&mut buffer[..max_readlen]) + }; + + let read_len = match read_res { + Ok(len) => len, + Err(e) => { + if total_len > 0 { + warn!("error occurs when trying to read file: {:?}", e); + break; + } + return Err(e); + } + }; + + if read_len == 0 { + break; + } + + // Note: `sendfile` allows sending partial data, + // so short reads and short writes are all acceptable + let write_res = out_file.write(&buffer[..read_len]); + + match write_res { + Ok(len) => { + total_len += len; + if len < BUFFER_SIZE { + break; + } + } + Err(e) => { + if total_len > 0 { + warn!("error occurs when trying to write file: {:?}", e); + break; + } + return Err(e); + } + } + } + + if let Some(offset) = offset { + write_val_to_user(offset_ptr, &(offset as isize))?; + } + + Ok(SyscallReturn::Return(total_len as _)) +} diff --git a/regression/syscall_test/Makefile b/regression/syscall_test/Makefile index 27c441112..387313c93 100644 --- a/regression/syscall_test/Makefile +++ b/regression/syscall_test/Makefile @@ -24,6 +24,7 @@ TESTS ?= \ pty_test \ read_test \ rename_test \ + sendfile_test \ stat_test \ statfs_test \ symlink_test \ diff --git a/regression/syscall_test/blocklists/sendfile_test b/regression/syscall_test/blocklists/sendfile_test new file mode 100644 index 000000000..f8f2b9a4f --- /dev/null +++ b/regression/syscall_test/blocklists/sendfile_test @@ -0,0 +1,6 @@ +SendFileTest.Overflow +SendFileTest.DoNotSendfileIfOutfileIsAppendOnly +SendFileTest.SendPipeWouldBlock +SendFileTest.SendPipeBlocks +SendFileTest.SendToNotARegularFile +SendFileTest.SendToSpecialFile