Add sys_fallocate

This commit is contained in:
Shaowei Song 2024-06-28 10:45:16 +00:00 committed by Tate, Hongliang Tian
parent 8dc08dccba
commit 8dce83605f
18 changed files with 349 additions and 18 deletions

View File

@ -305,7 +305,7 @@ provided by Linux on x86-64 architecture.
| 282 | signalfd | ❌ |
| 283 | timerfd_create | ❌ |
| 284 | eventfd | ✅ |
| 285 | fallocate | |
| 285 | fallocate | |
| 286 | timerfd_settime | ❌ |
| 287 | timerfd_gettime | ❌ |
| 288 | accept4 | ✅ |

View File

@ -10,7 +10,9 @@ use crate::{
fs::{
device::Device,
ext2::{FilePerm, FileType, Inode as Ext2Inode},
utils::{DirentVisitor, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata},
utils::{
DirentVisitor, FallocMode, FileSystem, Inode, InodeMode, InodeType, IoctlCmd, Metadata,
},
},
prelude::*,
process::{Gid, Uid},
@ -172,6 +174,10 @@ impl Inode for Ext2Inode {
self.write_link(target)
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
self.fallocate(mode, offset, len)
}
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32> {
Err(Error::new(Errno::EINVAL))
}

View File

@ -16,6 +16,7 @@ use super::{
prelude::*,
utils::now,
};
use crate::fs::utils::FallocMode;
/// Max length of file name.
pub const MAX_FNAME_LEN: usize = 255;
@ -734,6 +735,54 @@ impl Inode {
inner.set_gid(gid);
inner.set_ctime(now());
}
pub fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
if self.file_type() != FileType::File {
return_errno_with_message!(Errno::EISDIR, "not regular file");
}
match mode {
FallocMode::PunchHoleKeepSize => {
// Make the whole operation atomic
let inner = self.inner.write();
let file_size = inner.file_size();
if offset >= file_size {
return Ok(());
}
let end_offset = file_size.min(offset + len);
// TODO: Think of a more light-weight approach
inner.page_cache.fill_zeros(offset..end_offset)?;
// Mark the full blocks as holes
let inode_impl = inner.inode_impl.0.read();
let mut blocks_hole_desc = inode_impl.blocks_hole_desc.write();
for bid in Bid::from_offset(offset.align_up(BLOCK_SIZE))
..Bid::from_offset(end_offset.align_down(BLOCK_SIZE))
{
blocks_hole_desc.set(bid.to_raw() as _);
}
Ok(())
}
// We extend the compatibility here since Ext2 in Linux
// does not natively support `Allocate` and `AllocateKeepSize`.
FallocMode::Allocate => {
let new_size = offset + len;
if new_size > self.file_size() {
self.resize(new_size)?;
}
Ok(())
}
FallocMode::AllocateKeepSize => Ok(()),
_ => {
return_errno_with_message!(
Errno::EOPNOTSUPP,
"fallocate with the specified flags is not supported"
);
}
}
}
}
#[inherit_methods(from = "self.inner.read()")]

View File

@ -8,7 +8,7 @@ use crate::{
events::{IoEvents, Observer},
fs::{
device::Device,
utils::{AccessMode, InodeMode, IoctlCmd, Metadata, SeekFrom, StatusFlags},
utils::{AccessMode, FallocMode, InodeMode, IoctlCmd, Metadata, SeekFrom, StatusFlags},
},
net::socket::Socket,
prelude::*,
@ -98,6 +98,10 @@ pub trait FileLike: Pollable + Send + Sync + Any {
return_errno_with_message!(Errno::ESPIPE, "seek is not supported");
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
return_errno_with_message!(Errno::EOPNOTSUPP, "fallocate is not supported");
}
fn register_observer(
&self,
observer: Weak<dyn Observer<IoEvents>>,

View File

@ -14,10 +14,10 @@ impl InodeHandle<Rights> {
) -> Result<Self> {
let inode = dentry.inode();
if access_mode.is_readable() && !inode.mode()?.is_readable() {
return_errno_with_message!(Errno::EACCES, "File is not readable");
return_errno_with_message!(Errno::EACCES, "file is not readable");
}
if access_mode.is_writable() && !inode.mode()?.is_writable() {
return_errno_with_message!(Errno::EACCES, "File is not writable");
return_errno_with_message!(Errno::EACCES, "file is not writable");
}
Self::new_unchecked_access(dentry, access_mode, status_flags)
@ -30,7 +30,7 @@ impl InodeHandle<Rights> {
) -> Result<Self> {
let inode = dentry.inode();
if access_mode.is_writable() && inode.type_() == InodeType::Dir {
return_errno_with_message!(Errno::EISDIR, "Directory cannot open to write");
return_errno_with_message!(Errno::EISDIR, "directory cannot open to write");
}
let file_io = if let Some(device) = inode.as_device() {
@ -59,7 +59,7 @@ impl InodeHandle<Rights> {
pub fn read_to_end(&self, buf: &mut Vec<u8>) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
return_errno_with_message!(Errno::EBADF, "file is not readable");
}
self.0.read_to_end(buf)
@ -67,7 +67,7 @@ impl InodeHandle<Rights> {
pub fn readdir(&self, visitor: &mut dyn DirentVisitor) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
return_errno_with_message!(Errno::EBADF, "file is not readable");
}
self.0.readdir(visitor)
}
@ -100,14 +100,14 @@ impl FileLike for InodeHandle<Rights> {
fn read(&self, buf: &mut [u8]) -> Result<usize> {
if !self.1.contains(Rights::READ) {
return_errno_with_message!(Errno::EBADF, "File is not readable");
return_errno_with_message!(Errno::EBADF, "file is not readable");
}
self.0.read(buf)
}
fn write(&self, buf: &[u8]) -> Result<usize> {
if !self.1.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EBADF, "File is not writable");
return_errno_with_message!(Errno::EBADF, "file is not writable");
}
self.0.write(buf)
}
@ -128,7 +128,7 @@ impl FileLike for InodeHandle<Rights> {
fn resize(&self, new_size: usize) -> Result<()> {
if !self.1.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EINVAL, "File is not writable");
return_errno_with_message!(Errno::EINVAL, "file is not writable");
}
self.0.resize(new_size)
}
@ -138,6 +138,13 @@ impl FileLike for InodeHandle<Rights> {
Ok(())
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
if !self.1.contains(Rights::WRITE) {
return_errno_with_message!(Errno::EBADF, "file is not writable");
}
self.0.fallocate(mode, offset, len)
}
fn as_device(&self) -> Option<Arc<dyn Device>> {
self.dentry().inode().as_device()
}

View File

@ -19,8 +19,8 @@ use crate::{
file_handle::FileLike,
path::Dentry,
utils::{
AccessMode, DirentVisitor, InodeMode, InodeType, IoctlCmd, Metadata, SeekFrom,
StatusFlags,
AccessMode, DirentVisitor, FallocMode, InodeMode, InodeType, IoctlCmd, Metadata,
SeekFrom, StatusFlags,
},
},
prelude::*,
@ -184,6 +184,30 @@ impl InodeHandle_ {
self.dentry.inode().poll(mask, poller)
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
let status_flags = self.status_flags();
if status_flags.contains(StatusFlags::O_APPEND)
&& (mode == FallocMode::PunchHoleKeepSize
|| mode == FallocMode::CollapseRange
|| mode == FallocMode::InsertRange)
{
return_errno_with_message!(
Errno::EPERM,
"the flags do not work on the append-only file"
);
}
if status_flags.contains(StatusFlags::O_DIRECT)
|| status_flags.contains(StatusFlags::O_PATH)
{
return_errno_with_message!(
Errno::EBADF,
"currently fallocate file with O_DIRECT or O_PATH is not supported"
);
}
self.dentry.inode().fallocate(mode, offset, len)
}
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32> {
if let Some(ref file_io) = self.file_io {
return file_io.ioctl(cmd, arg);

View File

@ -19,8 +19,8 @@ use crate::{
fs::{
device::Device,
utils::{
CStr256, DirentVisitor, FileSystem, FsFlags, Inode, InodeMode, InodeType, IoctlCmd,
Metadata, PageCache, PageCacheBackend, SuperBlock,
CStr256, DirentVisitor, FallocMode, FileSystem, FsFlags, Inode, InodeMode, InodeType,
IoctlCmd, Metadata, PageCache, PageCacheBackend, SuperBlock,
},
},
prelude::*,
@ -1105,6 +1105,43 @@ impl Inode for RamInode {
Weak::upgrade(&self.fs).unwrap()
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
if self.typ != InodeType::File {
return_errno_with_message!(Errno::EISDIR, "not regular file");
}
// The support for flags is consistent with Linux
match mode {
FallocMode::Allocate => {
let new_size = offset + len;
if new_size > self.size() {
self.resize(new_size)?;
}
Ok(())
}
FallocMode::AllocateKeepSize => {
// Do nothing
Ok(())
}
FallocMode::PunchHoleKeepSize => {
let node = self.node.read();
let file_size = node.metadata.size;
if offset >= file_size {
return Ok(());
}
let range = offset..file_size.min(offset + len);
// TODO: Think of a more light-weight approach
node.inner.as_file().unwrap().fill_zeros(range)
}
_ => {
return_errno_with_message!(
Errno::EOPNOTSUPP,
"fallocate with the specified flags is not supported"
);
}
}
}
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32> {
if let Some(device) = self.node.read().inner.as_device() {
return device.ioctl(cmd, arg);

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: MPL-2.0
use crate::prelude::*;
/// Represents the various operation modes for fallocate.
///
/// Each mode determines whether the target disk space within a file
/// will be allocated, deallocated, or zeroed, among other operations.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum FallocMode {
/// Allocates disk space within the range specified.
Allocate,
/// Like `Allocate`, but does not change the file size.
AllocateKeepSize,
/// Makes shared file data extents private to guarantee subsequent writes.
AllocateUnshareRange,
/// Deallocates space (creates a hole) while keeping the file size unchanged.
PunchHoleKeepSize,
/// Converts a file range to zeros, expanding the file if necessary.
ZeroRange,
/// Like `ZeroRange`, but does not change the file size.
ZeroRangeKeepSize,
/// Removes a range of bytes without leaving a hole.
CollapseRange,
/// Inserts space within a file without overwriting existing data.
InsertRange,
}

View File

@ -7,7 +7,7 @@ use core::time::Duration;
use aster_rights::Full;
use core2::io::{Error as IoError, ErrorKind as IoErrorKind, Result as IoResult, Write};
use super::{DirentVisitor, FileSystem, IoctlCmd};
use super::{DirentVisitor, FallocMode, FileSystem, IoctlCmd};
use crate::{
events::IoEvents,
fs::device::{Device, DeviceType},
@ -348,6 +348,12 @@ pub trait Inode: Any + Sync + Send {
Ok(())
}
/// Manipulates a range of space of the file according to the specified allocate mode,
/// the manipulated range starts at `offset` and continues for `len` bytes.
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
return_errno!(Errno::EOPNOTSUPP);
}
fn poll(&self, mask: IoEvents, _poller: Option<&mut Poller>) -> IoEvents {
let events = IoEvents::IN | IoEvents::OUT;
events & mask

View File

@ -7,6 +7,7 @@ pub use channel::{Channel, Consumer, Producer};
pub use creation_flags::CreationFlags;
pub use dirent_visitor::DirentVisitor;
pub use direntry_vec::DirEntryVecExt;
pub use falloc_mode::FallocMode;
pub use file_creation_mask::FileCreationMask;
pub use fs::{FileSystem, FsFlags, SuperBlock};
pub use inode::{Inode, InodeMode, InodeType, Metadata};
@ -20,6 +21,7 @@ mod channel;
mod creation_flags;
mod dirent_visitor;
mod direntry_vec;
mod falloc_mode;
mod file_creation_mask;
mod fs;
mod inode;

View File

@ -23,6 +23,7 @@ use crate::syscall::{
execve::{sys_execve, sys_execveat},
exit::sys_exit,
exit_group::sys_exit_group,
fallocate::sys_fallocate,
fcntl::sys_fcntl,
fork::sys_fork,
fsync::{sys_fdatasync, sys_fsync},
@ -286,6 +287,7 @@ impl_syscall_nums_and_dispatch_fn! {
SYS_UTIMENSAT = 280 => sys_utimensat(args[..4]);
SYS_EPOLL_PWAIT = 281 => sys_epoll_pwait(args[..6]);
SYS_EVENTFD = 284 => sys_eventfd(args[..1]);
SYS_FALLOCATE = 285 => sys_fallocate(args[..4]);
SYS_ACCEPT4 = 288 => sys_accept4(args[..4]);
SYS_EVENTFD2 = 290 => sys_eventfd2(args[..2]);
SYS_EPOLL_CREATE1 = 291 => sys_epoll_create1(args[..1]);

View File

@ -0,0 +1,156 @@
// SPDX-License-Identifier: MPL-2.0
use super::SyscallReturn;
use crate::{
fs::{file_table::FileDesc, utils::FallocMode},
prelude::*,
process::ResourceType,
};
pub fn sys_fallocate(fd: FileDesc, mode: u64, offset: i64, len: i64) -> Result<SyscallReturn> {
debug!(
"fd = {}, mode = {}, offset = {}, len = {}",
fd, mode, offset, len
);
check_offset_and_len(offset, len)?;
let file = {
let current = current!();
let file_table = current.file_table().lock();
file_table.get_file(fd)?.clone()
};
let falloc_mode = FallocMode::try_from(
RawFallocMode::from_bits(mode as _)
.ok_or_else(|| Error::with_message(Errno::EOPNOTSUPP, "invalid fallocate mode"))?,
)?;
file.fallocate(falloc_mode, offset as usize, len as usize)?;
Ok(SyscallReturn::Return(0))
}
fn check_offset_and_len(offset: i64, len: i64) -> Result<()> {
if offset < 0 || len <= 0 {
return_errno_with_message!(
Errno::EINVAL,
"offset is less than 0, or len is less than or equal to 0"
);
}
if offset.checked_add(len).is_none() {
return_errno_with_message!(Errno::EINVAL, "offset+len has overflowed");
}
let max_file_size = {
let current = current!();
let resource_limits = current.resource_limits().lock();
resource_limits
.get_rlimit(ResourceType::RLIMIT_FSIZE)
.get_cur() as usize
};
if (offset + len) as usize > max_file_size {
return_errno_with_message!(Errno::EFBIG, "offset+len exceeds the maximum file size");
}
Ok(())
}
bitflags! {
/// Operation mode flags for fallocate.
///
/// These flags determine the operation to be performed on the given byte range.
struct RawFallocMode: u32 {
/// File size will not be changed when extending the file.
const FALLOC_FL_KEEP_SIZE = 0x01;
/// De-allocates a range (creates a hole).
///
/// Must be OR-ed with `FALLOC_FL_KEEP_SIZE`.
const FALLOC_FL_PUNCH_HOLE = 0x02;
/// Removes a range of a file without leaving a hole.
///
/// The offset and length must be multiples of the filesystem block size.
const FALLOC_FL_COLLAPSE_RANGE = 0x08;
/// Converts a range of a file to zeros.
///
/// Preallocates blocks within the range, converting to unwritten extents.
const FALLOC_FL_ZERO_RANGE = 0x10;
/// Inserts space within the file size without overwriting any existing data.
///
/// The offset and length must be multiples of the filesystem block size.
const FALLOC_FL_INSERT_RANGE = 0x20;
/// Unshares shared blocks within the file size without overwriting any existing data.
///
/// Guarantees that subsequent writes will not fail due to lack of space.
const FALLOC_FL_UNSHARE_RANGE = 0x40;
}
}
impl TryFrom<RawFallocMode> for FallocMode {
type Error = crate::error::Error;
fn try_from(raw_mode: RawFallocMode) -> Result<Self> {
// Check for invalid combinations of flags
if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE)
&& raw_mode.contains(RawFallocMode::FALLOC_FL_ZERO_RANGE)
{
return_errno_with_message!(
Errno::EOPNOTSUPP,
"PUNCH_HOLE and ZERO_RANGE cannot be used together"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE)
&& !raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE)
{
return_errno_with_message!(
Errno::EOPNOTSUPP,
"PUNCH_HOLE must be combined with KEEP_SIZE"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_COLLAPSE_RANGE)
&& !(raw_mode - RawFallocMode::FALLOC_FL_COLLAPSE_RANGE).is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"COLLAPSE_RANGE must be used exclusively without any other flags"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_INSERT_RANGE)
&& !(raw_mode - RawFallocMode::FALLOC_FL_INSERT_RANGE).is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"INSERT_RANGE must be used exclusively without any other flags"
);
}
if raw_mode.contains(RawFallocMode::FALLOC_FL_UNSHARE_RANGE)
&& !(raw_mode
- (RawFallocMode::FALLOC_FL_UNSHARE_RANGE | RawFallocMode::FALLOC_FL_KEEP_SIZE))
.is_empty()
{
return_errno_with_message!(
Errno::EINVAL,
"UNSHARE_RANGE can only be combined with KEEP_SIZE."
);
}
// Transform valid flags into the fallocate mode
let mode = if raw_mode.contains(RawFallocMode::FALLOC_FL_PUNCH_HOLE) {
FallocMode::PunchHoleKeepSize
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_ZERO_RANGE) {
if raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE) {
FallocMode::ZeroRangeKeepSize
} else {
FallocMode::ZeroRange
}
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_COLLAPSE_RANGE) {
FallocMode::CollapseRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_INSERT_RANGE) {
FallocMode::InsertRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_UNSHARE_RANGE) {
FallocMode::AllocateUnshareRange
} else if raw_mode.contains(RawFallocMode::FALLOC_FL_KEEP_SIZE) {
FallocMode::AllocateKeepSize
} else {
FallocMode::Allocate
};
Ok(mode)
}
}

View File

@ -31,6 +31,7 @@ mod eventfd;
mod execve;
mod exit;
mod exit_group;
mod fallocate;
mod fcntl;
mod fork;
mod fsync;

View File

@ -220,7 +220,7 @@ fn general_complete_fn(bio: &SubmittedBio) {
match bio.status() {
BioStatus::Complete => (),
err_status => log::error!(
"faild to do {:?} on the device with error status: {:?}",
"failed to do {:?} on the device with error status: {:?}",
bio.type_(),
err_status
),

View File

@ -12,7 +12,7 @@
//! and merge requests within the queue.
//!
//! This crate also offers the `Bio` related data structures and APIs to accomplish
//! safe and convenient block I/O operations, for exmaple:
//! safe and convenient block I/O operations, for example:
//!
//! ```no_run
//! // Creates a bio request.

View File

@ -47,6 +47,7 @@ TESTS ?= \
utimes_test \
vdso_clock_gettime_test \
write_test \
fallocate_test \
# The end of the list
MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))

View File

@ -0,0 +1,5 @@
AllocateTest.Fallocate
AllocateTest.FallocatePipe
AllocateTest.FallocateChar
AllocateTest.FallocateRlimit
AllocateTest.FallocateOtherFDs

View File

@ -0,0 +1,4 @@
AllocateTest.FallocatePipe
AllocateTest.FallocateChar
AllocateTest.FallocateRlimit
AllocateTest.FallocateOtherFDs