diff --git a/kernel/aster-nix/src/fs/inode_handle/dyn_cap.rs b/kernel/aster-nix/src/fs/inode_handle/dyn_cap.rs index c370240c..87f76707 100644 --- a/kernel/aster-nix/src/fs/inode_handle/dyn_cap.rs +++ b/kernel/aster-nix/src/fs/inode_handle/dyn_cap.rs @@ -54,7 +54,7 @@ impl InodeHandle { if !self.1.contains(rights) { return_errno_with_message!(Errno::EBADF, "check rights failed"); } - Ok(InodeHandle(self.0, R1::new())) + Ok(InodeHandle(self.0.clone(), R1::new())) } pub fn read_to_end(&self, buf: &mut Vec) -> Result { diff --git a/kernel/aster-nix/src/fs/inode_handle/mod.rs b/kernel/aster-nix/src/fs/inode_handle/mod.rs index 4ad3a9cb..5900b067 100644 --- a/kernel/aster-nix/src/fs/inode_handle/mod.rs +++ b/kernel/aster-nix/src/fs/inode_handle/mod.rs @@ -19,8 +19,9 @@ use crate::{ file_handle::FileLike, path::Dentry, utils::{ - AccessMode, DirentVisitor, FallocMode, InodeMode, InodeType, IoctlCmd, Metadata, - SeekFrom, StatusFlags, + AccessMode, DirentVisitor, FallocMode, FileRange, FlockItem, FlockList, InodeMode, + InodeType, IoctlCmd, Metadata, RangeLockItem, RangeLockItemBuilder, RangeLockList, + RangeLockType, SeekFrom, StatusFlags, OFFSET_MAX, }, }, prelude::*, @@ -215,6 +216,105 @@ impl InodeHandle_ { self.dentry.inode().ioctl(cmd, arg) } + + fn test_range_lock(&self, lock: RangeLockItem) -> Result { + let mut req_lock = lock.clone(); + if let Some(extension) = self.dentry.inode().extension() { + if let Some(range_lock_list) = extension.get::() { + req_lock = range_lock_list.test_lock(lock); + } else { + // The range lock could be placed if there is no lock list + req_lock.set_type(RangeLockType::Unlock); + } + } else { + debug!("Inode extension is not supported, the lock could be placed"); + // Some file systems may not support range lock like procfs and sysfs + // Returns Ok if extension is not supported. + req_lock.set_type(RangeLockType::Unlock); + } + Ok(req_lock) + } + + fn set_range_lock(&self, lock: &RangeLockItem, is_nonblocking: bool) -> Result<()> { + if RangeLockType::Unlock == lock.type_() { + self.unlock_range_lock(lock); + return Ok(()); + } + + self.check_range_lock_with_access_mode(lock)?; + if let Some(extension) = self.dentry.inode().extension() { + let range_lock_list = match extension.get::() { + Some(list) => list, + None => extension.get_or_put_default::(), + }; + + range_lock_list.set_lock(lock, is_nonblocking) + } else { + debug!("Inode extension is not supported, let the lock could be acquired"); + // Some file systems may not support range lock like procfs and sysfs + // Returns Ok if extension is not supported. + Ok(()) + } + } + + fn release_range_locks(&self) { + let range_lock = RangeLockItemBuilder::new() + .type_(RangeLockType::Unlock) + .range(FileRange::new(0, OFFSET_MAX).unwrap()) + .build() + .unwrap(); + + self.unlock_range_lock(&range_lock) + } + + fn unlock_range_lock(&self, lock: &RangeLockItem) { + if let Some(extension) = self.dentry.inode().extension() { + if let Some(range_lock_list) = extension.get::() { + range_lock_list.unlock(lock); + } + } + } + + fn check_range_lock_with_access_mode(&self, lock: &RangeLockItem) -> Result<()> { + match lock.type_() { + RangeLockType::ReadLock => { + if !self.access_mode().is_readable() { + return_errno_with_message!(Errno::EBADF, "file not readable"); + } + } + RangeLockType::WriteLock => { + if !self.access_mode().is_writable() { + return_errno_with_message!(Errno::EBADF, "file not writable"); + } + } + _ => (), + } + Ok(()) + } + + fn set_flock(&self, lock: FlockItem, is_nonblocking: bool) -> Result<()> { + if let Some(extension) = self.dentry.inode().extension() { + let flock_list = match extension.get::() { + Some(list) => list, + None => extension.get_or_put_default::(), + }; + + flock_list.set_lock(lock, is_nonblocking) + } else { + debug!("Inode extension is not supported, let the lock could be acquired"); + // Some file systems may not support flock like procfs and sysfs + // Returns Ok if extension is not supported. + Ok(()) + } + } + + fn unlock_flock(&self, req_owner: &InodeHandle) { + if let Some(extension) = self.dentry.inode().extension() { + if let Some(flock_list) = extension.get::() { + flock_list.unlock(req_owner); + } + } + } } #[inherit_methods(from = "self.dentry")] @@ -245,6 +345,36 @@ impl InodeHandle { pub fn dentry(&self) -> &Arc { &self.0.dentry } + + pub fn test_range_lock(&self, lock: RangeLockItem) -> Result { + self.0.test_range_lock(lock) + } + + pub fn set_range_lock(&self, lock: &RangeLockItem, is_nonblocking: bool) -> Result<()> { + self.0.set_range_lock(lock, is_nonblocking) + } + + pub fn release_range_locks(&self) { + self.0.release_range_locks() + } + + pub fn set_flock(&self, lock: FlockItem, is_nonblocking: bool) -> Result<()> { + self.0.set_flock(lock, is_nonblocking) + } + + pub fn unlock_flock(&self) { + self.0.unlock_flock(self); + } + + pub fn offset(&self) -> usize { + self.0.offset() + } +} + +impl Drop for InodeHandle { + fn drop(&mut self) { + self.unlock_flock(); + } } pub trait FileIo: Send + Sync + 'static { diff --git a/kernel/aster-nix/src/fs/utils/mod.rs b/kernel/aster-nix/src/fs/utils/mod.rs index 1aa67713..bfcd022b 100644 --- a/kernel/aster-nix/src/fs/utils/mod.rs +++ b/kernel/aster-nix/src/fs/utils/mod.rs @@ -9,11 +9,15 @@ pub use dirent_visitor::DirentVisitor; pub use direntry_vec::DirEntryVecExt; pub use falloc_mode::FallocMode; pub use file_creation_mask::FileCreationMask; +pub use flock::{FlockItem, FlockList, FlockType}; pub use fs::{FileSystem, FsFlags, SuperBlock}; -pub use inode::{Inode, InodeMode, InodeType, Metadata}; +pub use inode::{Extension, Inode, InodeMode, InodeType, Metadata}; pub use ioctl::IoctlCmd; pub use page_cache::{PageCache, PageCacheBackend}; pub use random_test::{generate_random_operation, new_fs_in_memory}; +pub use range_lock::{ + FileRange, RangeLockItem, RangeLockItemBuilder, RangeLockList, RangeLockType, OFFSET_MAX, +}; pub use status_flags::StatusFlags; mod access_mode; @@ -23,11 +27,13 @@ mod dirent_visitor; mod direntry_vec; mod falloc_mode; mod file_creation_mask; +mod flock; mod fs; mod inode; mod ioctl; mod page_cache; mod random_test; +mod range_lock; mod status_flags; use crate::prelude::*; diff --git a/kernel/aster-nix/src/fs/utils/range_lock/builder.rs b/kernel/aster-nix/src/fs/utils/range_lock/builder.rs new file mode 100644 index 00000000..5774afd6 --- /dev/null +++ b/kernel/aster-nix/src/fs/utils/range_lock/builder.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::*; +use crate::process::Pid; + +/// Builder for `RangeLockItem`. +/// +/// # Example +/// +/// ```no_run +/// let mut lock = RangeLockItemBuilder::new() +/// .type_(lock_type) +/// .range(from_c_flock_and_file(&lock_mut_c, file.clone())?) +/// .build()?; +/// ``` +pub struct RangeLockItemBuilder { + // Mandatory field + type_: Option, + range: Option, + // Optional fields + owner: Option, + waitqueue: Option, +} + +impl Default for RangeLockItemBuilder { + fn default() -> Self { + Self::new() + } +} + +impl RangeLockItemBuilder { + pub fn new() -> Self { + Self { + owner: None, + type_: None, + range: None, + waitqueue: None, + } + } + + pub fn owner(mut self, owner: Pid) -> Self { + self.owner = Some(owner); + self + } + + pub fn type_(mut self, type_: RangeLockType) -> Self { + self.type_ = Some(type_); + self + } + + pub fn range(mut self, range: FileRange) -> Self { + self.range = Some(range); + self + } + + pub fn waitqueue(mut self, waitqueue: WaitQueue) -> Self { + self.waitqueue = Some(waitqueue); + self + } + + pub fn build(self) -> Result { + let owner = self.owner.unwrap_or_else(|| current!().pid()); + let type_ = if let Some(type_) = self.type_ { + type_ + } else { + return_errno_with_message!(Errno::EINVAL, "type_ is mandatory"); + }; + let range = if let Some(range) = self.range { + range + } else { + return_errno_with_message!(Errno::EINVAL, "range is mandatory"); + }; + let waitqueue = match self.waitqueue { + Some(waitqueue) => Arc::new(waitqueue), + None => Arc::new(WaitQueue::new()), + }; + Ok(RangeLockItem { + lock: RangeLock { + owner, + type_, + range, + }, + waitqueue, + }) + } +} diff --git a/kernel/aster-nix/src/fs/utils/range_lock/mod.rs b/kernel/aster-nix/src/fs/utils/range_lock/mod.rs new file mode 100644 index 00000000..4ecd830b --- /dev/null +++ b/kernel/aster-nix/src/fs/utils/range_lock/mod.rs @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::fmt; + +use ostd::sync::{RwMutexWriteGuard, WaitQueue}; + +use self::range::FileRangeChange; +pub use self::{ + builder::RangeLockItemBuilder, + range::{FileRange, OverlapWith, OFFSET_MAX}, +}; +use crate::{prelude::*, process::Pid}; + +mod builder; +mod range; + +/// The metadata of a POSIX advisory file range lock. +#[derive(Debug, Clone)] +struct RangeLock { + /// Owner of the lock, representing the process holding the lock + owner: Pid, + /// Type of lock: can be F_RDLCK (read lock), F_WRLCK (write lock), or F_UNLCK (unlock) + type_: RangeLockType, + /// Range of the lock which specifies the portion of the file being locked + range: FileRange, +} + +/// Represents a POSIX advisory file range lock in the kernel. +/// Contains metadata about the lock and the processes waiting for it. +/// The lock is associated with a specific range of the file. +pub struct RangeLockItem { + /// The lock data including its properties + lock: RangeLock, + /// Waiters that are being blocked by this lock + waitqueue: Arc, +} + +impl RangeLockItem { + /// Returns the type of the lock (READ/WRITE/UNLOCK) + pub fn type_(&self) -> RangeLockType { + self.lock.type_ + } + + /// Sets the type of the lock to the specified type + pub fn set_type(&mut self, type_: RangeLockType) { + self.lock.type_ = type_; + } + + /// Returns the owner (process ID) of the lock + pub fn owner(&self) -> Pid { + self.lock.owner + } + + /// Sets the owner of the lock to the specified process ID + pub fn set_owner(&mut self, owner: Pid) { + self.lock.owner = owner; + } + + /// Returns the range of the lock + pub fn range(&self) -> FileRange { + self.lock.range + } + + /// Sets the range of the lock to the specified range + pub fn set_range(&mut self, range: FileRange) { + self.lock.range = range; + } + + /// Checks if this lock conflicts with another lock + /// Returns true if there is a conflict, otherwise false + pub fn conflict_with(&self, other: &Self) -> bool { + // If locks are owned by the same process, they do not conflict + if self.owner() == other.owner() { + return false; + } + // If the ranges do not overlap, they do not conflict + if self.overlap_with(other).is_none() { + return false; + } + // Write locks are exclusive and conflict with any other lock + if self.type_() == RangeLockType::WriteLock || other.type_() == RangeLockType::WriteLock { + return true; + } + false + } + + /// Checks if this lock overlaps with another lock + /// Returns an Option that contains the overlap details if they overlap + pub fn overlap_with(&self, other: &Self) -> Option { + self.range().overlap_with(&other.range()) + } + + /// Merges the range of this lock with another lock's range + /// If the merge fails, it will trigger a panic + pub fn merge_with(&mut self, other: &Self) { + self.lock + .range + .merge(&other.range()) + .expect("merge range failed"); + } + + /// Returns the starting position of the lock range + pub fn start(&self) -> usize { + self.range().start() + } + + /// Returns the ending position of the lock range + pub fn end(&self) -> usize { + self.range().end() + } + + /// Sets a new starting position for the lock range + /// If the range shrinks, it will wake all waiting processes + pub fn set_start(&mut self, new_start: usize) { + let change = self + .lock + .range + .set_start(new_start) + .expect("invalid new start"); + if let FileRangeChange::Shrinked = change { + self.wake_all(); + } + } + + /// Sets a new ending position for the lock range + /// If the range shrinks, it will wake all waiting processes + pub fn set_end(&mut self, new_end: usize) { + let change = self.range().set_end(new_end).expect("invalid new end"); + if let FileRangeChange::Shrinked = change { + self.wake_all(); + } + } + + /// Puts the current process in a wait state until the lock condition is satisfied + pub fn wait(&mut self) { + let cond = || None::<()>; + self.waitqueue.wait_until(cond); + } + + /// Wakes all the processes waiting on this lock + /// Returns the number of processes that were woken + pub fn wake_all(&self) -> usize { + self.waitqueue.wake_all() + } +} + +/// Implements the drop trait for RangeLockItem +/// Ensures that all waiting processes are woken when this item goes out of scope +impl Drop for RangeLockItem { + fn drop(&mut self) { + self.wake_all(); + } +} + +/// Implements the Debug trait for RangeLockItem +/// Customizes the output when the item is printed in debug mode +impl Debug for RangeLockItem { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RangeLock") + .field("owner", &self.owner()) + .field("type_", &self.type_()) + .field("range", &self.range()) + .finish() + } +} + +/// Implements the Clone trait for RangeLockItem +/// Allows creating a copy of the item with the same properties +impl Clone for RangeLockItem { + fn clone(&self) -> Self { + Self { + lock: self.lock.clone(), + waitqueue: self.waitqueue.clone(), + } + } +} + +/// List of File POSIX advisory range locks. +/// +/// Rule of ordering: +/// Locks are sorted by owner process, then by the starting offset. +/// +/// Rule of mergeing: +/// Adjacent and overlapping locks with same owner and type will be merged. +/// +/// Rule of updating: +/// New locks with different type will replace or split the overlapping locks +/// if they have same owner. +pub struct RangeLockList { + inner: RwMutex>, +} + +impl RangeLockList { + pub fn new() -> Self { + Self { + inner: RwMutex::new(VecDeque::new()), + } + } + + /// Test whether `lock` may be set. + /// + /// If there is a conflict, return the conflicting lock. + /// Otherwise, return a lock with type `Unlock`. + pub fn test_lock(&self, lock: RangeLockItem) -> RangeLockItem { + debug!("test_lock with RangeLock: {:?}", lock); + let mut req_lock = lock.clone(); + let list = self.inner.read(); + for existing_lock in list.iter() { + if lock.conflict_with(existing_lock) { + req_lock.set_owner(existing_lock.owner()); + req_lock.set_type(existing_lock.type_()); + req_lock.set_range(existing_lock.range()); + return req_lock; + } + } + req_lock.set_type(RangeLockType::Unlock); + req_lock + } + + /// Set a lock on the file. + /// + /// If the lock is non-blocking and there is a conflict, return `Err(Errno::EAGAIN)`. + /// Otherwise, block the current process until the lock can be set. + pub fn set_lock(&self, req_lock: &RangeLockItem, is_nonblocking: bool) -> Result<()> { + debug!( + "set_lock with RangeLock: {:?}, is_nonblocking: {}", + req_lock, is_nonblocking + ); + loop { + let mut conflict_lock; + + { + let mut list = self.inner.write(); + if let Some(existing_lock) = list.iter().find(|l| req_lock.conflict_with(l)) { + if is_nonblocking { + return_errno_with_message!(Errno::EAGAIN, "the file is locked"); + } + + conflict_lock = existing_lock.clone(); + } else { + Self::insert_lock_into_list(&mut list, req_lock); + return Ok(()); + } + } + + conflict_lock.wait(); + } + } + + /// Insert a lock into the list. + fn insert_lock_into_list( + list: &mut RwMutexWriteGuard>, + lock: &RangeLockItem, + ) { + let first_same_owner_idx = match list.iter().position(|lk| lk.owner() == lock.owner()) { + Some(idx) => idx, + None => { + // Can't find existing locks with same owner. + list.push_front(lock.clone()); + return; + } + }; + // Insert the lock at the start position with same owner, may breaking + // the rules of RangeLockList. + // We will handle the inserted lock with next one to adjust the list to + // obey the rules. + list.insert(first_same_owner_idx, lock.clone()); + let mut pre_idx = first_same_owner_idx; + let mut next_idx = pre_idx + 1; + loop { + if next_idx >= list.len() { + break; + } + let pre_lock = list[pre_idx].clone(); + let next_lock = list[next_idx].clone(); + + if next_lock.owner() != pre_lock.owner() { + break; + } + if next_lock.type_() == pre_lock.type_() { + // Same type + if pre_lock.end() < next_lock.start() { + break; + } else if next_lock.end() < pre_lock.start() { + list.swap(pre_idx, next_idx); + pre_idx += 1; + next_idx += 1; + } else { + // Merge adjacent or overlapping locks + list[next_idx].merge_with(&pre_lock); + list.remove(pre_idx); + } + } else { + // Different type + if pre_lock.end() <= next_lock.start() { + break; + } else if next_lock.end() <= pre_lock.start() { + list.swap(pre_idx, next_idx); + pre_idx += 1; + next_idx += 1; + } else { + // Split overlapping locks + let overlap_with = pre_lock.overlap_with(&next_lock).unwrap(); + match overlap_with { + OverlapWith::ToLeft => { + list[next_idx].set_start(pre_lock.end()); + break; + } + OverlapWith::InMiddle => { + let right_lk = { + let mut r_lk = next_lock.clone(); + r_lk.set_start(pre_lock.end()); + r_lk + }; + list[next_idx].set_end(pre_lock.start()); + list.swap(pre_idx, next_idx); + list.insert(next_idx + 1, right_lk); + break; + } + OverlapWith::ToRight => { + list[next_idx].set_end(pre_lock.start()); + list.swap(pre_idx, next_idx); + pre_idx += 1; + next_idx += 1; + } + OverlapWith::Includes => { + // New lock can replace the old one + list.remove(next_idx); + } + } + } + } + } + } + + /// Unlock the lock. + /// + /// The lock will be removed from the list. + /// Adjacent locks will be merged if they have the same owner and type. + /// Overlapping locks will be split or merged if they have the same owner. + pub fn unlock(&self, lock: &RangeLockItem) { + debug!("unlock with RangeLock: {:?}", lock); + let mut list = self.inner.write(); + let mut skipped = 0; + while let Some(idx) = list + .iter() + .skip(skipped) + .position(|lk| lk.owner() == lock.owner()) + { + // (idx + skipped) is the original position in list + let idx = idx + skipped; + let existing_lock = &mut list[idx]; + + let overlap_with = match lock.overlap_with(existing_lock) { + Some(overlap) => overlap, + None => { + skipped = idx + 1; + continue; + } + }; + + match overlap_with { + OverlapWith::ToLeft => { + existing_lock.set_start(lock.end()); + break; + } + OverlapWith::InMiddle => { + // Split the lock + let right_lk = { + let mut r_lk = existing_lock.clone(); + r_lk.set_start(lock.end()); + r_lk + }; + existing_lock.set_end(lock.start()); + list.insert(idx + 1, right_lk); + break; + } + OverlapWith::ToRight => { + existing_lock.set_end(lock.start()); + skipped = idx + 1; + } + OverlapWith::Includes => { + // The lock can be deleted from the list + list.remove(idx); + skipped = idx; + } + } + } + } +} + +impl Default for RangeLockList { + fn default() -> Self { + Self::new() + } +} + +/// Type of file range lock, aligned with Linux kernel. +/// F_RDLCK = 0, F_WRLCK = 1, F_UNLCK = 2, +#[derive(Debug, Copy, Clone, PartialEq, TryFromInt)] +#[repr(u16)] +pub enum RangeLockType { + ReadLock = 0, + WriteLock = 1, + Unlock = 2, +} diff --git a/kernel/aster-nix/src/fs/utils/range_lock/range.rs b/kernel/aster-nix/src/fs/utils/range_lock/range.rs new file mode 100644 index 00000000..23af67d3 --- /dev/null +++ b/kernel/aster-nix/src/fs/utils/range_lock/range.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::*; + +/// The maximum offset in a file. +pub const OFFSET_MAX: usize = i64::MAX as usize; + +/// A range in a file. +/// The range is [start, end). +/// The range is valid if start < end. +/// The range is empty if start == end. +/// The range is [0, OFFSET_MAX] if it is not set. +/// The range is [start, OFFSET_MAX] if only start is set. +/// The range is [0, end] if only end is set. +/// The range is [start, end] if both start and end are set. +#[derive(Debug, Copy, Clone)] +pub struct FileRange { + start: usize, + end: usize, +} + +impl FileRange { + pub fn new(start: usize, end: usize) -> Result { + if start >= end { + return_errno_with_message!(Errno::EINVAL, "invalid parameters"); + } + Ok(Self { start, end }) + } + + pub fn len(&self) -> usize { + self.end - self.start + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } + + pub fn set_start(&mut self, new_start: usize) -> Result { + if new_start >= self.end { + return_errno_with_message!(Errno::EINVAL, "invalid new start"); + } + let old_start = self.start; + self.start = new_start; + let change = match new_start { + new_start if new_start > old_start => FileRangeChange::Shrinked, + new_start if new_start < old_start => FileRangeChange::Expanded, + _ => FileRangeChange::Same, + }; + Ok(change) + } + + pub fn set_end(&mut self, new_end: usize) -> Result { + if new_end <= self.start { + return_errno_with_message!(Errno::EINVAL, "invalid new end"); + } + let old_end = self.end; + self.end = new_end; + let change = match new_end { + new_end if new_end < old_end => FileRangeChange::Shrinked, + new_end if new_end > old_end => FileRangeChange::Expanded, + _ => FileRangeChange::Same, + }; + Ok(change) + } + + pub fn overlap_with(&self, other: &Self) -> Option { + if self.start >= other.end || self.end <= other.start { + return None; + } + + let overlap = if self.start <= other.start && self.end < other.end { + OverlapWith::ToLeft + } else if self.start > other.start && self.end < other.end { + OverlapWith::InMiddle + } else if self.start > other.start && self.end >= other.end { + OverlapWith::ToRight + } else { + OverlapWith::Includes + }; + Some(overlap) + } + + pub fn merge(&mut self, other: &Self) -> Result { + if self.end < other.start || other.end < self.start { + return_errno_with_message!(Errno::EINVAL, "can not merge separated ranges"); + } + + let mut change = FileRangeChange::Same; + if other.start < self.start { + self.start = other.start; + change = FileRangeChange::Expanded; + } + if other.end > self.end { + self.end = other.end; + change = FileRangeChange::Expanded; + } + Ok(change) + } +} + +#[derive(Debug)] +pub enum FileRangeChange { + Same, + Expanded, + Shrinked, +} + +/// The position of a range (say A) relative another overlapping range (say B). +#[derive(Debug)] +pub enum OverlapWith { + /// The position where range A is to the left of B (A.start <= B.start && A.end < B.end). + ToLeft, + /// The position where range A is to the right of B (A.start > B.start && A.end >= B.end). + ToRight, + /// The position where range A is in the middle of B (A.start > B.start && A.end < B.end). + InMiddle, + /// The position where range A includes B (A.start <= B.start && A.end >= B.end). + Includes, +} diff --git a/kernel/aster-nix/src/syscall/fcntl.rs b/kernel/aster-nix/src/syscall/fcntl.rs index be914a02..323bb28c 100644 --- a/kernel/aster-nix/src/syscall/fcntl.rs +++ b/kernel/aster-nix/src/syscall/fcntl.rs @@ -3,8 +3,12 @@ use super::SyscallReturn; use crate::{ fs::{ + file_handle::FileLike, file_table::{FdFlags, FileDesc}, - utils::StatusFlags, + inode_handle::InodeHandle, + utils::{ + FileRange, RangeLockItem, RangeLockItemBuilder, RangeLockType, StatusFlags, OFFSET_MAX, + }, }, prelude::*, process::Pid, @@ -13,91 +17,144 @@ use crate::{ pub fn sys_fcntl(fd: FileDesc, cmd: i32, arg: u64, ctx: &Context) -> Result { let fcntl_cmd = FcntlCmd::try_from(cmd)?; debug!("fd = {}, cmd = {:?}, arg = {}", fd, fcntl_cmd, arg); - let current = ctx.process; match fcntl_cmd { - FcntlCmd::F_DUPFD => { - let mut file_table = current.file_table().lock(); - let new_fd = file_table.dup(fd, arg as FileDesc, FdFlags::empty())?; - Ok(SyscallReturn::Return(new_fd as _)) - } - FcntlCmd::F_DUPFD_CLOEXEC => { - let mut file_table = current.file_table().lock(); - let new_fd = file_table.dup(fd, arg as FileDesc, FdFlags::CLOEXEC)?; - Ok(SyscallReturn::Return(new_fd as _)) - } - FcntlCmd::F_GETFD => { - let file_table = current.file_table().lock(); - let entry = file_table.get_entry(fd)?; - let fd_flags = entry.flags(); - Ok(SyscallReturn::Return(fd_flags.bits() as _)) - } - FcntlCmd::F_SETFD => { - let flags = { - if arg > u8::MAX.into() { - return_errno_with_message!(Errno::EINVAL, "invalid fd flags"); - } - FdFlags::from_bits(arg as u8) - .ok_or(Error::with_message(Errno::EINVAL, "invalid flags"))? - }; - let file_table = current.file_table().lock(); - let entry = file_table.get_entry(fd)?; - entry.set_flags(flags); - Ok(SyscallReturn::Return(0)) - } - FcntlCmd::F_GETFL => { - let file = { - let file_table = current.file_table().lock(); - file_table.get_file(fd)?.clone() - }; - let status_flags = file.status_flags(); - let access_mode = file.access_mode(); - Ok(SyscallReturn::Return( - (status_flags.bits() | access_mode as u32) as _, - )) - } - FcntlCmd::F_SETFL => { - let file = { - let file_table = current.file_table().lock(); - file_table.get_file(fd)?.clone() - }; - let new_status_flags = { - // This cmd can change(set or unset) only the O_APPEND, O_ASYNC, O_DIRECT, - // O_NOATIME and O_NONBLOCK flags. - let valid_flags_mask = StatusFlags::O_APPEND - | StatusFlags::O_ASYNC - | StatusFlags::O_DIRECT - | StatusFlags::O_NOATIME - | StatusFlags::O_NONBLOCK; - let mut status_flags = file.status_flags(); - status_flags.remove(valid_flags_mask); - status_flags.insert(StatusFlags::from_bits_truncate(arg as _) & valid_flags_mask); - status_flags - }; - file.set_status_flags(new_status_flags)?; - Ok(SyscallReturn::Return(0)) - } - FcntlCmd::F_SETOWN => { - let file_table = current.file_table().lock(); - let file_entry = file_table.get_entry(fd)?; - // A process ID is specified as a positive value; a process group ID is specified as a negative value. - let abs_arg = (arg as i32).unsigned_abs(); - if abs_arg > i32::MAX as u32 { - return_errno_with_message!(Errno::EINVAL, "process (group) id overflowed"); - } - let pid = Pid::try_from(abs_arg) - .map_err(|_| Error::with_message(Errno::EINVAL, "invalid process (group) id"))?; - file_entry.set_owner(pid)?; - Ok(SyscallReturn::Return(0)) - } - FcntlCmd::F_GETOWN => { - let file_table = current.file_table().lock(); - let file_entry = file_table.get_entry(fd)?; - let pid = file_entry.owner().unwrap_or(0); - Ok(SyscallReturn::Return(pid as _)) - } + FcntlCmd::F_DUPFD => handle_dupfd(fd, arg, FdFlags::empty(), ctx), + FcntlCmd::F_DUPFD_CLOEXEC => handle_dupfd(fd, arg, FdFlags::CLOEXEC, ctx), + FcntlCmd::F_GETFD => handle_getfd(fd, ctx), + FcntlCmd::F_SETFD => handle_setfd(fd, arg, ctx), + FcntlCmd::F_GETFL => handle_getfl(fd, ctx), + FcntlCmd::F_SETFL => handle_setfl(fd, arg, ctx), + FcntlCmd::F_GETLK => handle_getlk(fd, arg, ctx), + FcntlCmd::F_SETLK => handle_setlk(fd, arg, true, ctx), + FcntlCmd::F_SETLKW => handle_setlk(fd, arg, false, ctx), + FcntlCmd::F_GETOWN => handle_getown(fd, ctx), + FcntlCmd::F_SETOWN => handle_setown(fd, arg, ctx), } } +fn handle_dupfd(fd: FileDesc, arg: u64, flags: FdFlags, ctx: &Context) -> Result { + let mut file_table = ctx.process.file_table().lock(); + let new_fd = file_table.dup(fd, arg as FileDesc, flags)?; + Ok(SyscallReturn::Return(new_fd as _)) +} + +fn handle_getfd(fd: FileDesc, ctx: &Context) -> Result { + let file_table = ctx.process.file_table().lock(); + let entry = file_table.get_entry(fd)?; + let fd_flags = entry.flags(); + Ok(SyscallReturn::Return(fd_flags.bits() as _)) +} + +fn handle_setfd(fd: FileDesc, arg: u64, ctx: &Context) -> Result { + let flags = if arg > u8::MAX.into() { + return_errno_with_message!(Errno::EINVAL, "invalid fd flags"); + } else { + FdFlags::from_bits(arg as u8).ok_or(Error::with_message(Errno::EINVAL, "invalid flags"))? + }; + let file_table = ctx.process.file_table().lock(); + let entry = file_table.get_entry(fd)?; + entry.set_flags(flags); + Ok(SyscallReturn::Return(0)) +} + +fn handle_getfl(fd: FileDesc, ctx: &Context) -> Result { + let file = { + let file_table = ctx.process.file_table().lock(); + file_table.get_file(fd)?.clone() + }; + let status_flags = file.status_flags(); + let access_mode = file.access_mode(); + Ok(SyscallReturn::Return( + (status_flags.bits() | access_mode as u32) as _, + )) +} + +fn handle_setfl(fd: FileDesc, arg: u64, ctx: &Context) -> Result { + let file = { + let file_table = ctx.process.file_table().lock(); + file_table.get_file(fd)?.clone() + }; + let valid_flags_mask = StatusFlags::O_APPEND + | StatusFlags::O_ASYNC + | StatusFlags::O_DIRECT + | StatusFlags::O_NOATIME + | StatusFlags::O_NONBLOCK; + let mut status_flags = file.status_flags(); + status_flags.remove(valid_flags_mask); + status_flags.insert(StatusFlags::from_bits_truncate(arg as _) & valid_flags_mask); + file.set_status_flags(status_flags)?; + Ok(SyscallReturn::Return(0)) +} + +fn handle_getlk(fd: FileDesc, arg: u64, ctx: &Context) -> Result { + let file = { + let file_table = ctx.process.file_table().lock(); + file_table.get_file(fd)?.clone() + }; + let lock_mut_ptr = arg as Vaddr; + let mut lock_mut_c = ctx.get_user_space().read_val::(lock_mut_ptr)?; + let lock_type = RangeLockType::try_from(lock_mut_c.l_type)?; + if lock_type == RangeLockType::Unlock { + return_errno_with_message!(Errno::EINVAL, "invalid flock type for getlk"); + } + let mut lock = RangeLockItemBuilder::new() + .type_(lock_type) + .range(from_c_flock_and_file(&lock_mut_c, file.clone())?) + .build()?; + let inode_file = file + .downcast_ref::() + .ok_or(Error::with_message(Errno::EBADF, "not inode"))?; + lock = inode_file.test_range_lock(lock)?; + lock_mut_c.copy_from_range_lock(&lock); + ctx.get_user_space().write_val(lock_mut_ptr, &lock_mut_c)?; + Ok(SyscallReturn::Return(0)) +} + +fn handle_setlk( + fd: FileDesc, + arg: u64, + is_nonblocking: bool, + ctx: &Context, +) -> Result { + let file = { + let file_table = ctx.process.file_table().lock(); + file_table.get_file(fd)?.clone() + }; + let lock_mut_ptr = arg as Vaddr; + let lock_mut_c = ctx.get_user_space().read_val::(lock_mut_ptr)?; + let lock_type = RangeLockType::try_from(lock_mut_c.l_type)?; + let lock = RangeLockItemBuilder::new() + .type_(lock_type) + .range(from_c_flock_and_file(&lock_mut_c, file.clone())?) + .build()?; + let inode_file = file + .downcast_ref::() + .ok_or(Error::with_message(Errno::EBADF, "not inode"))?; + inode_file.set_range_lock(&lock, is_nonblocking)?; + Ok(SyscallReturn::Return(0)) +} + +fn handle_getown(fd: FileDesc, ctx: &Context) -> Result { + let file_table = ctx.process.file_table().lock(); + let file_entry = file_table.get_entry(fd)?; + let pid = file_entry.owner().unwrap_or(0); + Ok(SyscallReturn::Return(pid as _)) +} + +fn handle_setown(fd: FileDesc, arg: u64, ctx: &Context) -> Result { + let file_table = ctx.process.file_table().lock(); + let file_entry = file_table.get_entry(fd)?; + // A process ID is specified as a positive value; a process group ID is specified as a negative value. + let abs_arg = (arg as i32).unsigned_abs(); + if abs_arg > i32::MAX as u32 { + return_errno_with_message!(Errno::EINVAL, "process (group) id overflowed"); + } + let pid = Pid::try_from(abs_arg) + .map_err(|_| Error::with_message(Errno::EINVAL, "invalid process (group) id"))?; + file_entry.set_owner(pid)?; + Ok(SyscallReturn::Return(0)) +} + #[repr(i32)] #[derive(Debug, Clone, Copy, TryFromInt)] #[allow(non_camel_case_types)] @@ -107,7 +164,95 @@ enum FcntlCmd { F_SETFD = 2, F_GETFL = 3, F_SETFL = 4, - F_SETOWN = 8, - F_GETOWN = 9, + F_SETLK = 6, + F_SETLKW = 7, + F_GETLK = 8, + F_SETOWN = 9, + F_GETOWN = 10, F_DUPFD_CLOEXEC = 1030, } + +#[allow(non_camel_case_types)] +pub type off_t = i64; + +#[allow(non_camel_case_types)] +#[derive(Debug, Copy, Clone, TryFromInt)] +#[repr(u16)] +pub enum RangeLockWhence { + SEEK_SET = 0, + SEEK_CUR = 1, + SEEK_END = 2, +} + +/// C struct for a file range lock in Libc +#[repr(C)] +#[derive(Debug, Copy, Clone, Pod)] +pub struct c_flock { + /// Type of lock: F_RDLCK, F_WRLCK, or F_UNLCK + pub l_type: u16, + /// Where `l_start' is relative to + pub l_whence: u16, + /// Offset where the lock begins + pub l_start: off_t, + /// Size of the locked area, 0 means until EOF + pub l_len: off_t, + /// Process holding the lock + pub l_pid: Pid, +} + +impl c_flock { + pub fn copy_from_range_lock(&mut self, lock: &RangeLockItem) { + self.l_type = lock.type_() as u16; + if RangeLockType::Unlock != lock.type_() { + self.l_whence = RangeLockWhence::SEEK_SET as u16; + self.l_start = lock.start() as off_t; + self.l_len = if lock.end() == OFFSET_MAX { + 0 + } else { + lock.range().len() as off_t + }; + self.l_pid = lock.owner(); + } + } +} + +/// Create the file range through C flock and opened file reference +fn from_c_flock_and_file(lock: &c_flock, file: Arc) -> Result { + let start = { + let whence = RangeLockWhence::try_from(lock.l_whence)?; + match whence { + RangeLockWhence::SEEK_SET => lock.l_start, + RangeLockWhence::SEEK_CUR => (file + .downcast_ref::() + .ok_or(Error::with_message(Errno::EBADF, "not inode"))? + .offset() as off_t) + .checked_add(lock.l_start) + .ok_or(Error::with_message(Errno::EOVERFLOW, "start overflow"))?, + + RangeLockWhence::SEEK_END => (file.metadata().size as off_t) + .checked_add(lock.l_start) + .ok_or(Error::with_message(Errno::EOVERFLOW, "start overflow"))?, + } + }; + + let (start, end) = match lock.l_len { + len if len > 0 => { + let end = start + .checked_add(len) + .ok_or(Error::with_message(Errno::EOVERFLOW, "end overflow"))?; + (start as usize, end as usize) + } + 0 => (start as usize, OFFSET_MAX), + len if len < 0 => { + let end = start; + let new_start = start + len; + if new_start < 0 { + return Err(Error::with_message(Errno::EINVAL, "invalid len")); + } + (new_start as usize, end as usize) + } + _ => unreachable!(), + }; + + FileRange::new(start, end) +} diff --git a/test/syscall_test/blocklists/fcntl_test b/test/syscall_test/blocklists/fcntl_test index 3c116a08..23aea3ce 100644 --- a/test/syscall_test/blocklists/fcntl_test +++ b/test/syscall_test/blocklists/fcntl_test @@ -1,4 +1,3 @@ -FcntlLockTest.* FcntlTest.GetAllFlags FcntlTest.SetFlags FcntlTest.GetO_ASYNC @@ -19,4 +18,46 @@ FcntlTest.SetOwnExTid FcntlTest.SetOwnExPid FcntlTest.SetOwnExPgrp FcntlTest.SetOwnExUnset -FcntlTest.SetFlSetOwnDoNotRace \ No newline at end of file +FcntlTest.SetFlSetOwnDoNotRaceFcntlTest.GetAllFlags +FcntlTest.SetFlags +FcntlTest.GetO_ASYNC +FcntlTest.SetFlO_ASYNC +FcntlTest.SetFdO_ASYNC +FcntlTest.DupAfterO_ASYNC +FcntlTest.GetOwnNone +FcntlTest.GetOwnExNone +FcntlTest.SetOwnInvalidPid +FcntlTest.SetOwnInvalidPgrp +FcntlTest.SetOwnPid +FcntlTest.SetOwnPgrp +FcntlTest.SetOwnUnset +FcntlTest.SetOwnOverflow +FcntlTest.SetOwnExInvalidType +FcntlTest.SetOwnExInvalidTid +FcntlTest.SetOwnExInvalidPid +FcntlTest.SetOwnExInvalidPgrp +FcntlTest.SetOwnExTid +FcntlTest.SetOwnExPid +FcntlTest.SetOwnExPgrp +FcntlTest.SetOwnExUnset +FcntlTest.GetOwnExTid +FcntlTest.GetOwnExPid +FcntlTest.GetOwnExPgrp +FcntlTest.SetFlSetOwnDoNotRace +FcntlLockTest.SetLockSymlink +FcntlLockTest.SetLockProc +FcntlLockTest.SetLockPipe +FcntlLockTest.SetLockSocket +FcntlLockTest.SetReadLockMultiProc +FcntlLockTest.SetReadThenWriteLockMultiProc +FcntlLockTest.SetWriteThenReadLockMultiProc +FcntlLockTest.SetWriteLockMultiProc +FcntlLockTest.SetLockIsRegional +FcntlLockTest.SetLockUpgradeDowngrade +FcntlLockTest.SetLockDroppedOnClose +FcntlLockTest.SetLockUnlock +FcntlLockTest.SetLockAcrossRename +FcntlLockTest.SetWriteLockThenBlockingWriteLock +FcntlLockTest.SetReadLockThenBlockingWriteLock +FcntlLockTest.SetWriteLockThenBlockingReadLock +FcntlLockTest.SetReadLockThenBlockingReadLock \ No newline at end of file