diff --git a/docs/src/kernel/linux-compatibility.md b/docs/src/kernel/linux-compatibility.md index 521468dd..9265ad1a 100644 --- a/docs/src/kernel/linux-compatibility.md +++ b/docs/src/kernel/linux-compatibility.md @@ -145,8 +145,8 @@ provided by Linux on x86-64 architecture. | 122 | setfsuid | ✅ | | 123 | setfsgid | ✅ | | 124 | getsid | ✅ | -| 125 | capget | ❌ | -| 126 | capset | ❌ | +| 125 | capget | ✅ | +| 126 | capset | ✅ | | 127 | rt_sigpending | ✅ | | 128 | rt_sigtimedwait | ❌ | | 129 | rt_sigqueueinfo | ❌ | diff --git a/kernel/aster-nix/src/process/credentials/c_types.rs b/kernel/aster-nix/src/process/credentials/c_types.rs new file mode 100644 index 00000000..62a43ac8 --- /dev/null +++ b/kernel/aster-nix/src/process/credentials/c_types.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MPL-2.0 +#![allow(non_camel_case_types)] + +use crate::{prelude::*, process::Pid}; + +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +pub struct cap_user_header_t { + pub version: u32, + pub pid: Pid, +} + +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +pub struct cap_user_data_t { + pub effective: u32, + pub permitted: u32, + pub inheritable: u32, +} + +pub const LINUX_CAPABILITY_VERSION_3: u32 = 0x20080522; diff --git a/kernel/aster-nix/src/process/credentials/capabilities.rs b/kernel/aster-nix/src/process/credentials/capabilities.rs new file mode 100644 index 00000000..2f38eaab --- /dev/null +++ b/kernel/aster-nix/src/process/credentials/capabilities.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::sync::atomic::{AtomicU64, Ordering}; + +use bitflags::bitflags; + +bitflags! { + /// Represents a set of Linux capabilities. + pub struct CapSet: u64 { + const CHOWN = 1 << 0; + const DAC_OVERRIDE = 1 << 1; + const DAC_READ_SEARCH = 1 << 2; + const FOWNER = 1 << 3; + const FSETID = 1 << 4; + const KILL = 1 << 5; + const SETGID = 1 << 6; + const SETUID = 1 << 7; + const SETPCAP = 1 << 8; + const LINUX_IMMUTABLE = 1 << 9; + const NET_BIND_SERVICE = 1 << 10; + const NET_BROADCAST = 1 << 11; + const NET_ADMIN = 1 << 12; + const NET_RAW = 1 << 13; + const IPC_LOCK = 1 << 14; + const IPC_OWNER = 1 << 15; + const SYS_MODULE = 1 << 16; + const SYS_RAWIO = 1 << 17; + const SYS_CHROOT = 1 << 18; + const SYS_PTRACE = 1 << 19; + const SYS_PACCT = 1 << 20; + const SYS_ADMIN = 1 << 21; + const SYS_BOOT = 1 << 22; + const SYS_NICE = 1 << 23; + const SYS_RESOURCE = 1 << 24; + const SYS_TIME = 1 << 25; + const SYS_TTY_CONFIG = 1 << 26; + const MKNOD = 1 << 27; + const LEASE = 1 << 28; + const AUDIT_WRITE = 1 << 29; + const AUDIT_CONTROL = 1 << 30; + const SETFCAP = 1 << 31; + const MAC_OVERRIDE = 1 << 32; + const MAC_ADMIN = 1 << 33; + const SYSLOG = 1 << 34; + const WAKE_ALARM = 1 << 35; + const BLOCK_SUSPEND = 1 << 36; + const AUDIT_READ = 1 << 37; + const PERFMON = 1 << 38; + const BPF = 1 << 39; + const CHECKPOINT_RESTORE = 1u64 << 40; + // ... include other capabilities as needed + } +} + +impl CapSet { + /// Converts the capability set to a `u32`. The higher bits are truncated. + pub fn as_u32(&self) -> u32 { + self.bits() as u32 + } + + /// Creates a new `CapSet` with the `SYS_ADMIN` capability set, typically for a root user. + pub const fn new_root() -> Self { + CapSet::SYS_ADMIN + } +} + +#[derive(Debug)] +pub(super) struct AtomicCapSet(AtomicU64); + +impl AtomicCapSet { + pub const fn new(capset: CapSet) -> Self { + Self(AtomicU64::new(capset.bits)) + } + + pub fn set(&self, capset: CapSet) { + self.0.store(capset.bits(), Ordering::Relaxed); + } + + pub fn get(&self) -> CapSet { + CapSet::from_bits_truncate(self.0.load(Ordering::Relaxed)) + } +} + +impl Clone for AtomicCapSet { + fn clone(&self) -> Self { + Self::new(self.get()) + } +} diff --git a/kernel/aster-nix/src/process/credentials/credentials_.rs b/kernel/aster-nix/src/process/credentials/credentials_.rs index 0fbfdde0..a2d9c926 100644 --- a/kernel/aster-nix/src/process/credentials/credentials_.rs +++ b/kernel/aster-nix/src/process/credentials/credentials_.rs @@ -3,7 +3,10 @@ use aster_frame::sync::{RwLockReadGuard, RwLockWriteGuard}; use super::{group::AtomicGid, user::AtomicUid, Gid, Uid}; -use crate::prelude::*; +use crate::{ + prelude::*, + process::credentials::capabilities::{AtomicCapSet, CapSet}, +}; #[derive(Debug)] pub(super) struct Credentials_ { @@ -25,13 +28,27 @@ pub(super) struct Credentials_ { /// Group id used for file system checks. fsgid: AtomicGid, - // A set of additional groups to which a process belongs. + /// A set of additional groups to which a process belongs. supplementary_gids: RwLock>, + + /// The Linux capabilities. + /// This is not the capability (in static_cap.rs) enforced on rust objects. + + /// Capability that child processes can inherit + inheritable_capset: AtomicCapSet, + + /// Capabilities that a process can potentially be granted. + /// It defines the maximum set of privileges that the process could possibly have. + /// Even if the process is not currently using these privileges, it has the potential ability to enable them. + permitted_capset: AtomicCapSet, + + /// Capability that we can actually use + effective_capset: AtomicCapSet, } impl Credentials_ { /// Create a new credentials. ruid, euid, suid will be set as the same uid, and gid is the same. - pub fn new(uid: Uid, gid: Gid) -> Self { + pub fn new(uid: Uid, gid: Gid, capset: CapSet) -> Self { let mut supplementary_gids = BTreeSet::new(); supplementary_gids.insert(gid); @@ -45,6 +62,9 @@ impl Credentials_ { sgid: AtomicGid::new(gid), fsgid: AtomicGid::new(gid), supplementary_gids: RwLock::new(supplementary_gids), + inheritable_capset: AtomicCapSet::new(capset), + permitted_capset: AtomicCapSet::new(capset), + effective_capset: AtomicCapSet::new(capset), } } @@ -364,6 +384,7 @@ impl Credentials_ { } // ******* Supplementary groups methods ******* + pub(super) fn groups(&self) -> RwLockReadGuard> { self.supplementary_gids.read() } @@ -371,6 +392,32 @@ impl Credentials_ { pub(super) fn groups_mut(&self) -> RwLockWriteGuard> { self.supplementary_gids.write() } + + // ******* Linux Capability methods ******* + + pub(super) fn inheritable_capset(&self) -> CapSet { + self.inheritable_capset.get() + } + + pub(super) fn permitted_capset(&self) -> CapSet { + self.permitted_capset.get() + } + + pub(super) fn effective_capset(&self) -> CapSet { + self.effective_capset.get() + } + + pub(super) fn set_inheritable_capset(&self, inheritable_capset: CapSet) { + self.inheritable_capset.set(inheritable_capset); + } + + pub(super) fn set_permitted_capset(&self, permitted_capset: CapSet) { + self.permitted_capset.set(permitted_capset); + } + + pub(super) fn set_effective_capset(&self, effective_capset: CapSet) { + self.effective_capset.set(effective_capset); + } } impl Clone for Credentials_ { @@ -385,6 +432,9 @@ impl Clone for Credentials_ { sgid: self.sgid.clone(), fsgid: self.fsgid.clone(), supplementary_gids: RwLock::new(self.supplementary_gids.read().clone()), + inheritable_capset: self.inheritable_capset.clone(), + permitted_capset: self.permitted_capset.clone(), + effective_capset: self.effective_capset.clone(), } } } diff --git a/kernel/aster-nix/src/process/credentials/mod.rs b/kernel/aster-nix/src/process/credentials/mod.rs index e6634d66..00efe30c 100644 --- a/kernel/aster-nix/src/process/credentials/mod.rs +++ b/kernel/aster-nix/src/process/credentials/mod.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 +pub mod c_types; +pub mod capabilities; mod credentials_; mod group; mod static_cap; @@ -20,7 +22,8 @@ use crate::prelude::*; /// - effective user ID and group ID; /// - saved-set user ID and saved-set group ID; /// - file system user ID and group ID (Linux-specific); -/// - supplementary group IDs. +/// - supplementary group IDs; +/// - Linux capabilities. pub struct Credentials(Arc, R); /// Gets read-only credentials of current thread. diff --git a/kernel/aster-nix/src/process/credentials/static_cap.rs b/kernel/aster-nix/src/process/credentials/static_cap.rs index e15dd6a9..3a831e16 100644 --- a/kernel/aster-nix/src/process/credentials/static_cap.rs +++ b/kernel/aster-nix/src/process/credentials/static_cap.rs @@ -6,7 +6,7 @@ use aster_frame::sync::{RwLockReadGuard, RwLockWriteGuard}; use aster_rights::{Dup, Read, TRights, Write}; use aster_rights_proc::require; -use super::{credentials_::Credentials_, Credentials, Gid, Uid}; +use super::{capabilities::CapSet, credentials_::Credentials_, Credentials, Gid, Uid}; use crate::prelude::*; impl Credentials { @@ -14,7 +14,8 @@ impl Credentials { pub fn new_root() -> Self { let uid = Uid::new_root(); let gid = Gid::new_root(); - let credentials_ = Arc::new(Credentials_::new(uid, gid)); + let cap = CapSet::new_root(); + let credentials_ = Arc::new(Credentials_::new(uid, gid, cap)); Self(credentials_, R::new()) } @@ -249,4 +250,54 @@ impl Credentials { pub fn groups_mut(&self) -> RwLockWriteGuard> { self.0.groups_mut() } + + // *********** Linux Capability methods ********** + + /// Gets the capabilities that child process can inherit. + /// + /// This method requies the `Read` right. + #[require(R > Read)] + pub fn inheritable_capset(&self) -> CapSet { + self.0.inheritable_capset() + } + + /// Gets the capabilities that are permitted. + /// + /// This method requies the `Read` right. + #[require(R > Read)] + pub fn permitted_capset(&self) -> CapSet { + self.0.permitted_capset() + } + + /// Gets the capabilities that actually use. + /// + /// This method requies the `Read` right. + #[require(R > Read)] + pub fn effective_capset(&self) -> CapSet { + self.0.effective_capset() + } + + /// Sets the capabilities that child process can inherit. + /// + /// This method requires the `Write` right. + #[require(R > Write)] + pub fn set_inheritable_capset(&self, inheritable_capset: CapSet) { + self.0.set_inheritable_capset(inheritable_capset); + } + + /// Sets the capabilities that are permitted. + /// + /// This method requires the `Write` right. + #[require(R > Write)] + pub fn set_permitted_capset(&self, permitted_capset: CapSet) { + self.0.set_permitted_capset(permitted_capset); + } + + /// Sets the capabilities that actually use. + /// + /// This method requires the `Write` right. + #[require(R > Write)] + pub fn set_effective_capset(&self, effective_capset: CapSet) { + self.0.set_effective_capset(effective_capset); + } } diff --git a/kernel/aster-nix/src/process/mod.rs b/kernel/aster-nix/src/process/mod.rs index 82115bac..a7f8715c 100644 --- a/kernel/aster-nix/src/process/mod.rs +++ b/kernel/aster-nix/src/process/mod.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 mod clone; -mod credentials; +pub mod credentials; mod exit; mod kill; pub mod posix_thread; diff --git a/kernel/aster-nix/src/syscall/arch/x86.rs b/kernel/aster-nix/src/syscall/arch/x86.rs index f5ee5e8e..c3f00aa9 100644 --- a/kernel/aster-nix/src/syscall/arch/x86.rs +++ b/kernel/aster-nix/src/syscall/arch/x86.rs @@ -7,6 +7,8 @@ use crate::syscall::{ arch_prctl::sys_arch_prctl, bind::sys_bind, brk::sys_brk, + capget::sys_capget, + capset::sys_capset, chdir::{sys_chdir, sys_fchdir}, chmod::{sys_chmod, sys_fchmod, sys_fchmodat}, chown::{sys_chown, sys_fchown, sys_fchownat, sys_lchown}, @@ -216,6 +218,8 @@ impl_syscall_nums_and_dispatch_fn! { SYS_SETFSUID = 122 => sys_setfsuid(args[..1]); SYS_SETFSGID = 123 => sys_setfsgid(args[..1]); SYS_GETSID = 124 => sys_getsid(args[..1]); + SYS_CAPGET = 125 => sys_capget(args[..2]); + SYS_CAPSET = 126 => sys_capset(args[..2]); SYS_RT_SIGPENDING = 127 => sys_rt_sigpending(args[..2]); SYS_RT_SIGSUSPEND = 130 => sys_rt_sigsuspend(args[..2]); SYS_SIGALTSTACK = 131 => sys_sigaltstack(args[..2]); diff --git a/kernel/aster-nix/src/syscall/capget.rs b/kernel/aster-nix/src/syscall/capget.rs new file mode 100644 index 00000000..2c4a99aa --- /dev/null +++ b/kernel/aster-nix/src/syscall/capget.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::SyscallReturn; +use crate::{ + prelude::*, + process::{ + credentials, + credentials::c_types::{cap_user_data_t, cap_user_header_t, LINUX_CAPABILITY_VERSION_3}, + }, + util::{read_val_from_user, write_val_to_user}, +}; + +pub fn sys_capget(cap_user_header_addr: Vaddr, cap_user_data_addr: Vaddr) -> Result { + let cap_user_header: cap_user_header_t = + read_val_from_user::(cap_user_header_addr)?; + + if cap_user_header.version != LINUX_CAPABILITY_VERSION_3 { + return_errno_with_message!(Errno::EINVAL, "not supported (capability version is not 3)"); + }; + + // Extract target pid and validate whether it represents the current process. + let header_pid = cap_user_header.pid; + // Capget only query current process's credential. Namely, it only allows header->pid == 0 + // or header->pid == getpid(), which are equivalent. + // See https://linux.die.net/man/2/capget (Section. With VFS capability support) for details. + if header_pid != 0 && header_pid != current!().pid() { + return_errno_with_message!(Errno::EINVAL, "invalid pid"); + } + + let credentials = credentials(); + let inheritable_capset = credentials.inheritable_capset(); + let permitted_capset = credentials.permitted_capset(); + let effective_capset = credentials.effective_capset(); + + // Annoying legacy format with 64-bit capabilities exposed as two sets of 32-bit fields, + // so we need to split the capability values up. + let result = cap_user_data_t { + // Note we silently drop the upper capabilities here. + // This behavior is considered fail-safe behavior. + effective: effective_capset.as_u32(), + permitted: permitted_capset.as_u32(), + inheritable: inheritable_capset.as_u32(), + }; + + write_val_to_user(cap_user_data_addr, &result)?; + Ok(SyscallReturn::Return(0)) +} diff --git a/kernel/aster-nix/src/syscall/capset.rs b/kernel/aster-nix/src/syscall/capset.rs new file mode 100644 index 00000000..dc228b07 --- /dev/null +++ b/kernel/aster-nix/src/syscall/capset.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::SyscallReturn; +use crate::{ + prelude::*, + process::{ + credentials::{ + c_types::{cap_user_data_t, cap_user_header_t, LINUX_CAPABILITY_VERSION_3}, + capabilities::CapSet, + }, + credentials_mut, + }, + util::read_val_from_user, +}; + +const CAP_LAST_CAP: u64 = 40; // Number of the last capability (CAP_CHECKPOINT_RESTORE) +const CAP_VALID_MASK: u64 = (1u64 << (CAP_LAST_CAP + 1)) - 1; + +fn make_kernel_cap(low: u32, high: u32) -> u64 { + ((low as u64) | ((high as u64) << 32)) & CAP_VALID_MASK +} + +pub fn sys_capset(cap_user_header_addr: Vaddr, cap_user_data_addr: Vaddr) -> Result { + let cap_user_header: cap_user_header_t = + read_val_from_user::(cap_user_header_addr)?; + + if cap_user_header.version != LINUX_CAPABILITY_VERSION_3 { + return_errno_with_message!(Errno::EINVAL, "not supported (capability version is not 3)"); + }; + + // The ability to set capabilities of any other process has been deprecated. + // See: https://elixir.bootlin.com/linux/v6.9.3/source/kernel/capability.c#L209 for more details. + let header_pid = cap_user_header.pid; + if header_pid != 0 && header_pid != current!().pid() { + return_errno_with_message!(Errno::EINVAL, "invalid pid"); + } + + // Convert the cap(u32) to u64 + let cap_user_data: cap_user_data_t = read_val_from_user::(cap_user_data_addr)?; + let inheritable = make_kernel_cap(cap_user_data.inheritable, 0); + let permitted = make_kernel_cap(cap_user_data.permitted, 0); + let effective = make_kernel_cap(cap_user_data.effective, 0); + + let credentials = credentials_mut(); + + credentials.set_inheritable_capset(CapSet::from_bits_truncate(inheritable)); + credentials.set_permitted_capset(CapSet::from_bits_truncate(permitted)); + credentials.set_effective_capset(CapSet::from_bits_truncate(effective)); + + Ok(SyscallReturn::Return(0)) +} diff --git a/kernel/aster-nix/src/syscall/mod.rs b/kernel/aster-nix/src/syscall/mod.rs index 4d4afcdd..ac199406 100644 --- a/kernel/aster-nix/src/syscall/mod.rs +++ b/kernel/aster-nix/src/syscall/mod.rs @@ -14,6 +14,8 @@ mod arch; mod arch_prctl; mod bind; mod brk; +mod capget; +mod capset; mod chdir; mod chmod; mod chown; diff --git a/regression/apps/Makefile b/regression/apps/Makefile index bb6db564..6427a546 100644 --- a/regression/apps/Makefile +++ b/regression/apps/Makefile @@ -11,6 +11,7 @@ REGRESSION_BUILD_DIR ?= $(INITRAMFS)/regression # These test apps are sorted by name TEST_APPS := \ alarm \ + capability \ clone3 \ cpu_affinity \ eventfd2 \ diff --git a/regression/apps/capability/Makefile b/regression/apps/capability/Makefile new file mode 100644 index 00000000..c603a781 --- /dev/null +++ b/regression/apps/capability/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MPL-2.0 + +include ../test_common.mk + +EXTRA_C_FLAGS := diff --git a/regression/apps/capability/capabilities.c b/regression/apps/capability/capabilities.c new file mode 100644 index 00000000..4e1f22b1 --- /dev/null +++ b/regression/apps/capability/capabilities.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MPL-2.0 + +#include +#include +#include +#include +#include +#include + +int set_caps(__u32 target_pid, __u32 capabilities) +{ + struct __user_cap_header_struct capheader; + struct __user_cap_data_struct capdata[2]; + capheader.version = _LINUX_CAPABILITY_VERSION_3; + capheader.pid = target_pid; + memset(&capdata, 0, sizeof(capdata)); + + // Set specified capabilities + capdata[0].effective = capdata[0].permitted = capabilities; + capdata[0].inheritable = 0; + if (syscall(SYS_capset, &capheader, &capdata) < 0) { + perror("capset failed"); + return 1; + } + printf("Process capabilities set successfully.\n"); + return 0; +} + +int check_caps(__u32 target_pid, __u32 capabilities) +{ + struct __user_cap_header_struct capheader; + struct __user_cap_data_struct capdata[2]; + memset(&capheader, 0, sizeof(capheader)); + memset(&capdata, 0, sizeof(capdata)); + capheader.version = _LINUX_CAPABILITY_VERSION_3; + capheader.pid = target_pid; + if (syscall(SYS_capget, &capheader, &capdata) == -1) { + perror("capget failed"); + exit(EXIT_FAILURE); + } + printf("Process capabilities retrieved successfully.\n"); + return (capdata[0].permitted & capabilities) && + (capdata[0].effective & capabilities); +} + +int main(void) +{ + __u32 target_pid = getpid(); + printf("Process Pid: %u.\n", target_pid); + + __u32 caps_to_set = + (1 << CAP_NET_RAW) | + (1 << CAP_NET_ADMIN); // Define the desired capabilities. + + // Try setting the specified capabilities. + if (set_caps(target_pid, caps_to_set) != 0) { + fprintf(stderr, "Failed to set capabilities.\n"); + return 1; + } + + // Check for CAP_NET_RAW among the process's capabilities. + if (check_caps(target_pid, 1 << CAP_NET_RAW)) { + printf("Process has CAP_NET_RAW capability.\n"); + } else { + fprintf(stderr, + "Process does NOT have CAP_NET_RAW capability.\n"); + return 1; + } + + // Check for CAP_NET_ADMIN among the process's capabilities. + if (check_caps(target_pid, 1 << CAP_NET_ADMIN)) { + printf("Process has CAP_NET_ADMIN capability.\n"); + } else { + fprintf(stderr, + "Process does NOT have CAP_NET_ADMIN capability.\n"); + return 1; + } + + return 0; +} \ No newline at end of file diff --git a/regression/syscall_test/blocklists/chroot_test b/regression/syscall_test/blocklists/chroot_test index 496343a0..e1d1bb10 100644 --- a/regression/syscall_test/blocklists/chroot_test +++ b/regression/syscall_test/blocklists/chroot_test @@ -1,11 +1 @@ -ChrootTest.Success -ChrootTest.PermissionDenied -ChrootTest.NotDir -ChrootTest.NotExist -ChrootTest.WithoutCapability -ChrootTest.CreatesNewRoot -ChrootTest.DotDotFromOpenFD -ChrootTest.ProcFdLinkResolutionInChroot -ChrootTest.ProcMemSelfFdsNoEscapeProcOpen -ChrootTest.ProcMemSelfMapsNoEscapeProcOpen -ChrootTest.ProcMountsMountinfoNoEscape \ No newline at end of file +ChrootTest.WithoutCapability \ No newline at end of file