ospp project (feature) add namespace overlayfs cgroup (#949)

## 开发进展:
## namespace
- pid_namespace 基本实现,基于pid_struct等数据结构实现隔离
- mnt_namespace 基本实现,挂载点的隔离通过不同的挂载树来实现
- usernamespace 作为支持性的namespace,目前受限实现全局静态
## overlayfs
- 实现若干个文件系统的叠加,在mount中传入多个路径作为多个fs的mount路径以及最后merge层的fs路径
- copy-up机制的,除最上层外其他层为只读层,满足写时拷贝,需要修改的时候copy到上层修改
- whiteout特殊文件,用于标记在下层需要被删除的文件用来掩盖需要删除的文件
## cgroups
- 目前cgroups还处于框架阶段,之后具体实现具体的内存、CPU等子系统
This commit is contained in:
codeironman 2024-10-31 00:50:34 +08:00 committed by GitHub
parent 84c528f53d
commit f5b2038871
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 2279 additions and 56 deletions

View File

@ -30,10 +30,12 @@
kernel/debug/index
kernel/ktest/index
kernel/cpu_arch/index
kernel/container/index
kernel/libs/index
kernel/trace/index
.. toctree::
:maxdepth: 1
:caption: 应用层

View File

@ -0,0 +1,13 @@
====================================
容器化
====================================
这里是DragonOS中与容器化相关的说明文档。
主要包括namespaceoverlayfs和cgroup
.. toctree::
:maxdepth: 2
namespaces/index
filesystem/unionfs/index

View File

@ -0,0 +1,14 @@
====================================
名称空间
====================================
DragonOS的namespaces目前支持pid_namespace和mnt_namespace 预计之后会继续完善
namespace是容器化实现过程中的重要组成部分
由于目前os是单用户user_namespace为全局静态
.. toctree::
:maxdepth: 1
pid_namespace
mnt_namespace

View File

@ -0,0 +1,19 @@
# 挂载名称空间
## 底层架构
pcb -> nsproxy -> mnt_namespace
每一个挂载文件系统都有自立独立的挂载点,表现在数据结构上是一个挂载的红黑树,每一个名称空间中挂载是独立的,所以文件系统的挂载和卸载不会影响别的
## 系统调用接口
- clone
- CLONE_NEWNS用于创建一个新的 MNT 命名空间。提供独立的文件系统挂载点
- unshare
- 使用 CLONE_NEWPID 标志调用 unshare() 后,后续创建的所有子进程都将在新的命名空间中运行。
- setns
- 将进程加入到指定的名称空间
- chroot
- 将当前进程的根目录更改为指定的路径,提供文件系统隔离。

View File

@ -0,0 +1,21 @@
# 进程名称空间
:::{note} 本文作者:操丰毅 1553389239@qq.com
2024年10月30日 :::
pid_namespace 是内核中的一种名称空间用于实现进程隔离允许在不同的名称空间中运行的进程有独立的pid试图
## 底层架构
pcb -> nsproxy -> pid_namespace
- pid_namespace 内有独立的一套进程分配器以及孤儿进程回收器独立管理内部的pid
- 不同进程的详细信息都存放在proc文件系统中里面的找到对应的pid号里面的信息都在pid中记录的是pid_namespace中的信息
- pid_namespace等限制由ucount来控制管理
## 系统调用接口
- clone
- CLONE_NEWPID用于创建一个新的 PID 命名空间。使用这个标志时,子进程将在新的 PID 命名空间内运行,进程 ID 从 1 开始。
- unshare
- 使用 CLONE_NEWPID 标志调用 unshare() 后,后续创建的所有子进程都将在新的命名空间中运行。
- getpid
- 在命名空间中调用 getpid() 会返回进程在当前 PID 命名空间中的进程 ID

View File

@ -13,4 +13,5 @@ todo: 由于文件系统模块重构文档暂时不可用预计在2023年4
vfs/index
sysfs
kernfs
unionfs/index

View File

@ -0,0 +1,10 @@
====================================
联合文件系统
====================================
Union Filesystem
OverlayFS 将多个文件系统(称为“层”)合并为一个逻辑文件系统,使用户看到一个统一的目录结构。
.. toctree::
:maxdepth: 1
overlayfs

View File

@ -0,0 +1,26 @@
# overlayfs
OverlayFs是目前使用最多的联合文件系统原理简单方便使用主要用于容器中
在 Docker 中OverlayFS 是默认的存储驱动之一。Docker 为每个容器创建一个独立的上层目录,而所有容器共享同一个下层镜像文件。这样的设计使得容器之间的资源共享更加高效,同时减少了存储需求。
## 架构设计
overlayfs主要有两个层以及一个虚拟的合并层
- Lower Layer下层通常是 只读 文件系统。可以包含多层。
- Upper Layer上层为 可写层,所有的写操作都会在这一层上进行。
- Merged Layer合并层上层和下层的逻辑视图合并后向用户呈现的最终文件系统。
## 工作原理
- 读取操作:
- OverlayFS 会优先从 Upper Layer 读取文件。如果文件不存在于上层,则读取 Lower Layer 中的内容。
- 写入操作:
- 如果一个文件位于 Lower Layer 中,并尝试写入该文件,系统会将其 copy-up 到 Upper Layer 并在上层写入。如果文件已经存在于 Upper Layer则直接在该层写入。
- 删除操作:
- 当删除文件时OverlayFS 会在上层创建一个标记为 whiteout 的条目,这会隐藏下层的文件。
## Copy-up
- 写时拷贝
当一个文件从 下层 被修改时,它会被复制到 上层(称为 copy-up。之后的所有修改都会发生在上层的文件副本上。
## 实现逻辑
通过构建ovlInode来实现indexnode这个trait来代表上层或者下层的inode具体的有关文件文件夹的操作都在

View File

@ -140,6 +140,11 @@ impl IdAllocator {
pub fn used(&self) -> usize {
self.used
}
/// 返回最大id数
pub fn get_max_id(&self) -> usize {
self.max_id
}
}
impl core::fmt::Debug for IdAllocator {

View File

@ -104,7 +104,7 @@ impl ProcessManager {
pub fn copy_thread(
current_pcb: &Arc<ProcessControlBlock>,
new_pcb: &Arc<ProcessControlBlock>,
clone_args: KernelCloneArgs,
clone_args: &KernelCloneArgs,
current_trapframe: &TrapFrame,
) -> Result<(), SystemError> {
let clone_flags = clone_args.flags;

View File

@ -299,7 +299,7 @@ impl ProcessManager {
pub fn copy_thread(
current_pcb: &Arc<ProcessControlBlock>,
new_pcb: &Arc<ProcessControlBlock>,
clone_args: KernelCloneArgs,
clone_args: &KernelCloneArgs,
current_trapframe: &TrapFrame,
) -> Result<(), SystemError> {
let clone_flags = clone_args.flags;

View File

@ -0,0 +1,6 @@
use super::CgroupSubsysState;
struct MemCgroup {
css: CgroupSubsysState,
id: u32,
}

48
kernel/src/cgroup/mod.rs Normal file
View File

@ -0,0 +1,48 @@
#![allow(dead_code, unused_variables, unused_imports)]
pub mod mem_cgroup;
use alloc::{collections::LinkedList, rc::Weak, sync::Arc, vec::Vec};
use alloc::boxed::Box;
use crate::filesystem::vfs::IndexNode;
pub struct Cgroup {
css: Weak<CgroupSubsysState>,
/// 当前所在的深度
level: u32,
/// 支持的最大深度
max_depth: u32,
/// 可见后代数量
nr_descendants: u32,
/// 正在死亡后代数量
nr_dying_descendants: u32,
/// 允许的最大后代数量
max_descendants: u32,
/// css_set的数量
nr_populated_csets: u32,
/// 子group中有任务的记数
nr_populated_domain_children: u32,
/// 线程子group中有任务的记数
nr_populated_threaded_children: u32,
/// 活跃线程子cgroup数量
nr_threaded_children: u32,
/// 关联cgroup的inode
kernfs_node: Box<dyn IndexNode>,
}
/// 控制资源的统计信息
pub struct CgroupSubsysState {
cgroup: Arc<Cgroup>,
/// 兄弟节点
sibling: LinkedList<Arc<Cgroup>>,
/// 孩子节点
children: LinkedList<Arc<Cgroup>>,
}
pub struct CgroupSubsys {}
/// cgroup_sub_state 的集合
pub struct CssSet {
subsys: Vec<Arc<CgroupSubsysState>>,
}

View File

@ -4,6 +4,7 @@ pub mod eventfd;
pub mod fat;
pub mod kernfs;
pub mod mbr;
pub mod overlayfs;
pub mod procfs;
pub mod ramfs;
pub mod sysfs;

View File

@ -0,0 +1,41 @@
use super::OvlInode;
use crate::{
filesystem::vfs::{IndexNode, Metadata},
libs::spinlock::SpinLock,
};
use alloc::sync::Arc;
use system_error::SystemError;
impl OvlInode {
pub fn copy_up(&self) -> Result<(), SystemError> {
let mut upper_inode = self.upper_inode.lock();
if upper_inode.is_some() {
return Ok(());
}
let lower_inode = self.lower_inode.as_ref().ok_or(SystemError::ENOENT)?;
let metadata = lower_inode.metadata()?;
let new_upper_inode = self.create_upper_inode(metadata.clone())?;
let mut buffer = vec![0u8; metadata.size as usize];
let lock = SpinLock::new(crate::filesystem::vfs::FilePrivateData::Unused);
lower_inode.read_at(0, metadata.size as usize, &mut buffer, lock.lock())?;
new_upper_inode.write_at(0, metadata.size as usize, &buffer, lock.lock())?;
*upper_inode = Some(new_upper_inode);
Ok(())
}
fn create_upper_inode(&self, metadata: Metadata) -> Result<Arc<dyn IndexNode>, SystemError> {
let upper_inode = self.upper_inode.lock();
let upper_root_inode = upper_inode
.as_ref()
.ok_or(SystemError::ENOSYS)?
.fs()
.root_inode();
upper_root_inode.create_with_data(&self.dname()?.0, metadata.file_type, metadata.mode, 0)
}
}

View File

@ -0,0 +1,32 @@
use alloc::sync::Arc;
use alloc::vec::Vec;
use crate::filesystem::vfs::IndexNode;
use super::{OvlInode, OvlSuperBlock};
#[derive(Debug)]
pub struct OvlEntry {
numlower: usize, // 下层数量
lowerstack: Vec<OvlPath>,
}
impl OvlEntry {
pub fn new() -> Self {
Self {
numlower: 2,
lowerstack: Vec::new(),
}
}
}
#[derive(Debug)]
pub struct OvlPath {
layer: Arc<OvlLayer>,
inode: Arc<dyn IndexNode>,
}
#[derive(Debug)]
pub struct OvlLayer {
pub mnt: Arc<OvlInode>, // 挂载点
pub index: u32, // 0 是上层读写层,>0 是下层只读层
pub fsid: u32, // 文件系统标识符
}

View File

@ -0,0 +1,432 @@
#![allow(dead_code, unused_variables, unused_imports)]
pub mod copy_up;
pub mod entry;
use super::ramfs::{LockedRamFSInode, RamFSInode};
use super::vfs::{self, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock};
use super::vfs::{FSMAKER, ROOT_INODE};
use crate::driver::base::device::device_number::DeviceNumber;
use crate::driver::base::device::device_number::Major;
use crate::filesystem::vfs::{FileSystemMaker, FileSystemMakerData};
use crate::libs::spinlock::SpinLock;
use alloc::string::String;
use alloc::sync::Arc;
use alloc::sync::Weak;
use alloc::vec::Vec;
use entry::{OvlEntry, OvlLayer};
use system_error::SystemError;
const WHITEOUT_MODE: u64 = 0o020000 | 0o600; // whiteout字符设备文件模式与权限
const WHITEOUT_DEV: DeviceNumber = DeviceNumber::new(Major::UNNAMED_MAJOR, 0); // Whiteout 文件设备号
const WHITEOUT_FLAG: u64 = 0x1;
#[distributed_slice(FSMAKER)]
static OVERLAYFSMAKER: FileSystemMaker = FileSystemMaker::new(
"overlay",
&(OverlayFS::make_overlayfs
as fn(
Option<&dyn FileSystemMakerData>,
) -> Result<Arc<dyn FileSystem + 'static>, SystemError>),
);
#[derive(Debug)]
pub struct OverlayMountData {
upper_dir: String,
lower_dirs: Vec<String>,
work_dir: String,
}
impl OverlayMountData {
pub fn from_row(raw_data: *const u8) -> Result<Self, SystemError> {
if raw_data.is_null() {
return Err(SystemError::EINVAL);
}
let len = (0..)
.find(|&i| unsafe { raw_data.add(i).read() } == 0)
.ok_or(SystemError::EINVAL)?;
let slice = unsafe { core::slice::from_raw_parts(raw_data, len) };
let raw_str = core::str::from_utf8(slice).map_err(|_| SystemError::EINVAL)?;
let mut data = OverlayMountData {
upper_dir: String::new(),
lower_dirs: Vec::new(),
work_dir: String::new(),
};
for pair in raw_str.split(',') {
let mut parts = pair.split('=');
let key = parts.next().ok_or(SystemError::EINVAL)?;
let value = parts.next().ok_or(SystemError::EINVAL)?;
match key {
"upperdir" => data.upper_dir = value.into(),
"lowerdir" => data.lower_dirs = value.split(':').map(|s| s.into()).collect(),
"workdir" => data.work_dir = value.into(),
_ => return Err(SystemError::EINVAL),
}
}
Ok(data)
}
}
impl FileSystemMakerData for OverlayMountData {
fn as_any(&self) -> &dyn core::any::Any {
self
}
}
#[derive(Debug)]
pub struct OvlSuperBlock {
super_block: SuperBlock,
pseudo_dev: DeviceNumber, // 虚拟设备号
is_lower: bool,
}
#[derive(Debug)]
struct OverlayFS {
numlayer: usize,
numfs: u32,
numdatalayer: usize,
layers: Vec<OvlLayer>, // 第0层为读写层后面是只读层
workdir: Arc<OvlInode>,
root_inode: Arc<OvlInode>,
}
#[derive(Debug)]
struct OvlInode {
redirect: String, // 重定向路径
file_type: FileType,
flags: SpinLock<u64>,
upper_inode: SpinLock<Option<Arc<dyn IndexNode>>>, // 读写层
lower_inode: Option<Arc<dyn IndexNode>>, // 只读层
oe: Arc<OvlEntry>,
fs: Weak<OverlayFS>,
}
impl OvlInode {
pub fn new(
redirect: String,
upper: Option<Arc<dyn IndexNode>>,
lower_inode: Option<Arc<dyn IndexNode>>,
) -> Self {
Self {
redirect,
file_type: FileType::Dir,
flags: SpinLock::new(0),
upper_inode: SpinLock::new(upper),
lower_inode,
oe: Arc::new(OvlEntry::new()),
fs: Weak::default(),
}
}
}
impl FileSystem for OverlayFS {
fn root_inode(&self) -> Arc<dyn IndexNode> {
self.root_inode.clone()
}
fn info(&self) -> vfs::FsInfo {
FsInfo {
blk_dev_id: 0,
max_name_len: 255,
}
}
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn name(&self) -> &str {
"overlayfs"
}
fn super_block(&self) -> SuperBlock {
todo!()
}
}
impl OverlayFS {
pub fn ovl_upper_mnt(&self) -> Arc<dyn IndexNode> {
self.layers[0].mnt.clone()
}
pub fn make_overlayfs(
data: Option<&dyn FileSystemMakerData>,
) -> Result<Arc<dyn FileSystem + 'static>, SystemError> {
let mount_data = data
.and_then(|d| d.as_any().downcast_ref::<OverlayMountData>())
.ok_or(SystemError::EINVAL)?;
let upper_inode = ROOT_INODE()
.lookup(&mount_data.upper_dir)
.map_err(|_| SystemError::EINVAL)?;
let upper_layer = OvlLayer {
mnt: Arc::new(OvlInode::new(
mount_data.upper_dir.clone(),
Some(upper_inode),
None,
)),
index: 0,
fsid: 0,
};
let lower_layers: Result<Vec<OvlLayer>, SystemError> = mount_data
.lower_dirs
.iter()
.enumerate()
.map(|(i, dir)| {
let lower_inode = ROOT_INODE().lookup(dir).map_err(|_| SystemError::EINVAL)?; // 处理错误
Ok(OvlLayer {
mnt: Arc::new(OvlInode::new(dir.clone(), None, Some(lower_inode))),
index: (i + 1) as u32,
fsid: (i + 1) as u32,
})
})
.collect();
let lower_layers = lower_layers?;
let workdir = Arc::new(OvlInode::new(mount_data.work_dir.clone(), None, None));
if lower_layers.is_empty() {
return Err(SystemError::EINVAL);
}
let mut layers = Vec::new();
layers.push(upper_layer);
layers.extend(lower_layers);
let root_inode = layers[0].mnt.clone();
let fs = OverlayFS {
numlayer: layers.len(),
numfs: 1,
numdatalayer: layers.len() - 1,
layers,
workdir,
root_inode,
};
Ok(Arc::new(fs))
}
}
impl OvlInode {
pub fn ovl_lower_redirect(&self) -> Option<&str> {
if self.file_type == FileType::File || self.file_type == FileType::Dir {
Some(&self.redirect)
} else {
None
}
}
pub fn create_whiteout(&self, name: &str) -> Result<(), SystemError> {
let whiteout_mode = vfs::syscall::ModeType::S_IFCHR;
let mut upper_inode = self.upper_inode.lock();
if let Some(ref upper_inode) = *upper_inode {
upper_inode.mknod(name, whiteout_mode, WHITEOUT_DEV)?;
} else {
let new_inode = self
.fs
.upgrade()
.ok_or(SystemError::EROFS)?
.root_inode()
.create(name, FileType::CharDevice, whiteout_mode)?;
*upper_inode = Some(new_inode);
}
let mut flags = self.flags.lock();
*flags |= WHITEOUT_FLAG; // 标记为 whiteout
Ok(())
}
fn is_whiteout(&self) -> bool {
let flags = self.flags.lock();
self.file_type == FileType::CharDevice && (*flags & WHITEOUT_FLAG) != 0
}
fn has_whiteout(&self, name: &str) -> bool {
let upper_inode = self.upper_inode.lock();
if let Some(ref upper_inode) = *upper_inode {
if let Ok(inode) = upper_inode.find(name) {
if let Some(ovl_inode) = inode.as_any_ref().downcast_ref::<OvlInode>() {
return ovl_inode.is_whiteout();
}
}
}
false
}
}
impl IndexNode for OvlInode {
fn read_at(
&self,
offset: usize,
len: usize,
buf: &mut [u8],
data: crate::libs::spinlock::SpinLockGuard<vfs::FilePrivateData>,
) -> Result<usize, system_error::SystemError> {
if let Some(ref upper_inode) = *self.upper_inode.lock() {
return upper_inode.read_at(offset, len, buf, data);
}
if let Some(lower_inode) = &self.lower_inode {
return lower_inode.read_at(offset, len, buf, data);
}
Err(SystemError::ENOENT)
}
fn write_at(
&self,
offset: usize,
len: usize,
buf: &[u8],
data: crate::libs::spinlock::SpinLockGuard<vfs::FilePrivateData>,
) -> Result<usize, SystemError> {
if (*self.upper_inode.lock()).is_none() {
self.copy_up()?;
}
if let Some(ref upper_inode) = *self.upper_inode.lock() {
return upper_inode.write_at(offset, len, buf, data);
}
Err(SystemError::EROFS)
}
fn fs(&self) -> Arc<dyn FileSystem> {
self.fs.upgrade().unwrap()
}
fn metadata(&self) -> Result<Metadata, SystemError> {
if let Some(ref upper_inode) = *self.upper_inode.lock() {
return upper_inode.metadata();
}
if let Some(ref lower_inode) = self.lower_inode {
return lower_inode.metadata();
}
Ok(Metadata::default())
}
fn as_any_ref(&self) -> &dyn core::any::Any {
self
}
fn list(&self) -> Result<Vec<String>, system_error::SystemError> {
let mut entries: Vec<String> = Vec::new();
let upper_inode = self.upper_inode.lock();
if let Some(ref upper_inode) = *upper_inode {
let upper_entries = upper_inode.list()?;
entries.extend(upper_entries);
}
if let Some(lower_inode) = &self.lower_inode {
let lower_entries = lower_inode.list()?;
for entry in lower_entries {
if !entries.contains(&entry) && !self.has_whiteout(&entry) {
entries.push(entry);
}
}
}
Ok(entries)
}
fn mkdir(
&self,
name: &str,
mode: vfs::syscall::ModeType,
) -> Result<Arc<dyn IndexNode>, system_error::SystemError> {
if let Some(ref upper_inode) = *self.upper_inode.lock() {
upper_inode.mkdir(name, mode)
} else {
Err(SystemError::EROFS)
}
}
fn rmdir(&self, name: &str) -> Result<(), SystemError> {
let upper_inode = self.upper_inode.lock();
if let Some(ref upper_inode) = *upper_inode {
upper_inode.rmdir(name)?;
} else if let Some(lower_inode) = &self.lower_inode {
if lower_inode.find(name).is_ok() {
self.create_whiteout(name)?;
} else {
return Err(SystemError::ENOENT);
}
} else {
return Err(SystemError::ENOENT);
}
Ok(())
}
fn unlink(&self, name: &str) -> Result<(), SystemError> {
let upper_inode = self.upper_inode.lock();
if let Some(ref upper_inode) = *upper_inode {
upper_inode.unlink(name)?;
} else if let Some(lower_inode) = &self.lower_inode {
if lower_inode.find(name).is_ok() {
self.create_whiteout(name)?;
} else {
return Err(SystemError::ENOENT);
}
} else {
return Err(SystemError::ENOENT);
}
Ok(())
}
fn link(
&self,
name: &str,
other: &Arc<dyn IndexNode>,
) -> Result<(), system_error::SystemError> {
if let Some(ref upper_inode) = *self.upper_inode.lock() {
upper_inode.link(name, other)
} else {
Err(SystemError::EROFS)
}
}
fn create(
&self,
name: &str,
file_type: vfs::FileType,
mode: vfs::syscall::ModeType,
) -> Result<Arc<dyn IndexNode>, system_error::SystemError> {
if let Some(ref upper_inode) = *self.upper_inode.lock() {
upper_inode.create(name, file_type, mode)
} else {
Err(SystemError::EROFS)
}
}
fn find(&self, name: &str) -> Result<Arc<dyn IndexNode>, system_error::SystemError> {
let upper_inode = self.upper_inode.lock();
if let Some(ref upper) = *upper_inode {
if let Ok(inode) = upper.find(name) {
return Ok(inode);
}
}
if self.has_whiteout(name) {
return Err(SystemError::ENOENT);
}
if let Some(lower) = &self.lower_inode {
if let Ok(inode) = lower.find(name) {
return Ok(inode);
}
}
Err(SystemError::ENOENT)
}
fn mknod(
&self,
filename: &str,
mode: vfs::syscall::ModeType,
dev_t: crate::driver::base::device::device_number::DeviceNumber,
) -> Result<Arc<dyn IndexNode>, system_error::SystemError> {
let upper_inode = self.upper_inode.lock();
if let Some(ref inode) = *upper_inode {
inode.mknod(filename, mode, dev_t)
} else {
Err(SystemError::EROFS)
}
}
}

View File

@ -1,7 +1,7 @@
use core::any::Any;
use core::intrinsics::unlikely;
use crate::filesystem::vfs::FSMAKER;
use crate::filesystem::vfs::{FileSystemMakerData, FSMAKER};
use crate::libs::rwlock::RwLock;
use crate::{
driver::base::device::device_number::DeviceNumber,
@ -35,7 +35,7 @@ const RAMFS_MAX_NAMELEN: usize = 64;
const RAMFS_BLOCK_SIZE: u64 = 512;
/// @brief 内存文件系统的Inode结构体
#[derive(Debug)]
struct LockedRamFSInode(SpinLock<RamFSInode>);
pub struct LockedRamFSInode(pub SpinLock<RamFSInode>);
/// @brief 内存文件系统结构体
#[derive(Debug)]
@ -70,6 +70,35 @@ pub struct RamFSInode {
name: DName,
}
impl RamFSInode {
pub fn new() -> Self {
Self {
parent: Weak::default(),
self_ref: Weak::default(),
children: BTreeMap::new(),
data: Vec::new(),
metadata: Metadata {
dev_id: 0,
inode_id: generate_inode_id(),
size: 0,
blk_size: 0,
blocks: 0,
atime: PosixTimeSpec::default(),
mtime: PosixTimeSpec::default(),
ctime: PosixTimeSpec::default(),
file_type: FileType::Dir,
mode: ModeType::from_bits_truncate(0o777),
nlinks: 1,
uid: 0,
gid: 0,
raw_dev: DeviceNumber::default(),
},
fs: Weak::default(),
special_node: None,
name: Default::default(),
}
}
}
impl FileSystem for RamFS {
fn root_inode(&self) -> Arc<dyn super::vfs::IndexNode> {
return self.root_inode.clone();
@ -105,31 +134,8 @@ impl RamFS {
RAMFS_MAX_NAMELEN as u64,
);
// 初始化root inode
let root: Arc<LockedRamFSInode> = Arc::new(LockedRamFSInode(SpinLock::new(RamFSInode {
parent: Weak::default(),
self_ref: Weak::default(),
children: BTreeMap::new(),
data: Vec::new(),
metadata: Metadata {
dev_id: 0,
inode_id: generate_inode_id(),
size: 0,
blk_size: 0,
blocks: 0,
atime: PosixTimeSpec::default(),
mtime: PosixTimeSpec::default(),
ctime: PosixTimeSpec::default(),
file_type: FileType::Dir,
mode: ModeType::from_bits_truncate(0o777),
nlinks: 1,
uid: 0,
gid: 0,
raw_dev: DeviceNumber::default(),
},
fs: Weak::default(),
special_node: None,
name: Default::default(),
})));
let root: Arc<LockedRamFSInode> =
Arc::new(LockedRamFSInode(SpinLock::new(RamFSInode::new())));
let result: Arc<RamFS> = Arc::new(RamFS {
root_inode: root,
@ -147,7 +153,9 @@ impl RamFS {
return result;
}
pub fn make_ramfs() -> Result<Arc<dyn FileSystem + 'static>, SystemError> {
pub fn make_ramfs(
_data: Option<&dyn FileSystemMakerData>,
) -> Result<Arc<dyn FileSystem + 'static>, SystemError> {
let fs = RamFS::new();
return Ok(fs);
}
@ -155,7 +163,10 @@ impl RamFS {
#[distributed_slice(FSMAKER)]
static RAMFSMAKER: FileSystemMaker = FileSystemMaker::new(
"ramfs",
&(RamFS::make_ramfs as fn() -> Result<Arc<dyn FileSystem + 'static>, SystemError>),
&(RamFS::make_ramfs
as fn(
Option<&dyn FileSystemMakerData>,
) -> Result<Arc<dyn FileSystem + 'static>, SystemError>),
);
impl IndexNode for LockedRamFSInode {

View File

@ -935,12 +935,20 @@ impl FileSystemMaker {
FileSystemMaker { function, name }
}
pub fn call(&self) -> Result<Arc<dyn FileSystem>, SystemError> {
(self.function)()
pub fn call(
&self,
data: Option<&dyn FileSystemMakerData>,
) -> Result<Arc<dyn FileSystem>, SystemError> {
(self.function)(data)
}
}
pub type FileSystemNewFunction = fn() -> Result<Arc<dyn FileSystem>, SystemError>;
pub trait FileSystemMakerData: Send + Sync {
fn as_any(&self) -> &dyn Any;
}
pub type FileSystemNewFunction =
fn(data: Option<&dyn FileSystemMakerData>) -> Result<Arc<dyn FileSystem>, SystemError>;
#[macro_export]
macro_rules! define_filesystem_maker_slice {
@ -956,9 +964,18 @@ macro_rules! define_filesystem_maker_slice {
/// 调用指定数组中的所有初始化器
#[macro_export]
macro_rules! producefs {
($initializer_slice:ident,$filesystem:ident) => {
($initializer_slice:ident,$filesystem:ident,$raw_data : ident) => {
match $initializer_slice.iter().find(|&m| m.name == $filesystem) {
Some(maker) => maker.call(),
Some(maker) => {
let mount_data = match $filesystem {
"overlay" => OverlayMountData::from_row($raw_data).ok(),
_ => None,
};
let data: Option<&dyn FileSystemMakerData> =
mount_data.as_ref().map(|d| d as &dyn FileSystemMakerData);
maker.call(data)
}
None => {
log::error!("mismatch filesystem type : {}", $filesystem);
Err(SystemError::EINVAL)

View File

@ -1,4 +1,5 @@
use core::ffi::c_void;
use crate::filesystem::overlayfs::OverlayMountData;
use crate::filesystem::vfs::FileSystemMakerData;
use core::mem::size_of;
use alloc::{string::String, sync::Arc, vec::Vec};
@ -1706,7 +1707,7 @@ impl Syscall {
target: *const u8,
filesystemtype: *const u8,
_mountflags: usize,
_data: *const c_void,
data: *const u8,
) -> Result<usize, SystemError> {
let target = user_access::check_and_clone_cstr(target, Some(MAX_PATHLEN))?
.into_string()
@ -1715,7 +1716,7 @@ impl Syscall {
let fstype_str = user_access::check_and_clone_cstr(filesystemtype, Some(MAX_PATHLEN))?;
let fstype_str = fstype_str.to_str().map_err(|_| SystemError::EINVAL)?;
let fstype = producefs!(FSMAKER, fstype_str)?;
let fstype = producefs!(FSMAKER, fstype_str, data)?;
Vcore::do_mount(fstype, &target)?;

View File

@ -10,6 +10,7 @@ use crate::{
arch::{interrupt::TrapFrame, process::arch_switch_to_user},
driver::{net::e1000e::e1000e::e1000e_init, virtio::virtio::virtio_probe},
filesystem::vfs::core::mount_root_fs,
namespaces::NsProxy,
net::net_core::net_init,
process::{
exec::ProcInitInfo, kthread::KernelThreadMechanism, stdio::stdio_init, ProcessFlags,
@ -140,6 +141,7 @@ fn run_init_process(
trap_frame: &mut TrapFrame,
) -> Result<(), SystemError> {
compiler_fence(Ordering::SeqCst);
ProcessManager::current_pcb().set_nsproxy(NsProxy::new()); // 初始化init进程的namespace
let path = proc_init_info.proc_name.to_str().unwrap();
Syscall::do_execve(
@ -148,5 +150,6 @@ fn run_init_process(
proc_init_info.envs.clone(),
trap_frame,
)?;
Ok(())
}

View File

@ -48,6 +48,7 @@ mod libs;
#[macro_use]
mod include;
mod bpf;
mod cgroup;
mod debug;
mod driver; // 如果driver依赖了libs应该在libs后面导出
mod exception;
@ -56,6 +57,7 @@ mod init;
mod ipc;
mod misc;
mod mm;
mod namespaces;
mod net;
mod perf;
mod process;
@ -93,8 +95,6 @@ extern crate wait_queue_macros;
use crate::mm::allocator::kernel_allocator::KernelAllocator;
use crate::process::ProcessManager;
#[cfg(all(feature = "backtrace", target_arch = "x86_64"))]
extern crate mini_backtrace;
@ -112,6 +112,7 @@ pub static KERNEL_ALLOCATOR: KernelAllocator = KernelAllocator;
#[no_mangle]
pub fn panic(info: &PanicInfo) -> ! {
use log::error;
use process::ProcessManager;
error!("Kernel Panic Occurred.");

View File

@ -0,0 +1,55 @@
#![no_std]
#![feature(core_intrinsics)]
#![allow(clippy::needless_return)]
use core::intrinsics::unlikely;
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
/// id分配器
///
/// TODO: 当前只是为了简单实现功能将来这里应使用类似linux的ida的方式去实现
#[derive(Debug)]
pub struct IdAllocator {
current_id: AtomicUsize,
max_id: usize,
dead: AtomicBool,
}
impl IdAllocator {
/// 创建一个新的id分配器
pub const fn new(initial_id: usize, max_id: usize) -> Self {
Self {
current_id: AtomicUsize::new(initial_id),
max_id,
dead: AtomicBool::new(false),
}
}
/// 分配一个新的id
///
/// ## 返回
///
/// 如果分配成功返回Some(id)否则返回None
pub fn alloc(&self) -> Option<usize> {
if unlikely(self.dead.load(Ordering::SeqCst)) {
return None;
}
let ret = self.current_id.fetch_add(1, Ordering::SeqCst);
// 如果id溢出panic
if ret == self.max_id {
self.dead.store(true, Ordering::SeqCst);
return None;
}
return Some(ret);
}
pub fn free(&self, _id: usize) {
// todo: free
}
pub fn get_max_id(&self) -> usize {
self.max_id
}
}

View File

@ -0,0 +1,206 @@
#![allow(dead_code, unused_variables, unused_imports)]
use core::sync::atomic::AtomicU64;
use core::sync::atomic::Ordering;
use alloc::boxed::Box;
use alloc::string::ToString;
use alloc::string::String;
use alloc::sync::Arc;
use system_error::SystemError;
use super::namespace::Namespace;
use super::namespace::NsOperations;
use super::ucount::Ucount::MntNamespaces;
use super::{namespace::NsCommon, ucount::UCounts, user_namespace::UserNamespace};
use crate::container_of;
use crate::filesystem::vfs::mount::MountFSInode;
use crate::filesystem::vfs::IndexNode;
use crate::filesystem::vfs::InodeId;
use crate::filesystem::vfs::MountFS;
use crate::filesystem::vfs::ROOT_INODE;
use crate::libs::rbtree::RBTree;
use crate::libs::wait_queue::WaitQueue;
use crate::process::fork::CloneFlags;
use crate::process::ProcessManager;
use crate::syscall::Syscall;
#[allow(dead_code)]
#[derive(Debug)]
pub struct MntNamespace {
/// namespace 共有的部分
ns_common: Arc<NsCommon>,
/// 关联的用户名字空间
user_ns: Arc<UserNamespace>,
/// 资源计数器
ucounts: Arc<UCounts>,
/// 根文件系统
root: Option<Arc<MountFS>>,
/// 红黑树用于挂载所有挂载点
mounts: RBTree<InodeId, MountFSInode>,
/// 等待队列
poll: WaitQueue,
/// 挂载序列号
seq: AtomicU64,
/// 挂载点的数量
nr_mounts: u32,
/// 待处理的挂载点
pending_mounts: u32,
}
impl Default for MntNamespace {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
struct MntNsOperations {
name: String,
clone_flags: CloneFlags,
}
/// 使用该结构体的时候加spinlock
#[derive(Clone, Debug)]
pub struct FsStruct {
umask: u32, //文件权限掩码
pub root: Arc<dyn IndexNode>,
pub pwd: Arc<dyn IndexNode>,
}
impl Default for FsStruct {
fn default() -> Self {
Self::new()
}
}
impl FsStruct {
pub fn new() -> Self {
Self {
umask: 0o22,
root: ROOT_INODE(),
pwd: ROOT_INODE(),
}
}
pub fn set_root(&mut self, inode: Arc<dyn IndexNode>) {
self.root = inode;
}
pub fn set_pwd(&mut self, inode: Arc<dyn IndexNode>) {
self.pwd = inode;
}
}
impl Namespace for MntNamespace {
fn ns_common_to_ns(ns_common: Arc<NsCommon>) -> Arc<Self> {
let ns_common_ptr = Arc::as_ptr(&ns_common);
container_of!(ns_common_ptr, MntNamespace, ns_common)
}
}
impl MntNsOperations {
pub fn new(name: String) -> Self {
Self {
name,
clone_flags: CloneFlags::CLONE_NEWNS,
}
}
}
impl NsOperations for MntNsOperations {
fn get(&self, pid: crate::process::Pid) -> Option<Arc<NsCommon>> {
let pcb = ProcessManager::find(pid);
pcb.map(|pcb| pcb.get_nsproxy().read().mnt_namespace.ns_common.clone())
}
// 不存在这个方法
fn get_parent(&self, _ns_common: Arc<NsCommon>) -> Result<Arc<NsCommon>, SystemError> {
unreachable!()
}
fn install(
&self,
nsset: &mut super::NsSet,
ns_common: Arc<NsCommon>,
) -> Result<(), SystemError> {
let nsproxy = &mut nsset.nsproxy;
let mnt_ns = MntNamespace::ns_common_to_ns(ns_common);
if mnt_ns.is_anon_ns() {
return Err(SystemError::EINVAL);
}
nsproxy.mnt_namespace = mnt_ns;
nsset.fs.lock().set_pwd(ROOT_INODE());
nsset.fs.lock().set_root(ROOT_INODE());
Ok(())
}
fn owner(&self, ns_common: Arc<NsCommon>) -> Arc<UserNamespace> {
let mnt_ns = MntNamespace::ns_common_to_ns(ns_common);
mnt_ns.user_ns.clone()
}
fn put(&self, ns_common: Arc<NsCommon>) {
let pid_ns = MntNamespace::ns_common_to_ns(ns_common);
}
}
impl MntNamespace {
pub fn new() -> Self {
let ns_common = Arc::new(NsCommon::new(Box::new(MntNsOperations::new(
"mnt".to_string(),
))));
Self {
ns_common,
user_ns: Arc::new(UserNamespace::new()),
ucounts: Arc::new(UCounts::new()),
root: None,
mounts: RBTree::new(),
poll: WaitQueue::default(),
seq: AtomicU64::new(0),
nr_mounts: 0,
pending_mounts: 0,
}
}
/// anon 用来判断是否是匿名的.匿名函数的问题还需要考虑
pub fn create_mnt_namespace(
&self,
user_ns: Arc<UserNamespace>,
anon: bool,
) -> Result<Self, SystemError> {
let ucounts = self.inc_mnt_namespace(user_ns.clone())?;
if ucounts.is_none() {
return Err(SystemError::ENOSPC);
}
let ucounts = ucounts.unwrap();
let ns_common = Arc::new(NsCommon::new(Box::new(MntNsOperations::new(
"mnt".to_string(),
))));
let seq = AtomicU64::new(0);
if !anon {
seq.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
}
Ok(Self {
ns_common,
user_ns,
ucounts,
root: None,
mounts: RBTree::new(),
poll: WaitQueue::default(),
seq,
nr_mounts: 0,
pending_mounts: 0,
})
}
pub fn inc_mnt_namespace(
&self,
user_ns: Arc<UserNamespace>,
) -> Result<Option<Arc<UCounts>>, SystemError> {
Ok(self
.ucounts
.inc_ucounts(user_ns, Syscall::geteuid()?, MntNamespaces))
}
pub fn dec_mnt_namespace(&self, uc: Arc<UCounts>) {
UCounts::dec_ucount(uc, super::ucount::Ucount::MntNamespaces)
}
//判断是不是匿名空间
pub fn is_anon_ns(&self) -> bool {
self.seq.load(Ordering::SeqCst) == 0
}
}

View File

@ -0,0 +1,92 @@
use alloc::sync::Arc;
use mnt_namespace::{FsStruct, MntNamespace};
use pid_namespace::PidNamespace;
use system_error::SystemError;
use user_namespace::UserNamespace;
use crate::{
libs::spinlock::SpinLock,
process::{fork::CloneFlags, ProcessControlBlock},
};
pub mod mnt_namespace;
pub mod namespace;
pub mod pid_namespace;
pub mod syscall;
pub mod ucount;
pub mod user_namespace;
/// 管理 namespace,包含了所有namespace的信息
#[derive(Clone)]
pub struct NsSet {
flags: u64,
nsproxy: NsProxy,
pub fs: Arc<SpinLock<FsStruct>>,
}
#[derive(Debug, Clone)]
pub struct NsProxy {
pub pid_namespace: Arc<PidNamespace>,
pub mnt_namespace: Arc<MntNamespace>,
}
impl Default for NsProxy {
fn default() -> Self {
Self::new()
}
}
impl NsProxy {
pub fn new() -> Self {
Self {
pid_namespace: Arc::new(PidNamespace::new()),
mnt_namespace: Arc::new(MntNamespace::new()),
}
}
pub fn set_pid_namespace(&mut self, new_pid_ns: Arc<PidNamespace>) {
self.pid_namespace = new_pid_ns;
}
pub fn set_mnt_namespace(&mut self, new_mnt_ns: Arc<MntNamespace>) {
self.mnt_namespace = new_mnt_ns;
}
}
pub fn create_new_namespaces(
clone_flags: u64,
pcb: &Arc<ProcessControlBlock>,
user_ns: Arc<UserNamespace>,
) -> Result<NsProxy, SystemError> {
let mut nsproxy = NsProxy::new();
// pid_namespace
let new_pid_ns = if (clone_flags & CloneFlags::CLONE_NEWPID.bits()) != 0 {
Arc::new(PidNamespace::new().create_pid_namespace(
pcb.get_nsproxy().read().pid_namespace.clone(),
user_ns.clone(),
)?)
} else {
pcb.get_nsproxy().read().pid_namespace.clone()
};
nsproxy.set_pid_namespace(new_pid_ns);
// mnt_namespace
let new_mnt_ns = if clone_flags & CloneFlags::CLONE_NEWNS.bits() != 0 {
Arc::new(MntNamespace::new().create_mnt_namespace(user_ns.clone(), false)?)
} else {
pcb.get_nsproxy().read().mnt_namespace.clone()
};
nsproxy.set_mnt_namespace(new_mnt_ns);
Ok(nsproxy)
}
#[macro_export]
macro_rules! container_of {
($ptr:expr, $struct:path, $field:ident) => {
unsafe {
let dummy = core::mem::MaybeUninit::<$struct>::uninit();
let dummy_ptr = dummy.as_ptr();
let field_ptr = &(*dummy_ptr).$field as *const _ as usize;
let offset = field_ptr - dummy_ptr as usize;
Arc::from_raw(($ptr as *const u8).wrapping_sub(offset) as *mut $struct)
}
};
}

View File

@ -0,0 +1,119 @@
#![allow(dead_code, unused_variables, unused_imports)]
use core::fmt::Debug;
use crate::filesystem::procfs::ProcFSInode;
use crate::filesystem::vfs::{IndexNode, ROOT_INODE};
use crate::namespaces::user_namespace::UserNamespace;
use crate::process::fork::CloneFlags;
use crate::process::{Pid, ProcessControlBlock, ProcessManager};
use alloc::boxed::Box;
use alloc::sync::Arc;
use system_error::SystemError;
// 目前无credit功能采用全局静态的user_namespace
lazy_static! {
pub static ref USER_NS: Arc<UserNamespace> = Arc::new(UserNamespace::new());
}
use super::{create_new_namespaces, NsProxy, NsSet};
pub trait NsOperations: Send + Sync + Debug {
fn get(&self, pid: Pid) -> Option<Arc<NsCommon>>;
fn put(&self, ns_common: Arc<NsCommon>);
fn install(&self, nsset: &mut NsSet, ns_common: Arc<NsCommon>) -> Result<(), SystemError>;
fn owner(&self, ns_common: Arc<NsCommon>) -> Arc<UserNamespace>;
fn get_parent(&self, ns_common: Arc<NsCommon>) -> Result<Arc<NsCommon>, SystemError>;
}
#[derive(Debug)]
pub struct NsCommon {
ops: Box<dyn NsOperations>,
stashed: Arc<dyn IndexNode>,
}
impl NsCommon {
pub fn new(ops: Box<dyn NsOperations>) -> Self {
let inode = ROOT_INODE().find("proc").unwrap_or_else(|_| ROOT_INODE());
Self {
ops,
stashed: inode,
}
}
}
pub enum NsType {
Pid,
User,
Uts,
Ipc,
Net,
Mnt,
Cgroup,
Time,
}
pub trait Namespace {
fn ns_common_to_ns(ns_common: Arc<NsCommon>) -> Arc<Self>;
}
pub fn check_unshare_flags(unshare_flags: u64) -> Result<usize, SystemError> {
let valid_flags = CloneFlags::CLONE_THREAD
| CloneFlags::CLONE_FS
| CloneFlags::CLONE_NEWNS
| CloneFlags::CLONE_SIGHAND
| CloneFlags::CLONE_VM
| CloneFlags::CLONE_FILES
| CloneFlags::CLONE_SYSVSEM
| CloneFlags::CLONE_NEWUTS
| CloneFlags::CLONE_NEWIPC
| CloneFlags::CLONE_NEWNET
| CloneFlags::CLONE_NEWUSER
| CloneFlags::CLONE_NEWPID
| CloneFlags::CLONE_NEWCGROUP;
if unshare_flags & !valid_flags.bits() != 0 {
return Err(SystemError::EINVAL);
}
Ok(0)
}
pub fn unshare_nsproxy_namespaces(unshare_flags: u64) -> Result<Option<NsProxy>, SystemError> {
if (unshare_flags
& (CloneFlags::CLONE_NEWNS.bits()
| CloneFlags::CLONE_NEWUTS.bits()
| CloneFlags::CLONE_NEWIPC.bits()
| CloneFlags::CLONE_NEWNET.bits()
| CloneFlags::CLONE_NEWPID.bits()
| CloneFlags::CLONE_NEWCGROUP.bits()))
== 0
{
return Ok(None);
}
let current = ProcessManager::current_pid();
let pcb = ProcessManager::find(current).unwrap();
let new_nsproxy = create_new_namespaces(unshare_flags, &pcb, USER_NS.clone())?;
Ok(Some(new_nsproxy))
}
pub fn switch_task_namespace(pcb: Arc<ProcessControlBlock>, new_nsproxy: NsProxy) {
let ns = pcb.get_nsproxy();
pcb.set_nsproxy(new_nsproxy);
}
pub fn prepare_nsset(flags: u64) -> Result<NsSet, SystemError> {
let current = ProcessManager::current_pcb();
Ok(NsSet {
flags,
fs: current.fs_struct(),
nsproxy: create_new_namespaces(flags, &current, USER_NS.clone())?,
})
}
pub fn commit_nsset(nsset: NsSet) {
let flags = CloneFlags::from_bits_truncate(nsset.flags);
let current = ProcessManager::current_pcb();
if flags.contains(CloneFlags::CLONE_NEWNS) {
let fs = current.fs_struct();
let nsset_fs = nsset.fs.lock();
fs.lock().set_pwd(nsset_fs.pwd.clone());
fs.lock().set_root(nsset_fs.root.clone());
}
switch_task_namespace(current, nsset.nsproxy); // 转移所有权
}

View File

@ -0,0 +1,273 @@
#![allow(dead_code, unused_variables, unused_imports)]
use alloc::vec::Vec;
use super::namespace::Namespace;
use super::ucount::Ucount::PidNamespaces;
use super::NsSet;
use super::{namespace::NsCommon, ucount::UCounts, user_namespace::UserNamespace};
use crate::container_of;
use crate::filesystem::vfs::{IndexNode, ROOT_INODE};
use crate::namespaces::namespace::NsOperations;
use crate::process::fork::CloneFlags;
use crate::process::ProcessManager;
use crate::syscall::Syscall;
use crate::{libs::rwlock::RwLock, process::Pid};
use alloc::boxed::Box;
use alloc::string::String;
use alloc::string::ToString;
use alloc::sync::Arc;
use ida::IdAllocator;
use system_error::SystemError;
use system_error::SystemError::ENOSPC;
const INT16_MAX: u32 = 32767;
const MAX_PID_NS_LEVEL: usize = 32;
const PIDNS_ADDING: u32 = 1 << 31;
const PID_MAX: usize = 4096;
static PID_IDA: ida::IdAllocator = ida::IdAllocator::new(1, usize::MAX).unwrap();
#[derive(Debug)]
#[repr(C)]
pub struct PidNamespace {
id_alloctor: RwLock<IdAllocator>,
/// 已经分配的进程数
pid_allocated: u32,
/// 当前的pid_namespace所在的层数
pub level: usize,
/// 父命名空间
parent: Option<Arc<PidNamespace>>,
/// 资源计数器
ucounts: Arc<UCounts>,
/// 关联的用户namespace
user_ns: Arc<UserNamespace>,
/// 回收孤儿进程的init进程
child_reaper: Arc<RwLock<Pid>>,
/// namespace共有部分
pub ns_common: Arc<NsCommon>,
}
impl Default for PidNamespace {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct PidStrcut {
pub level: usize,
pub numbers: Vec<UPid>,
pub stashed: Arc<dyn IndexNode>,
}
impl Default for PidStrcut {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct UPid {
pub nr: Pid, // 在该pid_namespace 中的pid
pub ns: Arc<PidNamespace>,
}
impl PidStrcut {
pub fn new() -> Self {
Self {
level: 0,
numbers: vec![UPid {
nr: Pid::new(0),
ns: Arc::new(PidNamespace::new()),
}],
stashed: ROOT_INODE(),
}
}
pub fn put_pid(pid: PidStrcut) {
let ns = pid.numbers[pid.level].ns.clone();
let id = pid.numbers[pid.level].nr.data();
ns.id_alloctor.write().free(id);
}
pub fn alloc_pid(ns: Arc<PidNamespace>, set_tid: Vec<usize>) -> Result<PidStrcut, SystemError> {
let mut set_tid_size = set_tid.len();
if set_tid_size > ns.level + 1 {
return Err(SystemError::EINVAL);
}
let mut numbers = Vec::<UPid>::with_capacity(ns.level + 1);
let mut tid_iter = set_tid.into_iter().rev();
let mut pid_ns = ns.clone(); // 当前正在处理的命名空间
for i in (0..=ns.level).rev() {
let tid = tid_iter.next().unwrap_or(0);
if set_tid_size > 0 {
if tid < 1 || tid > INT16_MAX as usize {
return Err(SystemError::EINVAL);
}
set_tid_size -= 1;
}
let mut nr = tid;
if tid == 0 {
nr = pid_ns
.id_alloctor
.write()
.alloc()
.expect("PID allocation failed.");
}
numbers.insert(
i,
UPid {
nr: Pid::from(nr),
ns: pid_ns.clone(),
},
);
if let Some(parent_ns) = &pid_ns.parent {
pid_ns = parent_ns.clone();
} else {
break; // 根命名空间,无需继续向上。
}
}
Ok(PidStrcut {
level: ns.level,
numbers,
stashed: ROOT_INODE(),
})
}
pub fn ns_of_pid(&self) -> Arc<PidNamespace> {
self.numbers[self.level].ns.clone()
}
}
#[derive(Debug)]
struct PidNsOperations {
name: String,
clone_flags: CloneFlags,
}
impl PidNsOperations {
pub fn new(name: String) -> Self {
Self {
name,
clone_flags: CloneFlags::CLONE_NEWPID,
}
}
}
impl Namespace for PidNamespace {
fn ns_common_to_ns(ns_common: Arc<NsCommon>) -> Arc<Self> {
container_of!(Arc::as_ptr(&ns_common), PidNamespace, ns_common)
}
}
impl NsOperations for PidNsOperations {
fn put(&self, ns_common: Arc<NsCommon>) {
let _pid_ns = PidNamespace::ns_common_to_ns(ns_common);
// pid_ns 超出作用域自动drop 同时递归drop
}
fn owner(&self, ns_common: Arc<NsCommon>) -> Arc<UserNamespace> {
let pid_ns = PidNamespace::ns_common_to_ns(ns_common);
pid_ns.user_ns.clone()
}
fn get_parent(&self, ns_common: Arc<NsCommon>) -> Result<Arc<NsCommon>, SystemError> {
let current = ProcessManager::current_pid();
let pcb = ProcessManager::find(current).unwrap();
let active = pcb.pid_strcut().read().ns_of_pid();
let mut pid_ns = &PidNamespace::ns_common_to_ns(ns_common).parent;
while let Some(ns) = pid_ns {
if Arc::ptr_eq(&active, ns) {
return Ok(ns.ns_common.clone());
}
pid_ns = &ns.parent;
}
Err(SystemError::EPERM)
}
fn get(&self, pid: Pid) -> Option<Arc<NsCommon>> {
let pcb = ProcessManager::find(pid);
pcb.map(|pcb| pcb.get_nsproxy().read().pid_namespace.ns_common.clone())
}
fn install(&self, nsset: &mut NsSet, ns_common: Arc<NsCommon>) -> Result<(), SystemError> {
let nsproxy = &mut nsset.nsproxy;
let current = ProcessManager::current_pid();
let pcb = ProcessManager::find(current).unwrap();
let active = pcb.pid_strcut().read().ns_of_pid();
let mut pid_ns = PidNamespace::ns_common_to_ns(ns_common);
if pid_ns.level < active.level {
return Err(SystemError::EINVAL);
}
while pid_ns.level > active.level {
if let Some(ns) = &pid_ns.parent {
pid_ns = ns.clone();
} else {
break;
}
}
if Arc::ptr_eq(&pid_ns, &active) {
return Err(SystemError::EINVAL);
}
nsproxy.pid_namespace = pid_ns.clone();
Ok(())
}
}
impl PidNamespace {
pub fn new() -> Self {
Self {
id_alloctor: RwLock::new(IdAllocator::new(1, PID_MAX).unwrap()),
pid_allocated: 1,
level: 0,
child_reaper: Arc::new(RwLock::new(Pid::from(1))),
parent: None,
ucounts: Arc::new(UCounts::new()),
user_ns: Arc::new(UserNamespace::new()),
ns_common: Arc::new(NsCommon::new(Box::new(PidNsOperations::new(
"pid".to_string(),
)))),
}
}
pub fn create_pid_namespace(
&self,
parent: Arc<PidNamespace>,
user_ns: Arc<UserNamespace>,
) -> Result<Self, SystemError> {
let level = parent.level + 1;
if level > MAX_PID_NS_LEVEL {
return Err(ENOSPC);
}
let ucounts = self.inc_pid_namespaces(user_ns.clone())?;
if ucounts.is_none() {
return Err(SystemError::ENOSPC);
}
let ucounts = ucounts.unwrap();
let ns_common = Arc::new(NsCommon::new(Box::new(PidNsOperations::new(
"pid".to_string(),
))));
let child_reaper = parent.child_reaper.clone();
Ok(Self {
id_alloctor: RwLock::new(IdAllocator::new(1, PID_MAX).unwrap()),
pid_allocated: PIDNS_ADDING,
level,
ucounts,
parent: Some(parent),
user_ns,
ns_common,
child_reaper,
})
}
pub fn inc_pid_namespaces(
&self,
user_ns: Arc<UserNamespace>,
) -> Result<Option<Arc<UCounts>>, SystemError> {
Ok(self
.ucounts
.inc_ucounts(user_ns, Syscall::geteuid()?, PidNamespaces))
}
pub fn dec_pid_namespaces(&mut self, uc: Arc<UCounts>) {
UCounts::dec_ucount(uc, PidNamespaces)
}
}

View File

@ -0,0 +1,50 @@
use system_error::SystemError;
use crate::{
process::{fork::CloneFlags, ProcessManager},
syscall::Syscall,
};
use super::namespace::{
check_unshare_flags, commit_nsset, prepare_nsset, unshare_nsproxy_namespaces,
};
impl Syscall {
pub fn sys_unshare(mut unshare_flags: u64) -> Result<usize, SystemError> {
if unshare_flags & CloneFlags::CLONE_NEWUSER.bits() != 0 {
unshare_flags |= CloneFlags::CLONE_THREAD.bits() | CloneFlags::CLONE_FS.bits();
}
if unshare_flags & CloneFlags::CLONE_VM.bits() != 0 {
unshare_flags |= CloneFlags::CLONE_SIGHAND.bits();
}
if unshare_flags & CloneFlags::CLONE_SIGHAND.bits() != 0 {
unshare_flags |= CloneFlags::CLONE_THREAD.bits();
}
if unshare_flags & CloneFlags::CLONE_NEWNS.bits() != 0 {
unshare_flags |= CloneFlags::CLONE_FS.bits();
}
let check = check_unshare_flags(unshare_flags)?;
let current = ProcessManager::current_pcb();
if let Some(nsproxy) = unshare_nsproxy_namespaces(unshare_flags)? {
*current.get_nsproxy().write() = nsproxy;
}
Ok(check)
}
pub fn sys_setns(_fd: i32, flags: u64) -> Result<usize, SystemError> {
let check = check_unshare_flags(flags)?;
let nsset = prepare_nsset(flags)?;
if check == 0 {
commit_nsset(nsset)
};
Ok(0)
}
}

View File

@ -0,0 +1,187 @@
#![allow(dead_code, unused_variables, unused_imports)]
use alloc::vec::Vec;
use core::{hash::Hash, sync::atomic::AtomicU32};
use system_error::SystemError;
use alloc::sync::Arc;
use hashbrown::HashMap;
use log::warn;
use super::user_namespace::UserNamespace;
use crate::libs::mutex::Mutex;
#[derive(Clone, Copy)]
pub enum Ucount {
UserNamespaces = 1,
PidNamespaces = 2,
UtsNamespaces = 3,
IpcNamespaces = 4,
NetNamespaces = 5,
MntNamespaces = 6,
CgroupNamespaces = 7,
TimeNamespaces = 8,
Counts = 9,
}
pub enum UcountRlimit {
Nproc = 1,
Msgqueue = 2,
Sigpending = 3,
Memlock = 4,
Counts = 5,
}
lazy_static! {
static ref COUNT_MANAGER: Arc<CountManager> = Arc::new(CountManager::new());
}
#[derive(Debug)]
pub struct UCounts {
/// 对应的user_namespace
ns: Arc<UserNamespace>,
/// 用户标识符
uid: usize,
count: AtomicU32,
ucount: Vec<AtomicU32>, //[AtomicU32; UCOUNT_COUNTS as usize],
rlimit: Vec<AtomicU32>, //[AtomicU32; UCOUNT_RLIMIT_COUNTS as usize],
}
impl Default for UCounts {
fn default() -> Self {
Self::new()
}
}
impl UCounts {
pub fn new() -> Self {
Self {
ns: Arc::new(UserNamespace::new()),
uid: 0,
count: AtomicU32::new(1),
ucount: (0..Ucount::Counts as usize)
.map(|_| AtomicU32::new(0))
.collect(),
rlimit: (0..UcountRlimit::Counts as usize)
.map(|_| AtomicU32::new(0))
.collect(),
}
}
fn alloc_ucounts(&self, ns: Arc<UserNamespace>, uid: usize) -> Arc<Self> {
let mut counts = COUNT_MANAGER.counts.lock();
let key = UKey {
user_ns: ns.clone(),
uid,
};
let uc = if let Some(uc) = counts.get(&key) {
self.count
.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
uc.clone()
} else {
Arc::new(Self {
ns,
uid,
count: AtomicU32::new(1),
ucount: (0..Ucount::Counts as usize)
.map(|_| AtomicU32::new(0))
.collect(),
rlimit: (0..UcountRlimit::Counts as usize)
.map(|_| AtomicU32::new(0))
.collect(),
})
};
counts.insert(key, uc.clone());
uc
}
pub fn inc_ucounts(
&self,
user_ns: Arc<UserNamespace>,
uid: usize,
ucount_type: Ucount,
) -> Option<Arc<UCounts>> {
let uc_type = ucount_type as usize;
let uc = self.alloc_ucounts(user_ns, uid);
let mut uc_iter = Some(uc.clone());
let mut ucounts_add = vec![];
while let Some(iter) = uc_iter {
let num = iter.ucount[uc_type].fetch_add(1, core::sync::atomic::Ordering::SeqCst);
ucounts_add.push(iter.clone());
// 分配失败回滚
if num > iter.ns.ucount_max[uc_type] {
for add_iter in &ucounts_add {
add_iter.ucount[uc_type].fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
}
return None;
}
uc_iter = iter.ns.ucounts.clone();
}
return Some(uc);
}
fn find_ucounts(user_ns: Arc<UserNamespace>, uid: usize) -> Option<Arc<UCounts>> {
let counts = COUNT_MANAGER.counts.lock();
let key = UKey { user_ns, uid };
counts.get(&key).cloned()
}
fn get_ucounts(uc: Arc<UCounts>) {
let mut counts = COUNT_MANAGER.counts.lock();
let ukey = UKey {
user_ns: uc.ns.clone(),
uid: uc.uid,
};
counts.insert(ukey, uc);
}
pub fn dec_ucount(uc: Arc<UCounts>, ucount_type: Ucount) {
let mut uc_iter = Some(uc.clone());
let uc_type = ucount_type as usize;
while let Some(iter) = uc_iter {
let num = iter.ucount[uc_type].fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
if num == 0 {
warn!("count has reached zero");
}
uc_iter = iter.ns.ucounts.clone();
}
Self::put_ucounts(uc);
}
fn put_ucounts(uc: Arc<UCounts>) {
let mut counts = COUNT_MANAGER.counts.lock();
let key = UKey {
user_ns: uc.ns.clone(),
uid: uc.uid,
};
counts.remove(&key);
}
}
struct UKey {
user_ns: Arc<UserNamespace>,
uid: usize,
}
impl Hash for UKey {
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
let user_ns_ptr = Arc::as_ptr(&self.user_ns);
user_ns_ptr.hash(state);
self.uid.hash(state)
}
}
impl Eq for UKey {}
impl PartialEq for UKey {
fn eq(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.user_ns, &other.user_ns) && self.uid == other.uid
}
}
struct CountManager {
counts: Mutex<HashMap<UKey, Arc<UCounts>>>,
}
impl CountManager {
fn new() -> Self {
Self {
counts: Mutex::new(HashMap::new()),
}
}
}

View File

@ -0,0 +1,135 @@
#![allow(dead_code, unused_variables, unused_imports)]
use alloc::boxed::Box;
use crate::libs::rwlock::RwLock;
use alloc::string::String;
use alloc::string::ToString;
use alloc::vec::Vec;
use system_error::SystemError;
use crate::namespaces::namespace::NsCommon;
use crate::namespaces::ucount::UCounts;
use crate::process::fork::CloneFlags;
use crate::process::Pid;
use alloc::sync::Arc;
use super::namespace::NsOperations;
use super::ucount::Ucount::Counts;
const UID_GID_MAP_MAX_BASE_EXTENTS: usize = 5;
const UCOUNT_MAX: u32 = 62636;
/// 管理用户ID和组ID的映射
#[allow(dead_code)]
#[derive(Clone, Debug)]
struct UidGidMap {
nr_extents: u32,
extent: Vec<UidGidExtent>,
}
///区间映射
#[allow(dead_code)]
#[derive(Clone, Debug)]
struct UidGidExtent {
first: u32,
lower_first: u32,
count: u32,
}
#[derive(Debug)]
pub struct UserNamespace {
uid_map: UidGidMap,
gid_map: UidGidMap,
progid_map: UidGidMap,
///项目ID映射
parent: Option<Arc<UserNamespace>>,
level: u32,
owner: usize,
group: usize,
ns_common: Arc<NsCommon>,
flags: u32,
pid: Arc<RwLock<Pid>>,
pub ucounts: Option<Arc<UCounts>>,
pub ucount_max: Vec<u32>, //vec![u32; UCOUNT_COUNTS as usize],
pub rlimit_max: Vec<u32>, // vec![u32; UCOUNT_RLIMIT_COUNTS as usize],
}
impl Default for UserNamespace {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
struct UserNsOperations {
name: String,
clone_flags: CloneFlags,
}
impl UserNsOperations {
pub fn new(name: String) -> Self {
Self {
name,
clone_flags: CloneFlags::CLONE_NEWUSER,
}
}
}
impl NsOperations for UserNsOperations {
fn get(&self, pid: Pid) -> Option<Arc<NsCommon>> {
unimplemented!()
}
fn get_parent(&self, ns_common: Arc<NsCommon>) -> Result<Arc<NsCommon>, SystemError> {
unimplemented!()
}
fn install(
&self,
nsset: &mut super::NsSet,
ns_common: Arc<NsCommon>,
) -> Result<(), SystemError> {
unimplemented!()
}
fn owner(&self, ns_common: Arc<NsCommon>) -> Arc<UserNamespace> {
unimplemented!()
}
fn put(&self, ns_common: Arc<NsCommon>) {
unimplemented!()
}
}
impl UidGidMap {
pub fn new() -> Self {
Self {
nr_extents: 1,
extent: vec![UidGidExtent::new(); UID_GID_MAP_MAX_BASE_EXTENTS],
}
}
}
impl UidGidExtent {
pub fn new() -> Self {
Self {
first: 0,
lower_first: 0,
count: u32::MAX,
}
}
}
impl UserNamespace {
pub fn new() -> Self {
Self {
uid_map: UidGidMap::new(),
gid_map: UidGidMap::new(),
progid_map: UidGidMap::new(),
owner: 0,
level: 0,
group: 0,
flags: 1,
parent: None,
ns_common: Arc::new(NsCommon::new(Box::new(UserNsOperations::new(
"User".to_string(),
)))),
pid: Arc::new(RwLock::new(Pid::new(1))),
ucount_max: vec![UCOUNT_MAX; Counts as usize],
ucounts: None,
rlimit_max: vec![65535, 10, 32000, 64 * 1024],
}
}
}

View File

@ -1,3 +1,4 @@
use alloc::vec::Vec;
use core::{intrinsics::unlikely, sync::atomic::Ordering};
use alloc::{string::ToString, sync::Arc};
@ -10,6 +11,7 @@ use crate::{
ipc::signal::flush_signal_handlers,
libs::rwlock::RwLock,
mm::VirtAddr,
namespaces::{create_new_namespaces, namespace::USER_NS, pid_namespace::PidStrcut},
process::ProcessFlags,
sched::{sched_cgroup_fork, sched_fork},
smp::core::smp_get_processor_id,
@ -20,6 +22,7 @@ use super::{
kthread::{KernelThreadPcbPrivate, WorkerPrivate},
KernelStack, Pid, ProcessControlBlock, ProcessManager,
};
const MAX_PID_NS_LEVEL: usize = 32;
bitflags! {
/// 进程克隆标志
@ -84,8 +87,8 @@ bitflags! {
/// 因为这两个系统调用的参数很多,所以有这样一个载体更灵活
///
/// 仅仅作为参数传递
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct KernelCloneArgs {
pub flags: CloneFlags,
@ -93,7 +96,7 @@ pub struct KernelCloneArgs {
pub pidfd: VirtAddr,
pub child_tid: VirtAddr,
pub parent_tid: VirtAddr,
pub set_tid: VirtAddr,
pub set_tid: Vec<usize>,
/// 进程退出时发送的信号
pub exit_signal: Signal,
@ -122,7 +125,7 @@ impl KernelCloneArgs {
pidfd: null_addr,
child_tid: null_addr,
parent_tid: null_addr,
set_tid: null_addr,
set_tid: Vec::with_capacity(MAX_PID_NS_LEVEL),
exit_signal: Signal::SIGCHLD,
stack: 0,
stack_size: 0,
@ -260,6 +263,34 @@ impl ProcessManager {
return Ok(());
}
#[inline(never)]
fn copy_namespaces(
clone_flags: &CloneFlags,
current_pcb: &Arc<ProcessControlBlock>,
new_pcb: &Arc<ProcessControlBlock>,
) -> Result<(), SystemError> {
if !clone_flags.contains(CloneFlags::CLONE_NEWNS)
&& !clone_flags.contains(CloneFlags::CLONE_NEWUTS)
&& !clone_flags.contains(CloneFlags::CLONE_NEWIPC)
&& !clone_flags.contains(CloneFlags::CLONE_NEWPID)
&& !clone_flags.contains(CloneFlags::CLONE_NEWNET)
&& !clone_flags.contains(CloneFlags::CLONE_NEWCGROUP)
{
new_pcb.set_nsproxy(current_pcb.get_nsproxy().read().clone());
return Ok(());
}
if clone_flags.contains(CloneFlags::CLONE_NEWIPC)
&& clone_flags.contains(CloneFlags::CLONE_SYSVSEM)
{
return Err(SystemError::EINVAL);
}
let new_nsproxy = create_new_namespaces(clone_flags.bits(), current_pcb, USER_NS.clone())?;
*new_pcb.nsproxy.write() = new_nsproxy;
Ok(())
}
#[inline(never)]
fn copy_files(
clone_flags: &CloneFlags,
@ -422,6 +453,11 @@ impl ProcessManager {
)
});
Self::copy_namespaces(&clone_flags, current_pcb, pcb).unwrap_or_else(|e|{
panic!("fork: Failed to copy namespace form current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e)
});
// 拷贝文件描述符表
Self::copy_files(&clone_flags, current_pcb, pcb).unwrap_or_else(|e| {
panic!(
@ -439,13 +475,19 @@ impl ProcessManager {
});
// 拷贝线程
Self::copy_thread(current_pcb, pcb, clone_args,current_trapframe).unwrap_or_else(|e| {
Self::copy_thread(current_pcb, pcb, &clone_args, current_trapframe).unwrap_or_else(|e| {
panic!(
"fork: Failed to copy thread from current process, current pid: [{:?}], new pid: [{:?}]. Error: {:?}",
current_pcb.pid(), pcb.pid(), e
)
});
if current_pcb.pid() != Pid(0) {
let new_pid = PidStrcut::alloc_pid(
pcb.get_nsproxy().read().pid_namespace.clone(), // 获取命名空间
clone_args.set_tid.clone(),
)?;
*pcb.thread_pid.write() = new_pid;
}
// 设置线程组id、组长
if clone_flags.contains(CloneFlags::CLONE_THREAD) {
pcb.thread.write_irqsave().group_leader =

View File

@ -50,11 +50,11 @@ use crate::{
ucontext::AddressSpace,
VirtAddr,
},
namespaces::{mnt_namespace::FsStruct, pid_namespace::PidStrcut, NsProxy},
net::socket::SocketInode,
sched::completion::Completion,
sched::{
cpu_rq, fair::FairSchedEntity, prio::MAX_PRIO, DequeueFlag, EnqueueFlag, OnRq, SchedMode,
WakeupFlags, __schedule,
completion::Completion, cpu_rq, fair::FairSchedEntity, prio::MAX_PRIO, DequeueFlag,
EnqueueFlag, OnRq, SchedMode, WakeupFlags, __schedule,
},
smp::{
core::smp_get_processor_id,
@ -90,7 +90,6 @@ pub static mut PROCESS_SWITCH_RESULT: Option<PerCpuVar<SwitchResult>> = None;
/// 一个只改变1次的全局变量标志进程管理器是否已经初始化完成
static mut __PROCESS_MANAGEMENT_INIT_DONE: bool = false;
#[derive(Debug)]
pub struct SwitchResult {
pub prev_pcb: Option<Arc<ProcessControlBlock>>,
pub next_pcb: Option<Arc<ProcessControlBlock>>,
@ -609,14 +608,14 @@ bitflags! {
const RANDOMIZE = 1 << 8;
}
}
#[derive(Debug)]
pub struct ProcessControlBlock {
/// 当前进程的pid
pid: Pid,
/// 当前进程的线程组id这个值在同一个线程组内永远不变
tgid: Pid,
/// 有关Pid的相关的信息
thread_pid: Arc<RwLock<PidStrcut>>,
basic: RwLock<ProcessBasicInfo>,
/// 当前进程的自旋锁持有计数
preempt_count: AtomicUsize,
@ -654,12 +653,18 @@ pub struct ProcessControlBlock {
/// 线程信息
thread: RwLock<ThreadInfo>,
/// 进程文件系统的状态
fs: Arc<SpinLock<FsStruct>>,
///闹钟定时器
alarm_timer: SpinLock<Option<AlarmTimer>>,
/// 进程的robust lock列表
robust_list: RwLock<Option<RobustListHead>>,
/// namespace的指针
nsproxy: Arc<RwLock<NsProxy>>,
/// 进程作为主体的凭证集
cred: SpinLock<Cred>,
}
@ -722,10 +727,10 @@ impl ProcessControlBlock {
let ppcb: Weak<ProcessControlBlock> = ProcessManager::find(ppid)
.map(|p| Arc::downgrade(&p))
.unwrap_or_default();
let pcb = Self {
pid,
tgid: pid,
thread_pid: Arc::new(RwLock::new(PidStrcut::new())),
basic: basic_info,
preempt_count,
flags,
@ -742,8 +747,10 @@ impl ProcessControlBlock {
children: RwLock::new(Vec::new()),
wait_queue: WaitQueue::default(),
thread: RwLock::new(ThreadInfo::new()),
fs: Arc::new(SpinLock::new(FsStruct::new())),
alarm_timer: SpinLock::new(None),
robust_list: RwLock::new(None),
nsproxy: Arc::new(RwLock::new(NsProxy::new())),
cred: SpinLock::new(cred),
};
@ -886,11 +893,21 @@ impl ProcessControlBlock {
return self.pid;
}
#[inline(always)]
pub fn pid_strcut(&self) -> Arc<RwLock<PidStrcut>> {
self.thread_pid.clone()
}
#[inline(always)]
pub fn tgid(&self) -> Pid {
return self.tgid;
}
#[inline(always)]
pub fn fs_struct(&self) -> Arc<SpinLock<FsStruct>> {
self.fs.clone()
}
/// 获取文件描述符表的Arc指针
#[inline(always)]
pub fn fd_table(&self) -> Arc<RwLock<FileDescriptorVec>> {
@ -1022,6 +1039,14 @@ impl ProcessControlBlock {
pub fn alarm_timer_irqsave(&self) -> SpinLockGuard<Option<AlarmTimer>> {
return self.alarm_timer.lock_irqsave();
}
pub fn get_nsproxy(&self) -> Arc<RwLock<NsProxy>> {
self.nsproxy.clone()
}
pub fn set_nsproxy(&self, nsprsy: NsProxy) {
*self.nsproxy.write() = nsprsy;
}
}
impl Drop for ProcessControlBlock {

View File

@ -188,7 +188,13 @@ impl Syscall {
/// @brief 获取当前进程的pid
pub fn getpid() -> Result<Pid, SystemError> {
let current_pcb = ProcessManager::current_pcb();
return Ok(current_pcb.tgid());
// if let Some(pid_ns) = &current_pcb.get_nsproxy().read().pid_namespace {
// // 获取该进程在命名空间中的 PID
// return Ok(current_pcb.pid_strcut().read().numbers[pid_ns.level].nr);
// // 返回命名空间中的 PID
// }
// 默认返回 tgid
Ok(current_pcb.tgid())
}
/// @brief 获取指定进程的pgid

View File

@ -1,6 +1,5 @@
use core::{
ffi::{c_int, c_void},
ptr::null,
sync::atomic::{AtomicBool, Ordering},
};
@ -1090,7 +1089,9 @@ impl Syscall {
let source = args[0] as *const u8;
let target = args[1] as *const u8;
let filesystemtype = args[2] as *const u8;
return Self::mount(source, target, filesystemtype, 0, null());
let mountflags = args[3];
let data = args[4] as *const u8; // 额外的mount参数实现自己的mountdata来获取
return Self::mount(source, target, filesystemtype, mountflags, data);
}
SYS_UMOUNT2 => {
@ -1183,6 +1184,7 @@ impl Syscall {
let flags = args[1] as u32;
Self::sys_eventfd(initval, flags)
}
SYS_UNSHARE => Self::sys_unshare(args[0] as u64),
SYS_BPF => {
let cmd = args[0] as u32;
let attr = args[1] as *mut u8;

View File

@ -0,0 +1,7 @@
[package]
name = "test-namespace"
version = "0.1.0"
edition = "2021"
[dependencies]
nix = { version = "0.29.0", features = ["sched", "process"] }

View File

@ -0,0 +1,56 @@
TOOLCHAIN="+nightly-2023-08-15-x86_64-unknown-linux-gnu"
RUSTFLAGS+=""
ifdef DADK_CURRENT_BUILD_DIR
# 如果是在dadk中编译那么安装到dadk的安装目录中
INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR)
else
# 如果是在本地编译那么安装到当前目录下的install目录中
INSTALL_DIR = ./install
endif
ifeq ($(ARCH), x86_64)
export RUST_TARGET=x86_64-unknown-linux-musl
else ifeq ($(ARCH), riscv64)
export RUST_TARGET=riscv64gc-unknown-linux-gnu
else
# 默认为x86_86用于本地编译
export RUST_TARGET=x86_64-unknown-linux-musl
endif
run:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
build:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
clean:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
test:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET)
doc:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET)
fmt:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt
fmt-check:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check
run-release:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
build-release:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
clean-release:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
test-release:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
.PHONY: install
install:
RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force

View File

@ -0,0 +1,63 @@
# Makefile.toml
[env]
TOOLCHAIN = "+nightly-2023-08-15-x86_64-unknown-linux-gnu"
ARCH = { default = "x86_64" }
RUST_TARGET = { default = { if = "eq(env.ARCH, 'riscv64')", value = "riscv64gc-unknown-linux-gnu", else = "x86_64-unknown-linux-musl" } }
INSTALL_DIR = { default = { if = "defined(env.DADK_CURRENT_BUILD_DIR)", value = "${DADK_CURRENT_BUILD_DIR}", else = "./install" } }
[tasks.build]
description = "Build the project"
command = "cargo"
args = ["${TOOLCHAIN}", "build", "--target", "${RUST_TARGET}"]
[tasks.run]
description = "Run the project"
command = "cargo"
args = ["${TOOLCHAIN}", "run", "--target", "${RUST_TARGET}"]
[tasks.clean]
description = "Clean the project"
command = "cargo"
args = ["${TOOLCHAIN}", "clean", "--target", "${RUST_TARGET}"]
[tasks.test]
description = "Run the tests"
command = "cargo"
args = ["${TOOLCHAIN}", "test", "--target", "${RUST_TARGET}"]
[tasks.doc]
description = "Generate documentation"
command = "cargo"
args = ["${TOOLCHAIN}", "doc", "--target", "${RUST_TARGET}"]
[tasks.fmt]
description = "Format the code"
command = "cargo"
args = ["${TOOLCHAIN}", "fmt"]
[tasks.fmt-check]
description = "Check code format"
command = "cargo"
args = ["${TOOLCHAIN}", "fmt", "--check"]
[tasks.run-release]
description = "Run the project in release mode"
command = "cargo"
args = ["${TOOLCHAIN}", "run", "--target", "${RUST_TARGET}", "--release"]
[tasks.build-release]
description = "Build the project in release mode"
command = "cargo"
args = ["${TOOLCHAIN}", "build", "--target", "${RUST_TARGET}", "--release"]
[tasks.test-release]
description = "Test the project in release mode"
command = "cargo"
args = ["${TOOLCHAIN}", "test", "--target", "${RUST_TARGET}", "--release"]
[tasks.install]
description = "Install the project"
command = "cargo"
args = ["${TOOLCHAIN}", "install", "--target", "${RUST_TARGET}", "--path", ".", "--no-track", "--root", "${INSTALL_DIR}", "--force"]

View File

@ -0,0 +1,38 @@
extern crate nix;
use nix::sched::{self, CloneFlags};
use nix::sys::wait::{waitpid, WaitStatus};
use nix::unistd::{self, fork, ForkResult};
use std::process;
fn main() {
let clone_flags = CloneFlags::CLONE_NEWPID | CloneFlags::CLONE_NEWNS;
println!("Parent process. PID: {}", unistd::getpid());
unsafe {
match fork() {
Ok(ForkResult::Parent { child }) => {
println!("Parent process. Child PID: {}", child);
match waitpid(child, None) {
Ok(WaitStatus::Exited(pid, status)) => {
println!("Child {} exited with status: {}", pid, status);
}
Ok(_) => println!("Child process did not exit normally."),
Err(e) => println!("Error waiting for child process: {:?}", e),
}
}
Ok(ForkResult::Child) => {
// 使用 unshare 创建新的命名空间
println!("Child process. PID: {}", unistd::getpid());
if let Err(e) = sched::unshare(clone_flags) {
println!("Failed to unshare: {:?}", e);
process::exit(1);
}
println!("Child process. PID: {}", unistd::getpid());
}
Err(err) => {
println!("Fork failed: {:?}", err);
process::exit(1);
}
}
}
}

1
user/apps/test_overlayfs/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
test_ovrlayfs

View File

@ -0,0 +1,20 @@
ifeq ($(ARCH), x86_64)
CROSS_COMPILE=x86_64-linux-musl-
else ifeq ($(ARCH), riscv64)
CROSS_COMPILE=riscv64-linux-musl-
endif
CC=$(CROSS_COMPILE)gcc
.PHONY: all
all: main.c
$(CC) -static -o test_overlayfs main.c
.PHONY: install clean
install: all
mv test_overlayfs $(DADK_CURRENT_BUILD_DIR)/test_overlayfs
clean:
rm test_overlayfs *.o
fmt:

View File

@ -0,0 +1,92 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
// #define LOWERDIR "/tmp/overlayfs/lower"
// #define UPPERDIR "/tmp/overlayfs/upper"
// #define WORKDIR "/tmp/overlayfs/work"
// #define MERGEDDIR "/tmp/overlayfs/merged"
// void create_directories()
// {
// mkdir(LOWERDIR, 0755);
// mkdir(UPPERDIR, 0755);
// mkdir(WORKDIR, 0755);
// mkdir(MERGEDDIR, 0755);
// }
#define TMPDIR "/tmp"
#define OVERLAYFSDIR "/tmp/overlayfs"
#define LOWERDIR "/tmp/overlayfs/lower"
#define UPPERDIR "/tmp/overlayfs/upper"
#define WORKDIR "/tmp/overlayfs/work"
#define MERGEDDIR "/tmp/overlayfs/merged"
void create_directories()
{
mkdir(TMPDIR, 0755);
mkdir(OVERLAYFSDIR, 0755);
mkdir(LOWERDIR, 0755);
mkdir(UPPERDIR, 0755);
mkdir(WORKDIR, 0755);
mkdir(MERGEDDIR, 0755);
printf("step1 : success\n");
}
void create_lower_file()
{
char filepath[256];
snprintf(filepath, sizeof(filepath), "%s/lowerfile.txt", LOWERDIR);
int fd = open(filepath, O_CREAT | O_WRONLY, 0644);
if (fd < 0)
{
perror("Failed to create file in lowerdir");
exit(EXIT_FAILURE);
}
write(fd, "This is a lower layer file.\n", 28);
close(fd);
printf("step2 : success\n");
}
void mount_overlayfs()
{
char options[1024];
snprintf(options, sizeof(options),
"lowerdir=%s,upperdir=%s,workdir=%s",
LOWERDIR, UPPERDIR, WORKDIR);
if (mount("overlay", MERGEDDIR, "overlay", 0, options) != 0)
{
perror("Mount failed");
exit(EXIT_FAILURE);
}
printf("OverlayFS mounted successfully.\n");
printf("step3 : success\n");
}
void create_directory_in_merged()
{
char dirpath[256];
snprintf(dirpath, sizeof(dirpath), "%s/newdir", UPPERDIR);
if (mkdir(dirpath, 0755) != 0)
{
perror("Failed to create directory in merged dir");
exit(EXIT_FAILURE);
}
printf("Directory created in merged: %s\n", dirpath);
printf("step4 : success\n");
}
int main()
{
create_directories();
mount_overlayfs();
create_directory_in_merged();
return 0;
}

View File

@ -0,0 +1,25 @@
{
"name": "test_namespace",
"version": "0.1.0",
"description": "test namespace",
"rust_target": null,
"task_type": {
"BuildFromSource": {
"Local": {
"path": "apps/test_namespace"
}
}
},
"depends": [],
"build": {
"build_command": "make install"
},
"install": {
"in_dragonos_path": "/bin"
},
"clean": {
"clean_command": "make clean"
},
"envs": [],
"target_arch": ["x86_64"]
}

View File

@ -0,0 +1,25 @@
{
"name": "test_overlayfs",
"version": "0.1.0",
"description": "test overlayfs",
"rust_target": null,
"task_type": {
"BuildFromSource": {
"Local": {
"path": "apps/test_overlayfs"
}
}
},
"depends": [],
"build": {
"build_command": "make install"
},
"install": {
"in_dragonos_path": "/bin"
},
"clean": {
"clean_command": "make clean"
},
"envs": [],
"target_arch": ["x86_64"]
}