Implement UNIX abstract address namespace

This commit is contained in:
Ruihan Li 2024-07-27 11:21:26 +08:00 committed by Tate, Hongliang Tian
parent aae9fdb331
commit 65aa156e92
9 changed files with 338 additions and 116 deletions

View File

@ -1,6 +1,13 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{fs::path::Dentry, net::socket::util::socket_addr::SocketAddr, prelude::*};
use keyable_arc::KeyableArc;
use super::ns::{self, AbstractHandle};
use crate::{
fs::{path::Dentry, utils::Inode},
net::socket::util::socket_addr::SocketAddr,
prelude::*,
};
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum UnixSocketAddr {
@ -9,10 +16,36 @@ pub enum UnixSocketAddr {
Abstract(Arc<[u8]>),
}
#[derive(Clone, Debug)]
pub(super) enum UnixSocketAddrBound {
Path(Arc<str>, Arc<Dentry>),
Abstract(Arc<[u8]>),
impl UnixSocketAddr {
pub(super) fn bind(self) -> Result<UnixSocketAddrBound> {
let bound = match self {
Self::Unnamed => UnixSocketAddrBound::Abstract(ns::alloc_ephemeral_abstract_name()?),
Self::Path(path) => {
let dentry = ns::create_socket_file(&path)?;
UnixSocketAddrBound::Path(path, dentry)
}
Self::Abstract(name) => UnixSocketAddrBound::Abstract(ns::create_abstract_name(name)?),
};
Ok(bound)
}
pub(super) fn connect(&self) -> Result<UnixSocketAddrKey> {
let bound = match self {
Self::Unnamed => return_errno_with_message!(
Errno::EINVAL,
"the unnamed UNIX domain socket address is not valid for connecting"
),
Self::Path(path) => UnixSocketAddrKey::Path(KeyableArc::from(
ns::lookup_socket_file(path)?.inode().clone(),
)),
Self::Abstract(name) => {
UnixSocketAddrKey::Abstract(KeyableArc::from(ns::lookup_abstract_name(name)?))
}
};
Ok(bound)
}
}
impl TryFrom<SocketAddr> for UnixSocketAddr {
@ -26,11 +59,34 @@ impl TryFrom<SocketAddr> for UnixSocketAddr {
}
}
#[derive(Clone, Debug)]
pub(super) enum UnixSocketAddrBound {
Path(Arc<str>, Arc<Dentry>),
Abstract(Arc<AbstractHandle>),
}
#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub(super) enum UnixSocketAddrKey {
Path(KeyableArc<dyn Inode>),
Abstract(KeyableArc<AbstractHandle>),
}
impl UnixSocketAddrBound {
pub(super) fn to_key(&self) -> UnixSocketAddrKey {
match self {
Self::Path(_, dentry) => {
UnixSocketAddrKey::Path(KeyableArc::from(dentry.inode().clone()))
}
Self::Abstract(handle) => UnixSocketAddrKey::Abstract(KeyableArc::from(handle.clone())),
}
}
}
impl From<UnixSocketAddrBound> for UnixSocketAddr {
fn from(value: UnixSocketAddrBound) -> Self {
match value {
UnixSocketAddrBound::Path(path, _) => Self::Path(path),
UnixSocketAddrBound::Abstract(name) => Self::Abstract(name),
UnixSocketAddrBound::Abstract(name) => Self::Abstract(name.name()),
}
}
}

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
mod addr;
mod ns;
mod stream;
pub use addr::UnixSocketAddr;

View File

@ -0,0 +1,122 @@
// SPDX-License-Identifier: MPL-2.0
use alloc::{collections::btree_map::Entry, format};
use keyable_arc::KeyableArc;
use crate::prelude::*;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct AbstractHandle(KeyableArc<[u8]>);
impl AbstractHandle {
fn new(name: Arc<[u8]>) -> Self {
Self(KeyableArc::from(name))
}
pub fn name(&self) -> Arc<[u8]> {
self.0.clone().into()
}
}
impl Drop for AbstractHandle {
fn drop(&mut self) {
HANDLE_TABLE.remove(self.name());
}
}
static HANDLE_TABLE: HandleTable = HandleTable::new();
struct HandleTable {
handles: RwLock<BTreeMap<Arc<[u8]>, Weak<AbstractHandle>>>,
}
impl HandleTable {
const fn new() -> Self {
Self {
handles: RwLock::new(BTreeMap::new()),
}
}
fn create(&self, name: Arc<[u8]>) -> Option<Arc<AbstractHandle>> {
let mut handles = self.handles.write();
let mut entry = handles.entry(name.clone());
if let Entry::Occupied(ref occupied) = entry {
// The handle is in use only if its strong count is greater than zero.
if occupied.get().strong_count() > 0 {
return None;
}
}
let new_handle = Arc::new(AbstractHandle::new(name));
let weak_handle = Arc::downgrade(&new_handle);
match entry {
Entry::Occupied(ref mut occupied) => {
occupied.insert(weak_handle);
}
Entry::Vacant(vacant) => {
vacant.insert(weak_handle);
}
}
Some(new_handle)
}
fn remove(&self, name: Arc<[u8]>) {
let mut handles = self.handles.write();
let Entry::Occupied(occupied) = handles.entry(name) else {
return;
};
// Due to race conditions between `AbstractHandle::drop` and `HandleTable::create`, the
// entry may be occupied by another handle.
//
// Therefore, before removing the entry, we must check again if the entry should be removed.
if occupied.get().strong_count() == 0 {
occupied.remove();
}
}
fn lookup(&self, name: &[u8]) -> Option<Arc<AbstractHandle>> {
let handles = self.handles.read();
handles.get(name).and_then(Weak::upgrade)
}
fn alloc_ephemeral(&self) -> Option<Arc<AbstractHandle>> {
// See "Autobind feature" in the man pages:
// <https://man7.org/linux/man-pages/man7/unix.7.html>.
//
// Note that false negatives are fine here. So we don't mind race conditions.
//
// TODO: Always starting with the first name is inefficient and leads to contention.
// Instead, we should generate some random names and check their availability.
(0..(1 << 20))
.map(|num| format!("{:05x}", num))
.map(|name| Arc::from(name.as_bytes()))
.filter_map(|name| self.create(name))
.next()
}
}
pub fn create_abstract_name(name: Arc<[u8]>) -> Result<Arc<AbstractHandle>> {
HANDLE_TABLE.create(name).ok_or_else(|| {
Error::with_message(Errno::EADDRINUSE, "the abstract name is already in use")
})
}
pub fn alloc_ephemeral_abstract_name() -> Result<Arc<AbstractHandle>> {
HANDLE_TABLE.alloc_ephemeral().ok_or_else(|| {
Error::with_message(Errno::ENOSPC, "no ephemeral abstract name is available")
})
}
pub fn lookup_abstract_name(name: &[u8]) -> Result<Arc<AbstractHandle>> {
HANDLE_TABLE
.lookup(name)
.ok_or_else(|| Error::with_message(Errno::ECONNREFUSED, "the abstract name does not exist"))
}

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
pub(super) use abs::{
alloc_ephemeral_abstract_name, create_abstract_name, lookup_abstract_name, AbstractHandle,
};
pub(super) use path::{create_socket_file, lookup_socket_file};
mod abs;
mod path;

View File

@ -0,0 +1,57 @@
// SPDX-License-Identifier: MPL-2.0
use crate::{
fs::{
fs_resolver::{split_path, FsPath},
path::Dentry,
utils::{InodeMode, InodeType},
},
prelude::*,
};
pub fn lookup_socket_file(path: &str) -> Result<Arc<Dentry>> {
let dentry = {
let current = current!();
let fs = current.fs().read();
let fs_path = FsPath::try_from(path)?;
fs.lookup(&fs_path)?
};
if !dentry.mode()?.is_readable() || !dentry.mode()?.is_writable() {
return_errno_with_message!(Errno::EACCES, "the socket file cannot be read or written")
}
if dentry.type_() != InodeType::Socket {
return_errno_with_message!(
Errno::ECONNREFUSED,
"the specified file is not a socket file"
)
}
Ok(dentry)
}
pub fn create_socket_file(path: &str) -> Result<Arc<Dentry>> {
let (parent_pathname, file_name) = split_path(path);
let parent = {
let current = current!();
let fs = current.fs().read();
let parent_path = FsPath::try_from(parent_pathname)?;
fs.lookup(&parent_path)?
};
parent
.new_fs_child(
file_name,
InodeType::Socket,
InodeMode::S_IRUSR | InodeMode::S_IWUSR,
)
.map_err(|err| {
if err.error() == Errno::EEXIST {
Error::with_message(Errno::EADDRINUSE, "the socket file already exists")
} else {
err
}
})
}

View File

@ -2,11 +2,6 @@
use crate::{
events::{IoEvents, Observer},
fs::{
fs_resolver::{split_path, FsPath},
path::Dentry,
utils::{InodeMode, InodeType},
},
net::socket::unix::addr::{UnixSocketAddr, UnixSocketAddrBound},
prelude::*,
process::signal::{Pollee, Poller},
@ -30,14 +25,7 @@ impl Init {
return_errno_with_message!(Errno::EINVAL, "the socket is already bound");
}
let bound_addr = match addr_to_bind {
UnixSocketAddr::Unnamed => todo!(),
UnixSocketAddr::Abstract(_) => todo!(),
UnixSocketAddr::Path(path) => {
let dentry = create_socket_file(&path)?;
UnixSocketAddrBound::Path(path, dentry)
}
};
let bound_addr = addr_to_bind.bind()?;
self.addr = Some(bound_addr);
Ok(())
@ -67,19 +55,3 @@ impl Init {
self.pollee.unregister_observer(observer)
}
}
fn create_socket_file(path: &str) -> Result<Arc<Dentry>> {
let (parent_pathname, file_name) = split_path(path);
let parent = {
let current = current!();
let fs = current.fs().read();
let parent_path = FsPath::try_from(parent_pathname)?;
fs.lookup(&parent_path)?
};
let dentry = parent.new_fs_child(
file_name,
InodeType::Socket,
InodeMode::S_IRUSR | InodeMode::S_IWUSR,
)?;
Ok(dentry)
}

View File

@ -2,13 +2,14 @@
use core::sync::atomic::{AtomicUsize, Ordering};
use keyable_arc::KeyableWeak;
use super::{connected::Connected, UnixStreamSocket};
use crate::{
events::{IoEvents, Observer},
fs::{file_handle::FileLike, path::Dentry, utils::Inode},
net::socket::{unix::addr::UnixSocketAddrBound, SocketAddr},
fs::file_handle::FileLike,
net::socket::{
unix::addr::{UnixSocketAddrBound, UnixSocketAddrKey},
SocketAddr,
},
prelude::*,
process::signal::{Pollee, Poller},
};
@ -62,15 +63,14 @@ impl Listener {
impl Drop for Listener {
fn drop(&mut self) {
unregister_backlog(self.backlog.addr())
unregister_backlog(&self.backlog.addr().to_key())
}
}
static BACKLOG_TABLE: BacklogTable = BacklogTable::new();
struct BacklogTable {
backlog_sockets: RwLock<BTreeMap<KeyableWeak<dyn Inode>, Arc<Backlog>>>,
// TODO: For linux, there is also abstract socket domain that a socket addr is not bound to an inode.
backlog_sockets: RwLock<BTreeMap<UnixSocketAddrKey, Arc<Backlog>>>,
}
impl BacklogTable {
@ -81,41 +81,30 @@ impl BacklogTable {
}
fn add_backlog(&self, addr: UnixSocketAddrBound, backlog: usize) -> Option<Arc<Backlog>> {
let inode = {
let UnixSocketAddrBound::Path(_, ref dentry) = addr else {
todo!()
};
create_keyable_inode(dentry)
};
let new_backlog = Arc::new(Backlog::new(addr, backlog));
let addr_key = addr.to_key();
let mut backlog_sockets = self.backlog_sockets.write();
if backlog_sockets.contains_key(&inode) {
if backlog_sockets.contains_key(&addr_key) {
return None;
}
backlog_sockets.insert(inode, new_backlog.clone());
let new_backlog = Arc::new(Backlog::new(addr, backlog));
backlog_sockets.insert(addr_key, new_backlog.clone());
Some(new_backlog)
}
fn get_backlog(&self, addr: &UnixSocketAddrBound) -> Option<Arc<Backlog>> {
let inode = {
let UnixSocketAddrBound::Path(_, dentry) = addr else {
todo!()
};
create_keyable_inode(dentry)
};
let backlog_sockets = self.backlog_sockets.read();
backlog_sockets.get(&inode).cloned()
fn get_backlog(&self, addr: &UnixSocketAddrKey) -> Option<Arc<Backlog>> {
self.backlog_sockets.read().get(addr).cloned()
}
fn push_incoming(
&self,
server_addr: &UnixSocketAddrBound,
server_key: &UnixSocketAddrKey,
client_addr: Option<UnixSocketAddrBound>,
) -> Result<Connected> {
let backlog = self.get_backlog(server_addr).ok_or_else(|| {
let backlog = self.get_backlog(server_key).ok_or_else(|| {
Error::with_message(
Errno::ECONNREFUSED,
"no socket is listening at the remote address",
@ -125,13 +114,8 @@ impl BacklogTable {
backlog.push_incoming(client_addr)
}
fn remove_backlog(&self, addr: &UnixSocketAddrBound) {
let UnixSocketAddrBound::Path(_, dentry) = addr else {
todo!()
};
let inode = create_keyable_inode(dentry);
self.backlog_sockets.write().remove(&inode);
fn remove_backlog(&self, addr_key: &UnixSocketAddrKey) {
self.backlog_sockets.write().remove(addr_key);
}
}
@ -210,18 +194,13 @@ impl Backlog {
}
}
fn create_keyable_inode(dentry: &Arc<Dentry>) -> KeyableWeak<dyn Inode> {
let weak_inode = Arc::downgrade(dentry.inode());
KeyableWeak::from(weak_inode)
}
fn unregister_backlog(addr: &UnixSocketAddrBound) {
fn unregister_backlog(addr: &UnixSocketAddrKey) {
BACKLOG_TABLE.remove_backlog(addr);
}
pub(super) fn push_incoming(
server_addr: &UnixSocketAddrBound,
server_key: &UnixSocketAddrKey,
client_addr: Option<UnixSocketAddrBound>,
) -> Result<Connected> {
BACKLOG_TABLE.push_incoming(server_addr, client_addr)
BACKLOG_TABLE.push_incoming(server_key, client_addr)
}

View File

@ -11,14 +11,9 @@ use super::{
};
use crate::{
events::{IoEvents, Observer},
fs::{
file_handle::FileLike,
fs_resolver::FsPath,
path::Dentry,
utils::{InodeType, StatusFlags},
},
fs::{file_handle::FileLike, utils::StatusFlags},
net::socket::{
unix::{addr::UnixSocketAddrBound, UnixSocketAddr},
unix::UnixSocketAddr,
util::{
copy_message_from_user, copy_message_to_user, create_message_buffer,
send_recv_flags::SendRecvFlags, socket_addr::SocketAddr, MessageHeader,
@ -203,19 +198,7 @@ impl Socket for UnixStreamSocket {
}
fn connect(&self, socket_addr: SocketAddr) -> Result<()> {
let remote_addr = {
let unix_socket_addr = UnixSocketAddr::try_from(socket_addr)?;
match unix_socket_addr {
UnixSocketAddr::Unnamed => todo!(),
UnixSocketAddr::Abstract(abstract_name) => {
UnixSocketAddrBound::Abstract(abstract_name)
}
UnixSocketAddr::Path(path) => {
let dentry = lookup_socket_file(&path)?;
UnixSocketAddrBound::Path(path, dentry)
}
}
};
let remote_addr = UnixSocketAddr::try_from(socket_addr)?.connect()?;
// Note that the Linux kernel implementation locks the remote socket and checks to see if
// it is listening first. This is different from our implementation, which locks the local
@ -356,21 +339,3 @@ impl Socket for UnixStreamSocket {
Ok((copied_bytes, message_header))
}
}
fn lookup_socket_file(path: &str) -> Result<Arc<Dentry>> {
let dentry = {
let current = current!();
let fs = current.fs().read();
let fs_path = FsPath::try_from(path)?;
fs.lookup(&fs_path)?
};
if dentry.type_() != InodeType::Socket {
return_errno_with_message!(Errno::ENOTSOCK, "not a socket file")
}
if !dentry.mode()?.is_readable() || !dentry.mode()?.is_writable() {
return_errno_with_message!(Errno::EACCES, "the socket cannot be read or written")
}
Ok(dentry)
}

View File

@ -3,6 +3,7 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <fcntl.h>
#include <unistd.h>
#include <stddef.h>
@ -233,3 +234,63 @@ FN_TEST(listen)
TEST_ERRNO(listen(sk_accepted, 10), EINVAL);
}
END_TEST()
FN_TEST(ns_path)
{
int fd;
fd = TEST_SUCC(creat("/tmp/.good", 0644));
TEST_ERRNO(bind(sk_unbound, (struct sockaddr *)&UNIX_ADDR("/tmp/.good"),
sizeof(struct sockaddr)),
EADDRINUSE);
TEST_ERRNO(connect(sk_unbound,
(struct sockaddr *)&UNIX_ADDR("/tmp/.good"),
sizeof(struct sockaddr)),
ECONNREFUSED);
TEST_SUCC(close(fd));
TEST_SUCC(unlink("/tmp/.good"));
fd = TEST_SUCC(creat("/tmp/.bad", 0000));
TEST_ERRNO(bind(sk_unbound, (struct sockaddr *)&UNIX_ADDR("/tmp/.bad"),
sizeof(struct sockaddr)),
EADDRINUSE);
TEST_ERRNO(connect(sk_unbound,
(struct sockaddr *)&UNIX_ADDR("/tmp/.bad"),
sizeof(struct sockaddr)),
EACCES);
TEST_SUCC(close(fd));
TEST_SUCC(unlink("/tmp/.bad"));
}
END_TEST()
FN_TEST(ns_abs)
{
int sk, sk2;
struct sockaddr_un addr;
socklen_t addrlen;
sk = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0));
TEST_SUCC(bind(sk, (struct sockaddr *)&UNIX_ADDR(""), PATH_OFFSET));
addrlen = sizeof(addr);
TEST_RES(getsockname(sk, (struct sockaddr *)&addr, &addrlen),
addrlen == PATH_OFFSET + 6 && addr.sun_path[0] == '\0');
sk2 = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0));
TEST_ERRNO(bind(sk2, (struct sockaddr *)&addr, addrlen), EADDRINUSE);
TEST_ERRNO(connect(sk2, (struct sockaddr *)&addr, addrlen),
ECONNREFUSED);
TEST_SUCC(listen(sk, 1));
TEST_SUCC(connect(sk2, (struct sockaddr *)&addr, addrlen));
TEST_SUCC(close(sk));
TEST_SUCC(close(sk2));
sk = TEST_SUCC(socket(PF_UNIX, SOCK_STREAM, 0));
TEST_ERRNO(connect(sk, (struct sockaddr *)&addr, addrlen),
ECONNREFUSED);
TEST_SUCC(bind(sk, (struct sockaddr *)&addr, addrlen));
TEST_SUCC(close(sk));
}
END_TEST()